diff options
author | Glenn Elliott <gelliott@cs.unc.edu> | 2012-03-04 19:47:13 -0500 |
---|---|---|
committer | Glenn Elliott <gelliott@cs.unc.edu> | 2012-03-04 19:47:13 -0500 |
commit | c71c03bda1e86c9d5198c5d83f712e695c4f2a1e (patch) | |
tree | ecb166cb3e2b7e2adb3b5e292245fefd23381ac8 /fs/xfs/linux-2.6 | |
parent | ea53c912f8a86a8567697115b6a0d8152beee5c8 (diff) | |
parent | 6a00f206debf8a5c8899055726ad127dbeeed098 (diff) |
Merge branch 'mpi-master' into wip-k-fmlpwip-k-fmlp
Conflicts:
litmus/sched_cedf.c
Diffstat (limited to 'fs/xfs/linux-2.6')
29 files changed, 2331 insertions, 2052 deletions
diff --git a/fs/xfs/linux-2.6/kmem.c b/fs/xfs/linux-2.6/kmem.c index 666c9db48eb6..a907de565db3 100644 --- a/fs/xfs/linux-2.6/kmem.c +++ b/fs/xfs/linux-2.6/kmem.c | |||
@@ -23,6 +23,7 @@ | |||
23 | #include <linux/backing-dev.h> | 23 | #include <linux/backing-dev.h> |
24 | #include "time.h" | 24 | #include "time.h" |
25 | #include "kmem.h" | 25 | #include "kmem.h" |
26 | #include "xfs_message.h" | ||
26 | 27 | ||
27 | /* | 28 | /* |
28 | * Greedy allocation. May fail and may return vmalloced memory. | 29 | * Greedy allocation. May fail and may return vmalloced memory. |
@@ -56,8 +57,8 @@ kmem_alloc(size_t size, unsigned int __nocast flags) | |||
56 | if (ptr || (flags & (KM_MAYFAIL|KM_NOSLEEP))) | 57 | if (ptr || (flags & (KM_MAYFAIL|KM_NOSLEEP))) |
57 | return ptr; | 58 | return ptr; |
58 | if (!(++retries % 100)) | 59 | if (!(++retries % 100)) |
59 | printk(KERN_ERR "XFS: possible memory allocation " | 60 | xfs_err(NULL, |
60 | "deadlock in %s (mode:0x%x)\n", | 61 | "possible memory allocation deadlock in %s (mode:0x%x)", |
61 | __func__, lflags); | 62 | __func__, lflags); |
62 | congestion_wait(BLK_RW_ASYNC, HZ/50); | 63 | congestion_wait(BLK_RW_ASYNC, HZ/50); |
63 | } while (1); | 64 | } while (1); |
@@ -112,8 +113,8 @@ kmem_zone_alloc(kmem_zone_t *zone, unsigned int __nocast flags) | |||
112 | if (ptr || (flags & (KM_MAYFAIL|KM_NOSLEEP))) | 113 | if (ptr || (flags & (KM_MAYFAIL|KM_NOSLEEP))) |
113 | return ptr; | 114 | return ptr; |
114 | if (!(++retries % 100)) | 115 | if (!(++retries % 100)) |
115 | printk(KERN_ERR "XFS: possible memory allocation " | 116 | xfs_err(NULL, |
116 | "deadlock in %s (mode:0x%x)\n", | 117 | "possible memory allocation deadlock in %s (mode:0x%x)", |
117 | __func__, lflags); | 118 | __func__, lflags); |
118 | congestion_wait(BLK_RW_ASYNC, HZ/50); | 119 | congestion_wait(BLK_RW_ASYNC, HZ/50); |
119 | } while (1); | 120 | } while (1); |
diff --git a/fs/xfs/linux-2.6/sv.h b/fs/xfs/linux-2.6/sv.h deleted file mode 100644 index 4dfc7c370819..000000000000 --- a/fs/xfs/linux-2.6/sv.h +++ /dev/null | |||
@@ -1,59 +0,0 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc. | ||
3 | * All Rights Reserved. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or | ||
6 | * modify it under the terms of the GNU General Public License as | ||
7 | * published by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope that it would be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, write the Free Software Foundation, | ||
16 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
17 | */ | ||
18 | #ifndef __XFS_SUPPORT_SV_H__ | ||
19 | #define __XFS_SUPPORT_SV_H__ | ||
20 | |||
21 | #include <linux/wait.h> | ||
22 | #include <linux/sched.h> | ||
23 | #include <linux/spinlock.h> | ||
24 | |||
25 | /* | ||
26 | * Synchronisation variables. | ||
27 | * | ||
28 | * (Parameters "pri", "svf" and "rts" are not implemented) | ||
29 | */ | ||
30 | |||
31 | typedef struct sv_s { | ||
32 | wait_queue_head_t waiters; | ||
33 | } sv_t; | ||
34 | |||
35 | static inline void _sv_wait(sv_t *sv, spinlock_t *lock) | ||
36 | { | ||
37 | DECLARE_WAITQUEUE(wait, current); | ||
38 | |||
39 | add_wait_queue_exclusive(&sv->waiters, &wait); | ||
40 | __set_current_state(TASK_UNINTERRUPTIBLE); | ||
41 | spin_unlock(lock); | ||
42 | |||
43 | schedule(); | ||
44 | |||
45 | remove_wait_queue(&sv->waiters, &wait); | ||
46 | } | ||
47 | |||
48 | #define sv_init(sv,flag,name) \ | ||
49 | init_waitqueue_head(&(sv)->waiters) | ||
50 | #define sv_destroy(sv) \ | ||
51 | /*NOTHING*/ | ||
52 | #define sv_wait(sv, pri, lock, s) \ | ||
53 | _sv_wait(sv, lock) | ||
54 | #define sv_signal(sv) \ | ||
55 | wake_up(&(sv)->waiters) | ||
56 | #define sv_broadcast(sv) \ | ||
57 | wake_up_all(&(sv)->waiters) | ||
58 | |||
59 | #endif /* __XFS_SUPPORT_SV_H__ */ | ||
diff --git a/fs/xfs/linux-2.6/xfs_acl.c b/fs/xfs/linux-2.6/xfs_acl.c index b2771862fd3d..39f4f809bb68 100644 --- a/fs/xfs/linux-2.6/xfs_acl.c +++ b/fs/xfs/linux-2.6/xfs_acl.c | |||
@@ -219,12 +219,13 @@ xfs_set_acl(struct inode *inode, int type, struct posix_acl *acl) | |||
219 | } | 219 | } |
220 | 220 | ||
221 | int | 221 | int |
222 | xfs_check_acl(struct inode *inode, int mask) | 222 | xfs_check_acl(struct inode *inode, int mask, unsigned int flags) |
223 | { | 223 | { |
224 | struct xfs_inode *ip = XFS_I(inode); | 224 | struct xfs_inode *ip; |
225 | struct posix_acl *acl; | 225 | struct posix_acl *acl; |
226 | int error = -EAGAIN; | 226 | int error = -EAGAIN; |
227 | 227 | ||
228 | ip = XFS_I(inode); | ||
228 | trace_xfs_check_acl(ip); | 229 | trace_xfs_check_acl(ip); |
229 | 230 | ||
230 | /* | 231 | /* |
@@ -234,6 +235,12 @@ xfs_check_acl(struct inode *inode, int mask) | |||
234 | if (!XFS_IFORK_Q(ip)) | 235 | if (!XFS_IFORK_Q(ip)) |
235 | return -EAGAIN; | 236 | return -EAGAIN; |
236 | 237 | ||
238 | if (flags & IPERM_FLAG_RCU) { | ||
239 | if (!negative_cached_acl(inode, ACL_TYPE_ACCESS)) | ||
240 | return -ECHILD; | ||
241 | return -EAGAIN; | ||
242 | } | ||
243 | |||
237 | acl = xfs_get_acl(inode, ACL_TYPE_ACCESS); | 244 | acl = xfs_get_acl(inode, ACL_TYPE_ACCESS); |
238 | if (IS_ERR(acl)) | 245 | if (IS_ERR(acl)) |
239 | return PTR_ERR(acl); | 246 | return PTR_ERR(acl); |
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c index b552f816de15..79ce38be15a1 100644 --- a/fs/xfs/linux-2.6/xfs_aops.c +++ b/fs/xfs/linux-2.6/xfs_aops.c | |||
@@ -38,15 +38,6 @@ | |||
38 | #include <linux/pagevec.h> | 38 | #include <linux/pagevec.h> |
39 | #include <linux/writeback.h> | 39 | #include <linux/writeback.h> |
40 | 40 | ||
41 | /* | ||
42 | * Types of I/O for bmap clustering and I/O completion tracking. | ||
43 | */ | ||
44 | enum { | ||
45 | IO_READ, /* mapping for a read */ | ||
46 | IO_DELAY, /* mapping covers delalloc region */ | ||
47 | IO_UNWRITTEN, /* mapping covers allocated but uninitialized data */ | ||
48 | IO_NEW /* just allocated */ | ||
49 | }; | ||
50 | 41 | ||
51 | /* | 42 | /* |
52 | * Prime number of hash buckets since address is used as the key. | 43 | * Prime number of hash buckets since address is used as the key. |
@@ -182,9 +173,6 @@ xfs_setfilesize( | |||
182 | xfs_inode_t *ip = XFS_I(ioend->io_inode); | 173 | xfs_inode_t *ip = XFS_I(ioend->io_inode); |
183 | xfs_fsize_t isize; | 174 | xfs_fsize_t isize; |
184 | 175 | ||
185 | ASSERT((ip->i_d.di_mode & S_IFMT) == S_IFREG); | ||
186 | ASSERT(ioend->io_type != IO_READ); | ||
187 | |||
188 | if (unlikely(ioend->io_error)) | 176 | if (unlikely(ioend->io_error)) |
189 | return 0; | 177 | return 0; |
190 | 178 | ||
@@ -244,10 +232,8 @@ xfs_end_io( | |||
244 | * We might have to update the on-disk file size after extending | 232 | * We might have to update the on-disk file size after extending |
245 | * writes. | 233 | * writes. |
246 | */ | 234 | */ |
247 | if (ioend->io_type != IO_READ) { | 235 | error = xfs_setfilesize(ioend); |
248 | error = xfs_setfilesize(ioend); | 236 | ASSERT(!error || error == EAGAIN); |
249 | ASSERT(!error || error == EAGAIN); | ||
250 | } | ||
251 | 237 | ||
252 | /* | 238 | /* |
253 | * If we didn't complete processing of the ioend, requeue it to the | 239 | * If we didn't complete processing of the ioend, requeue it to the |
@@ -318,14 +304,63 @@ STATIC int | |||
318 | xfs_map_blocks( | 304 | xfs_map_blocks( |
319 | struct inode *inode, | 305 | struct inode *inode, |
320 | loff_t offset, | 306 | loff_t offset, |
321 | ssize_t count, | ||
322 | struct xfs_bmbt_irec *imap, | 307 | struct xfs_bmbt_irec *imap, |
323 | int flags) | 308 | int type, |
309 | int nonblocking) | ||
324 | { | 310 | { |
325 | int nmaps = 1; | 311 | struct xfs_inode *ip = XFS_I(inode); |
326 | int new = 0; | 312 | struct xfs_mount *mp = ip->i_mount; |
313 | ssize_t count = 1 << inode->i_blkbits; | ||
314 | xfs_fileoff_t offset_fsb, end_fsb; | ||
315 | int error = 0; | ||
316 | int bmapi_flags = XFS_BMAPI_ENTIRE; | ||
317 | int nimaps = 1; | ||
318 | |||
319 | if (XFS_FORCED_SHUTDOWN(mp)) | ||
320 | return -XFS_ERROR(EIO); | ||
321 | |||
322 | if (type == IO_UNWRITTEN) | ||
323 | bmapi_flags |= XFS_BMAPI_IGSTATE; | ||
324 | |||
325 | if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) { | ||
326 | if (nonblocking) | ||
327 | return -XFS_ERROR(EAGAIN); | ||
328 | xfs_ilock(ip, XFS_ILOCK_SHARED); | ||
329 | } | ||
330 | |||
331 | ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE || | ||
332 | (ip->i_df.if_flags & XFS_IFEXTENTS)); | ||
333 | ASSERT(offset <= mp->m_maxioffset); | ||
334 | |||
335 | if (offset + count > mp->m_maxioffset) | ||
336 | count = mp->m_maxioffset - offset; | ||
337 | end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count); | ||
338 | offset_fsb = XFS_B_TO_FSBT(mp, offset); | ||
339 | error = xfs_bmapi(NULL, ip, offset_fsb, end_fsb - offset_fsb, | ||
340 | bmapi_flags, NULL, 0, imap, &nimaps, NULL); | ||
341 | xfs_iunlock(ip, XFS_ILOCK_SHARED); | ||
327 | 342 | ||
328 | return -xfs_iomap(XFS_I(inode), offset, count, flags, imap, &nmaps, &new); | 343 | if (error) |
344 | return -XFS_ERROR(error); | ||
345 | |||
346 | if (type == IO_DELALLOC && | ||
347 | (!nimaps || isnullstartblock(imap->br_startblock))) { | ||
348 | error = xfs_iomap_write_allocate(ip, offset, count, imap); | ||
349 | if (!error) | ||
350 | trace_xfs_map_blocks_alloc(ip, offset, count, type, imap); | ||
351 | return -XFS_ERROR(error); | ||
352 | } | ||
353 | |||
354 | #ifdef DEBUG | ||
355 | if (type == IO_UNWRITTEN) { | ||
356 | ASSERT(nimaps); | ||
357 | ASSERT(imap->br_startblock != HOLESTARTBLOCK); | ||
358 | ASSERT(imap->br_startblock != DELAYSTARTBLOCK); | ||
359 | } | ||
360 | #endif | ||
361 | if (nimaps) | ||
362 | trace_xfs_map_blocks_found(ip, offset, count, type, imap); | ||
363 | return 0; | ||
329 | } | 364 | } |
330 | 365 | ||
331 | STATIC int | 366 | STATIC int |
@@ -378,28 +413,19 @@ xfs_submit_ioend_bio( | |||
378 | if (xfs_ioend_new_eof(ioend)) | 413 | if (xfs_ioend_new_eof(ioend)) |
379 | xfs_mark_inode_dirty(XFS_I(ioend->io_inode)); | 414 | xfs_mark_inode_dirty(XFS_I(ioend->io_inode)); |
380 | 415 | ||
381 | submit_bio(wbc->sync_mode == WB_SYNC_ALL ? | 416 | submit_bio(wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE, bio); |
382 | WRITE_SYNC_PLUG : WRITE, bio); | ||
383 | ASSERT(!bio_flagged(bio, BIO_EOPNOTSUPP)); | ||
384 | bio_put(bio); | ||
385 | } | 417 | } |
386 | 418 | ||
387 | STATIC struct bio * | 419 | STATIC struct bio * |
388 | xfs_alloc_ioend_bio( | 420 | xfs_alloc_ioend_bio( |
389 | struct buffer_head *bh) | 421 | struct buffer_head *bh) |
390 | { | 422 | { |
391 | struct bio *bio; | ||
392 | int nvecs = bio_get_nr_vecs(bh->b_bdev); | 423 | int nvecs = bio_get_nr_vecs(bh->b_bdev); |
393 | 424 | struct bio *bio = bio_alloc(GFP_NOIO, nvecs); | |
394 | do { | ||
395 | bio = bio_alloc(GFP_NOIO, nvecs); | ||
396 | nvecs >>= 1; | ||
397 | } while (!bio); | ||
398 | 425 | ||
399 | ASSERT(bio->bi_private == NULL); | 426 | ASSERT(bio->bi_private == NULL); |
400 | bio->bi_sector = bh->b_blocknr * (bh->b_size >> 9); | 427 | bio->bi_sector = bh->b_blocknr * (bh->b_size >> 9); |
401 | bio->bi_bdev = bh->b_bdev; | 428 | bio->bi_bdev = bh->b_bdev; |
402 | bio_get(bio); | ||
403 | return bio; | 429 | return bio; |
404 | } | 430 | } |
405 | 431 | ||
@@ -470,9 +496,8 @@ xfs_submit_ioend( | |||
470 | /* Pass 1 - start writeback */ | 496 | /* Pass 1 - start writeback */ |
471 | do { | 497 | do { |
472 | next = ioend->io_list; | 498 | next = ioend->io_list; |
473 | for (bh = ioend->io_buffer_head; bh; bh = bh->b_private) { | 499 | for (bh = ioend->io_buffer_head; bh; bh = bh->b_private) |
474 | xfs_start_buffer_writeback(bh); | 500 | xfs_start_buffer_writeback(bh); |
475 | } | ||
476 | } while ((ioend = next) != NULL); | 501 | } while ((ioend = next) != NULL); |
477 | 502 | ||
478 | /* Pass 2 - submit I/O */ | 503 | /* Pass 2 - submit I/O */ |
@@ -600,117 +625,13 @@ xfs_map_at_offset( | |||
600 | ASSERT(imap->br_startblock != HOLESTARTBLOCK); | 625 | ASSERT(imap->br_startblock != HOLESTARTBLOCK); |
601 | ASSERT(imap->br_startblock != DELAYSTARTBLOCK); | 626 | ASSERT(imap->br_startblock != DELAYSTARTBLOCK); |
602 | 627 | ||
603 | lock_buffer(bh); | ||
604 | xfs_map_buffer(inode, bh, imap, offset); | 628 | xfs_map_buffer(inode, bh, imap, offset); |
605 | bh->b_bdev = xfs_find_bdev_for_inode(inode); | ||
606 | set_buffer_mapped(bh); | 629 | set_buffer_mapped(bh); |
607 | clear_buffer_delay(bh); | 630 | clear_buffer_delay(bh); |
608 | clear_buffer_unwritten(bh); | 631 | clear_buffer_unwritten(bh); |
609 | } | 632 | } |
610 | 633 | ||
611 | /* | 634 | /* |
612 | * Look for a page at index that is suitable for clustering. | ||
613 | */ | ||
614 | STATIC unsigned int | ||
615 | xfs_probe_page( | ||
616 | struct page *page, | ||
617 | unsigned int pg_offset) | ||
618 | { | ||
619 | struct buffer_head *bh, *head; | ||
620 | int ret = 0; | ||
621 | |||
622 | if (PageWriteback(page)) | ||
623 | return 0; | ||
624 | if (!PageDirty(page)) | ||
625 | return 0; | ||
626 | if (!page->mapping) | ||
627 | return 0; | ||
628 | if (!page_has_buffers(page)) | ||
629 | return 0; | ||
630 | |||
631 | bh = head = page_buffers(page); | ||
632 | do { | ||
633 | if (!buffer_uptodate(bh)) | ||
634 | break; | ||
635 | if (!buffer_mapped(bh)) | ||
636 | break; | ||
637 | ret += bh->b_size; | ||
638 | if (ret >= pg_offset) | ||
639 | break; | ||
640 | } while ((bh = bh->b_this_page) != head); | ||
641 | |||
642 | return ret; | ||
643 | } | ||
644 | |||
645 | STATIC size_t | ||
646 | xfs_probe_cluster( | ||
647 | struct inode *inode, | ||
648 | struct page *startpage, | ||
649 | struct buffer_head *bh, | ||
650 | struct buffer_head *head) | ||
651 | { | ||
652 | struct pagevec pvec; | ||
653 | pgoff_t tindex, tlast, tloff; | ||
654 | size_t total = 0; | ||
655 | int done = 0, i; | ||
656 | |||
657 | /* First sum forwards in this page */ | ||
658 | do { | ||
659 | if (!buffer_uptodate(bh) || !buffer_mapped(bh)) | ||
660 | return total; | ||
661 | total += bh->b_size; | ||
662 | } while ((bh = bh->b_this_page) != head); | ||
663 | |||
664 | /* if we reached the end of the page, sum forwards in following pages */ | ||
665 | tlast = i_size_read(inode) >> PAGE_CACHE_SHIFT; | ||
666 | tindex = startpage->index + 1; | ||
667 | |||
668 | /* Prune this back to avoid pathological behavior */ | ||
669 | tloff = min(tlast, startpage->index + 64); | ||
670 | |||
671 | pagevec_init(&pvec, 0); | ||
672 | while (!done && tindex <= tloff) { | ||
673 | unsigned len = min_t(pgoff_t, PAGEVEC_SIZE, tlast - tindex + 1); | ||
674 | |||
675 | if (!pagevec_lookup(&pvec, inode->i_mapping, tindex, len)) | ||
676 | break; | ||
677 | |||
678 | for (i = 0; i < pagevec_count(&pvec); i++) { | ||
679 | struct page *page = pvec.pages[i]; | ||
680 | size_t pg_offset, pg_len = 0; | ||
681 | |||
682 | if (tindex == tlast) { | ||
683 | pg_offset = | ||
684 | i_size_read(inode) & (PAGE_CACHE_SIZE - 1); | ||
685 | if (!pg_offset) { | ||
686 | done = 1; | ||
687 | break; | ||
688 | } | ||
689 | } else | ||
690 | pg_offset = PAGE_CACHE_SIZE; | ||
691 | |||
692 | if (page->index == tindex && trylock_page(page)) { | ||
693 | pg_len = xfs_probe_page(page, pg_offset); | ||
694 | unlock_page(page); | ||
695 | } | ||
696 | |||
697 | if (!pg_len) { | ||
698 | done = 1; | ||
699 | break; | ||
700 | } | ||
701 | |||
702 | total += pg_len; | ||
703 | tindex++; | ||
704 | } | ||
705 | |||
706 | pagevec_release(&pvec); | ||
707 | cond_resched(); | ||
708 | } | ||
709 | |||
710 | return total; | ||
711 | } | ||
712 | |||
713 | /* | ||
714 | * Test if a given page is suitable for writing as part of an unwritten | 635 | * Test if a given page is suitable for writing as part of an unwritten |
715 | * or delayed allocate extent. | 636 | * or delayed allocate extent. |
716 | */ | 637 | */ |
@@ -731,9 +652,9 @@ xfs_is_delayed_page( | |||
731 | if (buffer_unwritten(bh)) | 652 | if (buffer_unwritten(bh)) |
732 | acceptable = (type == IO_UNWRITTEN); | 653 | acceptable = (type == IO_UNWRITTEN); |
733 | else if (buffer_delay(bh)) | 654 | else if (buffer_delay(bh)) |
734 | acceptable = (type == IO_DELAY); | 655 | acceptable = (type == IO_DELALLOC); |
735 | else if (buffer_dirty(bh) && buffer_mapped(bh)) | 656 | else if (buffer_dirty(bh) && buffer_mapped(bh)) |
736 | acceptable = (type == IO_NEW); | 657 | acceptable = (type == IO_OVERWRITE); |
737 | else | 658 | else |
738 | break; | 659 | break; |
739 | } while ((bh = bh->b_this_page) != head); | 660 | } while ((bh = bh->b_this_page) != head); |
@@ -758,8 +679,7 @@ xfs_convert_page( | |||
758 | loff_t tindex, | 679 | loff_t tindex, |
759 | struct xfs_bmbt_irec *imap, | 680 | struct xfs_bmbt_irec *imap, |
760 | xfs_ioend_t **ioendp, | 681 | xfs_ioend_t **ioendp, |
761 | struct writeback_control *wbc, | 682 | struct writeback_control *wbc) |
762 | int all_bh) | ||
763 | { | 683 | { |
764 | struct buffer_head *bh, *head; | 684 | struct buffer_head *bh, *head; |
765 | xfs_off_t end_offset; | 685 | xfs_off_t end_offset; |
@@ -814,37 +734,30 @@ xfs_convert_page( | |||
814 | continue; | 734 | continue; |
815 | } | 735 | } |
816 | 736 | ||
817 | if (buffer_unwritten(bh) || buffer_delay(bh)) { | 737 | if (buffer_unwritten(bh) || buffer_delay(bh) || |
738 | buffer_mapped(bh)) { | ||
818 | if (buffer_unwritten(bh)) | 739 | if (buffer_unwritten(bh)) |
819 | type = IO_UNWRITTEN; | 740 | type = IO_UNWRITTEN; |
741 | else if (buffer_delay(bh)) | ||
742 | type = IO_DELALLOC; | ||
820 | else | 743 | else |
821 | type = IO_DELAY; | 744 | type = IO_OVERWRITE; |
822 | 745 | ||
823 | if (!xfs_imap_valid(inode, imap, offset)) { | 746 | if (!xfs_imap_valid(inode, imap, offset)) { |
824 | done = 1; | 747 | done = 1; |
825 | continue; | 748 | continue; |
826 | } | 749 | } |
827 | 750 | ||
828 | ASSERT(imap->br_startblock != HOLESTARTBLOCK); | 751 | lock_buffer(bh); |
829 | ASSERT(imap->br_startblock != DELAYSTARTBLOCK); | 752 | if (type != IO_OVERWRITE) |
830 | 753 | xfs_map_at_offset(inode, bh, imap, offset); | |
831 | xfs_map_at_offset(inode, bh, imap, offset); | ||
832 | xfs_add_to_ioend(inode, bh, offset, type, | 754 | xfs_add_to_ioend(inode, bh, offset, type, |
833 | ioendp, done); | 755 | ioendp, done); |
834 | 756 | ||
835 | page_dirty--; | 757 | page_dirty--; |
836 | count++; | 758 | count++; |
837 | } else { | 759 | } else { |
838 | type = IO_NEW; | 760 | done = 1; |
839 | if (buffer_mapped(bh) && all_bh) { | ||
840 | lock_buffer(bh); | ||
841 | xfs_add_to_ioend(inode, bh, offset, | ||
842 | type, ioendp, done); | ||
843 | count++; | ||
844 | page_dirty--; | ||
845 | } else { | ||
846 | done = 1; | ||
847 | } | ||
848 | } | 761 | } |
849 | } while (offset += len, (bh = bh->b_this_page) != head); | 762 | } while (offset += len, (bh = bh->b_this_page) != head); |
850 | 763 | ||
@@ -876,7 +789,6 @@ xfs_cluster_write( | |||
876 | struct xfs_bmbt_irec *imap, | 789 | struct xfs_bmbt_irec *imap, |
877 | xfs_ioend_t **ioendp, | 790 | xfs_ioend_t **ioendp, |
878 | struct writeback_control *wbc, | 791 | struct writeback_control *wbc, |
879 | int all_bh, | ||
880 | pgoff_t tlast) | 792 | pgoff_t tlast) |
881 | { | 793 | { |
882 | struct pagevec pvec; | 794 | struct pagevec pvec; |
@@ -891,7 +803,7 @@ xfs_cluster_write( | |||
891 | 803 | ||
892 | for (i = 0; i < pagevec_count(&pvec); i++) { | 804 | for (i = 0; i < pagevec_count(&pvec); i++) { |
893 | done = xfs_convert_page(inode, pvec.pages[i], tindex++, | 805 | done = xfs_convert_page(inode, pvec.pages[i], tindex++, |
894 | imap, ioendp, wbc, all_bh); | 806 | imap, ioendp, wbc); |
895 | if (done) | 807 | if (done) |
896 | break; | 808 | break; |
897 | } | 809 | } |
@@ -934,83 +846,38 @@ xfs_aops_discard_page( | |||
934 | struct xfs_inode *ip = XFS_I(inode); | 846 | struct xfs_inode *ip = XFS_I(inode); |
935 | struct buffer_head *bh, *head; | 847 | struct buffer_head *bh, *head; |
936 | loff_t offset = page_offset(page); | 848 | loff_t offset = page_offset(page); |
937 | ssize_t len = 1 << inode->i_blkbits; | ||
938 | 849 | ||
939 | if (!xfs_is_delayed_page(page, IO_DELAY)) | 850 | if (!xfs_is_delayed_page(page, IO_DELALLOC)) |
940 | goto out_invalidate; | 851 | goto out_invalidate; |
941 | 852 | ||
942 | if (XFS_FORCED_SHUTDOWN(ip->i_mount)) | 853 | if (XFS_FORCED_SHUTDOWN(ip->i_mount)) |
943 | goto out_invalidate; | 854 | goto out_invalidate; |
944 | 855 | ||
945 | xfs_fs_cmn_err(CE_ALERT, ip->i_mount, | 856 | xfs_alert(ip->i_mount, |
946 | "page discard on page %p, inode 0x%llx, offset %llu.", | 857 | "page discard on page %p, inode 0x%llx, offset %llu.", |
947 | page, ip->i_ino, offset); | 858 | page, ip->i_ino, offset); |
948 | 859 | ||
949 | xfs_ilock(ip, XFS_ILOCK_EXCL); | 860 | xfs_ilock(ip, XFS_ILOCK_EXCL); |
950 | bh = head = page_buffers(page); | 861 | bh = head = page_buffers(page); |
951 | do { | 862 | do { |
952 | int done; | ||
953 | xfs_fileoff_t offset_fsb; | ||
954 | xfs_bmbt_irec_t imap; | ||
955 | int nimaps = 1; | ||
956 | int error; | 863 | int error; |
957 | xfs_fsblock_t firstblock; | 864 | xfs_fileoff_t start_fsb; |
958 | xfs_bmap_free_t flist; | ||
959 | 865 | ||
960 | if (!buffer_delay(bh)) | 866 | if (!buffer_delay(bh)) |
961 | goto next_buffer; | 867 | goto next_buffer; |
962 | 868 | ||
963 | offset_fsb = XFS_B_TO_FSBT(ip->i_mount, offset); | 869 | start_fsb = XFS_B_TO_FSBT(ip->i_mount, offset); |
964 | 870 | error = xfs_bmap_punch_delalloc_range(ip, start_fsb, 1); | |
965 | /* | ||
966 | * Map the range first and check that it is a delalloc extent | ||
967 | * before trying to unmap the range. Otherwise we will be | ||
968 | * trying to remove a real extent (which requires a | ||
969 | * transaction) or a hole, which is probably a bad idea... | ||
970 | */ | ||
971 | error = xfs_bmapi(NULL, ip, offset_fsb, 1, | ||
972 | XFS_BMAPI_ENTIRE, NULL, 0, &imap, | ||
973 | &nimaps, NULL); | ||
974 | |||
975 | if (error) { | ||
976 | /* something screwed, just bail */ | ||
977 | if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) { | ||
978 | xfs_fs_cmn_err(CE_ALERT, ip->i_mount, | ||
979 | "page discard failed delalloc mapping lookup."); | ||
980 | } | ||
981 | break; | ||
982 | } | ||
983 | if (!nimaps) { | ||
984 | /* nothing there */ | ||
985 | goto next_buffer; | ||
986 | } | ||
987 | if (imap.br_startblock != DELAYSTARTBLOCK) { | ||
988 | /* been converted, ignore */ | ||
989 | goto next_buffer; | ||
990 | } | ||
991 | WARN_ON(imap.br_blockcount == 0); | ||
992 | |||
993 | /* | ||
994 | * Note: while we initialise the firstblock/flist pair, they | ||
995 | * should never be used because blocks should never be | ||
996 | * allocated or freed for a delalloc extent and hence we need | ||
997 | * don't cancel or finish them after the xfs_bunmapi() call. | ||
998 | */ | ||
999 | xfs_bmap_init(&flist, &firstblock); | ||
1000 | error = xfs_bunmapi(NULL, ip, offset_fsb, 1, 0, 1, &firstblock, | ||
1001 | &flist, &done); | ||
1002 | |||
1003 | ASSERT(!flist.xbf_count && !flist.xbf_first); | ||
1004 | if (error) { | 871 | if (error) { |
1005 | /* something screwed, just bail */ | 872 | /* something screwed, just bail */ |
1006 | if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) { | 873 | if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) { |
1007 | xfs_fs_cmn_err(CE_ALERT, ip->i_mount, | 874 | xfs_alert(ip->i_mount, |
1008 | "page discard unable to remove delalloc mapping."); | 875 | "page discard unable to remove delalloc mapping."); |
1009 | } | 876 | } |
1010 | break; | 877 | break; |
1011 | } | 878 | } |
1012 | next_buffer: | 879 | next_buffer: |
1013 | offset += len; | 880 | offset += 1 << inode->i_blkbits; |
1014 | 881 | ||
1015 | } while ((bh = bh->b_this_page) != head); | 882 | } while ((bh = bh->b_this_page) != head); |
1016 | 883 | ||
@@ -1047,10 +914,10 @@ xfs_vm_writepage( | |||
1047 | unsigned int type; | 914 | unsigned int type; |
1048 | __uint64_t end_offset; | 915 | __uint64_t end_offset; |
1049 | pgoff_t end_index, last_index; | 916 | pgoff_t end_index, last_index; |
1050 | ssize_t size, len; | 917 | ssize_t len; |
1051 | int flags, err, imap_valid = 0, uptodate = 1; | 918 | int err, imap_valid = 0, uptodate = 1; |
1052 | int count = 0; | 919 | int count = 0; |
1053 | int all_bh = 0; | 920 | int nonblocking = 0; |
1054 | 921 | ||
1055 | trace_xfs_writepage(inode, page, 0); | 922 | trace_xfs_writepage(inode, page, 0); |
1056 | 923 | ||
@@ -1101,110 +968,78 @@ xfs_vm_writepage( | |||
1101 | 968 | ||
1102 | bh = head = page_buffers(page); | 969 | bh = head = page_buffers(page); |
1103 | offset = page_offset(page); | 970 | offset = page_offset(page); |
1104 | flags = BMAPI_READ; | 971 | type = IO_OVERWRITE; |
1105 | type = IO_NEW; | 972 | |
973 | if (wbc->sync_mode == WB_SYNC_NONE && wbc->nonblocking) | ||
974 | nonblocking = 1; | ||
1106 | 975 | ||
1107 | do { | 976 | do { |
977 | int new_ioend = 0; | ||
978 | |||
1108 | if (offset >= end_offset) | 979 | if (offset >= end_offset) |
1109 | break; | 980 | break; |
1110 | if (!buffer_uptodate(bh)) | 981 | if (!buffer_uptodate(bh)) |
1111 | uptodate = 0; | 982 | uptodate = 0; |
1112 | 983 | ||
1113 | /* | 984 | /* |
1114 | * A hole may still be marked uptodate because discard_buffer | 985 | * set_page_dirty dirties all buffers in a page, independent |
1115 | * leaves the flag set. | 986 | * of their state. The dirty state however is entirely |
987 | * meaningless for holes (!mapped && uptodate), so skip | ||
988 | * buffers covering holes here. | ||
1116 | */ | 989 | */ |
1117 | if (!buffer_mapped(bh) && buffer_uptodate(bh)) { | 990 | if (!buffer_mapped(bh) && buffer_uptodate(bh)) { |
1118 | ASSERT(!buffer_dirty(bh)); | ||
1119 | imap_valid = 0; | 991 | imap_valid = 0; |
1120 | continue; | 992 | continue; |
1121 | } | 993 | } |
1122 | 994 | ||
1123 | if (imap_valid) | 995 | if (buffer_unwritten(bh)) { |
1124 | imap_valid = xfs_imap_valid(inode, &imap, offset); | 996 | if (type != IO_UNWRITTEN) { |
1125 | |||
1126 | if (buffer_unwritten(bh) || buffer_delay(bh)) { | ||
1127 | int new_ioend = 0; | ||
1128 | |||
1129 | /* | ||
1130 | * Make sure we don't use a read-only iomap | ||
1131 | */ | ||
1132 | if (flags == BMAPI_READ) | ||
1133 | imap_valid = 0; | ||
1134 | |||
1135 | if (buffer_unwritten(bh)) { | ||
1136 | type = IO_UNWRITTEN; | 997 | type = IO_UNWRITTEN; |
1137 | flags = BMAPI_WRITE | BMAPI_IGNSTATE; | 998 | imap_valid = 0; |
1138 | } else if (buffer_delay(bh)) { | ||
1139 | type = IO_DELAY; | ||
1140 | flags = BMAPI_ALLOCATE; | ||
1141 | |||
1142 | if (wbc->sync_mode == WB_SYNC_NONE && | ||
1143 | wbc->nonblocking) | ||
1144 | flags |= BMAPI_TRYLOCK; | ||
1145 | } | ||
1146 | |||
1147 | if (!imap_valid) { | ||
1148 | /* | ||
1149 | * If we didn't have a valid mapping then we | ||
1150 | * need to ensure that we put the new mapping | ||
1151 | * in a new ioend structure. This needs to be | ||
1152 | * done to ensure that the ioends correctly | ||
1153 | * reflect the block mappings at io completion | ||
1154 | * for unwritten extent conversion. | ||
1155 | */ | ||
1156 | new_ioend = 1; | ||
1157 | err = xfs_map_blocks(inode, offset, len, | ||
1158 | &imap, flags); | ||
1159 | if (err) | ||
1160 | goto error; | ||
1161 | imap_valid = xfs_imap_valid(inode, &imap, | ||
1162 | offset); | ||
1163 | } | 999 | } |
1164 | if (imap_valid) { | 1000 | } else if (buffer_delay(bh)) { |
1165 | xfs_map_at_offset(inode, bh, &imap, offset); | 1001 | if (type != IO_DELALLOC) { |
1166 | xfs_add_to_ioend(inode, bh, offset, type, | 1002 | type = IO_DELALLOC; |
1167 | &ioend, new_ioend); | 1003 | imap_valid = 0; |
1168 | count++; | ||
1169 | } | 1004 | } |
1170 | } else if (buffer_uptodate(bh)) { | 1005 | } else if (buffer_uptodate(bh)) { |
1171 | /* | 1006 | if (type != IO_OVERWRITE) { |
1172 | * we got here because the buffer is already mapped. | 1007 | type = IO_OVERWRITE; |
1173 | * That means it must already have extents allocated | 1008 | imap_valid = 0; |
1174 | * underneath it. Map the extent by reading it. | 1009 | } |
1175 | */ | 1010 | } else { |
1176 | if (!imap_valid || flags != BMAPI_READ) { | 1011 | if (PageUptodate(page)) { |
1177 | flags = BMAPI_READ; | 1012 | ASSERT(buffer_mapped(bh)); |
1178 | size = xfs_probe_cluster(inode, page, bh, head); | 1013 | imap_valid = 0; |
1179 | err = xfs_map_blocks(inode, offset, size, | ||
1180 | &imap, flags); | ||
1181 | if (err) | ||
1182 | goto error; | ||
1183 | imap_valid = xfs_imap_valid(inode, &imap, | ||
1184 | offset); | ||
1185 | } | 1014 | } |
1015 | continue; | ||
1016 | } | ||
1186 | 1017 | ||
1018 | if (imap_valid) | ||
1019 | imap_valid = xfs_imap_valid(inode, &imap, offset); | ||
1020 | if (!imap_valid) { | ||
1187 | /* | 1021 | /* |
1188 | * We set the type to IO_NEW in case we are doing a | 1022 | * If we didn't have a valid mapping then we need to |
1189 | * small write at EOF that is extending the file but | 1023 | * put the new mapping into a separate ioend structure. |
1190 | * without needing an allocation. We need to update the | 1024 | * This ensures non-contiguous extents always have |
1191 | * file size on I/O completion in this case so it is | 1025 | * separate ioends, which is particularly important |
1192 | * the same case as having just allocated a new extent | 1026 | * for unwritten extent conversion at I/O completion |
1193 | * that we are writing into for the first time. | 1027 | * time. |
1194 | */ | 1028 | */ |
1195 | type = IO_NEW; | 1029 | new_ioend = 1; |
1196 | if (trylock_buffer(bh)) { | 1030 | err = xfs_map_blocks(inode, offset, &imap, type, |
1197 | if (imap_valid) | 1031 | nonblocking); |
1198 | all_bh = 1; | 1032 | if (err) |
1199 | xfs_add_to_ioend(inode, bh, offset, type, | 1033 | goto error; |
1200 | &ioend, !imap_valid); | 1034 | imap_valid = xfs_imap_valid(inode, &imap, offset); |
1201 | count++; | 1035 | } |
1202 | } else { | 1036 | if (imap_valid) { |
1203 | imap_valid = 0; | 1037 | lock_buffer(bh); |
1204 | } | 1038 | if (type != IO_OVERWRITE) |
1205 | } else if (PageUptodate(page)) { | 1039 | xfs_map_at_offset(inode, bh, &imap, offset); |
1206 | ASSERT(buffer_mapped(bh)); | 1040 | xfs_add_to_ioend(inode, bh, offset, type, &ioend, |
1207 | imap_valid = 0; | 1041 | new_ioend); |
1042 | count++; | ||
1208 | } | 1043 | } |
1209 | 1044 | ||
1210 | if (!iohead) | 1045 | if (!iohead) |
@@ -1233,7 +1068,7 @@ xfs_vm_writepage( | |||
1233 | end_index = last_index; | 1068 | end_index = last_index; |
1234 | 1069 | ||
1235 | xfs_cluster_write(inode, page->index + 1, &imap, &ioend, | 1070 | xfs_cluster_write(inode, page->index + 1, &imap, &ioend, |
1236 | wbc, all_bh, end_index); | 1071 | wbc, end_index); |
1237 | } | 1072 | } |
1238 | 1073 | ||
1239 | if (iohead) | 1074 | if (iohead) |
@@ -1302,13 +1137,19 @@ __xfs_get_blocks( | |||
1302 | int create, | 1137 | int create, |
1303 | int direct) | 1138 | int direct) |
1304 | { | 1139 | { |
1305 | int flags = create ? BMAPI_WRITE : BMAPI_READ; | 1140 | struct xfs_inode *ip = XFS_I(inode); |
1141 | struct xfs_mount *mp = ip->i_mount; | ||
1142 | xfs_fileoff_t offset_fsb, end_fsb; | ||
1143 | int error = 0; | ||
1144 | int lockmode = 0; | ||
1306 | struct xfs_bmbt_irec imap; | 1145 | struct xfs_bmbt_irec imap; |
1146 | int nimaps = 1; | ||
1307 | xfs_off_t offset; | 1147 | xfs_off_t offset; |
1308 | ssize_t size; | 1148 | ssize_t size; |
1309 | int nimap = 1; | ||
1310 | int new = 0; | 1149 | int new = 0; |
1311 | int error; | 1150 | |
1151 | if (XFS_FORCED_SHUTDOWN(mp)) | ||
1152 | return -XFS_ERROR(EIO); | ||
1312 | 1153 | ||
1313 | offset = (xfs_off_t)iblock << inode->i_blkbits; | 1154 | offset = (xfs_off_t)iblock << inode->i_blkbits; |
1314 | ASSERT(bh_result->b_size >= (1 << inode->i_blkbits)); | 1155 | ASSERT(bh_result->b_size >= (1 << inode->i_blkbits)); |
@@ -1317,15 +1158,45 @@ __xfs_get_blocks( | |||
1317 | if (!create && direct && offset >= i_size_read(inode)) | 1158 | if (!create && direct && offset >= i_size_read(inode)) |
1318 | return 0; | 1159 | return 0; |
1319 | 1160 | ||
1320 | if (direct && create) | 1161 | if (create) { |
1321 | flags |= BMAPI_DIRECT; | 1162 | lockmode = XFS_ILOCK_EXCL; |
1163 | xfs_ilock(ip, lockmode); | ||
1164 | } else { | ||
1165 | lockmode = xfs_ilock_map_shared(ip); | ||
1166 | } | ||
1322 | 1167 | ||
1323 | error = xfs_iomap(XFS_I(inode), offset, size, flags, &imap, &nimap, | 1168 | ASSERT(offset <= mp->m_maxioffset); |
1324 | &new); | 1169 | if (offset + size > mp->m_maxioffset) |
1170 | size = mp->m_maxioffset - offset; | ||
1171 | end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + size); | ||
1172 | offset_fsb = XFS_B_TO_FSBT(mp, offset); | ||
1173 | |||
1174 | error = xfs_bmapi(NULL, ip, offset_fsb, end_fsb - offset_fsb, | ||
1175 | XFS_BMAPI_ENTIRE, NULL, 0, &imap, &nimaps, NULL); | ||
1325 | if (error) | 1176 | if (error) |
1326 | return -error; | 1177 | goto out_unlock; |
1327 | if (nimap == 0) | 1178 | |
1328 | return 0; | 1179 | if (create && |
1180 | (!nimaps || | ||
1181 | (imap.br_startblock == HOLESTARTBLOCK || | ||
1182 | imap.br_startblock == DELAYSTARTBLOCK))) { | ||
1183 | if (direct) { | ||
1184 | error = xfs_iomap_write_direct(ip, offset, size, | ||
1185 | &imap, nimaps); | ||
1186 | } else { | ||
1187 | error = xfs_iomap_write_delay(ip, offset, size, &imap); | ||
1188 | } | ||
1189 | if (error) | ||
1190 | goto out_unlock; | ||
1191 | |||
1192 | trace_xfs_get_blocks_alloc(ip, offset, size, 0, &imap); | ||
1193 | } else if (nimaps) { | ||
1194 | trace_xfs_get_blocks_found(ip, offset, size, 0, &imap); | ||
1195 | } else { | ||
1196 | trace_xfs_get_blocks_notfound(ip, offset, size); | ||
1197 | goto out_unlock; | ||
1198 | } | ||
1199 | xfs_iunlock(ip, lockmode); | ||
1329 | 1200 | ||
1330 | if (imap.br_startblock != HOLESTARTBLOCK && | 1201 | if (imap.br_startblock != HOLESTARTBLOCK && |
1331 | imap.br_startblock != DELAYSTARTBLOCK) { | 1202 | imap.br_startblock != DELAYSTARTBLOCK) { |
@@ -1392,6 +1263,10 @@ __xfs_get_blocks( | |||
1392 | } | 1263 | } |
1393 | 1264 | ||
1394 | return 0; | 1265 | return 0; |
1266 | |||
1267 | out_unlock: | ||
1268 | xfs_iunlock(ip, lockmode); | ||
1269 | return -error; | ||
1395 | } | 1270 | } |
1396 | 1271 | ||
1397 | int | 1272 | int |
@@ -1420,7 +1295,7 @@ xfs_get_blocks_direct( | |||
1420 | * If the private argument is non-NULL __xfs_get_blocks signals us that we | 1295 | * If the private argument is non-NULL __xfs_get_blocks signals us that we |
1421 | * need to issue a transaction to convert the range from unwritten to written | 1296 | * need to issue a transaction to convert the range from unwritten to written |
1422 | * extents. In case this is regular synchronous I/O we just call xfs_end_io | 1297 | * extents. In case this is regular synchronous I/O we just call xfs_end_io |
1423 | * to do this and we are done. But in case this was a successfull AIO | 1298 | * to do this and we are done. But in case this was a successful AIO |
1424 | * request this handler is called from interrupt context, from which we | 1299 | * request this handler is called from interrupt context, from which we |
1425 | * can't start transactions. In that case offload the I/O completion to | 1300 | * can't start transactions. In that case offload the I/O completion to |
1426 | * the workqueues we also use for buffered I/O completion. | 1301 | * the workqueues we also use for buffered I/O completion. |
@@ -1479,7 +1354,7 @@ xfs_vm_direct_IO( | |||
1479 | ssize_t ret; | 1354 | ssize_t ret; |
1480 | 1355 | ||
1481 | if (rw & WRITE) { | 1356 | if (rw & WRITE) { |
1482 | iocb->private = xfs_alloc_ioend(inode, IO_NEW); | 1357 | iocb->private = xfs_alloc_ioend(inode, IO_DIRECT); |
1483 | 1358 | ||
1484 | ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov, | 1359 | ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov, |
1485 | offset, nr_segs, | 1360 | offset, nr_segs, |
@@ -1505,11 +1380,42 @@ xfs_vm_write_failed( | |||
1505 | struct inode *inode = mapping->host; | 1380 | struct inode *inode = mapping->host; |
1506 | 1381 | ||
1507 | if (to > inode->i_size) { | 1382 | if (to > inode->i_size) { |
1508 | struct iattr ia = { | 1383 | /* |
1509 | .ia_valid = ATTR_SIZE | ATTR_FORCE, | 1384 | * punch out the delalloc blocks we have already allocated. We |
1510 | .ia_size = inode->i_size, | 1385 | * don't call xfs_setattr() to do this as we may be in the |
1511 | }; | 1386 | * middle of a multi-iovec write and so the vfs inode->i_size |
1512 | xfs_setattr(XFS_I(inode), &ia, XFS_ATTR_NOLOCK); | 1387 | * will not match the xfs ip->i_size and so it will zero too |
1388 | * much. Hence we jus truncate the page cache to zero what is | ||
1389 | * necessary and punch the delalloc blocks directly. | ||
1390 | */ | ||
1391 | struct xfs_inode *ip = XFS_I(inode); | ||
1392 | xfs_fileoff_t start_fsb; | ||
1393 | xfs_fileoff_t end_fsb; | ||
1394 | int error; | ||
1395 | |||
1396 | truncate_pagecache(inode, to, inode->i_size); | ||
1397 | |||
1398 | /* | ||
1399 | * Check if there are any blocks that are outside of i_size | ||
1400 | * that need to be trimmed back. | ||
1401 | */ | ||
1402 | start_fsb = XFS_B_TO_FSB(ip->i_mount, inode->i_size) + 1; | ||
1403 | end_fsb = XFS_B_TO_FSB(ip->i_mount, to); | ||
1404 | if (end_fsb <= start_fsb) | ||
1405 | return; | ||
1406 | |||
1407 | xfs_ilock(ip, XFS_ILOCK_EXCL); | ||
1408 | error = xfs_bmap_punch_delalloc_range(ip, start_fsb, | ||
1409 | end_fsb - start_fsb); | ||
1410 | if (error) { | ||
1411 | /* something screwed, just bail */ | ||
1412 | if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) { | ||
1413 | xfs_alert(ip->i_mount, | ||
1414 | "xfs_vm_write_failed: unable to clean up ino %lld", | ||
1415 | ip->i_ino); | ||
1416 | } | ||
1417 | } | ||
1418 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | ||
1513 | } | 1419 | } |
1514 | } | 1420 | } |
1515 | 1421 | ||
@@ -1588,7 +1494,6 @@ const struct address_space_operations xfs_address_space_operations = { | |||
1588 | .readpages = xfs_vm_readpages, | 1494 | .readpages = xfs_vm_readpages, |
1589 | .writepage = xfs_vm_writepage, | 1495 | .writepage = xfs_vm_writepage, |
1590 | .writepages = xfs_vm_writepages, | 1496 | .writepages = xfs_vm_writepages, |
1591 | .sync_page = block_sync_page, | ||
1592 | .releasepage = xfs_vm_releasepage, | 1497 | .releasepage = xfs_vm_releasepage, |
1593 | .invalidatepage = xfs_vm_invalidatepage, | 1498 | .invalidatepage = xfs_vm_invalidatepage, |
1594 | .write_begin = xfs_vm_write_begin, | 1499 | .write_begin = xfs_vm_write_begin, |
diff --git a/fs/xfs/linux-2.6/xfs_aops.h b/fs/xfs/linux-2.6/xfs_aops.h index c5057fb6237a..71f721e1a71f 100644 --- a/fs/xfs/linux-2.6/xfs_aops.h +++ b/fs/xfs/linux-2.6/xfs_aops.h | |||
@@ -23,6 +23,22 @@ extern struct workqueue_struct *xfsconvertd_workqueue; | |||
23 | extern mempool_t *xfs_ioend_pool; | 23 | extern mempool_t *xfs_ioend_pool; |
24 | 24 | ||
25 | /* | 25 | /* |
26 | * Types of I/O for bmap clustering and I/O completion tracking. | ||
27 | */ | ||
28 | enum { | ||
29 | IO_DIRECT = 0, /* special case for direct I/O ioends */ | ||
30 | IO_DELALLOC, /* mapping covers delalloc region */ | ||
31 | IO_UNWRITTEN, /* mapping covers allocated but uninitialized data */ | ||
32 | IO_OVERWRITE, /* mapping covers already allocated extent */ | ||
33 | }; | ||
34 | |||
35 | #define XFS_IO_TYPES \ | ||
36 | { 0, "" }, \ | ||
37 | { IO_DELALLOC, "delalloc" }, \ | ||
38 | { IO_UNWRITTEN, "unwritten" }, \ | ||
39 | { IO_OVERWRITE, "overwrite" } | ||
40 | |||
41 | /* | ||
26 | * xfs_ioend struct manages large extent writes for XFS. | 42 | * xfs_ioend struct manages large extent writes for XFS. |
27 | * It can manage several multi-page bio's at once. | 43 | * It can manage several multi-page bio's at once. |
28 | */ | 44 | */ |
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index 286e36e21dae..5e68099db2a5 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c | |||
@@ -33,7 +33,6 @@ | |||
33 | #include <linux/migrate.h> | 33 | #include <linux/migrate.h> |
34 | #include <linux/backing-dev.h> | 34 | #include <linux/backing-dev.h> |
35 | #include <linux/freezer.h> | 35 | #include <linux/freezer.h> |
36 | #include <linux/list_sort.h> | ||
37 | 36 | ||
38 | #include "xfs_sb.h" | 37 | #include "xfs_sb.h" |
39 | #include "xfs_inum.h" | 38 | #include "xfs_inum.h" |
@@ -44,12 +43,7 @@ | |||
44 | 43 | ||
45 | static kmem_zone_t *xfs_buf_zone; | 44 | static kmem_zone_t *xfs_buf_zone; |
46 | STATIC int xfsbufd(void *); | 45 | STATIC int xfsbufd(void *); |
47 | STATIC int xfsbufd_wakeup(struct shrinker *, int, gfp_t); | ||
48 | STATIC void xfs_buf_delwri_queue(xfs_buf_t *, int); | 46 | STATIC void xfs_buf_delwri_queue(xfs_buf_t *, int); |
49 | static struct shrinker xfs_buf_shake = { | ||
50 | .shrink = xfsbufd_wakeup, | ||
51 | .seeks = DEFAULT_SEEKS, | ||
52 | }; | ||
53 | 47 | ||
54 | static struct workqueue_struct *xfslogd_workqueue; | 48 | static struct workqueue_struct *xfslogd_workqueue; |
55 | struct workqueue_struct *xfsdatad_workqueue; | 49 | struct workqueue_struct *xfsdatad_workqueue; |
@@ -99,77 +93,79 @@ xfs_buf_vmap_len( | |||
99 | } | 93 | } |
100 | 94 | ||
101 | /* | 95 | /* |
102 | * Page Region interfaces. | 96 | * xfs_buf_lru_add - add a buffer to the LRU. |
103 | * | 97 | * |
104 | * For pages in filesystems where the blocksize is smaller than the | 98 | * The LRU takes a new reference to the buffer so that it will only be freed |
105 | * pagesize, we use the page->private field (long) to hold a bitmap | 99 | * once the shrinker takes the buffer off the LRU. |
106 | * of uptodate regions within the page. | ||
107 | * | ||
108 | * Each such region is "bytes per page / bits per long" bytes long. | ||
109 | * | ||
110 | * NBPPR == number-of-bytes-per-page-region | ||
111 | * BTOPR == bytes-to-page-region (rounded up) | ||
112 | * BTOPRT == bytes-to-page-region-truncated (rounded down) | ||
113 | */ | 100 | */ |
114 | #if (BITS_PER_LONG == 32) | 101 | STATIC void |
115 | #define PRSHIFT (PAGE_CACHE_SHIFT - 5) /* (32 == 1<<5) */ | 102 | xfs_buf_lru_add( |
116 | #elif (BITS_PER_LONG == 64) | 103 | struct xfs_buf *bp) |
117 | #define PRSHIFT (PAGE_CACHE_SHIFT - 6) /* (64 == 1<<6) */ | ||
118 | #else | ||
119 | #error BITS_PER_LONG must be 32 or 64 | ||
120 | #endif | ||
121 | #define NBPPR (PAGE_CACHE_SIZE/BITS_PER_LONG) | ||
122 | #define BTOPR(b) (((unsigned int)(b) + (NBPPR - 1)) >> PRSHIFT) | ||
123 | #define BTOPRT(b) (((unsigned int)(b) >> PRSHIFT)) | ||
124 | |||
125 | STATIC unsigned long | ||
126 | page_region_mask( | ||
127 | size_t offset, | ||
128 | size_t length) | ||
129 | { | 104 | { |
130 | unsigned long mask; | 105 | struct xfs_buftarg *btp = bp->b_target; |
131 | int first, final; | ||
132 | |||
133 | first = BTOPR(offset); | ||
134 | final = BTOPRT(offset + length - 1); | ||
135 | first = min(first, final); | ||
136 | |||
137 | mask = ~0UL; | ||
138 | mask <<= BITS_PER_LONG - (final - first); | ||
139 | mask >>= BITS_PER_LONG - (final); | ||
140 | |||
141 | ASSERT(offset + length <= PAGE_CACHE_SIZE); | ||
142 | ASSERT((final - first) < BITS_PER_LONG && (final - first) >= 0); | ||
143 | 106 | ||
144 | return mask; | 107 | spin_lock(&btp->bt_lru_lock); |
108 | if (list_empty(&bp->b_lru)) { | ||
109 | atomic_inc(&bp->b_hold); | ||
110 | list_add_tail(&bp->b_lru, &btp->bt_lru); | ||
111 | btp->bt_lru_nr++; | ||
112 | } | ||
113 | spin_unlock(&btp->bt_lru_lock); | ||
145 | } | 114 | } |
146 | 115 | ||
116 | /* | ||
117 | * xfs_buf_lru_del - remove a buffer from the LRU | ||
118 | * | ||
119 | * The unlocked check is safe here because it only occurs when there are not | ||
120 | * b_lru_ref counts left on the inode under the pag->pag_buf_lock. it is there | ||
121 | * to optimise the shrinker removing the buffer from the LRU and calling | ||
122 | * xfs_buf_free(). i.e. it removes an unnecessary round trip on the | ||
123 | * bt_lru_lock. | ||
124 | */ | ||
147 | STATIC void | 125 | STATIC void |
148 | set_page_region( | 126 | xfs_buf_lru_del( |
149 | struct page *page, | 127 | struct xfs_buf *bp) |
150 | size_t offset, | ||
151 | size_t length) | ||
152 | { | 128 | { |
153 | set_page_private(page, | 129 | struct xfs_buftarg *btp = bp->b_target; |
154 | page_private(page) | page_region_mask(offset, length)); | ||
155 | if (page_private(page) == ~0UL) | ||
156 | SetPageUptodate(page); | ||
157 | } | ||
158 | 130 | ||
159 | STATIC int | 131 | if (list_empty(&bp->b_lru)) |
160 | test_page_region( | 132 | return; |
161 | struct page *page, | ||
162 | size_t offset, | ||
163 | size_t length) | ||
164 | { | ||
165 | unsigned long mask = page_region_mask(offset, length); | ||
166 | 133 | ||
167 | return (mask && (page_private(page) & mask) == mask); | 134 | spin_lock(&btp->bt_lru_lock); |
135 | if (!list_empty(&bp->b_lru)) { | ||
136 | list_del_init(&bp->b_lru); | ||
137 | btp->bt_lru_nr--; | ||
138 | } | ||
139 | spin_unlock(&btp->bt_lru_lock); | ||
168 | } | 140 | } |
169 | 141 | ||
170 | /* | 142 | /* |
171 | * Internal xfs_buf_t object manipulation | 143 | * When we mark a buffer stale, we remove the buffer from the LRU and clear the |
144 | * b_lru_ref count so that the buffer is freed immediately when the buffer | ||
145 | * reference count falls to zero. If the buffer is already on the LRU, we need | ||
146 | * to remove the reference that LRU holds on the buffer. | ||
147 | * | ||
148 | * This prevents build-up of stale buffers on the LRU. | ||
172 | */ | 149 | */ |
150 | void | ||
151 | xfs_buf_stale( | ||
152 | struct xfs_buf *bp) | ||
153 | { | ||
154 | bp->b_flags |= XBF_STALE; | ||
155 | atomic_set(&(bp)->b_lru_ref, 0); | ||
156 | if (!list_empty(&bp->b_lru)) { | ||
157 | struct xfs_buftarg *btp = bp->b_target; | ||
158 | |||
159 | spin_lock(&btp->bt_lru_lock); | ||
160 | if (!list_empty(&bp->b_lru)) { | ||
161 | list_del_init(&bp->b_lru); | ||
162 | btp->bt_lru_nr--; | ||
163 | atomic_dec(&bp->b_hold); | ||
164 | } | ||
165 | spin_unlock(&btp->bt_lru_lock); | ||
166 | } | ||
167 | ASSERT(atomic_read(&bp->b_hold) >= 1); | ||
168 | } | ||
173 | 169 | ||
174 | STATIC void | 170 | STATIC void |
175 | _xfs_buf_initialize( | 171 | _xfs_buf_initialize( |
@@ -186,10 +182,12 @@ _xfs_buf_initialize( | |||
186 | 182 | ||
187 | memset(bp, 0, sizeof(xfs_buf_t)); | 183 | memset(bp, 0, sizeof(xfs_buf_t)); |
188 | atomic_set(&bp->b_hold, 1); | 184 | atomic_set(&bp->b_hold, 1); |
185 | atomic_set(&bp->b_lru_ref, 1); | ||
189 | init_completion(&bp->b_iowait); | 186 | init_completion(&bp->b_iowait); |
187 | INIT_LIST_HEAD(&bp->b_lru); | ||
190 | INIT_LIST_HEAD(&bp->b_list); | 188 | INIT_LIST_HEAD(&bp->b_list); |
191 | INIT_LIST_HEAD(&bp->b_hash_list); | 189 | RB_CLEAR_NODE(&bp->b_rbnode); |
192 | init_MUTEX_LOCKED(&bp->b_sema); /* held, no waiters */ | 190 | sema_init(&bp->b_sema, 0); /* held, no waiters */ |
193 | XB_SET_OWNER(bp); | 191 | XB_SET_OWNER(bp); |
194 | bp->b_target = target; | 192 | bp->b_target = target; |
195 | bp->b_file_offset = range_base; | 193 | bp->b_file_offset = range_base; |
@@ -262,9 +260,9 @@ xfs_buf_free( | |||
262 | { | 260 | { |
263 | trace_xfs_buf_free(bp, _RET_IP_); | 261 | trace_xfs_buf_free(bp, _RET_IP_); |
264 | 262 | ||
265 | ASSERT(list_empty(&bp->b_hash_list)); | 263 | ASSERT(list_empty(&bp->b_lru)); |
266 | 264 | ||
267 | if (bp->b_flags & (_XBF_PAGE_CACHE|_XBF_PAGES)) { | 265 | if (bp->b_flags & _XBF_PAGES) { |
268 | uint i; | 266 | uint i; |
269 | 267 | ||
270 | if (xfs_buf_is_vmapped(bp)) | 268 | if (xfs_buf_is_vmapped(bp)) |
@@ -274,56 +272,77 @@ xfs_buf_free( | |||
274 | for (i = 0; i < bp->b_page_count; i++) { | 272 | for (i = 0; i < bp->b_page_count; i++) { |
275 | struct page *page = bp->b_pages[i]; | 273 | struct page *page = bp->b_pages[i]; |
276 | 274 | ||
277 | if (bp->b_flags & _XBF_PAGE_CACHE) | 275 | __free_page(page); |
278 | ASSERT(!PagePrivate(page)); | ||
279 | page_cache_release(page); | ||
280 | } | 276 | } |
281 | } | 277 | } else if (bp->b_flags & _XBF_KMEM) |
278 | kmem_free(bp->b_addr); | ||
282 | _xfs_buf_free_pages(bp); | 279 | _xfs_buf_free_pages(bp); |
283 | xfs_buf_deallocate(bp); | 280 | xfs_buf_deallocate(bp); |
284 | } | 281 | } |
285 | 282 | ||
286 | /* | 283 | /* |
287 | * Finds all pages for buffer in question and builds it's page list. | 284 | * Allocates all the pages for buffer in question and builds it's page list. |
288 | */ | 285 | */ |
289 | STATIC int | 286 | STATIC int |
290 | _xfs_buf_lookup_pages( | 287 | xfs_buf_allocate_memory( |
291 | xfs_buf_t *bp, | 288 | xfs_buf_t *bp, |
292 | uint flags) | 289 | uint flags) |
293 | { | 290 | { |
294 | struct address_space *mapping = bp->b_target->bt_mapping; | ||
295 | size_t blocksize = bp->b_target->bt_bsize; | ||
296 | size_t size = bp->b_count_desired; | 291 | size_t size = bp->b_count_desired; |
297 | size_t nbytes, offset; | 292 | size_t nbytes, offset; |
298 | gfp_t gfp_mask = xb_to_gfp(flags); | 293 | gfp_t gfp_mask = xb_to_gfp(flags); |
299 | unsigned short page_count, i; | 294 | unsigned short page_count, i; |
300 | pgoff_t first; | ||
301 | xfs_off_t end; | 295 | xfs_off_t end; |
302 | int error; | 296 | int error; |
303 | 297 | ||
298 | /* | ||
299 | * for buffers that are contained within a single page, just allocate | ||
300 | * the memory from the heap - there's no need for the complexity of | ||
301 | * page arrays to keep allocation down to order 0. | ||
302 | */ | ||
303 | if (bp->b_buffer_length < PAGE_SIZE) { | ||
304 | bp->b_addr = kmem_alloc(bp->b_buffer_length, xb_to_km(flags)); | ||
305 | if (!bp->b_addr) { | ||
306 | /* low memory - use alloc_page loop instead */ | ||
307 | goto use_alloc_page; | ||
308 | } | ||
309 | |||
310 | if (((unsigned long)(bp->b_addr + bp->b_buffer_length - 1) & | ||
311 | PAGE_MASK) != | ||
312 | ((unsigned long)bp->b_addr & PAGE_MASK)) { | ||
313 | /* b_addr spans two pages - use alloc_page instead */ | ||
314 | kmem_free(bp->b_addr); | ||
315 | bp->b_addr = NULL; | ||
316 | goto use_alloc_page; | ||
317 | } | ||
318 | bp->b_offset = offset_in_page(bp->b_addr); | ||
319 | bp->b_pages = bp->b_page_array; | ||
320 | bp->b_pages[0] = virt_to_page(bp->b_addr); | ||
321 | bp->b_page_count = 1; | ||
322 | bp->b_flags |= XBF_MAPPED | _XBF_KMEM; | ||
323 | return 0; | ||
324 | } | ||
325 | |||
326 | use_alloc_page: | ||
304 | end = bp->b_file_offset + bp->b_buffer_length; | 327 | end = bp->b_file_offset + bp->b_buffer_length; |
305 | page_count = xfs_buf_btoc(end) - xfs_buf_btoct(bp->b_file_offset); | 328 | page_count = xfs_buf_btoc(end) - xfs_buf_btoct(bp->b_file_offset); |
306 | |||
307 | error = _xfs_buf_get_pages(bp, page_count, flags); | 329 | error = _xfs_buf_get_pages(bp, page_count, flags); |
308 | if (unlikely(error)) | 330 | if (unlikely(error)) |
309 | return error; | 331 | return error; |
310 | bp->b_flags |= _XBF_PAGE_CACHE; | ||
311 | 332 | ||
312 | offset = bp->b_offset; | 333 | offset = bp->b_offset; |
313 | first = bp->b_file_offset >> PAGE_CACHE_SHIFT; | 334 | bp->b_flags |= _XBF_PAGES; |
314 | 335 | ||
315 | for (i = 0; i < bp->b_page_count; i++) { | 336 | for (i = 0; i < bp->b_page_count; i++) { |
316 | struct page *page; | 337 | struct page *page; |
317 | uint retries = 0; | 338 | uint retries = 0; |
318 | 339 | retry: | |
319 | retry: | 340 | page = alloc_page(gfp_mask); |
320 | page = find_or_create_page(mapping, first + i, gfp_mask); | ||
321 | if (unlikely(page == NULL)) { | 341 | if (unlikely(page == NULL)) { |
322 | if (flags & XBF_READ_AHEAD) { | 342 | if (flags & XBF_READ_AHEAD) { |
323 | bp->b_page_count = i; | 343 | bp->b_page_count = i; |
324 | for (i = 0; i < bp->b_page_count; i++) | 344 | error = ENOMEM; |
325 | unlock_page(bp->b_pages[i]); | 345 | goto out_free_pages; |
326 | return -ENOMEM; | ||
327 | } | 346 | } |
328 | 347 | ||
329 | /* | 348 | /* |
@@ -333,65 +352,55 @@ _xfs_buf_lookup_pages( | |||
333 | * handle buffer allocation failures we can't do much. | 352 | * handle buffer allocation failures we can't do much. |
334 | */ | 353 | */ |
335 | if (!(++retries % 100)) | 354 | if (!(++retries % 100)) |
336 | printk(KERN_ERR | 355 | xfs_err(NULL, |
337 | "XFS: possible memory allocation " | 356 | "possible memory allocation deadlock in %s (mode:0x%x)", |
338 | "deadlock in %s (mode:0x%x)\n", | ||
339 | __func__, gfp_mask); | 357 | __func__, gfp_mask); |
340 | 358 | ||
341 | XFS_STATS_INC(xb_page_retries); | 359 | XFS_STATS_INC(xb_page_retries); |
342 | xfsbufd_wakeup(NULL, 0, gfp_mask); | ||
343 | congestion_wait(BLK_RW_ASYNC, HZ/50); | 360 | congestion_wait(BLK_RW_ASYNC, HZ/50); |
344 | goto retry; | 361 | goto retry; |
345 | } | 362 | } |
346 | 363 | ||
347 | XFS_STATS_INC(xb_page_found); | 364 | XFS_STATS_INC(xb_page_found); |
348 | 365 | ||
349 | nbytes = min_t(size_t, size, PAGE_CACHE_SIZE - offset); | 366 | nbytes = min_t(size_t, size, PAGE_SIZE - offset); |
350 | size -= nbytes; | 367 | size -= nbytes; |
351 | |||
352 | ASSERT(!PagePrivate(page)); | ||
353 | if (!PageUptodate(page)) { | ||
354 | page_count--; | ||
355 | if (blocksize >= PAGE_CACHE_SIZE) { | ||
356 | if (flags & XBF_READ) | ||
357 | bp->b_flags |= _XBF_PAGE_LOCKED; | ||
358 | } else if (!PagePrivate(page)) { | ||
359 | if (test_page_region(page, offset, nbytes)) | ||
360 | page_count++; | ||
361 | } | ||
362 | } | ||
363 | |||
364 | bp->b_pages[i] = page; | 368 | bp->b_pages[i] = page; |
365 | offset = 0; | 369 | offset = 0; |
366 | } | 370 | } |
371 | return 0; | ||
367 | 372 | ||
368 | if (!(bp->b_flags & _XBF_PAGE_LOCKED)) { | 373 | out_free_pages: |
369 | for (i = 0; i < bp->b_page_count; i++) | 374 | for (i = 0; i < bp->b_page_count; i++) |
370 | unlock_page(bp->b_pages[i]); | 375 | __free_page(bp->b_pages[i]); |
371 | } | ||
372 | |||
373 | if (page_count == bp->b_page_count) | ||
374 | bp->b_flags |= XBF_DONE; | ||
375 | |||
376 | return error; | 376 | return error; |
377 | } | 377 | } |
378 | 378 | ||
379 | /* | 379 | /* |
380 | * Map buffer into kernel address-space if nessecary. | 380 | * Map buffer into kernel address-space if necessary. |
381 | */ | 381 | */ |
382 | STATIC int | 382 | STATIC int |
383 | _xfs_buf_map_pages( | 383 | _xfs_buf_map_pages( |
384 | xfs_buf_t *bp, | 384 | xfs_buf_t *bp, |
385 | uint flags) | 385 | uint flags) |
386 | { | 386 | { |
387 | /* A single page buffer is always mappable */ | 387 | ASSERT(bp->b_flags & _XBF_PAGES); |
388 | if (bp->b_page_count == 1) { | 388 | if (bp->b_page_count == 1) { |
389 | /* A single page buffer is always mappable */ | ||
389 | bp->b_addr = page_address(bp->b_pages[0]) + bp->b_offset; | 390 | bp->b_addr = page_address(bp->b_pages[0]) + bp->b_offset; |
390 | bp->b_flags |= XBF_MAPPED; | 391 | bp->b_flags |= XBF_MAPPED; |
391 | } else if (flags & XBF_MAPPED) { | 392 | } else if (flags & XBF_MAPPED) { |
392 | bp->b_addr = vm_map_ram(bp->b_pages, bp->b_page_count, | 393 | int retried = 0; |
393 | -1, PAGE_KERNEL); | 394 | |
394 | if (unlikely(bp->b_addr == NULL)) | 395 | do { |
396 | bp->b_addr = vm_map_ram(bp->b_pages, bp->b_page_count, | ||
397 | -1, PAGE_KERNEL); | ||
398 | if (bp->b_addr) | ||
399 | break; | ||
400 | vm_unmap_aliases(); | ||
401 | } while (retried++ <= 1); | ||
402 | |||
403 | if (!bp->b_addr) | ||
395 | return -ENOMEM; | 404 | return -ENOMEM; |
396 | bp->b_addr += bp->b_offset; | 405 | bp->b_addr += bp->b_offset; |
397 | bp->b_flags |= XBF_MAPPED; | 406 | bp->b_flags |= XBF_MAPPED; |
@@ -422,8 +431,10 @@ _xfs_buf_find( | |||
422 | { | 431 | { |
423 | xfs_off_t range_base; | 432 | xfs_off_t range_base; |
424 | size_t range_length; | 433 | size_t range_length; |
425 | xfs_bufhash_t *hash; | 434 | struct xfs_perag *pag; |
426 | xfs_buf_t *bp, *n; | 435 | struct rb_node **rbp; |
436 | struct rb_node *parent; | ||
437 | xfs_buf_t *bp; | ||
427 | 438 | ||
428 | range_base = (ioff << BBSHIFT); | 439 | range_base = (ioff << BBSHIFT); |
429 | range_length = (isize << BBSHIFT); | 440 | range_length = (isize << BBSHIFT); |
@@ -432,14 +443,37 @@ _xfs_buf_find( | |||
432 | ASSERT(!(range_length < (1 << btp->bt_sshift))); | 443 | ASSERT(!(range_length < (1 << btp->bt_sshift))); |
433 | ASSERT(!(range_base & (xfs_off_t)btp->bt_smask)); | 444 | ASSERT(!(range_base & (xfs_off_t)btp->bt_smask)); |
434 | 445 | ||
435 | hash = &btp->bt_hash[hash_long((unsigned long)ioff, btp->bt_hashshift)]; | 446 | /* get tree root */ |
436 | 447 | pag = xfs_perag_get(btp->bt_mount, | |
437 | spin_lock(&hash->bh_lock); | 448 | xfs_daddr_to_agno(btp->bt_mount, ioff)); |
438 | 449 | ||
439 | list_for_each_entry_safe(bp, n, &hash->bh_list, b_hash_list) { | 450 | /* walk tree */ |
440 | ASSERT(btp == bp->b_target); | 451 | spin_lock(&pag->pag_buf_lock); |
441 | if (bp->b_file_offset == range_base && | 452 | rbp = &pag->pag_buf_tree.rb_node; |
442 | bp->b_buffer_length == range_length) { | 453 | parent = NULL; |
454 | bp = NULL; | ||
455 | while (*rbp) { | ||
456 | parent = *rbp; | ||
457 | bp = rb_entry(parent, struct xfs_buf, b_rbnode); | ||
458 | |||
459 | if (range_base < bp->b_file_offset) | ||
460 | rbp = &(*rbp)->rb_left; | ||
461 | else if (range_base > bp->b_file_offset) | ||
462 | rbp = &(*rbp)->rb_right; | ||
463 | else { | ||
464 | /* | ||
465 | * found a block offset match. If the range doesn't | ||
466 | * match, the only way this is allowed is if the buffer | ||
467 | * in the cache is stale and the transaction that made | ||
468 | * it stale has not yet committed. i.e. we are | ||
469 | * reallocating a busy extent. Skip this buffer and | ||
470 | * continue searching to the right for an exact match. | ||
471 | */ | ||
472 | if (bp->b_buffer_length != range_length) { | ||
473 | ASSERT(bp->b_flags & XBF_STALE); | ||
474 | rbp = &(*rbp)->rb_right; | ||
475 | continue; | ||
476 | } | ||
443 | atomic_inc(&bp->b_hold); | 477 | atomic_inc(&bp->b_hold); |
444 | goto found; | 478 | goto found; |
445 | } | 479 | } |
@@ -449,46 +483,42 @@ _xfs_buf_find( | |||
449 | if (new_bp) { | 483 | if (new_bp) { |
450 | _xfs_buf_initialize(new_bp, btp, range_base, | 484 | _xfs_buf_initialize(new_bp, btp, range_base, |
451 | range_length, flags); | 485 | range_length, flags); |
452 | new_bp->b_hash = hash; | 486 | rb_link_node(&new_bp->b_rbnode, parent, rbp); |
453 | list_add(&new_bp->b_hash_list, &hash->bh_list); | 487 | rb_insert_color(&new_bp->b_rbnode, &pag->pag_buf_tree); |
488 | /* the buffer keeps the perag reference until it is freed */ | ||
489 | new_bp->b_pag = pag; | ||
490 | spin_unlock(&pag->pag_buf_lock); | ||
454 | } else { | 491 | } else { |
455 | XFS_STATS_INC(xb_miss_locked); | 492 | XFS_STATS_INC(xb_miss_locked); |
493 | spin_unlock(&pag->pag_buf_lock); | ||
494 | xfs_perag_put(pag); | ||
456 | } | 495 | } |
457 | |||
458 | spin_unlock(&hash->bh_lock); | ||
459 | return new_bp; | 496 | return new_bp; |
460 | 497 | ||
461 | found: | 498 | found: |
462 | spin_unlock(&hash->bh_lock); | 499 | spin_unlock(&pag->pag_buf_lock); |
500 | xfs_perag_put(pag); | ||
463 | 501 | ||
464 | /* Attempt to get the semaphore without sleeping, | 502 | if (xfs_buf_cond_lock(bp)) { |
465 | * if this does not work then we need to drop the | 503 | /* failed, so wait for the lock if requested. */ |
466 | * spinlock and do a hard attempt on the semaphore. | ||
467 | */ | ||
468 | if (down_trylock(&bp->b_sema)) { | ||
469 | if (!(flags & XBF_TRYLOCK)) { | 504 | if (!(flags & XBF_TRYLOCK)) { |
470 | /* wait for buffer ownership */ | ||
471 | xfs_buf_lock(bp); | 505 | xfs_buf_lock(bp); |
472 | XFS_STATS_INC(xb_get_locked_waited); | 506 | XFS_STATS_INC(xb_get_locked_waited); |
473 | } else { | 507 | } else { |
474 | /* We asked for a trylock and failed, no need | ||
475 | * to look at file offset and length here, we | ||
476 | * know that this buffer at least overlaps our | ||
477 | * buffer and is locked, therefore our buffer | ||
478 | * either does not exist, or is this buffer. | ||
479 | */ | ||
480 | xfs_buf_rele(bp); | 508 | xfs_buf_rele(bp); |
481 | XFS_STATS_INC(xb_busy_locked); | 509 | XFS_STATS_INC(xb_busy_locked); |
482 | return NULL; | 510 | return NULL; |
483 | } | 511 | } |
484 | } else { | ||
485 | /* trylock worked */ | ||
486 | XB_SET_OWNER(bp); | ||
487 | } | 512 | } |
488 | 513 | ||
514 | /* | ||
515 | * if the buffer is stale, clear all the external state associated with | ||
516 | * it. We need to keep flags such as how we allocated the buffer memory | ||
517 | * intact here. | ||
518 | */ | ||
489 | if (bp->b_flags & XBF_STALE) { | 519 | if (bp->b_flags & XBF_STALE) { |
490 | ASSERT((bp->b_flags & _XBF_DELWRI_Q) == 0); | 520 | ASSERT((bp->b_flags & _XBF_DELWRI_Q) == 0); |
491 | bp->b_flags &= XBF_MAPPED; | 521 | bp->b_flags &= XBF_MAPPED | _XBF_KMEM | _XBF_PAGES; |
492 | } | 522 | } |
493 | 523 | ||
494 | trace_xfs_buf_find(bp, flags, _RET_IP_); | 524 | trace_xfs_buf_find(bp, flags, _RET_IP_); |
@@ -509,7 +539,7 @@ xfs_buf_get( | |||
509 | xfs_buf_flags_t flags) | 539 | xfs_buf_flags_t flags) |
510 | { | 540 | { |
511 | xfs_buf_t *bp, *new_bp; | 541 | xfs_buf_t *bp, *new_bp; |
512 | int error = 0, i; | 542 | int error = 0; |
513 | 543 | ||
514 | new_bp = xfs_buf_allocate(flags); | 544 | new_bp = xfs_buf_allocate(flags); |
515 | if (unlikely(!new_bp)) | 545 | if (unlikely(!new_bp)) |
@@ -517,7 +547,7 @@ xfs_buf_get( | |||
517 | 547 | ||
518 | bp = _xfs_buf_find(target, ioff, isize, flags, new_bp); | 548 | bp = _xfs_buf_find(target, ioff, isize, flags, new_bp); |
519 | if (bp == new_bp) { | 549 | if (bp == new_bp) { |
520 | error = _xfs_buf_lookup_pages(bp, flags); | 550 | error = xfs_buf_allocate_memory(bp, flags); |
521 | if (error) | 551 | if (error) |
522 | goto no_buffer; | 552 | goto no_buffer; |
523 | } else { | 553 | } else { |
@@ -526,14 +556,11 @@ xfs_buf_get( | |||
526 | return NULL; | 556 | return NULL; |
527 | } | 557 | } |
528 | 558 | ||
529 | for (i = 0; i < bp->b_page_count; i++) | ||
530 | mark_page_accessed(bp->b_pages[i]); | ||
531 | |||
532 | if (!(bp->b_flags & XBF_MAPPED)) { | 559 | if (!(bp->b_flags & XBF_MAPPED)) { |
533 | error = _xfs_buf_map_pages(bp, flags); | 560 | error = _xfs_buf_map_pages(bp, flags); |
534 | if (unlikely(error)) { | 561 | if (unlikely(error)) { |
535 | printk(KERN_WARNING "%s: failed to map pages\n", | 562 | xfs_warn(target->bt_mount, |
536 | __func__); | 563 | "%s: failed to map pages\n", __func__); |
537 | goto no_buffer; | 564 | goto no_buffer; |
538 | } | 565 | } |
539 | } | 566 | } |
@@ -625,17 +652,47 @@ void | |||
625 | xfs_buf_readahead( | 652 | xfs_buf_readahead( |
626 | xfs_buftarg_t *target, | 653 | xfs_buftarg_t *target, |
627 | xfs_off_t ioff, | 654 | xfs_off_t ioff, |
628 | size_t isize, | 655 | size_t isize) |
629 | xfs_buf_flags_t flags) | ||
630 | { | 656 | { |
631 | struct backing_dev_info *bdi; | 657 | if (bdi_read_congested(target->bt_bdi)) |
632 | |||
633 | bdi = target->bt_mapping->backing_dev_info; | ||
634 | if (bdi_read_congested(bdi)) | ||
635 | return; | 658 | return; |
636 | 659 | ||
637 | flags |= (XBF_TRYLOCK|XBF_ASYNC|XBF_READ_AHEAD); | 660 | xfs_buf_read(target, ioff, isize, |
638 | xfs_buf_read(target, ioff, isize, flags); | 661 | XBF_TRYLOCK|XBF_ASYNC|XBF_READ_AHEAD|XBF_DONT_BLOCK); |
662 | } | ||
663 | |||
664 | /* | ||
665 | * Read an uncached buffer from disk. Allocates and returns a locked | ||
666 | * buffer containing the disk contents or nothing. | ||
667 | */ | ||
668 | struct xfs_buf * | ||
669 | xfs_buf_read_uncached( | ||
670 | struct xfs_mount *mp, | ||
671 | struct xfs_buftarg *target, | ||
672 | xfs_daddr_t daddr, | ||
673 | size_t length, | ||
674 | int flags) | ||
675 | { | ||
676 | xfs_buf_t *bp; | ||
677 | int error; | ||
678 | |||
679 | bp = xfs_buf_get_uncached(target, length, flags); | ||
680 | if (!bp) | ||
681 | return NULL; | ||
682 | |||
683 | /* set up the buffer for a read IO */ | ||
684 | xfs_buf_lock(bp); | ||
685 | XFS_BUF_SET_ADDR(bp, daddr); | ||
686 | XFS_BUF_READ(bp); | ||
687 | XFS_BUF_BUSY(bp); | ||
688 | |||
689 | xfsbdstrat(mp, bp); | ||
690 | error = xfs_buf_iowait(bp); | ||
691 | if (error || bp->b_error) { | ||
692 | xfs_buf_relse(bp); | ||
693 | return NULL; | ||
694 | } | ||
695 | return bp; | ||
639 | } | 696 | } |
640 | 697 | ||
641 | xfs_buf_t * | 698 | xfs_buf_t * |
@@ -651,6 +708,27 @@ xfs_buf_get_empty( | |||
651 | return bp; | 708 | return bp; |
652 | } | 709 | } |
653 | 710 | ||
711 | /* | ||
712 | * Return a buffer allocated as an empty buffer and associated to external | ||
713 | * memory via xfs_buf_associate_memory() back to it's empty state. | ||
714 | */ | ||
715 | void | ||
716 | xfs_buf_set_empty( | ||
717 | struct xfs_buf *bp, | ||
718 | size_t len) | ||
719 | { | ||
720 | if (bp->b_pages) | ||
721 | _xfs_buf_free_pages(bp); | ||
722 | |||
723 | bp->b_pages = NULL; | ||
724 | bp->b_page_count = 0; | ||
725 | bp->b_addr = NULL; | ||
726 | bp->b_file_offset = 0; | ||
727 | bp->b_buffer_length = bp->b_count_desired = len; | ||
728 | bp->b_bn = XFS_BUF_DADDR_NULL; | ||
729 | bp->b_flags &= ~XBF_MAPPED; | ||
730 | } | ||
731 | |||
654 | static inline struct page * | 732 | static inline struct page * |
655 | mem_to_page( | 733 | mem_to_page( |
656 | void *addr) | 734 | void *addr) |
@@ -675,10 +753,10 @@ xfs_buf_associate_memory( | |||
675 | size_t buflen; | 753 | size_t buflen; |
676 | int page_count; | 754 | int page_count; |
677 | 755 | ||
678 | pageaddr = (unsigned long)mem & PAGE_CACHE_MASK; | 756 | pageaddr = (unsigned long)mem & PAGE_MASK; |
679 | offset = (unsigned long)mem - pageaddr; | 757 | offset = (unsigned long)mem - pageaddr; |
680 | buflen = PAGE_CACHE_ALIGN(len + offset); | 758 | buflen = PAGE_ALIGN(len + offset); |
681 | page_count = buflen >> PAGE_CACHE_SHIFT; | 759 | page_count = buflen >> PAGE_SHIFT; |
682 | 760 | ||
683 | /* Free any previous set of page pointers */ | 761 | /* Free any previous set of page pointers */ |
684 | if (bp->b_pages) | 762 | if (bp->b_pages) |
@@ -695,21 +773,21 @@ xfs_buf_associate_memory( | |||
695 | 773 | ||
696 | for (i = 0; i < bp->b_page_count; i++) { | 774 | for (i = 0; i < bp->b_page_count; i++) { |
697 | bp->b_pages[i] = mem_to_page((void *)pageaddr); | 775 | bp->b_pages[i] = mem_to_page((void *)pageaddr); |
698 | pageaddr += PAGE_CACHE_SIZE; | 776 | pageaddr += PAGE_SIZE; |
699 | } | 777 | } |
700 | 778 | ||
701 | bp->b_count_desired = len; | 779 | bp->b_count_desired = len; |
702 | bp->b_buffer_length = buflen; | 780 | bp->b_buffer_length = buflen; |
703 | bp->b_flags |= XBF_MAPPED; | 781 | bp->b_flags |= XBF_MAPPED; |
704 | bp->b_flags &= ~_XBF_PAGE_LOCKED; | ||
705 | 782 | ||
706 | return 0; | 783 | return 0; |
707 | } | 784 | } |
708 | 785 | ||
709 | xfs_buf_t * | 786 | xfs_buf_t * |
710 | xfs_buf_get_noaddr( | 787 | xfs_buf_get_uncached( |
788 | struct xfs_buftarg *target, | ||
711 | size_t len, | 789 | size_t len, |
712 | xfs_buftarg_t *target) | 790 | int flags) |
713 | { | 791 | { |
714 | unsigned long page_count = PAGE_ALIGN(len) >> PAGE_SHIFT; | 792 | unsigned long page_count = PAGE_ALIGN(len) >> PAGE_SHIFT; |
715 | int error, i; | 793 | int error, i; |
@@ -725,7 +803,7 @@ xfs_buf_get_noaddr( | |||
725 | goto fail_free_buf; | 803 | goto fail_free_buf; |
726 | 804 | ||
727 | for (i = 0; i < page_count; i++) { | 805 | for (i = 0; i < page_count; i++) { |
728 | bp->b_pages[i] = alloc_page(GFP_KERNEL); | 806 | bp->b_pages[i] = alloc_page(xb_to_gfp(flags)); |
729 | if (!bp->b_pages[i]) | 807 | if (!bp->b_pages[i]) |
730 | goto fail_free_mem; | 808 | goto fail_free_mem; |
731 | } | 809 | } |
@@ -733,14 +811,14 @@ xfs_buf_get_noaddr( | |||
733 | 811 | ||
734 | error = _xfs_buf_map_pages(bp, XBF_MAPPED); | 812 | error = _xfs_buf_map_pages(bp, XBF_MAPPED); |
735 | if (unlikely(error)) { | 813 | if (unlikely(error)) { |
736 | printk(KERN_WARNING "%s: failed to map pages\n", | 814 | xfs_warn(target->bt_mount, |
737 | __func__); | 815 | "%s: failed to map pages\n", __func__); |
738 | goto fail_free_mem; | 816 | goto fail_free_mem; |
739 | } | 817 | } |
740 | 818 | ||
741 | xfs_buf_unlock(bp); | 819 | xfs_buf_unlock(bp); |
742 | 820 | ||
743 | trace_xfs_buf_get_noaddr(bp, _RET_IP_); | 821 | trace_xfs_buf_get_uncached(bp, _RET_IP_); |
744 | return bp; | 822 | return bp; |
745 | 823 | ||
746 | fail_free_mem: | 824 | fail_free_mem: |
@@ -774,29 +852,32 @@ void | |||
774 | xfs_buf_rele( | 852 | xfs_buf_rele( |
775 | xfs_buf_t *bp) | 853 | xfs_buf_t *bp) |
776 | { | 854 | { |
777 | xfs_bufhash_t *hash = bp->b_hash; | 855 | struct xfs_perag *pag = bp->b_pag; |
778 | 856 | ||
779 | trace_xfs_buf_rele(bp, _RET_IP_); | 857 | trace_xfs_buf_rele(bp, _RET_IP_); |
780 | 858 | ||
781 | if (unlikely(!hash)) { | 859 | if (!pag) { |
782 | ASSERT(!bp->b_relse); | 860 | ASSERT(list_empty(&bp->b_lru)); |
861 | ASSERT(RB_EMPTY_NODE(&bp->b_rbnode)); | ||
783 | if (atomic_dec_and_test(&bp->b_hold)) | 862 | if (atomic_dec_and_test(&bp->b_hold)) |
784 | xfs_buf_free(bp); | 863 | xfs_buf_free(bp); |
785 | return; | 864 | return; |
786 | } | 865 | } |
787 | 866 | ||
867 | ASSERT(!RB_EMPTY_NODE(&bp->b_rbnode)); | ||
868 | |||
788 | ASSERT(atomic_read(&bp->b_hold) > 0); | 869 | ASSERT(atomic_read(&bp->b_hold) > 0); |
789 | if (atomic_dec_and_lock(&bp->b_hold, &hash->bh_lock)) { | 870 | if (atomic_dec_and_lock(&bp->b_hold, &pag->pag_buf_lock)) { |
790 | if (bp->b_relse) { | 871 | if (!(bp->b_flags & XBF_STALE) && |
791 | atomic_inc(&bp->b_hold); | 872 | atomic_read(&bp->b_lru_ref)) { |
792 | spin_unlock(&hash->bh_lock); | 873 | xfs_buf_lru_add(bp); |
793 | (*(bp->b_relse)) (bp); | 874 | spin_unlock(&pag->pag_buf_lock); |
794 | } else if (bp->b_flags & XBF_FS_MANAGED) { | ||
795 | spin_unlock(&hash->bh_lock); | ||
796 | } else { | 875 | } else { |
876 | xfs_buf_lru_del(bp); | ||
797 | ASSERT(!(bp->b_flags & (XBF_DELWRI|_XBF_DELWRI_Q))); | 877 | ASSERT(!(bp->b_flags & (XBF_DELWRI|_XBF_DELWRI_Q))); |
798 | list_del_init(&bp->b_hash_list); | 878 | rb_erase(&bp->b_rbnode, &pag->pag_buf_tree); |
799 | spin_unlock(&hash->bh_lock); | 879 | spin_unlock(&pag->pag_buf_lock); |
880 | xfs_perag_put(pag); | ||
800 | xfs_buf_free(bp); | 881 | xfs_buf_free(bp); |
801 | } | 882 | } |
802 | } | 883 | } |
@@ -804,20 +885,15 @@ xfs_buf_rele( | |||
804 | 885 | ||
805 | 886 | ||
806 | /* | 887 | /* |
807 | * Mutual exclusion on buffers. Locking model: | 888 | * Lock a buffer object, if it is not already locked. |
808 | * | 889 | * |
809 | * Buffers associated with inodes for which buffer locking | 890 | * If we come across a stale, pinned, locked buffer, we know that we are |
810 | * is not enabled are not protected by semaphores, and are | 891 | * being asked to lock a buffer that has been reallocated. Because it is |
811 | * assumed to be exclusively owned by the caller. There is a | 892 | * pinned, we know that the log has not been pushed to disk and hence it |
812 | * spinlock in the buffer, used by the caller when concurrent | 893 | * will still be locked. Rather than continuing to have trylock attempts |
813 | * access is possible. | 894 | * fail until someone else pushes the log, push it ourselves before |
814 | */ | 895 | * returning. This means that the xfsaild will not get stuck trying |
815 | 896 | * to push on stale inode buffers. | |
816 | /* | ||
817 | * Locks a buffer object, if it is not already locked. | ||
818 | * Note that this in no way locks the underlying pages, so it is only | ||
819 | * useful for synchronizing concurrent use of buffer objects, not for | ||
820 | * synchronizing independent access to the underlying pages. | ||
821 | */ | 897 | */ |
822 | int | 898 | int |
823 | xfs_buf_cond_lock( | 899 | xfs_buf_cond_lock( |
@@ -828,6 +904,8 @@ xfs_buf_cond_lock( | |||
828 | locked = down_trylock(&bp->b_sema) == 0; | 904 | locked = down_trylock(&bp->b_sema) == 0; |
829 | if (locked) | 905 | if (locked) |
830 | XB_SET_OWNER(bp); | 906 | XB_SET_OWNER(bp); |
907 | else if (atomic_read(&bp->b_pin_count) && (bp->b_flags & XBF_STALE)) | ||
908 | xfs_log_force(bp->b_target->bt_mount, 0); | ||
831 | 909 | ||
832 | trace_xfs_buf_cond_lock(bp, _RET_IP_); | 910 | trace_xfs_buf_cond_lock(bp, _RET_IP_); |
833 | return locked ? 0 : -EBUSY; | 911 | return locked ? 0 : -EBUSY; |
@@ -841,10 +919,7 @@ xfs_buf_lock_value( | |||
841 | } | 919 | } |
842 | 920 | ||
843 | /* | 921 | /* |
844 | * Locks a buffer object. | 922 | * Lock a buffer object. |
845 | * Note that this in no way locks the underlying pages, so it is only | ||
846 | * useful for synchronizing concurrent use of buffer objects, not for | ||
847 | * synchronizing independent access to the underlying pages. | ||
848 | * | 923 | * |
849 | * If we come across a stale, pinned, locked buffer, we know that we | 924 | * If we come across a stale, pinned, locked buffer, we know that we |
850 | * are being asked to lock a buffer that has been reallocated. Because | 925 | * are being asked to lock a buffer that has been reallocated. Because |
@@ -859,9 +934,7 @@ xfs_buf_lock( | |||
859 | trace_xfs_buf_lock(bp, _RET_IP_); | 934 | trace_xfs_buf_lock(bp, _RET_IP_); |
860 | 935 | ||
861 | if (atomic_read(&bp->b_pin_count) && (bp->b_flags & XBF_STALE)) | 936 | if (atomic_read(&bp->b_pin_count) && (bp->b_flags & XBF_STALE)) |
862 | xfs_log_force(bp->b_mount, 0); | 937 | xfs_log_force(bp->b_target->bt_mount, 0); |
863 | if (atomic_read(&bp->b_io_remaining)) | ||
864 | blk_run_address_space(bp->b_target->bt_mapping); | ||
865 | down(&bp->b_sema); | 938 | down(&bp->b_sema); |
866 | XB_SET_OWNER(bp); | 939 | XB_SET_OWNER(bp); |
867 | 940 | ||
@@ -905,9 +978,7 @@ xfs_buf_wait_unpin( | |||
905 | set_current_state(TASK_UNINTERRUPTIBLE); | 978 | set_current_state(TASK_UNINTERRUPTIBLE); |
906 | if (atomic_read(&bp->b_pin_count) == 0) | 979 | if (atomic_read(&bp->b_pin_count) == 0) |
907 | break; | 980 | break; |
908 | if (atomic_read(&bp->b_io_remaining)) | 981 | io_schedule(); |
909 | blk_run_address_space(bp->b_target->bt_mapping); | ||
910 | schedule(); | ||
911 | } | 982 | } |
912 | remove_wait_queue(&bp->b_waiters, &wait); | 983 | remove_wait_queue(&bp->b_waiters, &wait); |
913 | set_current_state(TASK_RUNNING); | 984 | set_current_state(TASK_RUNNING); |
@@ -924,19 +995,7 @@ xfs_buf_iodone_work( | |||
924 | xfs_buf_t *bp = | 995 | xfs_buf_t *bp = |
925 | container_of(work, xfs_buf_t, b_iodone_work); | 996 | container_of(work, xfs_buf_t, b_iodone_work); |
926 | 997 | ||
927 | /* | 998 | if (bp->b_iodone) |
928 | * We can get an EOPNOTSUPP to ordered writes. Here we clear the | ||
929 | * ordered flag and reissue them. Because we can't tell the higher | ||
930 | * layers directly that they should not issue ordered I/O anymore, they | ||
931 | * need to check if the _XFS_BARRIER_FAILED flag was set during I/O completion. | ||
932 | */ | ||
933 | if ((bp->b_error == EOPNOTSUPP) && | ||
934 | (bp->b_flags & (XBF_ORDERED|XBF_ASYNC)) == (XBF_ORDERED|XBF_ASYNC)) { | ||
935 | trace_xfs_buf_ordered_retry(bp, _RET_IP_); | ||
936 | bp->b_flags &= ~XBF_ORDERED; | ||
937 | bp->b_flags |= _XFS_BARRIER_FAILED; | ||
938 | xfs_buf_iorequest(bp); | ||
939 | } else if (bp->b_iodone) | ||
940 | (*(bp->b_iodone))(bp); | 999 | (*(bp->b_iodone))(bp); |
941 | else if (bp->b_flags & XBF_ASYNC) | 1000 | else if (bp->b_flags & XBF_ASYNC) |
942 | xfs_buf_relse(bp); | 1001 | xfs_buf_relse(bp); |
@@ -982,7 +1041,6 @@ xfs_bwrite( | |||
982 | { | 1041 | { |
983 | int error; | 1042 | int error; |
984 | 1043 | ||
985 | bp->b_mount = mp; | ||
986 | bp->b_flags |= XBF_WRITE; | 1044 | bp->b_flags |= XBF_WRITE; |
987 | bp->b_flags &= ~(XBF_ASYNC | XBF_READ); | 1045 | bp->b_flags &= ~(XBF_ASYNC | XBF_READ); |
988 | 1046 | ||
@@ -1003,8 +1061,6 @@ xfs_bdwrite( | |||
1003 | { | 1061 | { |
1004 | trace_xfs_buf_bdwrite(bp, _RET_IP_); | 1062 | trace_xfs_buf_bdwrite(bp, _RET_IP_); |
1005 | 1063 | ||
1006 | bp->b_mount = mp; | ||
1007 | |||
1008 | bp->b_flags &= ~XBF_READ; | 1064 | bp->b_flags &= ~XBF_READ; |
1009 | bp->b_flags |= (XBF_DELWRI | XBF_ASYNC); | 1065 | bp->b_flags |= (XBF_DELWRI | XBF_ASYNC); |
1010 | 1066 | ||
@@ -1013,7 +1069,7 @@ xfs_bdwrite( | |||
1013 | 1069 | ||
1014 | /* | 1070 | /* |
1015 | * Called when we want to stop a buffer from getting written or read. | 1071 | * Called when we want to stop a buffer from getting written or read. |
1016 | * We attach the EIO error, muck with its flags, and call biodone | 1072 | * We attach the EIO error, muck with its flags, and call xfs_buf_ioend |
1017 | * so that the proper iodone callbacks get called. | 1073 | * so that the proper iodone callbacks get called. |
1018 | */ | 1074 | */ |
1019 | STATIC int | 1075 | STATIC int |
@@ -1030,21 +1086,21 @@ xfs_bioerror( | |||
1030 | XFS_BUF_ERROR(bp, EIO); | 1086 | XFS_BUF_ERROR(bp, EIO); |
1031 | 1087 | ||
1032 | /* | 1088 | /* |
1033 | * We're calling biodone, so delete XBF_DONE flag. | 1089 | * We're calling xfs_buf_ioend, so delete XBF_DONE flag. |
1034 | */ | 1090 | */ |
1035 | XFS_BUF_UNREAD(bp); | 1091 | XFS_BUF_UNREAD(bp); |
1036 | XFS_BUF_UNDELAYWRITE(bp); | 1092 | XFS_BUF_UNDELAYWRITE(bp); |
1037 | XFS_BUF_UNDONE(bp); | 1093 | XFS_BUF_UNDONE(bp); |
1038 | XFS_BUF_STALE(bp); | 1094 | XFS_BUF_STALE(bp); |
1039 | 1095 | ||
1040 | xfs_biodone(bp); | 1096 | xfs_buf_ioend(bp, 0); |
1041 | 1097 | ||
1042 | return EIO; | 1098 | return EIO; |
1043 | } | 1099 | } |
1044 | 1100 | ||
1045 | /* | 1101 | /* |
1046 | * Same as xfs_bioerror, except that we are releasing the buffer | 1102 | * Same as xfs_bioerror, except that we are releasing the buffer |
1047 | * here ourselves, and avoiding the biodone call. | 1103 | * here ourselves, and avoiding the xfs_buf_ioend call. |
1048 | * This is meant for userdata errors; metadata bufs come with | 1104 | * This is meant for userdata errors; metadata bufs come with |
1049 | * iodone functions attached, so that we can track down errors. | 1105 | * iodone functions attached, so that we can track down errors. |
1050 | */ | 1106 | */ |
@@ -1093,7 +1149,7 @@ int | |||
1093 | xfs_bdstrat_cb( | 1149 | xfs_bdstrat_cb( |
1094 | struct xfs_buf *bp) | 1150 | struct xfs_buf *bp) |
1095 | { | 1151 | { |
1096 | if (XFS_FORCED_SHUTDOWN(bp->b_mount)) { | 1152 | if (XFS_FORCED_SHUTDOWN(bp->b_target->bt_mount)) { |
1097 | trace_xfs_bdstrat_shut(bp, _RET_IP_); | 1153 | trace_xfs_bdstrat_shut(bp, _RET_IP_); |
1098 | /* | 1154 | /* |
1099 | * Metadata write that didn't get logged but | 1155 | * Metadata write that didn't get logged but |
@@ -1134,10 +1190,8 @@ _xfs_buf_ioend( | |||
1134 | xfs_buf_t *bp, | 1190 | xfs_buf_t *bp, |
1135 | int schedule) | 1191 | int schedule) |
1136 | { | 1192 | { |
1137 | if (atomic_dec_and_test(&bp->b_io_remaining) == 1) { | 1193 | if (atomic_dec_and_test(&bp->b_io_remaining) == 1) |
1138 | bp->b_flags &= ~_XBF_PAGE_LOCKED; | ||
1139 | xfs_buf_ioend(bp, schedule); | 1194 | xfs_buf_ioend(bp, schedule); |
1140 | } | ||
1141 | } | 1195 | } |
1142 | 1196 | ||
1143 | STATIC void | 1197 | STATIC void |
@@ -1146,35 +1200,12 @@ xfs_buf_bio_end_io( | |||
1146 | int error) | 1200 | int error) |
1147 | { | 1201 | { |
1148 | xfs_buf_t *bp = (xfs_buf_t *)bio->bi_private; | 1202 | xfs_buf_t *bp = (xfs_buf_t *)bio->bi_private; |
1149 | unsigned int blocksize = bp->b_target->bt_bsize; | ||
1150 | struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; | ||
1151 | 1203 | ||
1152 | xfs_buf_ioerror(bp, -error); | 1204 | xfs_buf_ioerror(bp, -error); |
1153 | 1205 | ||
1154 | if (!error && xfs_buf_is_vmapped(bp) && (bp->b_flags & XBF_READ)) | 1206 | if (!error && xfs_buf_is_vmapped(bp) && (bp->b_flags & XBF_READ)) |
1155 | invalidate_kernel_vmap_range(bp->b_addr, xfs_buf_vmap_len(bp)); | 1207 | invalidate_kernel_vmap_range(bp->b_addr, xfs_buf_vmap_len(bp)); |
1156 | 1208 | ||
1157 | do { | ||
1158 | struct page *page = bvec->bv_page; | ||
1159 | |||
1160 | ASSERT(!PagePrivate(page)); | ||
1161 | if (unlikely(bp->b_error)) { | ||
1162 | if (bp->b_flags & XBF_READ) | ||
1163 | ClearPageUptodate(page); | ||
1164 | } else if (blocksize >= PAGE_CACHE_SIZE) { | ||
1165 | SetPageUptodate(page); | ||
1166 | } else if (!PagePrivate(page) && | ||
1167 | (bp->b_flags & _XBF_PAGE_CACHE)) { | ||
1168 | set_page_region(page, bvec->bv_offset, bvec->bv_len); | ||
1169 | } | ||
1170 | |||
1171 | if (--bvec >= bio->bi_io_vec) | ||
1172 | prefetchw(&bvec->bv_page->flags); | ||
1173 | |||
1174 | if (bp->b_flags & _XBF_PAGE_LOCKED) | ||
1175 | unlock_page(page); | ||
1176 | } while (bvec >= bio->bi_io_vec); | ||
1177 | |||
1178 | _xfs_buf_ioend(bp, 1); | 1209 | _xfs_buf_ioend(bp, 1); |
1179 | bio_put(bio); | 1210 | bio_put(bio); |
1180 | } | 1211 | } |
@@ -1188,14 +1219,13 @@ _xfs_buf_ioapply( | |||
1188 | int offset = bp->b_offset; | 1219 | int offset = bp->b_offset; |
1189 | int size = bp->b_count_desired; | 1220 | int size = bp->b_count_desired; |
1190 | sector_t sector = bp->b_bn; | 1221 | sector_t sector = bp->b_bn; |
1191 | unsigned int blocksize = bp->b_target->bt_bsize; | ||
1192 | 1222 | ||
1193 | total_nr_pages = bp->b_page_count; | 1223 | total_nr_pages = bp->b_page_count; |
1194 | map_i = 0; | 1224 | map_i = 0; |
1195 | 1225 | ||
1196 | if (bp->b_flags & XBF_ORDERED) { | 1226 | if (bp->b_flags & XBF_ORDERED) { |
1197 | ASSERT(!(bp->b_flags & XBF_READ)); | 1227 | ASSERT(!(bp->b_flags & XBF_READ)); |
1198 | rw = WRITE_BARRIER; | 1228 | rw = WRITE_FLUSH_FUA; |
1199 | } else if (bp->b_flags & XBF_LOG_BUFFER) { | 1229 | } else if (bp->b_flags & XBF_LOG_BUFFER) { |
1200 | ASSERT(!(bp->b_flags & XBF_READ_AHEAD)); | 1230 | ASSERT(!(bp->b_flags & XBF_READ_AHEAD)); |
1201 | bp->b_flags &= ~_XBF_RUN_QUEUES; | 1231 | bp->b_flags &= ~_XBF_RUN_QUEUES; |
@@ -1209,29 +1239,6 @@ _xfs_buf_ioapply( | |||
1209 | (bp->b_flags & XBF_READ_AHEAD) ? READA : READ; | 1239 | (bp->b_flags & XBF_READ_AHEAD) ? READA : READ; |
1210 | } | 1240 | } |
1211 | 1241 | ||
1212 | /* Special code path for reading a sub page size buffer in -- | ||
1213 | * we populate up the whole page, and hence the other metadata | ||
1214 | * in the same page. This optimization is only valid when the | ||
1215 | * filesystem block size is not smaller than the page size. | ||
1216 | */ | ||
1217 | if ((bp->b_buffer_length < PAGE_CACHE_SIZE) && | ||
1218 | ((bp->b_flags & (XBF_READ|_XBF_PAGE_LOCKED)) == | ||
1219 | (XBF_READ|_XBF_PAGE_LOCKED)) && | ||
1220 | (blocksize >= PAGE_CACHE_SIZE)) { | ||
1221 | bio = bio_alloc(GFP_NOIO, 1); | ||
1222 | |||
1223 | bio->bi_bdev = bp->b_target->bt_bdev; | ||
1224 | bio->bi_sector = sector - (offset >> BBSHIFT); | ||
1225 | bio->bi_end_io = xfs_buf_bio_end_io; | ||
1226 | bio->bi_private = bp; | ||
1227 | |||
1228 | bio_add_page(bio, bp->b_pages[0], PAGE_CACHE_SIZE, 0); | ||
1229 | size = 0; | ||
1230 | |||
1231 | atomic_inc(&bp->b_io_remaining); | ||
1232 | |||
1233 | goto submit_io; | ||
1234 | } | ||
1235 | 1242 | ||
1236 | next_chunk: | 1243 | next_chunk: |
1237 | atomic_inc(&bp->b_io_remaining); | 1244 | atomic_inc(&bp->b_io_remaining); |
@@ -1245,8 +1252,9 @@ next_chunk: | |||
1245 | bio->bi_end_io = xfs_buf_bio_end_io; | 1252 | bio->bi_end_io = xfs_buf_bio_end_io; |
1246 | bio->bi_private = bp; | 1253 | bio->bi_private = bp; |
1247 | 1254 | ||
1255 | |||
1248 | for (; size && nr_pages; nr_pages--, map_i++) { | 1256 | for (; size && nr_pages; nr_pages--, map_i++) { |
1249 | int rbytes, nbytes = PAGE_CACHE_SIZE - offset; | 1257 | int rbytes, nbytes = PAGE_SIZE - offset; |
1250 | 1258 | ||
1251 | if (nbytes > size) | 1259 | if (nbytes > size) |
1252 | nbytes = size; | 1260 | nbytes = size; |
@@ -1261,7 +1269,6 @@ next_chunk: | |||
1261 | total_nr_pages--; | 1269 | total_nr_pages--; |
1262 | } | 1270 | } |
1263 | 1271 | ||
1264 | submit_io: | ||
1265 | if (likely(bio->bi_size)) { | 1272 | if (likely(bio->bi_size)) { |
1266 | if (xfs_buf_is_vmapped(bp)) { | 1273 | if (xfs_buf_is_vmapped(bp)) { |
1267 | flush_kernel_vmap_range(bp->b_addr, | 1274 | flush_kernel_vmap_range(bp->b_addr, |
@@ -1271,18 +1278,7 @@ submit_io: | |||
1271 | if (size) | 1278 | if (size) |
1272 | goto next_chunk; | 1279 | goto next_chunk; |
1273 | } else { | 1280 | } else { |
1274 | /* | ||
1275 | * if we get here, no pages were added to the bio. However, | ||
1276 | * we can't just error out here - if the pages are locked then | ||
1277 | * we have to unlock them otherwise we can hang on a later | ||
1278 | * access to the page. | ||
1279 | */ | ||
1280 | xfs_buf_ioerror(bp, EIO); | 1281 | xfs_buf_ioerror(bp, EIO); |
1281 | if (bp->b_flags & _XBF_PAGE_LOCKED) { | ||
1282 | int i; | ||
1283 | for (i = 0; i < bp->b_page_count; i++) | ||
1284 | unlock_page(bp->b_pages[i]); | ||
1285 | } | ||
1286 | bio_put(bio); | 1282 | bio_put(bio); |
1287 | } | 1283 | } |
1288 | } | 1284 | } |
@@ -1327,8 +1323,6 @@ xfs_buf_iowait( | |||
1327 | { | 1323 | { |
1328 | trace_xfs_buf_iowait(bp, _RET_IP_); | 1324 | trace_xfs_buf_iowait(bp, _RET_IP_); |
1329 | 1325 | ||
1330 | if (atomic_read(&bp->b_io_remaining)) | ||
1331 | blk_run_address_space(bp->b_target->bt_mapping); | ||
1332 | wait_for_completion(&bp->b_iowait); | 1326 | wait_for_completion(&bp->b_iowait); |
1333 | 1327 | ||
1334 | trace_xfs_buf_iowait_done(bp, _RET_IP_); | 1328 | trace_xfs_buf_iowait_done(bp, _RET_IP_); |
@@ -1346,8 +1340,8 @@ xfs_buf_offset( | |||
1346 | return XFS_BUF_PTR(bp) + offset; | 1340 | return XFS_BUF_PTR(bp) + offset; |
1347 | 1341 | ||
1348 | offset += bp->b_offset; | 1342 | offset += bp->b_offset; |
1349 | page = bp->b_pages[offset >> PAGE_CACHE_SHIFT]; | 1343 | page = bp->b_pages[offset >> PAGE_SHIFT]; |
1350 | return (xfs_caddr_t)page_address(page) + (offset & (PAGE_CACHE_SIZE-1)); | 1344 | return (xfs_caddr_t)page_address(page) + (offset & (PAGE_SIZE-1)); |
1351 | } | 1345 | } |
1352 | 1346 | ||
1353 | /* | 1347 | /* |
@@ -1369,9 +1363,9 @@ xfs_buf_iomove( | |||
1369 | page = bp->b_pages[xfs_buf_btoct(boff + bp->b_offset)]; | 1363 | page = bp->b_pages[xfs_buf_btoct(boff + bp->b_offset)]; |
1370 | cpoff = xfs_buf_poff(boff + bp->b_offset); | 1364 | cpoff = xfs_buf_poff(boff + bp->b_offset); |
1371 | csize = min_t(size_t, | 1365 | csize = min_t(size_t, |
1372 | PAGE_CACHE_SIZE-cpoff, bp->b_count_desired-boff); | 1366 | PAGE_SIZE-cpoff, bp->b_count_desired-boff); |
1373 | 1367 | ||
1374 | ASSERT(((csize + cpoff) <= PAGE_CACHE_SIZE)); | 1368 | ASSERT(((csize + cpoff) <= PAGE_SIZE)); |
1375 | 1369 | ||
1376 | switch (mode) { | 1370 | switch (mode) { |
1377 | case XBRW_ZERO: | 1371 | case XBRW_ZERO: |
@@ -1394,89 +1388,84 @@ xfs_buf_iomove( | |||
1394 | */ | 1388 | */ |
1395 | 1389 | ||
1396 | /* | 1390 | /* |
1397 | * Wait for any bufs with callbacks that have been submitted but | 1391 | * Wait for any bufs with callbacks that have been submitted but have not yet |
1398 | * have not yet returned... walk the hash list for the target. | 1392 | * returned. These buffers will have an elevated hold count, so wait on those |
1393 | * while freeing all the buffers only held by the LRU. | ||
1399 | */ | 1394 | */ |
1400 | void | 1395 | void |
1401 | xfs_wait_buftarg( | 1396 | xfs_wait_buftarg( |
1402 | xfs_buftarg_t *btp) | 1397 | struct xfs_buftarg *btp) |
1403 | { | 1398 | { |
1404 | xfs_buf_t *bp, *n; | 1399 | struct xfs_buf *bp; |
1405 | xfs_bufhash_t *hash; | 1400 | |
1406 | uint i; | 1401 | restart: |
1407 | 1402 | spin_lock(&btp->bt_lru_lock); | |
1408 | for (i = 0; i < (1 << btp->bt_hashshift); i++) { | 1403 | while (!list_empty(&btp->bt_lru)) { |
1409 | hash = &btp->bt_hash[i]; | 1404 | bp = list_first_entry(&btp->bt_lru, struct xfs_buf, b_lru); |
1410 | again: | 1405 | if (atomic_read(&bp->b_hold) > 1) { |
1411 | spin_lock(&hash->bh_lock); | 1406 | spin_unlock(&btp->bt_lru_lock); |
1412 | list_for_each_entry_safe(bp, n, &hash->bh_list, b_hash_list) { | 1407 | delay(100); |
1413 | ASSERT(btp == bp->b_target); | 1408 | goto restart; |
1414 | if (!(bp->b_flags & XBF_FS_MANAGED)) { | ||
1415 | spin_unlock(&hash->bh_lock); | ||
1416 | /* | ||
1417 | * Catch superblock reference count leaks | ||
1418 | * immediately | ||
1419 | */ | ||
1420 | BUG_ON(bp->b_bn == 0); | ||
1421 | delay(100); | ||
1422 | goto again; | ||
1423 | } | ||
1424 | } | 1409 | } |
1425 | spin_unlock(&hash->bh_lock); | 1410 | /* |
1411 | * clear the LRU reference count so the bufer doesn't get | ||
1412 | * ignored in xfs_buf_rele(). | ||
1413 | */ | ||
1414 | atomic_set(&bp->b_lru_ref, 0); | ||
1415 | spin_unlock(&btp->bt_lru_lock); | ||
1416 | xfs_buf_rele(bp); | ||
1417 | spin_lock(&btp->bt_lru_lock); | ||
1426 | } | 1418 | } |
1419 | spin_unlock(&btp->bt_lru_lock); | ||
1427 | } | 1420 | } |
1428 | 1421 | ||
1429 | /* | 1422 | int |
1430 | * Allocate buffer hash table for a given target. | 1423 | xfs_buftarg_shrink( |
1431 | * For devices containing metadata (i.e. not the log/realtime devices) | 1424 | struct shrinker *shrink, |
1432 | * we need to allocate a much larger hash table. | 1425 | struct shrink_control *sc) |
1433 | */ | ||
1434 | STATIC void | ||
1435 | xfs_alloc_bufhash( | ||
1436 | xfs_buftarg_t *btp, | ||
1437 | int external) | ||
1438 | { | 1426 | { |
1439 | unsigned int i; | 1427 | struct xfs_buftarg *btp = container_of(shrink, |
1428 | struct xfs_buftarg, bt_shrinker); | ||
1429 | struct xfs_buf *bp; | ||
1430 | int nr_to_scan = sc->nr_to_scan; | ||
1431 | LIST_HEAD(dispose); | ||
1440 | 1432 | ||
1441 | btp->bt_hashshift = external ? 3 : 12; /* 8 or 4096 buckets */ | 1433 | if (!nr_to_scan) |
1442 | btp->bt_hash = kmem_zalloc_large((1 << btp->bt_hashshift) * | 1434 | return btp->bt_lru_nr; |
1443 | sizeof(xfs_bufhash_t)); | ||
1444 | for (i = 0; i < (1 << btp->bt_hashshift); i++) { | ||
1445 | spin_lock_init(&btp->bt_hash[i].bh_lock); | ||
1446 | INIT_LIST_HEAD(&btp->bt_hash[i].bh_list); | ||
1447 | } | ||
1448 | } | ||
1449 | 1435 | ||
1450 | STATIC void | 1436 | spin_lock(&btp->bt_lru_lock); |
1451 | xfs_free_bufhash( | 1437 | while (!list_empty(&btp->bt_lru)) { |
1452 | xfs_buftarg_t *btp) | 1438 | if (nr_to_scan-- <= 0) |
1453 | { | 1439 | break; |
1454 | kmem_free_large(btp->bt_hash); | ||
1455 | btp->bt_hash = NULL; | ||
1456 | } | ||
1457 | 1440 | ||
1458 | /* | 1441 | bp = list_first_entry(&btp->bt_lru, struct xfs_buf, b_lru); |
1459 | * buftarg list for delwrite queue processing | ||
1460 | */ | ||
1461 | static LIST_HEAD(xfs_buftarg_list); | ||
1462 | static DEFINE_SPINLOCK(xfs_buftarg_lock); | ||
1463 | 1442 | ||
1464 | STATIC void | 1443 | /* |
1465 | xfs_register_buftarg( | 1444 | * Decrement the b_lru_ref count unless the value is already |
1466 | xfs_buftarg_t *btp) | 1445 | * zero. If the value is already zero, we need to reclaim the |
1467 | { | 1446 | * buffer, otherwise it gets another trip through the LRU. |
1468 | spin_lock(&xfs_buftarg_lock); | 1447 | */ |
1469 | list_add(&btp->bt_list, &xfs_buftarg_list); | 1448 | if (!atomic_add_unless(&bp->b_lru_ref, -1, 0)) { |
1470 | spin_unlock(&xfs_buftarg_lock); | 1449 | list_move_tail(&bp->b_lru, &btp->bt_lru); |
1471 | } | 1450 | continue; |
1451 | } | ||
1472 | 1452 | ||
1473 | STATIC void | 1453 | /* |
1474 | xfs_unregister_buftarg( | 1454 | * remove the buffer from the LRU now to avoid needing another |
1475 | xfs_buftarg_t *btp) | 1455 | * lock round trip inside xfs_buf_rele(). |
1476 | { | 1456 | */ |
1477 | spin_lock(&xfs_buftarg_lock); | 1457 | list_move(&bp->b_lru, &dispose); |
1478 | list_del(&btp->bt_list); | 1458 | btp->bt_lru_nr--; |
1479 | spin_unlock(&xfs_buftarg_lock); | 1459 | } |
1460 | spin_unlock(&btp->bt_lru_lock); | ||
1461 | |||
1462 | while (!list_empty(&dispose)) { | ||
1463 | bp = list_first_entry(&dispose, struct xfs_buf, b_lru); | ||
1464 | list_del_init(&bp->b_lru); | ||
1465 | xfs_buf_rele(bp); | ||
1466 | } | ||
1467 | |||
1468 | return btp->bt_lru_nr; | ||
1480 | } | 1469 | } |
1481 | 1470 | ||
1482 | void | 1471 | void |
@@ -1484,18 +1473,13 @@ xfs_free_buftarg( | |||
1484 | struct xfs_mount *mp, | 1473 | struct xfs_mount *mp, |
1485 | struct xfs_buftarg *btp) | 1474 | struct xfs_buftarg *btp) |
1486 | { | 1475 | { |
1476 | unregister_shrinker(&btp->bt_shrinker); | ||
1477 | |||
1487 | xfs_flush_buftarg(btp, 1); | 1478 | xfs_flush_buftarg(btp, 1); |
1488 | if (mp->m_flags & XFS_MOUNT_BARRIER) | 1479 | if (mp->m_flags & XFS_MOUNT_BARRIER) |
1489 | xfs_blkdev_issue_flush(btp); | 1480 | xfs_blkdev_issue_flush(btp); |
1490 | xfs_free_bufhash(btp); | ||
1491 | iput(btp->bt_mapping->host); | ||
1492 | 1481 | ||
1493 | /* Unregister the buftarg first so that we don't get a | ||
1494 | * wakeup finding a non-existent task | ||
1495 | */ | ||
1496 | xfs_unregister_buftarg(btp); | ||
1497 | kthread_stop(btp->bt_task); | 1482 | kthread_stop(btp->bt_task); |
1498 | |||
1499 | kmem_free(btp); | 1483 | kmem_free(btp); |
1500 | } | 1484 | } |
1501 | 1485 | ||
@@ -1511,21 +1495,12 @@ xfs_setsize_buftarg_flags( | |||
1511 | btp->bt_smask = sectorsize - 1; | 1495 | btp->bt_smask = sectorsize - 1; |
1512 | 1496 | ||
1513 | if (set_blocksize(btp->bt_bdev, sectorsize)) { | 1497 | if (set_blocksize(btp->bt_bdev, sectorsize)) { |
1514 | printk(KERN_WARNING | 1498 | xfs_warn(btp->bt_mount, |
1515 | "XFS: Cannot set_blocksize to %u on device %s\n", | 1499 | "Cannot set_blocksize to %u on device %s\n", |
1516 | sectorsize, XFS_BUFTARG_NAME(btp)); | 1500 | sectorsize, XFS_BUFTARG_NAME(btp)); |
1517 | return EINVAL; | 1501 | return EINVAL; |
1518 | } | 1502 | } |
1519 | 1503 | ||
1520 | if (verbose && | ||
1521 | (PAGE_CACHE_SIZE / BITS_PER_LONG) > sectorsize) { | ||
1522 | printk(KERN_WARNING | ||
1523 | "XFS: %u byte sectors in use on device %s. " | ||
1524 | "This is suboptimal; %u or greater is ideal.\n", | ||
1525 | sectorsize, XFS_BUFTARG_NAME(btp), | ||
1526 | (unsigned int)PAGE_CACHE_SIZE / BITS_PER_LONG); | ||
1527 | } | ||
1528 | |||
1529 | return 0; | 1504 | return 0; |
1530 | } | 1505 | } |
1531 | 1506 | ||
@@ -1540,7 +1515,7 @@ xfs_setsize_buftarg_early( | |||
1540 | struct block_device *bdev) | 1515 | struct block_device *bdev) |
1541 | { | 1516 | { |
1542 | return xfs_setsize_buftarg_flags(btp, | 1517 | return xfs_setsize_buftarg_flags(btp, |
1543 | PAGE_CACHE_SIZE, bdev_logical_block_size(bdev), 0); | 1518 | PAGE_SIZE, bdev_logical_block_size(bdev), 0); |
1544 | } | 1519 | } |
1545 | 1520 | ||
1546 | int | 1521 | int |
@@ -1553,62 +1528,22 @@ xfs_setsize_buftarg( | |||
1553 | } | 1528 | } |
1554 | 1529 | ||
1555 | STATIC int | 1530 | STATIC int |
1556 | xfs_mapping_buftarg( | ||
1557 | xfs_buftarg_t *btp, | ||
1558 | struct block_device *bdev) | ||
1559 | { | ||
1560 | struct backing_dev_info *bdi; | ||
1561 | struct inode *inode; | ||
1562 | struct address_space *mapping; | ||
1563 | static const struct address_space_operations mapping_aops = { | ||
1564 | .sync_page = block_sync_page, | ||
1565 | .migratepage = fail_migrate_page, | ||
1566 | }; | ||
1567 | |||
1568 | inode = new_inode(bdev->bd_inode->i_sb); | ||
1569 | if (!inode) { | ||
1570 | printk(KERN_WARNING | ||
1571 | "XFS: Cannot allocate mapping inode for device %s\n", | ||
1572 | XFS_BUFTARG_NAME(btp)); | ||
1573 | return ENOMEM; | ||
1574 | } | ||
1575 | inode->i_mode = S_IFBLK; | ||
1576 | inode->i_bdev = bdev; | ||
1577 | inode->i_rdev = bdev->bd_dev; | ||
1578 | bdi = blk_get_backing_dev_info(bdev); | ||
1579 | if (!bdi) | ||
1580 | bdi = &default_backing_dev_info; | ||
1581 | mapping = &inode->i_data; | ||
1582 | mapping->a_ops = &mapping_aops; | ||
1583 | mapping->backing_dev_info = bdi; | ||
1584 | mapping_set_gfp_mask(mapping, GFP_NOFS); | ||
1585 | btp->bt_mapping = mapping; | ||
1586 | return 0; | ||
1587 | } | ||
1588 | |||
1589 | STATIC int | ||
1590 | xfs_alloc_delwrite_queue( | 1531 | xfs_alloc_delwrite_queue( |
1591 | xfs_buftarg_t *btp, | 1532 | xfs_buftarg_t *btp, |
1592 | const char *fsname) | 1533 | const char *fsname) |
1593 | { | 1534 | { |
1594 | int error = 0; | ||
1595 | |||
1596 | INIT_LIST_HEAD(&btp->bt_list); | ||
1597 | INIT_LIST_HEAD(&btp->bt_delwrite_queue); | 1535 | INIT_LIST_HEAD(&btp->bt_delwrite_queue); |
1598 | spin_lock_init(&btp->bt_delwrite_lock); | 1536 | spin_lock_init(&btp->bt_delwrite_lock); |
1599 | btp->bt_flags = 0; | 1537 | btp->bt_flags = 0; |
1600 | btp->bt_task = kthread_run(xfsbufd, btp, "xfsbufd/%s", fsname); | 1538 | btp->bt_task = kthread_run(xfsbufd, btp, "xfsbufd/%s", fsname); |
1601 | if (IS_ERR(btp->bt_task)) { | 1539 | if (IS_ERR(btp->bt_task)) |
1602 | error = PTR_ERR(btp->bt_task); | 1540 | return PTR_ERR(btp->bt_task); |
1603 | goto out_error; | 1541 | return 0; |
1604 | } | ||
1605 | xfs_register_buftarg(btp); | ||
1606 | out_error: | ||
1607 | return error; | ||
1608 | } | 1542 | } |
1609 | 1543 | ||
1610 | xfs_buftarg_t * | 1544 | xfs_buftarg_t * |
1611 | xfs_alloc_buftarg( | 1545 | xfs_alloc_buftarg( |
1546 | struct xfs_mount *mp, | ||
1612 | struct block_device *bdev, | 1547 | struct block_device *bdev, |
1613 | int external, | 1548 | int external, |
1614 | const char *fsname) | 1549 | const char *fsname) |
@@ -1617,15 +1552,22 @@ xfs_alloc_buftarg( | |||
1617 | 1552 | ||
1618 | btp = kmem_zalloc(sizeof(*btp), KM_SLEEP); | 1553 | btp = kmem_zalloc(sizeof(*btp), KM_SLEEP); |
1619 | 1554 | ||
1555 | btp->bt_mount = mp; | ||
1620 | btp->bt_dev = bdev->bd_dev; | 1556 | btp->bt_dev = bdev->bd_dev; |
1621 | btp->bt_bdev = bdev; | 1557 | btp->bt_bdev = bdev; |
1622 | if (xfs_setsize_buftarg_early(btp, bdev)) | 1558 | btp->bt_bdi = blk_get_backing_dev_info(bdev); |
1559 | if (!btp->bt_bdi) | ||
1623 | goto error; | 1560 | goto error; |
1624 | if (xfs_mapping_buftarg(btp, bdev)) | 1561 | |
1562 | INIT_LIST_HEAD(&btp->bt_lru); | ||
1563 | spin_lock_init(&btp->bt_lru_lock); | ||
1564 | if (xfs_setsize_buftarg_early(btp, bdev)) | ||
1625 | goto error; | 1565 | goto error; |
1626 | if (xfs_alloc_delwrite_queue(btp, fsname)) | 1566 | if (xfs_alloc_delwrite_queue(btp, fsname)) |
1627 | goto error; | 1567 | goto error; |
1628 | xfs_alloc_bufhash(btp, external); | 1568 | btp->bt_shrinker.shrink = xfs_buftarg_shrink; |
1569 | btp->bt_shrinker.seeks = DEFAULT_SEEKS; | ||
1570 | register_shrinker(&btp->bt_shrinker); | ||
1629 | return btp; | 1571 | return btp; |
1630 | 1572 | ||
1631 | error: | 1573 | error: |
@@ -1730,27 +1672,6 @@ xfs_buf_runall_queues( | |||
1730 | flush_workqueue(queue); | 1672 | flush_workqueue(queue); |
1731 | } | 1673 | } |
1732 | 1674 | ||
1733 | STATIC int | ||
1734 | xfsbufd_wakeup( | ||
1735 | struct shrinker *shrink, | ||
1736 | int priority, | ||
1737 | gfp_t mask) | ||
1738 | { | ||
1739 | xfs_buftarg_t *btp; | ||
1740 | |||
1741 | spin_lock(&xfs_buftarg_lock); | ||
1742 | list_for_each_entry(btp, &xfs_buftarg_list, bt_list) { | ||
1743 | if (test_bit(XBT_FORCE_SLEEP, &btp->bt_flags)) | ||
1744 | continue; | ||
1745 | if (list_empty(&btp->bt_delwrite_queue)) | ||
1746 | continue; | ||
1747 | set_bit(XBT_FORCE_FLUSH, &btp->bt_flags); | ||
1748 | wake_up_process(btp->bt_task); | ||
1749 | } | ||
1750 | spin_unlock(&xfs_buftarg_lock); | ||
1751 | return 0; | ||
1752 | } | ||
1753 | |||
1754 | /* | 1675 | /* |
1755 | * Move as many buffers as specified to the supplied list | 1676 | * Move as many buffers as specified to the supplied list |
1756 | * idicating if we skipped any buffers to prevent deadlocks. | 1677 | * idicating if we skipped any buffers to prevent deadlocks. |
@@ -1771,7 +1692,6 @@ xfs_buf_delwri_split( | |||
1771 | INIT_LIST_HEAD(list); | 1692 | INIT_LIST_HEAD(list); |
1772 | spin_lock(dwlk); | 1693 | spin_lock(dwlk); |
1773 | list_for_each_entry_safe(bp, n, dwq, b_list) { | 1694 | list_for_each_entry_safe(bp, n, dwq, b_list) { |
1774 | trace_xfs_buf_delwri_split(bp, _RET_IP_); | ||
1775 | ASSERT(bp->b_flags & XBF_DELWRI); | 1695 | ASSERT(bp->b_flags & XBF_DELWRI); |
1776 | 1696 | ||
1777 | if (!XFS_BUF_ISPINNED(bp) && !xfs_buf_cond_lock(bp)) { | 1697 | if (!XFS_BUF_ISPINNED(bp) && !xfs_buf_cond_lock(bp)) { |
@@ -1785,6 +1705,7 @@ xfs_buf_delwri_split( | |||
1785 | _XBF_RUN_QUEUES); | 1705 | _XBF_RUN_QUEUES); |
1786 | bp->b_flags |= XBF_WRITE; | 1706 | bp->b_flags |= XBF_WRITE; |
1787 | list_move_tail(&bp->b_list, list); | 1707 | list_move_tail(&bp->b_list, list); |
1708 | trace_xfs_buf_delwri_split(bp, _RET_IP_); | ||
1788 | } else | 1709 | } else |
1789 | skipped++; | 1710 | skipped++; |
1790 | } | 1711 | } |
@@ -1838,8 +1759,8 @@ xfsbufd( | |||
1838 | do { | 1759 | do { |
1839 | long age = xfs_buf_age_centisecs * msecs_to_jiffies(10); | 1760 | long age = xfs_buf_age_centisecs * msecs_to_jiffies(10); |
1840 | long tout = xfs_buf_timer_centisecs * msecs_to_jiffies(10); | 1761 | long tout = xfs_buf_timer_centisecs * msecs_to_jiffies(10); |
1841 | int count = 0; | ||
1842 | struct list_head tmp; | 1762 | struct list_head tmp; |
1763 | struct blk_plug plug; | ||
1843 | 1764 | ||
1844 | if (unlikely(freezing(current))) { | 1765 | if (unlikely(freezing(current))) { |
1845 | set_bit(XBT_FORCE_SLEEP, &target->bt_flags); | 1766 | set_bit(XBT_FORCE_SLEEP, &target->bt_flags); |
@@ -1855,16 +1776,15 @@ xfsbufd( | |||
1855 | 1776 | ||
1856 | xfs_buf_delwri_split(target, &tmp, age); | 1777 | xfs_buf_delwri_split(target, &tmp, age); |
1857 | list_sort(NULL, &tmp, xfs_buf_cmp); | 1778 | list_sort(NULL, &tmp, xfs_buf_cmp); |
1779 | |||
1780 | blk_start_plug(&plug); | ||
1858 | while (!list_empty(&tmp)) { | 1781 | while (!list_empty(&tmp)) { |
1859 | struct xfs_buf *bp; | 1782 | struct xfs_buf *bp; |
1860 | bp = list_first_entry(&tmp, struct xfs_buf, b_list); | 1783 | bp = list_first_entry(&tmp, struct xfs_buf, b_list); |
1861 | list_del_init(&bp->b_list); | 1784 | list_del_init(&bp->b_list); |
1862 | xfs_bdstrat_cb(bp); | 1785 | xfs_bdstrat_cb(bp); |
1863 | count++; | ||
1864 | } | 1786 | } |
1865 | if (count) | 1787 | blk_finish_plug(&plug); |
1866 | blk_run_address_space(target->bt_mapping); | ||
1867 | |||
1868 | } while (!kthread_should_stop()); | 1788 | } while (!kthread_should_stop()); |
1869 | 1789 | ||
1870 | return 0; | 1790 | return 0; |
@@ -1884,6 +1804,7 @@ xfs_flush_buftarg( | |||
1884 | int pincount = 0; | 1804 | int pincount = 0; |
1885 | LIST_HEAD(tmp_list); | 1805 | LIST_HEAD(tmp_list); |
1886 | LIST_HEAD(wait_list); | 1806 | LIST_HEAD(wait_list); |
1807 | struct blk_plug plug; | ||
1887 | 1808 | ||
1888 | xfs_buf_runall_queues(xfsconvertd_workqueue); | 1809 | xfs_buf_runall_queues(xfsconvertd_workqueue); |
1889 | xfs_buf_runall_queues(xfsdatad_workqueue); | 1810 | xfs_buf_runall_queues(xfsdatad_workqueue); |
@@ -1898,6 +1819,8 @@ xfs_flush_buftarg( | |||
1898 | * we do that after issuing all the IO. | 1819 | * we do that after issuing all the IO. |
1899 | */ | 1820 | */ |
1900 | list_sort(NULL, &tmp_list, xfs_buf_cmp); | 1821 | list_sort(NULL, &tmp_list, xfs_buf_cmp); |
1822 | |||
1823 | blk_start_plug(&plug); | ||
1901 | while (!list_empty(&tmp_list)) { | 1824 | while (!list_empty(&tmp_list)) { |
1902 | bp = list_first_entry(&tmp_list, struct xfs_buf, b_list); | 1825 | bp = list_first_entry(&tmp_list, struct xfs_buf, b_list); |
1903 | ASSERT(target == bp->b_target); | 1826 | ASSERT(target == bp->b_target); |
@@ -1908,15 +1831,15 @@ xfs_flush_buftarg( | |||
1908 | } | 1831 | } |
1909 | xfs_bdstrat_cb(bp); | 1832 | xfs_bdstrat_cb(bp); |
1910 | } | 1833 | } |
1834 | blk_finish_plug(&plug); | ||
1911 | 1835 | ||
1912 | if (wait) { | 1836 | if (wait) { |
1913 | /* Expedite and wait for IO to complete. */ | 1837 | /* Wait for IO to complete. */ |
1914 | blk_run_address_space(target->bt_mapping); | ||
1915 | while (!list_empty(&wait_list)) { | 1838 | while (!list_empty(&wait_list)) { |
1916 | bp = list_first_entry(&wait_list, struct xfs_buf, b_list); | 1839 | bp = list_first_entry(&wait_list, struct xfs_buf, b_list); |
1917 | 1840 | ||
1918 | list_del_init(&bp->b_list); | 1841 | list_del_init(&bp->b_list); |
1919 | xfs_iowait(bp); | 1842 | xfs_buf_iowait(bp); |
1920 | xfs_buf_relse(bp); | 1843 | xfs_buf_relse(bp); |
1921 | } | 1844 | } |
1922 | } | 1845 | } |
@@ -1933,19 +1856,19 @@ xfs_buf_init(void) | |||
1933 | goto out; | 1856 | goto out; |
1934 | 1857 | ||
1935 | xfslogd_workqueue = alloc_workqueue("xfslogd", | 1858 | xfslogd_workqueue = alloc_workqueue("xfslogd", |
1936 | WQ_RESCUER | WQ_HIGHPRI, 1); | 1859 | WQ_MEM_RECLAIM | WQ_HIGHPRI, 1); |
1937 | if (!xfslogd_workqueue) | 1860 | if (!xfslogd_workqueue) |
1938 | goto out_free_buf_zone; | 1861 | goto out_free_buf_zone; |
1939 | 1862 | ||
1940 | xfsdatad_workqueue = create_workqueue("xfsdatad"); | 1863 | xfsdatad_workqueue = alloc_workqueue("xfsdatad", WQ_MEM_RECLAIM, 1); |
1941 | if (!xfsdatad_workqueue) | 1864 | if (!xfsdatad_workqueue) |
1942 | goto out_destroy_xfslogd_workqueue; | 1865 | goto out_destroy_xfslogd_workqueue; |
1943 | 1866 | ||
1944 | xfsconvertd_workqueue = create_workqueue("xfsconvertd"); | 1867 | xfsconvertd_workqueue = alloc_workqueue("xfsconvertd", |
1868 | WQ_MEM_RECLAIM, 1); | ||
1945 | if (!xfsconvertd_workqueue) | 1869 | if (!xfsconvertd_workqueue) |
1946 | goto out_destroy_xfsdatad_workqueue; | 1870 | goto out_destroy_xfsdatad_workqueue; |
1947 | 1871 | ||
1948 | register_shrinker(&xfs_buf_shake); | ||
1949 | return 0; | 1872 | return 0; |
1950 | 1873 | ||
1951 | out_destroy_xfsdatad_workqueue: | 1874 | out_destroy_xfsdatad_workqueue: |
@@ -1961,7 +1884,6 @@ xfs_buf_init(void) | |||
1961 | void | 1884 | void |
1962 | xfs_buf_terminate(void) | 1885 | xfs_buf_terminate(void) |
1963 | { | 1886 | { |
1964 | unregister_shrinker(&xfs_buf_shake); | ||
1965 | destroy_workqueue(xfsconvertd_workqueue); | 1887 | destroy_workqueue(xfsconvertd_workqueue); |
1966 | destroy_workqueue(xfsdatad_workqueue); | 1888 | destroy_workqueue(xfsdatad_workqueue); |
1967 | destroy_workqueue(xfslogd_workqueue); | 1889 | destroy_workqueue(xfslogd_workqueue); |
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h index 2a05614f0b92..50a7d5fb3b73 100644 --- a/fs/xfs/linux-2.6/xfs_buf.h +++ b/fs/xfs/linux-2.6/xfs_buf.h | |||
@@ -51,7 +51,6 @@ typedef enum { | |||
51 | #define XBF_DONE (1 << 5) /* all pages in the buffer uptodate */ | 51 | #define XBF_DONE (1 << 5) /* all pages in the buffer uptodate */ |
52 | #define XBF_DELWRI (1 << 6) /* buffer has dirty pages */ | 52 | #define XBF_DELWRI (1 << 6) /* buffer has dirty pages */ |
53 | #define XBF_STALE (1 << 7) /* buffer has been staled, do not find it */ | 53 | #define XBF_STALE (1 << 7) /* buffer has been staled, do not find it */ |
54 | #define XBF_FS_MANAGED (1 << 8) /* filesystem controls freeing memory */ | ||
55 | #define XBF_ORDERED (1 << 11)/* use ordered writes */ | 54 | #define XBF_ORDERED (1 << 11)/* use ordered writes */ |
56 | #define XBF_READ_AHEAD (1 << 12)/* asynchronous read-ahead */ | 55 | #define XBF_READ_AHEAD (1 << 12)/* asynchronous read-ahead */ |
57 | #define XBF_LOG_BUFFER (1 << 13)/* this is a buffer used for the log */ | 56 | #define XBF_LOG_BUFFER (1 << 13)/* this is a buffer used for the log */ |
@@ -62,38 +61,11 @@ typedef enum { | |||
62 | #define XBF_DONT_BLOCK (1 << 16)/* do not block in current thread */ | 61 | #define XBF_DONT_BLOCK (1 << 16)/* do not block in current thread */ |
63 | 62 | ||
64 | /* flags used only internally */ | 63 | /* flags used only internally */ |
65 | #define _XBF_PAGE_CACHE (1 << 17)/* backed by pagecache */ | ||
66 | #define _XBF_PAGES (1 << 18)/* backed by refcounted pages */ | 64 | #define _XBF_PAGES (1 << 18)/* backed by refcounted pages */ |
67 | #define _XBF_RUN_QUEUES (1 << 19)/* run block device task queue */ | 65 | #define _XBF_RUN_QUEUES (1 << 19)/* run block device task queue */ |
66 | #define _XBF_KMEM (1 << 20)/* backed by heap memory */ | ||
68 | #define _XBF_DELWRI_Q (1 << 21)/* buffer on delwri queue */ | 67 | #define _XBF_DELWRI_Q (1 << 21)/* buffer on delwri queue */ |
69 | 68 | ||
70 | /* | ||
71 | * Special flag for supporting metadata blocks smaller than a FSB. | ||
72 | * | ||
73 | * In this case we can have multiple xfs_buf_t on a single page and | ||
74 | * need to lock out concurrent xfs_buf_t readers as they only | ||
75 | * serialise access to the buffer. | ||
76 | * | ||
77 | * If the FSB size >= PAGE_CACHE_SIZE case, we have no serialisation | ||
78 | * between reads of the page. Hence we can have one thread read the | ||
79 | * page and modify it, but then race with another thread that thinks | ||
80 | * the page is not up-to-date and hence reads it again. | ||
81 | * | ||
82 | * The result is that the first modifcation to the page is lost. | ||
83 | * This sort of AGF/AGI reading race can happen when unlinking inodes | ||
84 | * that require truncation and results in the AGI unlinked list | ||
85 | * modifications being lost. | ||
86 | */ | ||
87 | #define _XBF_PAGE_LOCKED (1 << 22) | ||
88 | |||
89 | /* | ||
90 | * If we try a barrier write, but it fails we have to communicate | ||
91 | * this to the upper layers. Unfortunately b_error gets overwritten | ||
92 | * when the buffer is re-issued so we have to add another flag to | ||
93 | * keep this information. | ||
94 | */ | ||
95 | #define _XFS_BARRIER_FAILED (1 << 23) | ||
96 | |||
97 | typedef unsigned int xfs_buf_flags_t; | 69 | typedef unsigned int xfs_buf_flags_t; |
98 | 70 | ||
99 | #define XFS_BUF_FLAGS \ | 71 | #define XFS_BUF_FLAGS \ |
@@ -104,19 +76,15 @@ typedef unsigned int xfs_buf_flags_t; | |||
104 | { XBF_DONE, "DONE" }, \ | 76 | { XBF_DONE, "DONE" }, \ |
105 | { XBF_DELWRI, "DELWRI" }, \ | 77 | { XBF_DELWRI, "DELWRI" }, \ |
106 | { XBF_STALE, "STALE" }, \ | 78 | { XBF_STALE, "STALE" }, \ |
107 | { XBF_FS_MANAGED, "FS_MANAGED" }, \ | ||
108 | { XBF_ORDERED, "ORDERED" }, \ | 79 | { XBF_ORDERED, "ORDERED" }, \ |
109 | { XBF_READ_AHEAD, "READ_AHEAD" }, \ | 80 | { XBF_READ_AHEAD, "READ_AHEAD" }, \ |
110 | { XBF_LOCK, "LOCK" }, /* should never be set */\ | 81 | { XBF_LOCK, "LOCK" }, /* should never be set */\ |
111 | { XBF_TRYLOCK, "TRYLOCK" }, /* ditto */\ | 82 | { XBF_TRYLOCK, "TRYLOCK" }, /* ditto */\ |
112 | { XBF_DONT_BLOCK, "DONT_BLOCK" }, /* ditto */\ | 83 | { XBF_DONT_BLOCK, "DONT_BLOCK" }, /* ditto */\ |
113 | { _XBF_PAGE_CACHE, "PAGE_CACHE" }, \ | ||
114 | { _XBF_PAGES, "PAGES" }, \ | 84 | { _XBF_PAGES, "PAGES" }, \ |
115 | { _XBF_RUN_QUEUES, "RUN_QUEUES" }, \ | 85 | { _XBF_RUN_QUEUES, "RUN_QUEUES" }, \ |
116 | { _XBF_DELWRI_Q, "DELWRI_Q" }, \ | 86 | { _XBF_KMEM, "KMEM" }, \ |
117 | { _XBF_PAGE_LOCKED, "PAGE_LOCKED" }, \ | 87 | { _XBF_DELWRI_Q, "DELWRI_Q" } |
118 | { _XFS_BARRIER_FAILED, "BARRIER_FAILED" } | ||
119 | |||
120 | 88 | ||
121 | typedef enum { | 89 | typedef enum { |
122 | XBT_FORCE_SLEEP = 0, | 90 | XBT_FORCE_SLEEP = 0, |
@@ -131,70 +99,67 @@ typedef struct xfs_bufhash { | |||
131 | typedef struct xfs_buftarg { | 99 | typedef struct xfs_buftarg { |
132 | dev_t bt_dev; | 100 | dev_t bt_dev; |
133 | struct block_device *bt_bdev; | 101 | struct block_device *bt_bdev; |
134 | struct address_space *bt_mapping; | 102 | struct backing_dev_info *bt_bdi; |
103 | struct xfs_mount *bt_mount; | ||
135 | unsigned int bt_bsize; | 104 | unsigned int bt_bsize; |
136 | unsigned int bt_sshift; | 105 | unsigned int bt_sshift; |
137 | size_t bt_smask; | 106 | size_t bt_smask; |
138 | 107 | ||
139 | /* per device buffer hash table */ | ||
140 | uint bt_hashshift; | ||
141 | xfs_bufhash_t *bt_hash; | ||
142 | |||
143 | /* per device delwri queue */ | 108 | /* per device delwri queue */ |
144 | struct task_struct *bt_task; | 109 | struct task_struct *bt_task; |
145 | struct list_head bt_list; | ||
146 | struct list_head bt_delwrite_queue; | 110 | struct list_head bt_delwrite_queue; |
147 | spinlock_t bt_delwrite_lock; | 111 | spinlock_t bt_delwrite_lock; |
148 | unsigned long bt_flags; | 112 | unsigned long bt_flags; |
149 | } xfs_buftarg_t; | ||
150 | 113 | ||
151 | /* | 114 | /* LRU control structures */ |
152 | * xfs_buf_t: Buffer structure for pagecache-based buffers | 115 | struct shrinker bt_shrinker; |
153 | * | 116 | struct list_head bt_lru; |
154 | * This buffer structure is used by the pagecache buffer management routines | 117 | spinlock_t bt_lru_lock; |
155 | * to refer to an assembly of pages forming a logical buffer. | 118 | unsigned int bt_lru_nr; |
156 | * | 119 | } xfs_buftarg_t; |
157 | * The buffer structure is used on a temporary basis only, and discarded when | ||
158 | * released. The real data storage is recorded in the pagecache. Buffers are | ||
159 | * hashed to the block device on which the file system resides. | ||
160 | */ | ||
161 | 120 | ||
162 | struct xfs_buf; | 121 | struct xfs_buf; |
163 | typedef void (*xfs_buf_iodone_t)(struct xfs_buf *); | 122 | typedef void (*xfs_buf_iodone_t)(struct xfs_buf *); |
164 | typedef void (*xfs_buf_relse_t)(struct xfs_buf *); | ||
165 | typedef int (*xfs_buf_bdstrat_t)(struct xfs_buf *); | ||
166 | 123 | ||
167 | #define XB_PAGES 2 | 124 | #define XB_PAGES 2 |
168 | 125 | ||
169 | typedef struct xfs_buf { | 126 | typedef struct xfs_buf { |
127 | /* | ||
128 | * first cacheline holds all the fields needed for an uncontended cache | ||
129 | * hit to be fully processed. The semaphore straddles the cacheline | ||
130 | * boundary, but the counter and lock sits on the first cacheline, | ||
131 | * which is the only bit that is touched if we hit the semaphore | ||
132 | * fast-path on locking. | ||
133 | */ | ||
134 | struct rb_node b_rbnode; /* rbtree node */ | ||
135 | xfs_off_t b_file_offset; /* offset in file */ | ||
136 | size_t b_buffer_length;/* size of buffer in bytes */ | ||
137 | atomic_t b_hold; /* reference count */ | ||
138 | atomic_t b_lru_ref; /* lru reclaim ref count */ | ||
139 | xfs_buf_flags_t b_flags; /* status flags */ | ||
170 | struct semaphore b_sema; /* semaphore for lockables */ | 140 | struct semaphore b_sema; /* semaphore for lockables */ |
171 | unsigned long b_queuetime; /* time buffer was queued */ | 141 | |
172 | atomic_t b_pin_count; /* pin count */ | 142 | struct list_head b_lru; /* lru list */ |
173 | wait_queue_head_t b_waiters; /* unpin waiters */ | 143 | wait_queue_head_t b_waiters; /* unpin waiters */ |
174 | struct list_head b_list; | 144 | struct list_head b_list; |
175 | xfs_buf_flags_t b_flags; /* status flags */ | 145 | struct xfs_perag *b_pag; /* contains rbtree root */ |
176 | struct list_head b_hash_list; /* hash table list */ | ||
177 | xfs_bufhash_t *b_hash; /* hash table list start */ | ||
178 | xfs_buftarg_t *b_target; /* buffer target (device) */ | 146 | xfs_buftarg_t *b_target; /* buffer target (device) */ |
179 | atomic_t b_hold; /* reference count */ | ||
180 | xfs_daddr_t b_bn; /* block number for I/O */ | 147 | xfs_daddr_t b_bn; /* block number for I/O */ |
181 | xfs_off_t b_file_offset; /* offset in file */ | ||
182 | size_t b_buffer_length;/* size of buffer in bytes */ | ||
183 | size_t b_count_desired;/* desired transfer size */ | 148 | size_t b_count_desired;/* desired transfer size */ |
184 | void *b_addr; /* virtual address of buffer */ | 149 | void *b_addr; /* virtual address of buffer */ |
185 | struct work_struct b_iodone_work; | 150 | struct work_struct b_iodone_work; |
186 | atomic_t b_io_remaining; /* #outstanding I/O requests */ | ||
187 | xfs_buf_iodone_t b_iodone; /* I/O completion function */ | 151 | xfs_buf_iodone_t b_iodone; /* I/O completion function */ |
188 | xfs_buf_relse_t b_relse; /* releasing function */ | ||
189 | struct completion b_iowait; /* queue for I/O waiters */ | 152 | struct completion b_iowait; /* queue for I/O waiters */ |
190 | void *b_fspriv; | 153 | void *b_fspriv; |
191 | void *b_fspriv2; | 154 | void *b_fspriv2; |
192 | struct xfs_mount *b_mount; | ||
193 | unsigned short b_error; /* error code on I/O */ | ||
194 | unsigned int b_page_count; /* size of page array */ | ||
195 | unsigned int b_offset; /* page offset in first page */ | ||
196 | struct page **b_pages; /* array of page pointers */ | 155 | struct page **b_pages; /* array of page pointers */ |
197 | struct page *b_page_array[XB_PAGES]; /* inline pages */ | 156 | struct page *b_page_array[XB_PAGES]; /* inline pages */ |
157 | unsigned long b_queuetime; /* time buffer was queued */ | ||
158 | atomic_t b_pin_count; /* pin count */ | ||
159 | atomic_t b_io_remaining; /* #outstanding I/O requests */ | ||
160 | unsigned int b_page_count; /* size of page array */ | ||
161 | unsigned int b_offset; /* page offset in first page */ | ||
162 | unsigned short b_error; /* error code on I/O */ | ||
198 | #ifdef XFS_BUF_LOCK_TRACKING | 163 | #ifdef XFS_BUF_LOCK_TRACKING |
199 | int b_last_holder; | 164 | int b_last_holder; |
200 | #endif | 165 | #endif |
@@ -213,11 +178,14 @@ extern xfs_buf_t *xfs_buf_read(xfs_buftarg_t *, xfs_off_t, size_t, | |||
213 | xfs_buf_flags_t); | 178 | xfs_buf_flags_t); |
214 | 179 | ||
215 | extern xfs_buf_t *xfs_buf_get_empty(size_t, xfs_buftarg_t *); | 180 | extern xfs_buf_t *xfs_buf_get_empty(size_t, xfs_buftarg_t *); |
216 | extern xfs_buf_t *xfs_buf_get_noaddr(size_t, xfs_buftarg_t *); | 181 | extern void xfs_buf_set_empty(struct xfs_buf *bp, size_t len); |
182 | extern xfs_buf_t *xfs_buf_get_uncached(struct xfs_buftarg *, size_t, int); | ||
217 | extern int xfs_buf_associate_memory(xfs_buf_t *, void *, size_t); | 183 | extern int xfs_buf_associate_memory(xfs_buf_t *, void *, size_t); |
218 | extern void xfs_buf_hold(xfs_buf_t *); | 184 | extern void xfs_buf_hold(xfs_buf_t *); |
219 | extern void xfs_buf_readahead(xfs_buftarg_t *, xfs_off_t, size_t, | 185 | extern void xfs_buf_readahead(xfs_buftarg_t *, xfs_off_t, size_t); |
220 | xfs_buf_flags_t); | 186 | struct xfs_buf *xfs_buf_read_uncached(struct xfs_mount *mp, |
187 | struct xfs_buftarg *target, | ||
188 | xfs_daddr_t daddr, size_t length, int flags); | ||
221 | 189 | ||
222 | /* Releasing Buffers */ | 190 | /* Releasing Buffers */ |
223 | extern void xfs_buf_free(xfs_buf_t *); | 191 | extern void xfs_buf_free(xfs_buf_t *); |
@@ -242,6 +210,8 @@ extern int xfs_buf_iorequest(xfs_buf_t *); | |||
242 | extern int xfs_buf_iowait(xfs_buf_t *); | 210 | extern int xfs_buf_iowait(xfs_buf_t *); |
243 | extern void xfs_buf_iomove(xfs_buf_t *, size_t, size_t, void *, | 211 | extern void xfs_buf_iomove(xfs_buf_t *, size_t, size_t, void *, |
244 | xfs_buf_rw_t); | 212 | xfs_buf_rw_t); |
213 | #define xfs_buf_zero(bp, off, len) \ | ||
214 | xfs_buf_iomove((bp), (off), (len), NULL, XBRW_ZERO) | ||
245 | 215 | ||
246 | static inline int xfs_buf_geterror(xfs_buf_t *bp) | 216 | static inline int xfs_buf_geterror(xfs_buf_t *bp) |
247 | { | 217 | { |
@@ -267,7 +237,8 @@ extern void xfs_buf_terminate(void); | |||
267 | #define XFS_BUF_ZEROFLAGS(bp) ((bp)->b_flags &= \ | 237 | #define XFS_BUF_ZEROFLAGS(bp) ((bp)->b_flags &= \ |
268 | ~(XBF_READ|XBF_WRITE|XBF_ASYNC|XBF_DELWRI|XBF_ORDERED)) | 238 | ~(XBF_READ|XBF_WRITE|XBF_ASYNC|XBF_DELWRI|XBF_ORDERED)) |
269 | 239 | ||
270 | #define XFS_BUF_STALE(bp) ((bp)->b_flags |= XBF_STALE) | 240 | void xfs_buf_stale(struct xfs_buf *bp); |
241 | #define XFS_BUF_STALE(bp) xfs_buf_stale(bp); | ||
271 | #define XFS_BUF_UNSTALE(bp) ((bp)->b_flags &= ~XBF_STALE) | 242 | #define XFS_BUF_UNSTALE(bp) ((bp)->b_flags &= ~XBF_STALE) |
272 | #define XFS_BUF_ISSTALE(bp) ((bp)->b_flags & XBF_STALE) | 243 | #define XFS_BUF_ISSTALE(bp) ((bp)->b_flags & XBF_STALE) |
273 | #define XFS_BUF_SUPER_STALE(bp) do { \ | 244 | #define XFS_BUF_SUPER_STALE(bp) do { \ |
@@ -276,8 +247,6 @@ extern void xfs_buf_terminate(void); | |||
276 | XFS_BUF_DONE(bp); \ | 247 | XFS_BUF_DONE(bp); \ |
277 | } while (0) | 248 | } while (0) |
278 | 249 | ||
279 | #define XFS_BUF_UNMANAGE(bp) ((bp)->b_flags &= ~XBF_FS_MANAGED) | ||
280 | |||
281 | #define XFS_BUF_DELAYWRITE(bp) ((bp)->b_flags |= XBF_DELWRI) | 250 | #define XFS_BUF_DELAYWRITE(bp) ((bp)->b_flags |= XBF_DELWRI) |
282 | #define XFS_BUF_UNDELAYWRITE(bp) xfs_buf_delwri_dequeue(bp) | 251 | #define XFS_BUF_UNDELAYWRITE(bp) xfs_buf_delwri_dequeue(bp) |
283 | #define XFS_BUF_ISDELAYWRITE(bp) ((bp)->b_flags & XBF_DELWRI) | 252 | #define XFS_BUF_ISDELAYWRITE(bp) ((bp)->b_flags & XBF_DELWRI) |
@@ -320,7 +289,6 @@ extern void xfs_buf_terminate(void); | |||
320 | #define XFS_BUF_FSPRIVATE2(bp, type) ((type)(bp)->b_fspriv2) | 289 | #define XFS_BUF_FSPRIVATE2(bp, type) ((type)(bp)->b_fspriv2) |
321 | #define XFS_BUF_SET_FSPRIVATE2(bp, val) ((bp)->b_fspriv2 = (void*)(val)) | 290 | #define XFS_BUF_SET_FSPRIVATE2(bp, val) ((bp)->b_fspriv2 = (void*)(val)) |
322 | #define XFS_BUF_SET_START(bp) do { } while (0) | 291 | #define XFS_BUF_SET_START(bp) do { } while (0) |
323 | #define XFS_BUF_SET_BRELSE_FUNC(bp, func) ((bp)->b_relse = (func)) | ||
324 | 292 | ||
325 | #define XFS_BUF_PTR(bp) (xfs_caddr_t)((bp)->b_addr) | 293 | #define XFS_BUF_PTR(bp) (xfs_caddr_t)((bp)->b_addr) |
326 | #define XFS_BUF_SET_PTR(bp, val, cnt) xfs_buf_associate_memory(bp, val, cnt) | 294 | #define XFS_BUF_SET_PTR(bp, val, cnt) xfs_buf_associate_memory(bp, val, cnt) |
@@ -333,9 +301,15 @@ extern void xfs_buf_terminate(void); | |||
333 | #define XFS_BUF_SIZE(bp) ((bp)->b_buffer_length) | 301 | #define XFS_BUF_SIZE(bp) ((bp)->b_buffer_length) |
334 | #define XFS_BUF_SET_SIZE(bp, cnt) ((bp)->b_buffer_length = (cnt)) | 302 | #define XFS_BUF_SET_SIZE(bp, cnt) ((bp)->b_buffer_length = (cnt)) |
335 | 303 | ||
336 | #define XFS_BUF_SET_VTYPE_REF(bp, type, ref) do { } while (0) | 304 | static inline void |
305 | xfs_buf_set_ref( | ||
306 | struct xfs_buf *bp, | ||
307 | int lru_ref) | ||
308 | { | ||
309 | atomic_set(&bp->b_lru_ref, lru_ref); | ||
310 | } | ||
311 | #define XFS_BUF_SET_VTYPE_REF(bp, type, ref) xfs_buf_set_ref(bp, ref) | ||
337 | #define XFS_BUF_SET_VTYPE(bp, type) do { } while (0) | 312 | #define XFS_BUF_SET_VTYPE(bp, type) do { } while (0) |
338 | #define XFS_BUF_SET_REF(bp, ref) do { } while (0) | ||
339 | 313 | ||
340 | #define XFS_BUF_ISPINNED(bp) atomic_read(&((bp)->b_pin_count)) | 314 | #define XFS_BUF_ISPINNED(bp) atomic_read(&((bp)->b_pin_count)) |
341 | 315 | ||
@@ -351,30 +325,15 @@ extern void xfs_buf_terminate(void); | |||
351 | 325 | ||
352 | static inline void xfs_buf_relse(xfs_buf_t *bp) | 326 | static inline void xfs_buf_relse(xfs_buf_t *bp) |
353 | { | 327 | { |
354 | if (!bp->b_relse) | 328 | xfs_buf_unlock(bp); |
355 | xfs_buf_unlock(bp); | ||
356 | xfs_buf_rele(bp); | 329 | xfs_buf_rele(bp); |
357 | } | 330 | } |
358 | 331 | ||
359 | #define xfs_biodone(bp) xfs_buf_ioend(bp, 0) | ||
360 | |||
361 | #define xfs_biomove(bp, off, len, data, rw) \ | ||
362 | xfs_buf_iomove((bp), (off), (len), (data), \ | ||
363 | ((rw) == XBF_WRITE) ? XBRW_WRITE : XBRW_READ) | ||
364 | |||
365 | #define xfs_biozero(bp, off, len) \ | ||
366 | xfs_buf_iomove((bp), (off), (len), NULL, XBRW_ZERO) | ||
367 | |||
368 | #define xfs_iowait(bp) xfs_buf_iowait(bp) | ||
369 | |||
370 | #define xfs_baread(target, rablkno, ralen) \ | ||
371 | xfs_buf_readahead((target), (rablkno), (ralen), XBF_DONT_BLOCK) | ||
372 | |||
373 | |||
374 | /* | 332 | /* |
375 | * Handling of buftargs. | 333 | * Handling of buftargs. |
376 | */ | 334 | */ |
377 | extern xfs_buftarg_t *xfs_alloc_buftarg(struct block_device *, int, const char *); | 335 | extern xfs_buftarg_t *xfs_alloc_buftarg(struct xfs_mount *, |
336 | struct block_device *, int, const char *); | ||
378 | extern void xfs_free_buftarg(struct xfs_mount *, struct xfs_buftarg *); | 337 | extern void xfs_free_buftarg(struct xfs_mount *, struct xfs_buftarg *); |
379 | extern void xfs_wait_buftarg(xfs_buftarg_t *); | 338 | extern void xfs_wait_buftarg(xfs_buftarg_t *); |
380 | extern int xfs_setsize_buftarg(xfs_buftarg_t *, unsigned int, unsigned int); | 339 | extern int xfs_setsize_buftarg(xfs_buftarg_t *, unsigned int, unsigned int); |
diff --git a/fs/xfs/linux-2.6/xfs_cred.h b/fs/xfs/linux-2.6/xfs_cred.h deleted file mode 100644 index 55bddf3b6091..000000000000 --- a/fs/xfs/linux-2.6/xfs_cred.h +++ /dev/null | |||
@@ -1,28 +0,0 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc. | ||
3 | * All Rights Reserved. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or | ||
6 | * modify it under the terms of the GNU General Public License as | ||
7 | * published by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope that it would be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, write the Free Software Foundation, | ||
16 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
17 | */ | ||
18 | #ifndef __XFS_CRED_H__ | ||
19 | #define __XFS_CRED_H__ | ||
20 | |||
21 | #include <linux/capability.h> | ||
22 | |||
23 | /* | ||
24 | * Credentials | ||
25 | */ | ||
26 | typedef const struct cred cred_t; | ||
27 | |||
28 | #endif /* __XFS_CRED_H__ */ | ||
diff --git a/fs/xfs/linux-2.6/xfs_discard.c b/fs/xfs/linux-2.6/xfs_discard.c new file mode 100644 index 000000000000..244e797dae32 --- /dev/null +++ b/fs/xfs/linux-2.6/xfs_discard.c | |||
@@ -0,0 +1,222 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2010 Red Hat, Inc. | ||
3 | * All Rights Reserved. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or | ||
6 | * modify it under the terms of the GNU General Public License as | ||
7 | * published by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope that it would be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, write the Free Software Foundation, | ||
16 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
17 | */ | ||
18 | #include "xfs.h" | ||
19 | #include "xfs_sb.h" | ||
20 | #include "xfs_inum.h" | ||
21 | #include "xfs_log.h" | ||
22 | #include "xfs_ag.h" | ||
23 | #include "xfs_mount.h" | ||
24 | #include "xfs_quota.h" | ||
25 | #include "xfs_trans.h" | ||
26 | #include "xfs_alloc_btree.h" | ||
27 | #include "xfs_bmap_btree.h" | ||
28 | #include "xfs_ialloc_btree.h" | ||
29 | #include "xfs_btree.h" | ||
30 | #include "xfs_inode.h" | ||
31 | #include "xfs_alloc.h" | ||
32 | #include "xfs_error.h" | ||
33 | #include "xfs_discard.h" | ||
34 | #include "xfs_trace.h" | ||
35 | |||
36 | STATIC int | ||
37 | xfs_trim_extents( | ||
38 | struct xfs_mount *mp, | ||
39 | xfs_agnumber_t agno, | ||
40 | xfs_fsblock_t start, | ||
41 | xfs_fsblock_t len, | ||
42 | xfs_fsblock_t minlen, | ||
43 | __uint64_t *blocks_trimmed) | ||
44 | { | ||
45 | struct block_device *bdev = mp->m_ddev_targp->bt_bdev; | ||
46 | struct xfs_btree_cur *cur; | ||
47 | struct xfs_buf *agbp; | ||
48 | struct xfs_perag *pag; | ||
49 | int error; | ||
50 | int i; | ||
51 | |||
52 | pag = xfs_perag_get(mp, agno); | ||
53 | |||
54 | error = xfs_alloc_read_agf(mp, NULL, agno, 0, &agbp); | ||
55 | if (error || !agbp) | ||
56 | goto out_put_perag; | ||
57 | |||
58 | cur = xfs_allocbt_init_cursor(mp, NULL, agbp, agno, XFS_BTNUM_CNT); | ||
59 | |||
60 | /* | ||
61 | * Force out the log. This means any transactions that might have freed | ||
62 | * space before we took the AGF buffer lock are now on disk, and the | ||
63 | * volatile disk cache is flushed. | ||
64 | */ | ||
65 | xfs_log_force(mp, XFS_LOG_SYNC); | ||
66 | |||
67 | /* | ||
68 | * Look up the longest btree in the AGF and start with it. | ||
69 | */ | ||
70 | error = xfs_alloc_lookup_le(cur, 0, | ||
71 | XFS_BUF_TO_AGF(agbp)->agf_longest, &i); | ||
72 | if (error) | ||
73 | goto out_del_cursor; | ||
74 | |||
75 | /* | ||
76 | * Loop until we are done with all extents that are large | ||
77 | * enough to be worth discarding. | ||
78 | */ | ||
79 | while (i) { | ||
80 | xfs_agblock_t fbno; | ||
81 | xfs_extlen_t flen; | ||
82 | |||
83 | error = xfs_alloc_get_rec(cur, &fbno, &flen, &i); | ||
84 | if (error) | ||
85 | goto out_del_cursor; | ||
86 | XFS_WANT_CORRUPTED_GOTO(i == 1, out_del_cursor); | ||
87 | ASSERT(flen <= XFS_BUF_TO_AGF(agbp)->agf_longest); | ||
88 | |||
89 | /* | ||
90 | * Too small? Give up. | ||
91 | */ | ||
92 | if (flen < minlen) { | ||
93 | trace_xfs_discard_toosmall(mp, agno, fbno, flen); | ||
94 | goto out_del_cursor; | ||
95 | } | ||
96 | |||
97 | /* | ||
98 | * If the extent is entirely outside of the range we are | ||
99 | * supposed to discard skip it. Do not bother to trim | ||
100 | * down partially overlapping ranges for now. | ||
101 | */ | ||
102 | if (XFS_AGB_TO_FSB(mp, agno, fbno) + flen < start || | ||
103 | XFS_AGB_TO_FSB(mp, agno, fbno) >= start + len) { | ||
104 | trace_xfs_discard_exclude(mp, agno, fbno, flen); | ||
105 | goto next_extent; | ||
106 | } | ||
107 | |||
108 | /* | ||
109 | * If any blocks in the range are still busy, skip the | ||
110 | * discard and try again the next time. | ||
111 | */ | ||
112 | if (xfs_alloc_busy_search(mp, agno, fbno, flen)) { | ||
113 | trace_xfs_discard_busy(mp, agno, fbno, flen); | ||
114 | goto next_extent; | ||
115 | } | ||
116 | |||
117 | trace_xfs_discard_extent(mp, agno, fbno, flen); | ||
118 | error = -blkdev_issue_discard(bdev, | ||
119 | XFS_AGB_TO_DADDR(mp, agno, fbno), | ||
120 | XFS_FSB_TO_BB(mp, flen), | ||
121 | GFP_NOFS, 0); | ||
122 | if (error) | ||
123 | goto out_del_cursor; | ||
124 | *blocks_trimmed += flen; | ||
125 | |||
126 | next_extent: | ||
127 | error = xfs_btree_decrement(cur, 0, &i); | ||
128 | if (error) | ||
129 | goto out_del_cursor; | ||
130 | } | ||
131 | |||
132 | out_del_cursor: | ||
133 | xfs_btree_del_cursor(cur, error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR); | ||
134 | xfs_buf_relse(agbp); | ||
135 | out_put_perag: | ||
136 | xfs_perag_put(pag); | ||
137 | return error; | ||
138 | } | ||
139 | |||
140 | int | ||
141 | xfs_ioc_trim( | ||
142 | struct xfs_mount *mp, | ||
143 | struct fstrim_range __user *urange) | ||
144 | { | ||
145 | struct request_queue *q = mp->m_ddev_targp->bt_bdev->bd_disk->queue; | ||
146 | unsigned int granularity = q->limits.discard_granularity; | ||
147 | struct fstrim_range range; | ||
148 | xfs_fsblock_t start, len, minlen; | ||
149 | xfs_agnumber_t start_agno, end_agno, agno; | ||
150 | __uint64_t blocks_trimmed = 0; | ||
151 | int error, last_error = 0; | ||
152 | |||
153 | if (!capable(CAP_SYS_ADMIN)) | ||
154 | return -XFS_ERROR(EPERM); | ||
155 | if (!blk_queue_discard(q)) | ||
156 | return -XFS_ERROR(EOPNOTSUPP); | ||
157 | if (copy_from_user(&range, urange, sizeof(range))) | ||
158 | return -XFS_ERROR(EFAULT); | ||
159 | |||
160 | /* | ||
161 | * Truncating down the len isn't actually quite correct, but using | ||
162 | * XFS_B_TO_FSB would mean we trivially get overflows for values | ||
163 | * of ULLONG_MAX or slightly lower. And ULLONG_MAX is the default | ||
164 | * used by the fstrim application. In the end it really doesn't | ||
165 | * matter as trimming blocks is an advisory interface. | ||
166 | */ | ||
167 | start = XFS_B_TO_FSBT(mp, range.start); | ||
168 | len = XFS_B_TO_FSBT(mp, range.len); | ||
169 | minlen = XFS_B_TO_FSB(mp, max_t(u64, granularity, range.minlen)); | ||
170 | |||
171 | start_agno = XFS_FSB_TO_AGNO(mp, start); | ||
172 | if (start_agno >= mp->m_sb.sb_agcount) | ||
173 | return -XFS_ERROR(EINVAL); | ||
174 | |||
175 | end_agno = XFS_FSB_TO_AGNO(mp, start + len); | ||
176 | if (end_agno >= mp->m_sb.sb_agcount) | ||
177 | end_agno = mp->m_sb.sb_agcount - 1; | ||
178 | |||
179 | for (agno = start_agno; agno <= end_agno; agno++) { | ||
180 | error = -xfs_trim_extents(mp, agno, start, len, minlen, | ||
181 | &blocks_trimmed); | ||
182 | if (error) | ||
183 | last_error = error; | ||
184 | } | ||
185 | |||
186 | if (last_error) | ||
187 | return last_error; | ||
188 | |||
189 | range.len = XFS_FSB_TO_B(mp, blocks_trimmed); | ||
190 | if (copy_to_user(urange, &range, sizeof(range))) | ||
191 | return -XFS_ERROR(EFAULT); | ||
192 | return 0; | ||
193 | } | ||
194 | |||
195 | int | ||
196 | xfs_discard_extents( | ||
197 | struct xfs_mount *mp, | ||
198 | struct list_head *list) | ||
199 | { | ||
200 | struct xfs_busy_extent *busyp; | ||
201 | int error = 0; | ||
202 | |||
203 | list_for_each_entry(busyp, list, list) { | ||
204 | trace_xfs_discard_extent(mp, busyp->agno, busyp->bno, | ||
205 | busyp->length); | ||
206 | |||
207 | error = -blkdev_issue_discard(mp->m_ddev_targp->bt_bdev, | ||
208 | XFS_AGB_TO_DADDR(mp, busyp->agno, busyp->bno), | ||
209 | XFS_FSB_TO_BB(mp, busyp->length), | ||
210 | GFP_NOFS, 0); | ||
211 | if (error && error != EOPNOTSUPP) { | ||
212 | xfs_info(mp, | ||
213 | "discard failed for extent [0x%llu,%u], error %d", | ||
214 | (unsigned long long)busyp->bno, | ||
215 | busyp->length, | ||
216 | error); | ||
217 | return error; | ||
218 | } | ||
219 | } | ||
220 | |||
221 | return 0; | ||
222 | } | ||
diff --git a/fs/xfs/linux-2.6/xfs_discard.h b/fs/xfs/linux-2.6/xfs_discard.h new file mode 100644 index 000000000000..344879aea646 --- /dev/null +++ b/fs/xfs/linux-2.6/xfs_discard.h | |||
@@ -0,0 +1,10 @@ | |||
1 | #ifndef XFS_DISCARD_H | ||
2 | #define XFS_DISCARD_H 1 | ||
3 | |||
4 | struct fstrim_range; | ||
5 | struct list_head; | ||
6 | |||
7 | extern int xfs_ioc_trim(struct xfs_mount *, struct fstrim_range __user *); | ||
8 | extern int xfs_discard_extents(struct xfs_mount *, struct list_head *); | ||
9 | |||
10 | #endif /* XFS_DISCARD_H */ | ||
diff --git a/fs/xfs/linux-2.6/xfs_export.c b/fs/xfs/linux-2.6/xfs_export.c index 3764d74790ec..f4f878fc0083 100644 --- a/fs/xfs/linux-2.6/xfs_export.c +++ b/fs/xfs/linux-2.6/xfs_export.c | |||
@@ -70,8 +70,16 @@ xfs_fs_encode_fh( | |||
70 | else | 70 | else |
71 | fileid_type = FILEID_INO32_GEN_PARENT; | 71 | fileid_type = FILEID_INO32_GEN_PARENT; |
72 | 72 | ||
73 | /* filesystem may contain 64bit inode numbers */ | 73 | /* |
74 | if (!(XFS_M(inode->i_sb)->m_flags & XFS_MOUNT_SMALL_INUMS)) | 74 | * If the the filesystem may contain 64bit inode numbers, we need |
75 | * to use larger file handles that can represent them. | ||
76 | * | ||
77 | * While we only allocate inodes that do not fit into 32 bits any | ||
78 | * large enough filesystem may contain them, thus the slightly | ||
79 | * confusing looking conditional below. | ||
80 | */ | ||
81 | if (!(XFS_M(inode->i_sb)->m_flags & XFS_MOUNT_SMALL_INUMS) || | ||
82 | (XFS_M(inode->i_sb)->m_flags & XFS_MOUNT_32BITINODES)) | ||
75 | fileid_type |= XFS_FILEID_TYPE_64FLAG; | 83 | fileid_type |= XFS_FILEID_TYPE_64FLAG; |
76 | 84 | ||
77 | /* | 85 | /* |
@@ -81,8 +89,10 @@ xfs_fs_encode_fh( | |||
81 | * seven combinations work. The real answer is "don't use v2". | 89 | * seven combinations work. The real answer is "don't use v2". |
82 | */ | 90 | */ |
83 | len = xfs_fileid_length(fileid_type); | 91 | len = xfs_fileid_length(fileid_type); |
84 | if (*max_len < len) | 92 | if (*max_len < len) { |
93 | *max_len = len; | ||
85 | return 255; | 94 | return 255; |
95 | } | ||
86 | *max_len = len; | 96 | *max_len = len; |
87 | 97 | ||
88 | switch (fileid_type) { | 98 | switch (fileid_type) { |
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c index ba8ad422a165..7f782af286bf 100644 --- a/fs/xfs/linux-2.6/xfs_file.c +++ b/fs/xfs/linux-2.6/xfs_file.c | |||
@@ -37,10 +37,45 @@ | |||
37 | #include "xfs_trace.h" | 37 | #include "xfs_trace.h" |
38 | 38 | ||
39 | #include <linux/dcache.h> | 39 | #include <linux/dcache.h> |
40 | #include <linux/falloc.h> | ||
40 | 41 | ||
41 | static const struct vm_operations_struct xfs_file_vm_ops; | 42 | static const struct vm_operations_struct xfs_file_vm_ops; |
42 | 43 | ||
43 | /* | 44 | /* |
45 | * Locking primitives for read and write IO paths to ensure we consistently use | ||
46 | * and order the inode->i_mutex, ip->i_lock and ip->i_iolock. | ||
47 | */ | ||
48 | static inline void | ||
49 | xfs_rw_ilock( | ||
50 | struct xfs_inode *ip, | ||
51 | int type) | ||
52 | { | ||
53 | if (type & XFS_IOLOCK_EXCL) | ||
54 | mutex_lock(&VFS_I(ip)->i_mutex); | ||
55 | xfs_ilock(ip, type); | ||
56 | } | ||
57 | |||
58 | static inline void | ||
59 | xfs_rw_iunlock( | ||
60 | struct xfs_inode *ip, | ||
61 | int type) | ||
62 | { | ||
63 | xfs_iunlock(ip, type); | ||
64 | if (type & XFS_IOLOCK_EXCL) | ||
65 | mutex_unlock(&VFS_I(ip)->i_mutex); | ||
66 | } | ||
67 | |||
68 | static inline void | ||
69 | xfs_rw_ilock_demote( | ||
70 | struct xfs_inode *ip, | ||
71 | int type) | ||
72 | { | ||
73 | xfs_ilock_demote(ip, type); | ||
74 | if (type & XFS_IOLOCK_EXCL) | ||
75 | mutex_unlock(&VFS_I(ip)->i_mutex); | ||
76 | } | ||
77 | |||
78 | /* | ||
44 | * xfs_iozero | 79 | * xfs_iozero |
45 | * | 80 | * |
46 | * xfs_iozero clears the specified range of buffer supplied, | 81 | * xfs_iozero clears the specified range of buffer supplied, |
@@ -96,19 +131,34 @@ xfs_file_fsync( | |||
96 | { | 131 | { |
97 | struct inode *inode = file->f_mapping->host; | 132 | struct inode *inode = file->f_mapping->host; |
98 | struct xfs_inode *ip = XFS_I(inode); | 133 | struct xfs_inode *ip = XFS_I(inode); |
134 | struct xfs_mount *mp = ip->i_mount; | ||
99 | struct xfs_trans *tp; | 135 | struct xfs_trans *tp; |
100 | int error = 0; | 136 | int error = 0; |
101 | int log_flushed = 0; | 137 | int log_flushed = 0; |
102 | 138 | ||
103 | trace_xfs_file_fsync(ip); | 139 | trace_xfs_file_fsync(ip); |
104 | 140 | ||
105 | if (XFS_FORCED_SHUTDOWN(ip->i_mount)) | 141 | if (XFS_FORCED_SHUTDOWN(mp)) |
106 | return -XFS_ERROR(EIO); | 142 | return -XFS_ERROR(EIO); |
107 | 143 | ||
108 | xfs_iflags_clear(ip, XFS_ITRUNCATED); | 144 | xfs_iflags_clear(ip, XFS_ITRUNCATED); |
109 | 145 | ||
110 | xfs_ioend_wait(ip); | 146 | xfs_ioend_wait(ip); |
111 | 147 | ||
148 | if (mp->m_flags & XFS_MOUNT_BARRIER) { | ||
149 | /* | ||
150 | * If we have an RT and/or log subvolume we need to make sure | ||
151 | * to flush the write cache the device used for file data | ||
152 | * first. This is to ensure newly written file data make | ||
153 | * it to disk before logging the new inode size in case of | ||
154 | * an extending write. | ||
155 | */ | ||
156 | if (XFS_IS_REALTIME_INODE(ip)) | ||
157 | xfs_blkdev_issue_flush(mp->m_rtdev_targp); | ||
158 | else if (mp->m_logdev_targp != mp->m_ddev_targp) | ||
159 | xfs_blkdev_issue_flush(mp->m_ddev_targp); | ||
160 | } | ||
161 | |||
112 | /* | 162 | /* |
113 | * We always need to make sure that the required inode state is safe on | 163 | * We always need to make sure that the required inode state is safe on |
114 | * disk. The inode might be clean but we still might need to force the | 164 | * disk. The inode might be clean but we still might need to force the |
@@ -140,9 +190,9 @@ xfs_file_fsync( | |||
140 | * updates. The sync transaction will also force the log. | 190 | * updates. The sync transaction will also force the log. |
141 | */ | 191 | */ |
142 | xfs_iunlock(ip, XFS_ILOCK_SHARED); | 192 | xfs_iunlock(ip, XFS_ILOCK_SHARED); |
143 | tp = xfs_trans_alloc(ip->i_mount, XFS_TRANS_FSYNC_TS); | 193 | tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS); |
144 | error = xfs_trans_reserve(tp, 0, | 194 | error = xfs_trans_reserve(tp, 0, |
145 | XFS_FSYNC_TS_LOG_RES(ip->i_mount), 0, 0, 0); | 195 | XFS_FSYNC_TS_LOG_RES(mp), 0, 0, 0); |
146 | if (error) { | 196 | if (error) { |
147 | xfs_trans_cancel(tp, 0); | 197 | xfs_trans_cancel(tp, 0); |
148 | return -error; | 198 | return -error; |
@@ -174,28 +224,25 @@ xfs_file_fsync( | |||
174 | * force the log. | 224 | * force the log. |
175 | */ | 225 | */ |
176 | if (xfs_ipincount(ip)) { | 226 | if (xfs_ipincount(ip)) { |
177 | error = _xfs_log_force_lsn(ip->i_mount, | 227 | error = _xfs_log_force_lsn(mp, |
178 | ip->i_itemp->ili_last_lsn, | 228 | ip->i_itemp->ili_last_lsn, |
179 | XFS_LOG_SYNC, &log_flushed); | 229 | XFS_LOG_SYNC, &log_flushed); |
180 | } | 230 | } |
181 | xfs_iunlock(ip, XFS_ILOCK_SHARED); | 231 | xfs_iunlock(ip, XFS_ILOCK_SHARED); |
182 | } | 232 | } |
183 | 233 | ||
184 | if (ip->i_mount->m_flags & XFS_MOUNT_BARRIER) { | 234 | /* |
185 | /* | 235 | * If we only have a single device, and the log force about was |
186 | * If the log write didn't issue an ordered tag we need | 236 | * a no-op we might have to flush the data device cache here. |
187 | * to flush the disk cache for the data device now. | 237 | * This can only happen for fdatasync/O_DSYNC if we were overwriting |
188 | */ | 238 | * an already allocated file and thus do not have any metadata to |
189 | if (!log_flushed) | 239 | * commit. |
190 | xfs_blkdev_issue_flush(ip->i_mount->m_ddev_targp); | 240 | */ |
191 | 241 | if ((mp->m_flags & XFS_MOUNT_BARRIER) && | |
192 | /* | 242 | mp->m_logdev_targp == mp->m_ddev_targp && |
193 | * If this inode is on the RT dev we need to flush that | 243 | !XFS_IS_REALTIME_INODE(ip) && |
194 | * cache as well. | 244 | !log_flushed) |
195 | */ | 245 | xfs_blkdev_issue_flush(mp->m_ddev_targp); |
196 | if (XFS_IS_REALTIME_INODE(ip)) | ||
197 | xfs_blkdev_issue_flush(ip->i_mount->m_rtdev_targp); | ||
198 | } | ||
199 | 246 | ||
200 | return -error; | 247 | return -error; |
201 | } | 248 | } |
@@ -262,22 +309,21 @@ xfs_file_aio_read( | |||
262 | if (XFS_FORCED_SHUTDOWN(mp)) | 309 | if (XFS_FORCED_SHUTDOWN(mp)) |
263 | return -EIO; | 310 | return -EIO; |
264 | 311 | ||
265 | if (unlikely(ioflags & IO_ISDIRECT)) | ||
266 | mutex_lock(&inode->i_mutex); | ||
267 | xfs_ilock(ip, XFS_IOLOCK_SHARED); | ||
268 | |||
269 | if (unlikely(ioflags & IO_ISDIRECT)) { | 312 | if (unlikely(ioflags & IO_ISDIRECT)) { |
313 | xfs_rw_ilock(ip, XFS_IOLOCK_EXCL); | ||
314 | |||
270 | if (inode->i_mapping->nrpages) { | 315 | if (inode->i_mapping->nrpages) { |
271 | ret = -xfs_flushinval_pages(ip, | 316 | ret = -xfs_flushinval_pages(ip, |
272 | (iocb->ki_pos & PAGE_CACHE_MASK), | 317 | (iocb->ki_pos & PAGE_CACHE_MASK), |
273 | -1, FI_REMAPF_LOCKED); | 318 | -1, FI_REMAPF_LOCKED); |
319 | if (ret) { | ||
320 | xfs_rw_iunlock(ip, XFS_IOLOCK_EXCL); | ||
321 | return ret; | ||
322 | } | ||
274 | } | 323 | } |
275 | mutex_unlock(&inode->i_mutex); | 324 | xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL); |
276 | if (ret) { | 325 | } else |
277 | xfs_iunlock(ip, XFS_IOLOCK_SHARED); | 326 | xfs_rw_ilock(ip, XFS_IOLOCK_SHARED); |
278 | return ret; | ||
279 | } | ||
280 | } | ||
281 | 327 | ||
282 | trace_xfs_file_read(ip, size, iocb->ki_pos, ioflags); | 328 | trace_xfs_file_read(ip, size, iocb->ki_pos, ioflags); |
283 | 329 | ||
@@ -285,7 +331,7 @@ xfs_file_aio_read( | |||
285 | if (ret > 0) | 331 | if (ret > 0) |
286 | XFS_STATS_ADD(xs_read_bytes, ret); | 332 | XFS_STATS_ADD(xs_read_bytes, ret); |
287 | 333 | ||
288 | xfs_iunlock(ip, XFS_IOLOCK_SHARED); | 334 | xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED); |
289 | return ret; | 335 | return ret; |
290 | } | 336 | } |
291 | 337 | ||
@@ -309,7 +355,7 @@ xfs_file_splice_read( | |||
309 | if (XFS_FORCED_SHUTDOWN(ip->i_mount)) | 355 | if (XFS_FORCED_SHUTDOWN(ip->i_mount)) |
310 | return -EIO; | 356 | return -EIO; |
311 | 357 | ||
312 | xfs_ilock(ip, XFS_IOLOCK_SHARED); | 358 | xfs_rw_ilock(ip, XFS_IOLOCK_SHARED); |
313 | 359 | ||
314 | trace_xfs_file_splice_read(ip, count, *ppos, ioflags); | 360 | trace_xfs_file_splice_read(ip, count, *ppos, ioflags); |
315 | 361 | ||
@@ -317,10 +363,61 @@ xfs_file_splice_read( | |||
317 | if (ret > 0) | 363 | if (ret > 0) |
318 | XFS_STATS_ADD(xs_read_bytes, ret); | 364 | XFS_STATS_ADD(xs_read_bytes, ret); |
319 | 365 | ||
320 | xfs_iunlock(ip, XFS_IOLOCK_SHARED); | 366 | xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED); |
321 | return ret; | 367 | return ret; |
322 | } | 368 | } |
323 | 369 | ||
370 | STATIC void | ||
371 | xfs_aio_write_isize_update( | ||
372 | struct inode *inode, | ||
373 | loff_t *ppos, | ||
374 | ssize_t bytes_written) | ||
375 | { | ||
376 | struct xfs_inode *ip = XFS_I(inode); | ||
377 | xfs_fsize_t isize = i_size_read(inode); | ||
378 | |||
379 | if (bytes_written > 0) | ||
380 | XFS_STATS_ADD(xs_write_bytes, bytes_written); | ||
381 | |||
382 | if (unlikely(bytes_written < 0 && bytes_written != -EFAULT && | ||
383 | *ppos > isize)) | ||
384 | *ppos = isize; | ||
385 | |||
386 | if (*ppos > ip->i_size) { | ||
387 | xfs_rw_ilock(ip, XFS_ILOCK_EXCL); | ||
388 | if (*ppos > ip->i_size) | ||
389 | ip->i_size = *ppos; | ||
390 | xfs_rw_iunlock(ip, XFS_ILOCK_EXCL); | ||
391 | } | ||
392 | } | ||
393 | |||
394 | /* | ||
395 | * If this was a direct or synchronous I/O that failed (such as ENOSPC) then | ||
396 | * part of the I/O may have been written to disk before the error occurred. In | ||
397 | * this case the on-disk file size may have been adjusted beyond the in-memory | ||
398 | * file size and now needs to be truncated back. | ||
399 | */ | ||
400 | STATIC void | ||
401 | xfs_aio_write_newsize_update( | ||
402 | struct xfs_inode *ip) | ||
403 | { | ||
404 | if (ip->i_new_size) { | ||
405 | xfs_rw_ilock(ip, XFS_ILOCK_EXCL); | ||
406 | ip->i_new_size = 0; | ||
407 | if (ip->i_d.di_size > ip->i_size) | ||
408 | ip->i_d.di_size = ip->i_size; | ||
409 | xfs_rw_iunlock(ip, XFS_ILOCK_EXCL); | ||
410 | } | ||
411 | } | ||
412 | |||
413 | /* | ||
414 | * xfs_file_splice_write() does not use xfs_rw_ilock() because | ||
415 | * generic_file_splice_write() takes the i_mutex itself. This, in theory, | ||
416 | * couuld cause lock inversions between the aio_write path and the splice path | ||
417 | * if someone is doing concurrent splice(2) based writes and write(2) based | ||
418 | * writes to the same inode. The only real way to fix this is to re-implement | ||
419 | * the generic code here with correct locking orders. | ||
420 | */ | ||
324 | STATIC ssize_t | 421 | STATIC ssize_t |
325 | xfs_file_splice_write( | 422 | xfs_file_splice_write( |
326 | struct pipe_inode_info *pipe, | 423 | struct pipe_inode_info *pipe, |
@@ -331,7 +428,7 @@ xfs_file_splice_write( | |||
331 | { | 428 | { |
332 | struct inode *inode = outfilp->f_mapping->host; | 429 | struct inode *inode = outfilp->f_mapping->host; |
333 | struct xfs_inode *ip = XFS_I(inode); | 430 | struct xfs_inode *ip = XFS_I(inode); |
334 | xfs_fsize_t isize, new_size; | 431 | xfs_fsize_t new_size; |
335 | int ioflags = 0; | 432 | int ioflags = 0; |
336 | ssize_t ret; | 433 | ssize_t ret; |
337 | 434 | ||
@@ -355,27 +452,9 @@ xfs_file_splice_write( | |||
355 | trace_xfs_file_splice_write(ip, count, *ppos, ioflags); | 452 | trace_xfs_file_splice_write(ip, count, *ppos, ioflags); |
356 | 453 | ||
357 | ret = generic_file_splice_write(pipe, outfilp, ppos, count, flags); | 454 | ret = generic_file_splice_write(pipe, outfilp, ppos, count, flags); |
358 | if (ret > 0) | ||
359 | XFS_STATS_ADD(xs_write_bytes, ret); | ||
360 | 455 | ||
361 | isize = i_size_read(inode); | 456 | xfs_aio_write_isize_update(inode, ppos, ret); |
362 | if (unlikely(ret < 0 && ret != -EFAULT && *ppos > isize)) | 457 | xfs_aio_write_newsize_update(ip); |
363 | *ppos = isize; | ||
364 | |||
365 | if (*ppos > ip->i_size) { | ||
366 | xfs_ilock(ip, XFS_ILOCK_EXCL); | ||
367 | if (*ppos > ip->i_size) | ||
368 | ip->i_size = *ppos; | ||
369 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | ||
370 | } | ||
371 | |||
372 | if (ip->i_new_size) { | ||
373 | xfs_ilock(ip, XFS_ILOCK_EXCL); | ||
374 | ip->i_new_size = 0; | ||
375 | if (ip->i_d.di_size > ip->i_size) | ||
376 | ip->i_d.di_size = ip->i_size; | ||
377 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | ||
378 | } | ||
379 | xfs_iunlock(ip, XFS_IOLOCK_EXCL); | 458 | xfs_iunlock(ip, XFS_IOLOCK_EXCL); |
380 | return ret; | 459 | return ret; |
381 | } | 460 | } |
@@ -562,247 +641,318 @@ out_lock: | |||
562 | return error; | 641 | return error; |
563 | } | 642 | } |
564 | 643 | ||
644 | /* | ||
645 | * Common pre-write limit and setup checks. | ||
646 | * | ||
647 | * Returns with iolock held according to @iolock. | ||
648 | */ | ||
565 | STATIC ssize_t | 649 | STATIC ssize_t |
566 | xfs_file_aio_write( | 650 | xfs_file_aio_write_checks( |
567 | struct kiocb *iocb, | 651 | struct file *file, |
568 | const struct iovec *iovp, | 652 | loff_t *pos, |
569 | unsigned long nr_segs, | 653 | size_t *count, |
570 | loff_t pos) | 654 | int *iolock) |
571 | { | 655 | { |
572 | struct file *file = iocb->ki_filp; | 656 | struct inode *inode = file->f_mapping->host; |
573 | struct address_space *mapping = file->f_mapping; | ||
574 | struct inode *inode = mapping->host; | ||
575 | struct xfs_inode *ip = XFS_I(inode); | 657 | struct xfs_inode *ip = XFS_I(inode); |
576 | struct xfs_mount *mp = ip->i_mount; | 658 | xfs_fsize_t new_size; |
577 | ssize_t ret = 0, error = 0; | 659 | int error = 0; |
578 | int ioflags = 0; | ||
579 | xfs_fsize_t isize, new_size; | ||
580 | int iolock; | ||
581 | size_t ocount = 0, count; | ||
582 | int need_i_mutex; | ||
583 | 660 | ||
584 | XFS_STATS_INC(xs_write_calls); | 661 | error = generic_write_checks(file, pos, count, S_ISBLK(inode->i_mode)); |
662 | if (error) { | ||
663 | xfs_rw_iunlock(ip, XFS_ILOCK_EXCL | *iolock); | ||
664 | *iolock = 0; | ||
665 | return error; | ||
666 | } | ||
585 | 667 | ||
586 | BUG_ON(iocb->ki_pos != pos); | 668 | new_size = *pos + *count; |
669 | if (new_size > ip->i_size) | ||
670 | ip->i_new_size = new_size; | ||
587 | 671 | ||
588 | if (unlikely(file->f_flags & O_DIRECT)) | 672 | if (likely(!(file->f_mode & FMODE_NOCMTIME))) |
589 | ioflags |= IO_ISDIRECT; | 673 | file_update_time(file); |
590 | if (file->f_mode & FMODE_NOCMTIME) | ||
591 | ioflags |= IO_INVIS; | ||
592 | 674 | ||
593 | error = generic_segment_checks(iovp, &nr_segs, &ocount, VERIFY_READ); | 675 | /* |
676 | * If the offset is beyond the size of the file, we need to zero any | ||
677 | * blocks that fall between the existing EOF and the start of this | ||
678 | * write. | ||
679 | */ | ||
680 | if (*pos > ip->i_size) | ||
681 | error = -xfs_zero_eof(ip, *pos, ip->i_size); | ||
682 | |||
683 | xfs_rw_iunlock(ip, XFS_ILOCK_EXCL); | ||
594 | if (error) | 684 | if (error) |
595 | return error; | 685 | return error; |
596 | 686 | ||
597 | count = ocount; | 687 | /* |
598 | if (count == 0) | 688 | * If we're writing the file then make sure to clear the setuid and |
599 | return 0; | 689 | * setgid bits if the process is not being run by root. This keeps |
600 | 690 | * people from modifying setuid and setgid binaries. | |
601 | xfs_wait_for_freeze(mp, SB_FREEZE_WRITE); | 691 | */ |
692 | return file_remove_suid(file); | ||
602 | 693 | ||
603 | if (XFS_FORCED_SHUTDOWN(mp)) | 694 | } |
604 | return -EIO; | ||
605 | 695 | ||
606 | relock: | 696 | /* |
607 | if (ioflags & IO_ISDIRECT) { | 697 | * xfs_file_dio_aio_write - handle direct IO writes |
608 | iolock = XFS_IOLOCK_SHARED; | 698 | * |
609 | need_i_mutex = 0; | 699 | * Lock the inode appropriately to prepare for and issue a direct IO write. |
610 | } else { | 700 | * By separating it from the buffered write path we remove all the tricky to |
611 | iolock = XFS_IOLOCK_EXCL; | 701 | * follow locking changes and looping. |
612 | need_i_mutex = 1; | 702 | * |
613 | mutex_lock(&inode->i_mutex); | 703 | * If there are cached pages or we're extending the file, we need IOLOCK_EXCL |
704 | * until we're sure the bytes at the new EOF have been zeroed and/or the cached | ||
705 | * pages are flushed out. | ||
706 | * | ||
707 | * In most cases the direct IO writes will be done holding IOLOCK_SHARED | ||
708 | * allowing them to be done in parallel with reads and other direct IO writes. | ||
709 | * However, if the IO is not aligned to filesystem blocks, the direct IO layer | ||
710 | * needs to do sub-block zeroing and that requires serialisation against other | ||
711 | * direct IOs to the same block. In this case we need to serialise the | ||
712 | * submission of the unaligned IOs so that we don't get racing block zeroing in | ||
713 | * the dio layer. To avoid the problem with aio, we also need to wait for | ||
714 | * outstanding IOs to complete so that unwritten extent conversion is completed | ||
715 | * before we try to map the overlapping block. This is currently implemented by | ||
716 | * hitting it with a big hammer (i.e. xfs_ioend_wait()). | ||
717 | * | ||
718 | * Returns with locks held indicated by @iolock and errors indicated by | ||
719 | * negative return values. | ||
720 | */ | ||
721 | STATIC ssize_t | ||
722 | xfs_file_dio_aio_write( | ||
723 | struct kiocb *iocb, | ||
724 | const struct iovec *iovp, | ||
725 | unsigned long nr_segs, | ||
726 | loff_t pos, | ||
727 | size_t ocount, | ||
728 | int *iolock) | ||
729 | { | ||
730 | struct file *file = iocb->ki_filp; | ||
731 | struct address_space *mapping = file->f_mapping; | ||
732 | struct inode *inode = mapping->host; | ||
733 | struct xfs_inode *ip = XFS_I(inode); | ||
734 | struct xfs_mount *mp = ip->i_mount; | ||
735 | ssize_t ret = 0; | ||
736 | size_t count = ocount; | ||
737 | int unaligned_io = 0; | ||
738 | struct xfs_buftarg *target = XFS_IS_REALTIME_INODE(ip) ? | ||
739 | mp->m_rtdev_targp : mp->m_ddev_targp; | ||
740 | |||
741 | *iolock = 0; | ||
742 | if ((pos & target->bt_smask) || (count & target->bt_smask)) | ||
743 | return -XFS_ERROR(EINVAL); | ||
744 | |||
745 | if ((pos & mp->m_blockmask) || ((pos + count) & mp->m_blockmask)) | ||
746 | unaligned_io = 1; | ||
747 | |||
748 | if (unaligned_io || mapping->nrpages || pos > ip->i_size) | ||
749 | *iolock = XFS_IOLOCK_EXCL; | ||
750 | else | ||
751 | *iolock = XFS_IOLOCK_SHARED; | ||
752 | xfs_rw_ilock(ip, XFS_ILOCK_EXCL | *iolock); | ||
753 | |||
754 | ret = xfs_file_aio_write_checks(file, &pos, &count, iolock); | ||
755 | if (ret) | ||
756 | return ret; | ||
757 | |||
758 | if (mapping->nrpages) { | ||
759 | WARN_ON(*iolock != XFS_IOLOCK_EXCL); | ||
760 | ret = -xfs_flushinval_pages(ip, (pos & PAGE_CACHE_MASK), -1, | ||
761 | FI_REMAPF_LOCKED); | ||
762 | if (ret) | ||
763 | return ret; | ||
614 | } | 764 | } |
615 | 765 | ||
616 | xfs_ilock(ip, XFS_ILOCK_EXCL|iolock); | 766 | /* |
617 | 767 | * If we are doing unaligned IO, wait for all other IO to drain, | |
618 | start: | 768 | * otherwise demote the lock if we had to flush cached pages |
619 | error = -generic_write_checks(file, &pos, &count, | 769 | */ |
620 | S_ISBLK(inode->i_mode)); | 770 | if (unaligned_io) |
621 | if (error) { | 771 | xfs_ioend_wait(ip); |
622 | xfs_iunlock(ip, XFS_ILOCK_EXCL|iolock); | 772 | else if (*iolock == XFS_IOLOCK_EXCL) { |
623 | goto out_unlock_mutex; | 773 | xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL); |
774 | *iolock = XFS_IOLOCK_SHARED; | ||
624 | } | 775 | } |
625 | 776 | ||
626 | if (ioflags & IO_ISDIRECT) { | 777 | trace_xfs_file_direct_write(ip, count, iocb->ki_pos, 0); |
627 | xfs_buftarg_t *target = | 778 | ret = generic_file_direct_write(iocb, iovp, |
628 | XFS_IS_REALTIME_INODE(ip) ? | 779 | &nr_segs, pos, &iocb->ki_pos, count, ocount); |
629 | mp->m_rtdev_targp : mp->m_ddev_targp; | ||
630 | 780 | ||
631 | if ((pos & target->bt_smask) || (count & target->bt_smask)) { | 781 | /* No fallback to buffered IO on errors for XFS. */ |
632 | xfs_iunlock(ip, XFS_ILOCK_EXCL|iolock); | 782 | ASSERT(ret < 0 || ret == count); |
633 | return XFS_ERROR(-EINVAL); | 783 | return ret; |
634 | } | 784 | } |
635 | 785 | ||
636 | if (!need_i_mutex && (mapping->nrpages || pos > ip->i_size)) { | 786 | STATIC ssize_t |
637 | xfs_iunlock(ip, XFS_ILOCK_EXCL|iolock); | 787 | xfs_file_buffered_aio_write( |
638 | iolock = XFS_IOLOCK_EXCL; | 788 | struct kiocb *iocb, |
639 | need_i_mutex = 1; | 789 | const struct iovec *iovp, |
640 | mutex_lock(&inode->i_mutex); | 790 | unsigned long nr_segs, |
641 | xfs_ilock(ip, XFS_ILOCK_EXCL|iolock); | 791 | loff_t pos, |
642 | goto start; | 792 | size_t ocount, |
643 | } | 793 | int *iolock) |
644 | } | 794 | { |
795 | struct file *file = iocb->ki_filp; | ||
796 | struct address_space *mapping = file->f_mapping; | ||
797 | struct inode *inode = mapping->host; | ||
798 | struct xfs_inode *ip = XFS_I(inode); | ||
799 | ssize_t ret; | ||
800 | int enospc = 0; | ||
801 | size_t count = ocount; | ||
645 | 802 | ||
646 | new_size = pos + count; | 803 | *iolock = XFS_IOLOCK_EXCL; |
647 | if (new_size > ip->i_size) | 804 | xfs_rw_ilock(ip, XFS_ILOCK_EXCL | *iolock); |
648 | ip->i_new_size = new_size; | ||
649 | 805 | ||
650 | if (likely(!(ioflags & IO_INVIS))) | 806 | ret = xfs_file_aio_write_checks(file, &pos, &count, iolock); |
651 | file_update_time(file); | 807 | if (ret) |
808 | return ret; | ||
809 | |||
810 | /* We can write back this queue in page reclaim */ | ||
811 | current->backing_dev_info = mapping->backing_dev_info; | ||
652 | 812 | ||
813 | write_retry: | ||
814 | trace_xfs_file_buffered_write(ip, count, iocb->ki_pos, 0); | ||
815 | ret = generic_file_buffered_write(iocb, iovp, nr_segs, | ||
816 | pos, &iocb->ki_pos, count, ret); | ||
653 | /* | 817 | /* |
654 | * If the offset is beyond the size of the file, we have a couple | 818 | * if we just got an ENOSPC, flush the inode now we aren't holding any |
655 | * of things to do. First, if there is already space allocated | 819 | * page locks and retry *once* |
656 | * we need to either create holes or zero the disk or ... | ||
657 | * | ||
658 | * If there is a page where the previous size lands, we need | ||
659 | * to zero it out up to the new size. | ||
660 | */ | 820 | */ |
661 | 821 | if (ret == -ENOSPC && !enospc) { | |
662 | if (pos > ip->i_size) { | 822 | ret = -xfs_flush_pages(ip, 0, -1, 0, FI_NONE); |
663 | error = xfs_zero_eof(ip, pos, ip->i_size); | 823 | if (ret) |
664 | if (error) { | 824 | return ret; |
665 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | 825 | enospc = 1; |
666 | goto out_unlock_internal; | 826 | goto write_retry; |
667 | } | ||
668 | } | 827 | } |
669 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | 828 | current->backing_dev_info = NULL; |
829 | return ret; | ||
830 | } | ||
670 | 831 | ||
671 | /* | 832 | STATIC ssize_t |
672 | * If we're writing the file then make sure to clear the | 833 | xfs_file_aio_write( |
673 | * setuid and setgid bits if the process is not being run | 834 | struct kiocb *iocb, |
674 | * by root. This keeps people from modifying setuid and | 835 | const struct iovec *iovp, |
675 | * setgid binaries. | 836 | unsigned long nr_segs, |
676 | */ | 837 | loff_t pos) |
677 | error = -file_remove_suid(file); | 838 | { |
678 | if (unlikely(error)) | 839 | struct file *file = iocb->ki_filp; |
679 | goto out_unlock_internal; | 840 | struct address_space *mapping = file->f_mapping; |
841 | struct inode *inode = mapping->host; | ||
842 | struct xfs_inode *ip = XFS_I(inode); | ||
843 | ssize_t ret; | ||
844 | int iolock; | ||
845 | size_t ocount = 0; | ||
680 | 846 | ||
681 | /* We can write back this queue in page reclaim */ | 847 | XFS_STATS_INC(xs_write_calls); |
682 | current->backing_dev_info = mapping->backing_dev_info; | ||
683 | 848 | ||
684 | if ((ioflags & IO_ISDIRECT)) { | 849 | BUG_ON(iocb->ki_pos != pos); |
685 | if (mapping->nrpages) { | ||
686 | WARN_ON(need_i_mutex == 0); | ||
687 | error = xfs_flushinval_pages(ip, | ||
688 | (pos & PAGE_CACHE_MASK), | ||
689 | -1, FI_REMAPF_LOCKED); | ||
690 | if (error) | ||
691 | goto out_unlock_internal; | ||
692 | } | ||
693 | 850 | ||
694 | if (need_i_mutex) { | 851 | ret = generic_segment_checks(iovp, &nr_segs, &ocount, VERIFY_READ); |
695 | /* demote the lock now the cached pages are gone */ | 852 | if (ret) |
696 | xfs_ilock_demote(ip, XFS_IOLOCK_EXCL); | 853 | return ret; |
697 | mutex_unlock(&inode->i_mutex); | ||
698 | 854 | ||
699 | iolock = XFS_IOLOCK_SHARED; | 855 | if (ocount == 0) |
700 | need_i_mutex = 0; | 856 | return 0; |
701 | } | ||
702 | 857 | ||
703 | trace_xfs_file_direct_write(ip, count, iocb->ki_pos, ioflags); | 858 | xfs_wait_for_freeze(ip->i_mount, SB_FREEZE_WRITE); |
704 | ret = generic_file_direct_write(iocb, iovp, | ||
705 | &nr_segs, pos, &iocb->ki_pos, count, ocount); | ||
706 | 859 | ||
707 | /* | 860 | if (XFS_FORCED_SHUTDOWN(ip->i_mount)) |
708 | * direct-io write to a hole: fall through to buffered I/O | 861 | return -EIO; |
709 | * for completing the rest of the request. | ||
710 | */ | ||
711 | if (ret >= 0 && ret != count) { | ||
712 | XFS_STATS_ADD(xs_write_bytes, ret); | ||
713 | 862 | ||
714 | pos += ret; | 863 | if (unlikely(file->f_flags & O_DIRECT)) |
715 | count -= ret; | 864 | ret = xfs_file_dio_aio_write(iocb, iovp, nr_segs, pos, |
865 | ocount, &iolock); | ||
866 | else | ||
867 | ret = xfs_file_buffered_aio_write(iocb, iovp, nr_segs, pos, | ||
868 | ocount, &iolock); | ||
716 | 869 | ||
717 | ioflags &= ~IO_ISDIRECT; | 870 | xfs_aio_write_isize_update(inode, &iocb->ki_pos, ret); |
718 | xfs_iunlock(ip, iolock); | ||
719 | goto relock; | ||
720 | } | ||
721 | } else { | ||
722 | int enospc = 0; | ||
723 | ssize_t ret2 = 0; | ||
724 | 871 | ||
725 | write_retry: | 872 | if (ret <= 0) |
726 | trace_xfs_file_buffered_write(ip, count, iocb->ki_pos, ioflags); | 873 | goto out_unlock; |
727 | ret2 = generic_file_buffered_write(iocb, iovp, nr_segs, | ||
728 | pos, &iocb->ki_pos, count, ret); | ||
729 | /* | ||
730 | * if we just got an ENOSPC, flush the inode now we | ||
731 | * aren't holding any page locks and retry *once* | ||
732 | */ | ||
733 | if (ret2 == -ENOSPC && !enospc) { | ||
734 | error = xfs_flush_pages(ip, 0, -1, 0, FI_NONE); | ||
735 | if (error) | ||
736 | goto out_unlock_internal; | ||
737 | enospc = 1; | ||
738 | goto write_retry; | ||
739 | } | ||
740 | ret = ret2; | ||
741 | } | ||
742 | 874 | ||
743 | current->backing_dev_info = NULL; | 875 | /* Handle various SYNC-type writes */ |
876 | if ((file->f_flags & O_DSYNC) || IS_SYNC(inode)) { | ||
877 | loff_t end = pos + ret - 1; | ||
878 | int error, error2; | ||
744 | 879 | ||
745 | isize = i_size_read(inode); | 880 | xfs_rw_iunlock(ip, iolock); |
746 | if (unlikely(ret < 0 && ret != -EFAULT && iocb->ki_pos > isize)) | 881 | error = filemap_write_and_wait_range(mapping, pos, end); |
747 | iocb->ki_pos = isize; | 882 | xfs_rw_ilock(ip, iolock); |
748 | 883 | ||
749 | if (iocb->ki_pos > ip->i_size) { | 884 | error2 = -xfs_file_fsync(file, |
750 | xfs_ilock(ip, XFS_ILOCK_EXCL); | 885 | (file->f_flags & __O_SYNC) ? 0 : 1); |
751 | if (iocb->ki_pos > ip->i_size) | 886 | if (error) |
752 | ip->i_size = iocb->ki_pos; | 887 | ret = error; |
753 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | 888 | else if (error2) |
889 | ret = error2; | ||
754 | } | 890 | } |
755 | 891 | ||
756 | error = -ret; | 892 | out_unlock: |
757 | if (ret <= 0) | 893 | xfs_aio_write_newsize_update(ip); |
758 | goto out_unlock_internal; | 894 | xfs_rw_iunlock(ip, iolock); |
895 | return ret; | ||
896 | } | ||
759 | 897 | ||
760 | XFS_STATS_ADD(xs_write_bytes, ret); | 898 | STATIC long |
899 | xfs_file_fallocate( | ||
900 | struct file *file, | ||
901 | int mode, | ||
902 | loff_t offset, | ||
903 | loff_t len) | ||
904 | { | ||
905 | struct inode *inode = file->f_path.dentry->d_inode; | ||
906 | long error; | ||
907 | loff_t new_size = 0; | ||
908 | xfs_flock64_t bf; | ||
909 | xfs_inode_t *ip = XFS_I(inode); | ||
910 | int cmd = XFS_IOC_RESVSP; | ||
911 | int attr_flags = XFS_ATTR_NOLOCK; | ||
761 | 912 | ||
762 | /* Handle various SYNC-type writes */ | 913 | if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)) |
763 | if ((file->f_flags & O_DSYNC) || IS_SYNC(inode)) { | 914 | return -EOPNOTSUPP; |
764 | loff_t end = pos + ret - 1; | ||
765 | int error2; | ||
766 | 915 | ||
767 | xfs_iunlock(ip, iolock); | 916 | bf.l_whence = 0; |
768 | if (need_i_mutex) | 917 | bf.l_start = offset; |
769 | mutex_unlock(&inode->i_mutex); | 918 | bf.l_len = len; |
770 | 919 | ||
771 | error2 = filemap_write_and_wait_range(mapping, pos, end); | 920 | xfs_ilock(ip, XFS_IOLOCK_EXCL); |
772 | if (!error) | ||
773 | error = error2; | ||
774 | if (need_i_mutex) | ||
775 | mutex_lock(&inode->i_mutex); | ||
776 | xfs_ilock(ip, iolock); | ||
777 | 921 | ||
778 | error2 = -xfs_file_fsync(file, | 922 | if (mode & FALLOC_FL_PUNCH_HOLE) |
779 | (file->f_flags & __O_SYNC) ? 0 : 1); | 923 | cmd = XFS_IOC_UNRESVSP; |
780 | if (!error) | 924 | |
781 | error = error2; | 925 | /* check the new inode size is valid before allocating */ |
926 | if (!(mode & FALLOC_FL_KEEP_SIZE) && | ||
927 | offset + len > i_size_read(inode)) { | ||
928 | new_size = offset + len; | ||
929 | error = inode_newsize_ok(inode, new_size); | ||
930 | if (error) | ||
931 | goto out_unlock; | ||
782 | } | 932 | } |
783 | 933 | ||
784 | out_unlock_internal: | 934 | if (file->f_flags & O_DSYNC) |
785 | if (ip->i_new_size) { | 935 | attr_flags |= XFS_ATTR_SYNC; |
786 | xfs_ilock(ip, XFS_ILOCK_EXCL); | 936 | |
787 | ip->i_new_size = 0; | 937 | error = -xfs_change_file_space(ip, cmd, &bf, 0, attr_flags); |
788 | /* | 938 | if (error) |
789 | * If this was a direct or synchronous I/O that failed (such | 939 | goto out_unlock; |
790 | * as ENOSPC) then part of the I/O may have been written to | 940 | |
791 | * disk before the error occured. In this case the on-disk | 941 | /* Change file size if needed */ |
792 | * file size may have been adjusted beyond the in-memory file | 942 | if (new_size) { |
793 | * size and now needs to be truncated back. | 943 | struct iattr iattr; |
794 | */ | 944 | |
795 | if (ip->i_d.di_size > ip->i_size) | 945 | iattr.ia_valid = ATTR_SIZE; |
796 | ip->i_d.di_size = ip->i_size; | 946 | iattr.ia_size = new_size; |
797 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | 947 | error = -xfs_setattr(ip, &iattr, XFS_ATTR_NOLOCK); |
798 | } | 948 | } |
799 | xfs_iunlock(ip, iolock); | 949 | |
800 | out_unlock_mutex: | 950 | out_unlock: |
801 | if (need_i_mutex) | 951 | xfs_iunlock(ip, XFS_IOLOCK_EXCL); |
802 | mutex_unlock(&inode->i_mutex); | 952 | return error; |
803 | return -error; | ||
804 | } | 953 | } |
805 | 954 | ||
955 | |||
806 | STATIC int | 956 | STATIC int |
807 | xfs_file_open( | 957 | xfs_file_open( |
808 | struct inode *inode, | 958 | struct inode *inode, |
@@ -921,6 +1071,7 @@ const struct file_operations xfs_file_operations = { | |||
921 | .open = xfs_file_open, | 1071 | .open = xfs_file_open, |
922 | .release = xfs_file_release, | 1072 | .release = xfs_file_release, |
923 | .fsync = xfs_file_fsync, | 1073 | .fsync = xfs_file_fsync, |
1074 | .fallocate = xfs_file_fallocate, | ||
924 | }; | 1075 | }; |
925 | 1076 | ||
926 | const struct file_operations xfs_dir_file_operations = { | 1077 | const struct file_operations xfs_dir_file_operations = { |
diff --git a/fs/xfs/linux-2.6/xfs_fs_subr.c b/fs/xfs/linux-2.6/xfs_fs_subr.c index 1f279b012f94..ed88ed16811c 100644 --- a/fs/xfs/linux-2.6/xfs_fs_subr.c +++ b/fs/xfs/linux-2.6/xfs_fs_subr.c | |||
@@ -32,10 +32,9 @@ xfs_tosspages( | |||
32 | xfs_off_t last, | 32 | xfs_off_t last, |
33 | int fiopt) | 33 | int fiopt) |
34 | { | 34 | { |
35 | struct address_space *mapping = VFS_I(ip)->i_mapping; | 35 | /* can't toss partial tail pages, so mask them out */ |
36 | 36 | last &= ~(PAGE_SIZE - 1); | |
37 | if (mapping->nrpages) | 37 | truncate_inode_pages_range(VFS_I(ip)->i_mapping, first, last - 1); |
38 | truncate_inode_pages(mapping, first); | ||
39 | } | 38 | } |
40 | 39 | ||
41 | int | 40 | int |
@@ -50,12 +49,11 @@ xfs_flushinval_pages( | |||
50 | 49 | ||
51 | trace_xfs_pagecache_inval(ip, first, last); | 50 | trace_xfs_pagecache_inval(ip, first, last); |
52 | 51 | ||
53 | if (mapping->nrpages) { | 52 | xfs_iflags_clear(ip, XFS_ITRUNCATED); |
54 | xfs_iflags_clear(ip, XFS_ITRUNCATED); | 53 | ret = filemap_write_and_wait_range(mapping, first, |
55 | ret = filemap_write_and_wait(mapping); | 54 | last == -1 ? LLONG_MAX : last); |
56 | if (!ret) | 55 | if (!ret) |
57 | truncate_inode_pages(mapping, first); | 56 | truncate_inode_pages_range(mapping, first, last); |
58 | } | ||
59 | return -ret; | 57 | return -ret; |
60 | } | 58 | } |
61 | 59 | ||
@@ -71,10 +69,9 @@ xfs_flush_pages( | |||
71 | int ret = 0; | 69 | int ret = 0; |
72 | int ret2; | 70 | int ret2; |
73 | 71 | ||
74 | if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) { | 72 | xfs_iflags_clear(ip, XFS_ITRUNCATED); |
75 | xfs_iflags_clear(ip, XFS_ITRUNCATED); | 73 | ret = -filemap_fdatawrite_range(mapping, first, |
76 | ret = -filemap_fdatawrite(mapping); | 74 | last == -1 ? LLONG_MAX : last); |
77 | } | ||
78 | if (flags & XBF_ASYNC) | 75 | if (flags & XBF_ASYNC) |
79 | return ret; | 76 | return ret; |
80 | ret2 = xfs_wait_on_pages(ip, first, last); | 77 | ret2 = xfs_wait_on_pages(ip, first, last); |
@@ -91,7 +88,9 @@ xfs_wait_on_pages( | |||
91 | { | 88 | { |
92 | struct address_space *mapping = VFS_I(ip)->i_mapping; | 89 | struct address_space *mapping = VFS_I(ip)->i_mapping; |
93 | 90 | ||
94 | if (mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK)) | 91 | if (mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK)) { |
95 | return -filemap_fdatawait(mapping); | 92 | return -filemap_fdatawait_range(mapping, first, |
93 | last == -1 ? ip->i_size - 1 : last); | ||
94 | } | ||
96 | return 0; | 95 | return 0; |
97 | } | 96 | } |
diff --git a/fs/xfs/linux-2.6/xfs_globals.c b/fs/xfs/linux-2.6/xfs_globals.c index 2ae8b1ccb02e..76e81cff70b9 100644 --- a/fs/xfs/linux-2.6/xfs_globals.c +++ b/fs/xfs/linux-2.6/xfs_globals.c | |||
@@ -16,7 +16,6 @@ | |||
16 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | 16 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA |
17 | */ | 17 | */ |
18 | #include "xfs.h" | 18 | #include "xfs.h" |
19 | #include "xfs_cred.h" | ||
20 | #include "xfs_sysctl.h" | 19 | #include "xfs_sysctl.h" |
21 | 20 | ||
22 | /* | 21 | /* |
diff --git a/fs/xfs/linux-2.6/xfs_globals.h b/fs/xfs/linux-2.6/xfs_globals.h deleted file mode 100644 index 69f71caf061c..000000000000 --- a/fs/xfs/linux-2.6/xfs_globals.h +++ /dev/null | |||
@@ -1,23 +0,0 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc. | ||
3 | * All Rights Reserved. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or | ||
6 | * modify it under the terms of the GNU General Public License as | ||
7 | * published by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope that it would be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, write the Free Software Foundation, | ||
16 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
17 | */ | ||
18 | #ifndef __XFS_GLOBALS_H__ | ||
19 | #define __XFS_GLOBALS_H__ | ||
20 | |||
21 | extern uint64_t xfs_panic_mask; /* set to cause more panics */ | ||
22 | |||
23 | #endif /* __XFS_GLOBALS_H__ */ | ||
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c index 3b9e626f7cd1..acca2c5ca3fa 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl.c +++ b/fs/xfs/linux-2.6/xfs_ioctl.c | |||
@@ -39,6 +39,7 @@ | |||
39 | #include "xfs_dfrag.h" | 39 | #include "xfs_dfrag.h" |
40 | #include "xfs_fsops.h" | 40 | #include "xfs_fsops.h" |
41 | #include "xfs_vnodeops.h" | 41 | #include "xfs_vnodeops.h" |
42 | #include "xfs_discard.h" | ||
42 | #include "xfs_quota.h" | 43 | #include "xfs_quota.h" |
43 | #include "xfs_inode_item.h" | 44 | #include "xfs_inode_item.h" |
44 | #include "xfs_export.h" | 45 | #include "xfs_export.h" |
@@ -416,7 +417,7 @@ xfs_attrlist_by_handle( | |||
416 | if (IS_ERR(dentry)) | 417 | if (IS_ERR(dentry)) |
417 | return PTR_ERR(dentry); | 418 | return PTR_ERR(dentry); |
418 | 419 | ||
419 | kbuf = kmalloc(al_hreq.buflen, GFP_KERNEL); | 420 | kbuf = kzalloc(al_hreq.buflen, GFP_KERNEL); |
420 | if (!kbuf) | 421 | if (!kbuf) |
421 | goto out_dput; | 422 | goto out_dput; |
422 | 423 | ||
@@ -623,6 +624,10 @@ xfs_ioc_space( | |||
623 | 624 | ||
624 | if (filp->f_flags & (O_NDELAY|O_NONBLOCK)) | 625 | if (filp->f_flags & (O_NDELAY|O_NONBLOCK)) |
625 | attr_flags |= XFS_ATTR_NONBLOCK; | 626 | attr_flags |= XFS_ATTR_NONBLOCK; |
627 | |||
628 | if (filp->f_flags & O_DSYNC) | ||
629 | attr_flags |= XFS_ATTR_SYNC; | ||
630 | |||
626 | if (ioflags & IO_INVIS) | 631 | if (ioflags & IO_INVIS) |
627 | attr_flags |= XFS_ATTR_DMI; | 632 | attr_flags |= XFS_ATTR_DMI; |
628 | 633 | ||
@@ -694,14 +699,19 @@ xfs_ioc_fsgeometry_v1( | |||
694 | xfs_mount_t *mp, | 699 | xfs_mount_t *mp, |
695 | void __user *arg) | 700 | void __user *arg) |
696 | { | 701 | { |
697 | xfs_fsop_geom_v1_t fsgeo; | 702 | xfs_fsop_geom_t fsgeo; |
698 | int error; | 703 | int error; |
699 | 704 | ||
700 | error = xfs_fs_geometry(mp, (xfs_fsop_geom_t *)&fsgeo, 3); | 705 | error = xfs_fs_geometry(mp, &fsgeo, 3); |
701 | if (error) | 706 | if (error) |
702 | return -error; | 707 | return -error; |
703 | 708 | ||
704 | if (copy_to_user(arg, &fsgeo, sizeof(fsgeo))) | 709 | /* |
710 | * Caller should have passed an argument of type | ||
711 | * xfs_fsop_geom_v1_t. This is a proper subset of the | ||
712 | * xfs_fsop_geom_t that xfs_fs_geometry() fills in. | ||
713 | */ | ||
714 | if (copy_to_user(arg, &fsgeo, sizeof(xfs_fsop_geom_v1_t))) | ||
705 | return -XFS_ERROR(EFAULT); | 715 | return -XFS_ERROR(EFAULT); |
706 | return 0; | 716 | return 0; |
707 | } | 717 | } |
@@ -790,7 +800,7 @@ xfs_ioc_fsgetxattr( | |||
790 | xfs_ilock(ip, XFS_ILOCK_SHARED); | 800 | xfs_ilock(ip, XFS_ILOCK_SHARED); |
791 | fa.fsx_xflags = xfs_ip2xflags(ip); | 801 | fa.fsx_xflags = xfs_ip2xflags(ip); |
792 | fa.fsx_extsize = ip->i_d.di_extsize << ip->i_mount->m_sb.sb_blocklog; | 802 | fa.fsx_extsize = ip->i_d.di_extsize << ip->i_mount->m_sb.sb_blocklog; |
793 | fa.fsx_projid = ip->i_d.di_projid; | 803 | fa.fsx_projid = xfs_get_projid(ip); |
794 | 804 | ||
795 | if (attr) { | 805 | if (attr) { |
796 | if (ip->i_afp) { | 806 | if (ip->i_afp) { |
@@ -909,10 +919,10 @@ xfs_ioctl_setattr( | |||
909 | return XFS_ERROR(EIO); | 919 | return XFS_ERROR(EIO); |
910 | 920 | ||
911 | /* | 921 | /* |
912 | * Disallow 32bit project ids because on-disk structure | 922 | * Disallow 32bit project ids when projid32bit feature is not enabled. |
913 | * is 16bit only. | ||
914 | */ | 923 | */ |
915 | if ((mask & FSX_PROJID) && (fa->fsx_projid > (__uint16_t)-1)) | 924 | if ((mask & FSX_PROJID) && (fa->fsx_projid > (__uint16_t)-1) && |
925 | !xfs_sb_version_hasprojid32bit(&ip->i_mount->m_sb)) | ||
916 | return XFS_ERROR(EINVAL); | 926 | return XFS_ERROR(EINVAL); |
917 | 927 | ||
918 | /* | 928 | /* |
@@ -961,7 +971,7 @@ xfs_ioctl_setattr( | |||
961 | if (mask & FSX_PROJID) { | 971 | if (mask & FSX_PROJID) { |
962 | if (XFS_IS_QUOTA_RUNNING(mp) && | 972 | if (XFS_IS_QUOTA_RUNNING(mp) && |
963 | XFS_IS_PQUOTA_ON(mp) && | 973 | XFS_IS_PQUOTA_ON(mp) && |
964 | ip->i_d.di_projid != fa->fsx_projid) { | 974 | xfs_get_projid(ip) != fa->fsx_projid) { |
965 | ASSERT(tp); | 975 | ASSERT(tp); |
966 | code = xfs_qm_vop_chown_reserve(tp, ip, udqp, gdqp, | 976 | code = xfs_qm_vop_chown_reserve(tp, ip, udqp, gdqp, |
967 | capable(CAP_FOWNER) ? | 977 | capable(CAP_FOWNER) ? |
@@ -984,10 +994,22 @@ xfs_ioctl_setattr( | |||
984 | 994 | ||
985 | /* | 995 | /* |
986 | * Extent size must be a multiple of the appropriate block | 996 | * Extent size must be a multiple of the appropriate block |
987 | * size, if set at all. | 997 | * size, if set at all. It must also be smaller than the |
998 | * maximum extent size supported by the filesystem. | ||
999 | * | ||
1000 | * Also, for non-realtime files, limit the extent size hint to | ||
1001 | * half the size of the AGs in the filesystem so alignment | ||
1002 | * doesn't result in extents larger than an AG. | ||
988 | */ | 1003 | */ |
989 | if (fa->fsx_extsize != 0) { | 1004 | if (fa->fsx_extsize != 0) { |
990 | xfs_extlen_t size; | 1005 | xfs_extlen_t size; |
1006 | xfs_fsblock_t extsize_fsb; | ||
1007 | |||
1008 | extsize_fsb = XFS_B_TO_FSB(mp, fa->fsx_extsize); | ||
1009 | if (extsize_fsb > MAXEXTLEN) { | ||
1010 | code = XFS_ERROR(EINVAL); | ||
1011 | goto error_return; | ||
1012 | } | ||
991 | 1013 | ||
992 | if (XFS_IS_REALTIME_INODE(ip) || | 1014 | if (XFS_IS_REALTIME_INODE(ip) || |
993 | ((mask & FSX_XFLAGS) && | 1015 | ((mask & FSX_XFLAGS) && |
@@ -996,6 +1018,10 @@ xfs_ioctl_setattr( | |||
996 | mp->m_sb.sb_blocklog; | 1018 | mp->m_sb.sb_blocklog; |
997 | } else { | 1019 | } else { |
998 | size = mp->m_sb.sb_blocksize; | 1020 | size = mp->m_sb.sb_blocksize; |
1021 | if (extsize_fsb > mp->m_sb.sb_agblocks / 2) { | ||
1022 | code = XFS_ERROR(EINVAL); | ||
1023 | goto error_return; | ||
1024 | } | ||
999 | } | 1025 | } |
1000 | 1026 | ||
1001 | if (fa->fsx_extsize % size) { | 1027 | if (fa->fsx_extsize % size) { |
@@ -1063,12 +1089,12 @@ xfs_ioctl_setattr( | |||
1063 | * Change the ownerships and register quota modifications | 1089 | * Change the ownerships and register quota modifications |
1064 | * in the transaction. | 1090 | * in the transaction. |
1065 | */ | 1091 | */ |
1066 | if (ip->i_d.di_projid != fa->fsx_projid) { | 1092 | if (xfs_get_projid(ip) != fa->fsx_projid) { |
1067 | if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_PQUOTA_ON(mp)) { | 1093 | if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_PQUOTA_ON(mp)) { |
1068 | olddquot = xfs_qm_vop_chown(tp, ip, | 1094 | olddquot = xfs_qm_vop_chown(tp, ip, |
1069 | &ip->i_gdquot, gdqp); | 1095 | &ip->i_gdquot, gdqp); |
1070 | } | 1096 | } |
1071 | ip->i_d.di_projid = fa->fsx_projid; | 1097 | xfs_set_projid(ip, fa->fsx_projid); |
1072 | 1098 | ||
1073 | /* | 1099 | /* |
1074 | * We may have to rev the inode as well as | 1100 | * We may have to rev the inode as well as |
@@ -1088,8 +1114,8 @@ xfs_ioctl_setattr( | |||
1088 | xfs_diflags_to_linux(ip); | 1114 | xfs_diflags_to_linux(ip); |
1089 | } | 1115 | } |
1090 | 1116 | ||
1117 | xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG); | ||
1091 | xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); | 1118 | xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); |
1092 | xfs_ichgtime(ip, XFS_ICHGTIME_CHG); | ||
1093 | 1119 | ||
1094 | XFS_STATS_INC(xs_ig_attrchg); | 1120 | XFS_STATS_INC(xs_ig_attrchg); |
1095 | 1121 | ||
@@ -1294,6 +1320,8 @@ xfs_file_ioctl( | |||
1294 | trace_xfs_file_ioctl(ip); | 1320 | trace_xfs_file_ioctl(ip); |
1295 | 1321 | ||
1296 | switch (cmd) { | 1322 | switch (cmd) { |
1323 | case FITRIM: | ||
1324 | return xfs_ioc_trim(mp, arg); | ||
1297 | case XFS_IOC_ALLOCSP: | 1325 | case XFS_IOC_ALLOCSP: |
1298 | case XFS_IOC_FREESP: | 1326 | case XFS_IOC_FREESP: |
1299 | case XFS_IOC_RESVSP: | 1327 | case XFS_IOC_RESVSP: |
@@ -1301,7 +1329,8 @@ xfs_file_ioctl( | |||
1301 | case XFS_IOC_ALLOCSP64: | 1329 | case XFS_IOC_ALLOCSP64: |
1302 | case XFS_IOC_FREESP64: | 1330 | case XFS_IOC_FREESP64: |
1303 | case XFS_IOC_RESVSP64: | 1331 | case XFS_IOC_RESVSP64: |
1304 | case XFS_IOC_UNRESVSP64: { | 1332 | case XFS_IOC_UNRESVSP64: |
1333 | case XFS_IOC_ZERO_RANGE: { | ||
1305 | xfs_flock64_t bf; | 1334 | xfs_flock64_t bf; |
1306 | 1335 | ||
1307 | if (copy_from_user(&bf, arg, sizeof(bf))) | 1336 | if (copy_from_user(&bf, arg, sizeof(bf))) |
diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.c b/fs/xfs/linux-2.6/xfs_ioctl32.c index 6c83f7f62dc9..54e623bfbb85 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl32.c +++ b/fs/xfs/linux-2.6/xfs_ioctl32.c | |||
@@ -164,7 +164,8 @@ xfs_ioctl32_bstat_copyin( | |||
164 | get_user(bstat->bs_extsize, &bstat32->bs_extsize) || | 164 | get_user(bstat->bs_extsize, &bstat32->bs_extsize) || |
165 | get_user(bstat->bs_extents, &bstat32->bs_extents) || | 165 | get_user(bstat->bs_extents, &bstat32->bs_extents) || |
166 | get_user(bstat->bs_gen, &bstat32->bs_gen) || | 166 | get_user(bstat->bs_gen, &bstat32->bs_gen) || |
167 | get_user(bstat->bs_projid, &bstat32->bs_projid) || | 167 | get_user(bstat->bs_projid_lo, &bstat32->bs_projid_lo) || |
168 | get_user(bstat->bs_projid_hi, &bstat32->bs_projid_hi) || | ||
168 | get_user(bstat->bs_dmevmask, &bstat32->bs_dmevmask) || | 169 | get_user(bstat->bs_dmevmask, &bstat32->bs_dmevmask) || |
169 | get_user(bstat->bs_dmstate, &bstat32->bs_dmstate) || | 170 | get_user(bstat->bs_dmstate, &bstat32->bs_dmstate) || |
170 | get_user(bstat->bs_aextents, &bstat32->bs_aextents)) | 171 | get_user(bstat->bs_aextents, &bstat32->bs_aextents)) |
@@ -218,6 +219,7 @@ xfs_bulkstat_one_fmt_compat( | |||
218 | put_user(buffer->bs_extents, &p32->bs_extents) || | 219 | put_user(buffer->bs_extents, &p32->bs_extents) || |
219 | put_user(buffer->bs_gen, &p32->bs_gen) || | 220 | put_user(buffer->bs_gen, &p32->bs_gen) || |
220 | put_user(buffer->bs_projid, &p32->bs_projid) || | 221 | put_user(buffer->bs_projid, &p32->bs_projid) || |
222 | put_user(buffer->bs_projid_hi, &p32->bs_projid_hi) || | ||
221 | put_user(buffer->bs_dmevmask, &p32->bs_dmevmask) || | 223 | put_user(buffer->bs_dmevmask, &p32->bs_dmevmask) || |
222 | put_user(buffer->bs_dmstate, &p32->bs_dmstate) || | 224 | put_user(buffer->bs_dmstate, &p32->bs_dmstate) || |
223 | put_user(buffer->bs_aextents, &p32->bs_aextents)) | 225 | put_user(buffer->bs_aextents, &p32->bs_aextents)) |
@@ -574,6 +576,7 @@ xfs_file_compat_ioctl( | |||
574 | case XFS_IOC_FSGEOMETRY_V1: | 576 | case XFS_IOC_FSGEOMETRY_V1: |
575 | case XFS_IOC_FSGROWFSDATA: | 577 | case XFS_IOC_FSGROWFSDATA: |
576 | case XFS_IOC_FSGROWFSRT: | 578 | case XFS_IOC_FSGROWFSRT: |
579 | case XFS_IOC_ZERO_RANGE: | ||
577 | return xfs_file_ioctl(filp, cmd, p); | 580 | return xfs_file_ioctl(filp, cmd, p); |
578 | #else | 581 | #else |
579 | case XFS_IOC_ALLOCSP_32: | 582 | case XFS_IOC_ALLOCSP_32: |
@@ -583,7 +586,8 @@ xfs_file_compat_ioctl( | |||
583 | case XFS_IOC_RESVSP_32: | 586 | case XFS_IOC_RESVSP_32: |
584 | case XFS_IOC_UNRESVSP_32: | 587 | case XFS_IOC_UNRESVSP_32: |
585 | case XFS_IOC_RESVSP64_32: | 588 | case XFS_IOC_RESVSP64_32: |
586 | case XFS_IOC_UNRESVSP64_32: { | 589 | case XFS_IOC_UNRESVSP64_32: |
590 | case XFS_IOC_ZERO_RANGE_32: { | ||
587 | struct xfs_flock64 bf; | 591 | struct xfs_flock64 bf; |
588 | 592 | ||
589 | if (xfs_compat_flock64_copyin(&bf, arg)) | 593 | if (xfs_compat_flock64_copyin(&bf, arg)) |
diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.h b/fs/xfs/linux-2.6/xfs_ioctl32.h index 1024c4f8ba0d..80f4060e8970 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl32.h +++ b/fs/xfs/linux-2.6/xfs_ioctl32.h | |||
@@ -65,8 +65,10 @@ typedef struct compat_xfs_bstat { | |||
65 | __s32 bs_extsize; /* extent size */ | 65 | __s32 bs_extsize; /* extent size */ |
66 | __s32 bs_extents; /* number of extents */ | 66 | __s32 bs_extents; /* number of extents */ |
67 | __u32 bs_gen; /* generation count */ | 67 | __u32 bs_gen; /* generation count */ |
68 | __u16 bs_projid; /* project id */ | 68 | __u16 bs_projid_lo; /* lower part of project id */ |
69 | unsigned char bs_pad[14]; /* pad space, unused */ | 69 | #define bs_projid bs_projid_lo /* (previously just bs_projid) */ |
70 | __u16 bs_projid_hi; /* high part of project id */ | ||
71 | unsigned char bs_pad[12]; /* pad space, unused */ | ||
70 | __u32 bs_dmevmask; /* DMIG event mask */ | 72 | __u32 bs_dmevmask; /* DMIG event mask */ |
71 | __u16 bs_dmstate; /* DMIG state info */ | 73 | __u16 bs_dmstate; /* DMIG state info */ |
72 | __u16 bs_aextents; /* attribute number of extents */ | 74 | __u16 bs_aextents; /* attribute number of extents */ |
@@ -182,6 +184,7 @@ typedef struct compat_xfs_flock64 { | |||
182 | #define XFS_IOC_UNRESVSP_32 _IOW('X', 41, struct compat_xfs_flock64) | 184 | #define XFS_IOC_UNRESVSP_32 _IOW('X', 41, struct compat_xfs_flock64) |
183 | #define XFS_IOC_RESVSP64_32 _IOW('X', 42, struct compat_xfs_flock64) | 185 | #define XFS_IOC_RESVSP64_32 _IOW('X', 42, struct compat_xfs_flock64) |
184 | #define XFS_IOC_UNRESVSP64_32 _IOW('X', 43, struct compat_xfs_flock64) | 186 | #define XFS_IOC_UNRESVSP64_32 _IOW('X', 43, struct compat_xfs_flock64) |
187 | #define XFS_IOC_ZERO_RANGE_32 _IOW('X', 57, struct compat_xfs_flock64) | ||
185 | 188 | ||
186 | typedef struct compat_xfs_fsop_geom_v1 { | 189 | typedef struct compat_xfs_fsop_geom_v1 { |
187 | __u32 blocksize; /* filesystem (data) block size */ | 190 | __u32 blocksize; /* filesystem (data) block size */ |
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c index b1fc2a6bfe83..d44d92cd12b1 100644 --- a/fs/xfs/linux-2.6/xfs_iops.c +++ b/fs/xfs/linux-2.6/xfs_iops.c | |||
@@ -46,7 +46,6 @@ | |||
46 | #include <linux/namei.h> | 46 | #include <linux/namei.h> |
47 | #include <linux/posix_acl.h> | 47 | #include <linux/posix_acl.h> |
48 | #include <linux/security.h> | 48 | #include <linux/security.h> |
49 | #include <linux/falloc.h> | ||
50 | #include <linux/fiemap.h> | 49 | #include <linux/fiemap.h> |
51 | #include <linux/slab.h> | 50 | #include <linux/slab.h> |
52 | 51 | ||
@@ -71,7 +70,7 @@ xfs_synchronize_times( | |||
71 | 70 | ||
72 | /* | 71 | /* |
73 | * If the linux inode is valid, mark it dirty. | 72 | * If the linux inode is valid, mark it dirty. |
74 | * Used when commiting a dirty inode into a transaction so that | 73 | * Used when committing a dirty inode into a transaction so that |
75 | * the inode will get written back by the linux code | 74 | * the inode will get written back by the linux code |
76 | */ | 75 | */ |
77 | void | 76 | void |
@@ -95,41 +94,6 @@ xfs_mark_inode_dirty( | |||
95 | } | 94 | } |
96 | 95 | ||
97 | /* | 96 | /* |
98 | * Change the requested timestamp in the given inode. | ||
99 | * We don't lock across timestamp updates, and we don't log them but | ||
100 | * we do record the fact that there is dirty information in core. | ||
101 | */ | ||
102 | void | ||
103 | xfs_ichgtime( | ||
104 | xfs_inode_t *ip, | ||
105 | int flags) | ||
106 | { | ||
107 | struct inode *inode = VFS_I(ip); | ||
108 | timespec_t tv; | ||
109 | int sync_it = 0; | ||
110 | |||
111 | tv = current_fs_time(inode->i_sb); | ||
112 | |||
113 | if ((flags & XFS_ICHGTIME_MOD) && | ||
114 | !timespec_equal(&inode->i_mtime, &tv)) { | ||
115 | inode->i_mtime = tv; | ||
116 | sync_it = 1; | ||
117 | } | ||
118 | if ((flags & XFS_ICHGTIME_CHG) && | ||
119 | !timespec_equal(&inode->i_ctime, &tv)) { | ||
120 | inode->i_ctime = tv; | ||
121 | sync_it = 1; | ||
122 | } | ||
123 | |||
124 | /* | ||
125 | * Update complete - now make sure everyone knows that the inode | ||
126 | * is dirty. | ||
127 | */ | ||
128 | if (sync_it) | ||
129 | xfs_mark_inode_dirty_sync(ip); | ||
130 | } | ||
131 | |||
132 | /* | ||
133 | * Hook in SELinux. This is not quite correct yet, what we really need | 97 | * Hook in SELinux. This is not quite correct yet, what we really need |
134 | * here (as we do for default ACLs) is a mechanism by which creation of | 98 | * here (as we do for default ACLs) is a mechanism by which creation of |
135 | * these attrs can be journalled at inode creation time (along with the | 99 | * these attrs can be journalled at inode creation time (along with the |
@@ -138,7 +102,8 @@ xfs_ichgtime( | |||
138 | STATIC int | 102 | STATIC int |
139 | xfs_init_security( | 103 | xfs_init_security( |
140 | struct inode *inode, | 104 | struct inode *inode, |
141 | struct inode *dir) | 105 | struct inode *dir, |
106 | const struct qstr *qstr) | ||
142 | { | 107 | { |
143 | struct xfs_inode *ip = XFS_I(inode); | 108 | struct xfs_inode *ip = XFS_I(inode); |
144 | size_t length; | 109 | size_t length; |
@@ -146,7 +111,7 @@ xfs_init_security( | |||
146 | unsigned char *name; | 111 | unsigned char *name; |
147 | int error; | 112 | int error; |
148 | 113 | ||
149 | error = security_inode_init_security(inode, dir, (char **)&name, | 114 | error = security_inode_init_security(inode, dir, qstr, (char **)&name, |
150 | &value, &length); | 115 | &value, &length); |
151 | if (error) { | 116 | if (error) { |
152 | if (error == -EOPNOTSUPP) | 117 | if (error == -EOPNOTSUPP) |
@@ -217,20 +182,20 @@ xfs_vn_mknod( | |||
217 | if (IS_POSIXACL(dir)) { | 182 | if (IS_POSIXACL(dir)) { |
218 | default_acl = xfs_get_acl(dir, ACL_TYPE_DEFAULT); | 183 | default_acl = xfs_get_acl(dir, ACL_TYPE_DEFAULT); |
219 | if (IS_ERR(default_acl)) | 184 | if (IS_ERR(default_acl)) |
220 | return -PTR_ERR(default_acl); | 185 | return PTR_ERR(default_acl); |
221 | 186 | ||
222 | if (!default_acl) | 187 | if (!default_acl) |
223 | mode &= ~current_umask(); | 188 | mode &= ~current_umask(); |
224 | } | 189 | } |
225 | 190 | ||
226 | xfs_dentry_to_name(&name, dentry); | 191 | xfs_dentry_to_name(&name, dentry); |
227 | error = xfs_create(XFS_I(dir), &name, mode, rdev, &ip, NULL); | 192 | error = xfs_create(XFS_I(dir), &name, mode, rdev, &ip); |
228 | if (unlikely(error)) | 193 | if (unlikely(error)) |
229 | goto out_free_acl; | 194 | goto out_free_acl; |
230 | 195 | ||
231 | inode = VFS_I(ip); | 196 | inode = VFS_I(ip); |
232 | 197 | ||
233 | error = xfs_init_security(inode, dir); | 198 | error = xfs_init_security(inode, dir, &dentry->d_name); |
234 | if (unlikely(error)) | 199 | if (unlikely(error)) |
235 | goto out_cleanup_inode; | 200 | goto out_cleanup_inode; |
236 | 201 | ||
@@ -352,7 +317,7 @@ xfs_vn_link( | |||
352 | if (unlikely(error)) | 317 | if (unlikely(error)) |
353 | return -error; | 318 | return -error; |
354 | 319 | ||
355 | atomic_inc(&inode->i_count); | 320 | ihold(inode); |
356 | d_instantiate(dentry, inode); | 321 | d_instantiate(dentry, inode); |
357 | return 0; | 322 | return 0; |
358 | } | 323 | } |
@@ -397,13 +362,13 @@ xfs_vn_symlink( | |||
397 | (irix_symlink_mode ? 0777 & ~current_umask() : S_IRWXUGO); | 362 | (irix_symlink_mode ? 0777 & ~current_umask() : S_IRWXUGO); |
398 | xfs_dentry_to_name(&name, dentry); | 363 | xfs_dentry_to_name(&name, dentry); |
399 | 364 | ||
400 | error = xfs_symlink(XFS_I(dir), &name, symname, mode, &cip, NULL); | 365 | error = xfs_symlink(XFS_I(dir), &name, symname, mode, &cip); |
401 | if (unlikely(error)) | 366 | if (unlikely(error)) |
402 | goto out; | 367 | goto out; |
403 | 368 | ||
404 | inode = VFS_I(cip); | 369 | inode = VFS_I(cip); |
405 | 370 | ||
406 | error = xfs_init_security(inode, dir); | 371 | error = xfs_init_security(inode, dir, &dentry->d_name); |
407 | if (unlikely(error)) | 372 | if (unlikely(error)) |
408 | goto out_cleanup_inode; | 373 | goto out_cleanup_inode; |
409 | 374 | ||
@@ -540,58 +505,6 @@ xfs_vn_setattr( | |||
540 | return -xfs_setattr(XFS_I(dentry->d_inode), iattr, 0); | 505 | return -xfs_setattr(XFS_I(dentry->d_inode), iattr, 0); |
541 | } | 506 | } |
542 | 507 | ||
543 | STATIC long | ||
544 | xfs_vn_fallocate( | ||
545 | struct inode *inode, | ||
546 | int mode, | ||
547 | loff_t offset, | ||
548 | loff_t len) | ||
549 | { | ||
550 | long error; | ||
551 | loff_t new_size = 0; | ||
552 | xfs_flock64_t bf; | ||
553 | xfs_inode_t *ip = XFS_I(inode); | ||
554 | |||
555 | /* preallocation on directories not yet supported */ | ||
556 | error = -ENODEV; | ||
557 | if (S_ISDIR(inode->i_mode)) | ||
558 | goto out_error; | ||
559 | |||
560 | bf.l_whence = 0; | ||
561 | bf.l_start = offset; | ||
562 | bf.l_len = len; | ||
563 | |||
564 | xfs_ilock(ip, XFS_IOLOCK_EXCL); | ||
565 | |||
566 | /* check the new inode size is valid before allocating */ | ||
567 | if (!(mode & FALLOC_FL_KEEP_SIZE) && | ||
568 | offset + len > i_size_read(inode)) { | ||
569 | new_size = offset + len; | ||
570 | error = inode_newsize_ok(inode, new_size); | ||
571 | if (error) | ||
572 | goto out_unlock; | ||
573 | } | ||
574 | |||
575 | error = -xfs_change_file_space(ip, XFS_IOC_RESVSP, &bf, | ||
576 | 0, XFS_ATTR_NOLOCK); | ||
577 | if (error) | ||
578 | goto out_unlock; | ||
579 | |||
580 | /* Change file size if needed */ | ||
581 | if (new_size) { | ||
582 | struct iattr iattr; | ||
583 | |||
584 | iattr.ia_valid = ATTR_SIZE; | ||
585 | iattr.ia_size = new_size; | ||
586 | error = -xfs_setattr(ip, &iattr, XFS_ATTR_NOLOCK); | ||
587 | } | ||
588 | |||
589 | out_unlock: | ||
590 | xfs_iunlock(ip, XFS_IOLOCK_EXCL); | ||
591 | out_error: | ||
592 | return error; | ||
593 | } | ||
594 | |||
595 | #define XFS_FIEMAP_FLAGS (FIEMAP_FLAG_SYNC|FIEMAP_FLAG_XATTR) | 508 | #define XFS_FIEMAP_FLAGS (FIEMAP_FLAG_SYNC|FIEMAP_FLAG_XATTR) |
596 | 509 | ||
597 | /* | 510 | /* |
@@ -685,7 +598,6 @@ static const struct inode_operations xfs_inode_operations = { | |||
685 | .getxattr = generic_getxattr, | 598 | .getxattr = generic_getxattr, |
686 | .removexattr = generic_removexattr, | 599 | .removexattr = generic_removexattr, |
687 | .listxattr = xfs_vn_listxattr, | 600 | .listxattr = xfs_vn_listxattr, |
688 | .fallocate = xfs_vn_fallocate, | ||
689 | .fiemap = xfs_vn_fiemap, | 601 | .fiemap = xfs_vn_fiemap, |
690 | }; | 602 | }; |
691 | 603 | ||
@@ -795,7 +707,10 @@ xfs_setup_inode( | |||
795 | 707 | ||
796 | inode->i_ino = ip->i_ino; | 708 | inode->i_ino = ip->i_ino; |
797 | inode->i_state = I_NEW; | 709 | inode->i_state = I_NEW; |
798 | inode_add_to_lists(ip->i_mount->m_super, inode); | 710 | |
711 | inode_sb_list_add(inode); | ||
712 | /* make the inode look hashed for the writeback code */ | ||
713 | hlist_add_fake(&inode->i_hash); | ||
799 | 714 | ||
800 | inode->i_mode = ip->i_d.di_mode; | 715 | inode->i_mode = ip->i_d.di_mode; |
801 | inode->i_nlink = ip->i_d.di_nlink; | 716 | inode->i_nlink = ip->i_d.di_nlink; |
diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h index 2fa0bd9ebc7f..8633521b3b2e 100644 --- a/fs/xfs/linux-2.6/xfs_linux.h +++ b/fs/xfs/linux-2.6/xfs_linux.h | |||
@@ -37,10 +37,8 @@ | |||
37 | 37 | ||
38 | #include <kmem.h> | 38 | #include <kmem.h> |
39 | #include <mrlock.h> | 39 | #include <mrlock.h> |
40 | #include <sv.h> | ||
41 | #include <time.h> | 40 | #include <time.h> |
42 | 41 | ||
43 | #include <support/debug.h> | ||
44 | #include <support/uuid.h> | 42 | #include <support/uuid.h> |
45 | 43 | ||
46 | #include <linux/semaphore.h> | 44 | #include <linux/semaphore.h> |
@@ -71,6 +69,8 @@ | |||
71 | #include <linux/random.h> | 69 | #include <linux/random.h> |
72 | #include <linux/ctype.h> | 70 | #include <linux/ctype.h> |
73 | #include <linux/writeback.h> | 71 | #include <linux/writeback.h> |
72 | #include <linux/capability.h> | ||
73 | #include <linux/list_sort.h> | ||
74 | 74 | ||
75 | #include <asm/page.h> | 75 | #include <asm/page.h> |
76 | #include <asm/div64.h> | 76 | #include <asm/div64.h> |
@@ -79,15 +79,14 @@ | |||
79 | #include <asm/byteorder.h> | 79 | #include <asm/byteorder.h> |
80 | #include <asm/unaligned.h> | 80 | #include <asm/unaligned.h> |
81 | 81 | ||
82 | #include <xfs_cred.h> | ||
83 | #include <xfs_vnode.h> | 82 | #include <xfs_vnode.h> |
84 | #include <xfs_stats.h> | 83 | #include <xfs_stats.h> |
85 | #include <xfs_sysctl.h> | 84 | #include <xfs_sysctl.h> |
86 | #include <xfs_iops.h> | 85 | #include <xfs_iops.h> |
87 | #include <xfs_aops.h> | 86 | #include <xfs_aops.h> |
88 | #include <xfs_super.h> | 87 | #include <xfs_super.h> |
89 | #include <xfs_globals.h> | ||
90 | #include <xfs_buf.h> | 88 | #include <xfs_buf.h> |
89 | #include <xfs_message.h> | ||
91 | 90 | ||
92 | /* | 91 | /* |
93 | * Feature macros (disable/enable) | 92 | * Feature macros (disable/enable) |
@@ -144,7 +143,7 @@ | |||
144 | #define SYNCHRONIZE() barrier() | 143 | #define SYNCHRONIZE() barrier() |
145 | #define __return_address __builtin_return_address(0) | 144 | #define __return_address __builtin_return_address(0) |
146 | 145 | ||
147 | #define dfltprid 0 | 146 | #define XFS_PROJID_DEFAULT 0 |
148 | #define MAXPATHLEN 1024 | 147 | #define MAXPATHLEN 1024 |
149 | 148 | ||
150 | #define MIN(a,b) (min(a,b)) | 149 | #define MIN(a,b) (min(a,b)) |
@@ -282,4 +281,25 @@ static inline __uint64_t howmany_64(__uint64_t x, __uint32_t y) | |||
282 | #define __arch_pack | 281 | #define __arch_pack |
283 | #endif | 282 | #endif |
284 | 283 | ||
284 | #define ASSERT_ALWAYS(expr) \ | ||
285 | (unlikely(expr) ? (void)0 : assfail(#expr, __FILE__, __LINE__)) | ||
286 | |||
287 | #ifndef DEBUG | ||
288 | #define ASSERT(expr) ((void)0) | ||
289 | |||
290 | #ifndef STATIC | ||
291 | # define STATIC static noinline | ||
292 | #endif | ||
293 | |||
294 | #else /* DEBUG */ | ||
295 | |||
296 | #define ASSERT(expr) \ | ||
297 | (unlikely(expr) ? (void)0 : assfail(#expr, __FILE__, __LINE__)) | ||
298 | |||
299 | #ifndef STATIC | ||
300 | # define STATIC noinline | ||
301 | #endif | ||
302 | |||
303 | #endif /* DEBUG */ | ||
304 | |||
285 | #endif /* __XFS_LINUX__ */ | 305 | #endif /* __XFS_LINUX__ */ |
diff --git a/fs/xfs/linux-2.6/xfs_message.c b/fs/xfs/linux-2.6/xfs_message.c new file mode 100644 index 000000000000..bd672def95ac --- /dev/null +++ b/fs/xfs/linux-2.6/xfs_message.c | |||
@@ -0,0 +1,108 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2011 Red Hat, Inc. All Rights Reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or | ||
5 | * modify it under the terms of the GNU General Public License as | ||
6 | * published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope that it would be useful, | ||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
11 | * GNU General Public License for more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program; if not, write the Free Software Foundation, | ||
15 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
16 | */ | ||
17 | |||
18 | #include "xfs.h" | ||
19 | #include "xfs_fs.h" | ||
20 | #include "xfs_types.h" | ||
21 | #include "xfs_log.h" | ||
22 | #include "xfs_inum.h" | ||
23 | #include "xfs_trans.h" | ||
24 | #include "xfs_sb.h" | ||
25 | #include "xfs_ag.h" | ||
26 | #include "xfs_mount.h" | ||
27 | |||
28 | /* | ||
29 | * XFS logging functions | ||
30 | */ | ||
31 | static void | ||
32 | __xfs_printk( | ||
33 | const char *level, | ||
34 | const struct xfs_mount *mp, | ||
35 | struct va_format *vaf) | ||
36 | { | ||
37 | if (mp && mp->m_fsname) { | ||
38 | printk("%sXFS (%s): %pV\n", level, mp->m_fsname, vaf); | ||
39 | return; | ||
40 | } | ||
41 | printk("%sXFS: %pV\n", level, vaf); | ||
42 | } | ||
43 | |||
44 | #define define_xfs_printk_level(func, kern_level) \ | ||
45 | void func(const struct xfs_mount *mp, const char *fmt, ...) \ | ||
46 | { \ | ||
47 | struct va_format vaf; \ | ||
48 | va_list args; \ | ||
49 | \ | ||
50 | va_start(args, fmt); \ | ||
51 | \ | ||
52 | vaf.fmt = fmt; \ | ||
53 | vaf.va = &args; \ | ||
54 | \ | ||
55 | __xfs_printk(kern_level, mp, &vaf); \ | ||
56 | va_end(args); \ | ||
57 | } \ | ||
58 | |||
59 | define_xfs_printk_level(xfs_emerg, KERN_EMERG); | ||
60 | define_xfs_printk_level(xfs_alert, KERN_ALERT); | ||
61 | define_xfs_printk_level(xfs_crit, KERN_CRIT); | ||
62 | define_xfs_printk_level(xfs_err, KERN_ERR); | ||
63 | define_xfs_printk_level(xfs_warn, KERN_WARNING); | ||
64 | define_xfs_printk_level(xfs_notice, KERN_NOTICE); | ||
65 | define_xfs_printk_level(xfs_info, KERN_INFO); | ||
66 | #ifdef DEBUG | ||
67 | define_xfs_printk_level(xfs_debug, KERN_DEBUG); | ||
68 | #endif | ||
69 | |||
70 | void | ||
71 | xfs_alert_tag( | ||
72 | const struct xfs_mount *mp, | ||
73 | int panic_tag, | ||
74 | const char *fmt, ...) | ||
75 | { | ||
76 | struct va_format vaf; | ||
77 | va_list args; | ||
78 | int do_panic = 0; | ||
79 | |||
80 | if (xfs_panic_mask && (xfs_panic_mask & panic_tag)) { | ||
81 | xfs_alert(mp, "Transforming an alert into a BUG."); | ||
82 | do_panic = 1; | ||
83 | } | ||
84 | |||
85 | va_start(args, fmt); | ||
86 | |||
87 | vaf.fmt = fmt; | ||
88 | vaf.va = &args; | ||
89 | |||
90 | __xfs_printk(KERN_ALERT, mp, &vaf); | ||
91 | va_end(args); | ||
92 | |||
93 | BUG_ON(do_panic); | ||
94 | } | ||
95 | |||
96 | void | ||
97 | assfail(char *expr, char *file, int line) | ||
98 | { | ||
99 | xfs_emerg(NULL, "Assertion failed: %s, file: %s, line: %d", | ||
100 | expr, file, line); | ||
101 | BUG(); | ||
102 | } | ||
103 | |||
104 | void | ||
105 | xfs_hex_dump(void *p, int length) | ||
106 | { | ||
107 | print_hex_dump(KERN_ALERT, "", DUMP_PREFIX_ADDRESS, 16, 1, p, length, 1); | ||
108 | } | ||
diff --git a/fs/xfs/linux-2.6/xfs_message.h b/fs/xfs/linux-2.6/xfs_message.h new file mode 100644 index 000000000000..7fb7ea007672 --- /dev/null +++ b/fs/xfs/linux-2.6/xfs_message.h | |||
@@ -0,0 +1,39 @@ | |||
1 | #ifndef __XFS_MESSAGE_H | ||
2 | #define __XFS_MESSAGE_H 1 | ||
3 | |||
4 | struct xfs_mount; | ||
5 | |||
6 | extern void xfs_emerg(const struct xfs_mount *mp, const char *fmt, ...) | ||
7 | __attribute__ ((format (printf, 2, 3))); | ||
8 | extern void xfs_alert(const struct xfs_mount *mp, const char *fmt, ...) | ||
9 | __attribute__ ((format (printf, 2, 3))); | ||
10 | extern void xfs_alert_tag(const struct xfs_mount *mp, int tag, | ||
11 | const char *fmt, ...) | ||
12 | __attribute__ ((format (printf, 3, 4))); | ||
13 | extern void xfs_crit(const struct xfs_mount *mp, const char *fmt, ...) | ||
14 | __attribute__ ((format (printf, 2, 3))); | ||
15 | extern void xfs_err(const struct xfs_mount *mp, const char *fmt, ...) | ||
16 | __attribute__ ((format (printf, 2, 3))); | ||
17 | extern void xfs_warn(const struct xfs_mount *mp, const char *fmt, ...) | ||
18 | __attribute__ ((format (printf, 2, 3))); | ||
19 | extern void xfs_notice(const struct xfs_mount *mp, const char *fmt, ...) | ||
20 | __attribute__ ((format (printf, 2, 3))); | ||
21 | extern void xfs_info(const struct xfs_mount *mp, const char *fmt, ...) | ||
22 | __attribute__ ((format (printf, 2, 3))); | ||
23 | |||
24 | #ifdef DEBUG | ||
25 | extern void xfs_debug(const struct xfs_mount *mp, const char *fmt, ...) | ||
26 | __attribute__ ((format (printf, 2, 3))); | ||
27 | #else | ||
28 | static inline void | ||
29 | __attribute__ ((format (printf, 2, 3))) | ||
30 | xfs_debug(const struct xfs_mount *mp, const char *fmt, ...) | ||
31 | { | ||
32 | } | ||
33 | #endif | ||
34 | |||
35 | extern void assfail(char *expr, char *f, int l); | ||
36 | |||
37 | extern void xfs_hex_dump(void *p, int length); | ||
38 | |||
39 | #endif /* __XFS_MESSAGE_H */ | ||
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index a4e07974955b..a1a881e68a9a 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c | |||
@@ -44,7 +44,6 @@ | |||
44 | #include "xfs_buf_item.h" | 44 | #include "xfs_buf_item.h" |
45 | #include "xfs_utils.h" | 45 | #include "xfs_utils.h" |
46 | #include "xfs_vnodeops.h" | 46 | #include "xfs_vnodeops.h" |
47 | #include "xfs_version.h" | ||
48 | #include "xfs_log_priv.h" | 47 | #include "xfs_log_priv.h" |
49 | #include "xfs_trans_priv.h" | 48 | #include "xfs_trans_priv.h" |
50 | #include "xfs_filestream.h" | 49 | #include "xfs_filestream.h" |
@@ -111,8 +110,10 @@ mempool_t *xfs_ioend_pool; | |||
111 | #define MNTOPT_GQUOTANOENF "gqnoenforce"/* group quota limit enforcement */ | 110 | #define MNTOPT_GQUOTANOENF "gqnoenforce"/* group quota limit enforcement */ |
112 | #define MNTOPT_PQUOTANOENF "pqnoenforce"/* project quota limit enforcement */ | 111 | #define MNTOPT_PQUOTANOENF "pqnoenforce"/* project quota limit enforcement */ |
113 | #define MNTOPT_QUOTANOENF "qnoenforce" /* same as uqnoenforce */ | 112 | #define MNTOPT_QUOTANOENF "qnoenforce" /* same as uqnoenforce */ |
114 | #define MNTOPT_DELAYLOG "delaylog" /* Delayed loging enabled */ | 113 | #define MNTOPT_DELAYLOG "delaylog" /* Delayed logging enabled */ |
115 | #define MNTOPT_NODELAYLOG "nodelaylog" /* Delayed loging disabled */ | 114 | #define MNTOPT_NODELAYLOG "nodelaylog" /* Delayed logging disabled */ |
115 | #define MNTOPT_DISCARD "discard" /* Discard unused blocks */ | ||
116 | #define MNTOPT_NODISCARD "nodiscard" /* Do not discard unused blocks */ | ||
116 | 117 | ||
117 | /* | 118 | /* |
118 | * Table driven mount option parser. | 119 | * Table driven mount option parser. |
@@ -174,6 +175,15 @@ xfs_parseargs( | |||
174 | __uint8_t iosizelog = 0; | 175 | __uint8_t iosizelog = 0; |
175 | 176 | ||
176 | /* | 177 | /* |
178 | * set up the mount name first so all the errors will refer to the | ||
179 | * correct device. | ||
180 | */ | ||
181 | mp->m_fsname = kstrndup(sb->s_id, MAXNAMELEN, GFP_KERNEL); | ||
182 | if (!mp->m_fsname) | ||
183 | return ENOMEM; | ||
184 | mp->m_fsname_len = strlen(mp->m_fsname) + 1; | ||
185 | |||
186 | /* | ||
177 | * Copy binary VFS mount flags we are interested in. | 187 | * Copy binary VFS mount flags we are interested in. |
178 | */ | 188 | */ |
179 | if (sb->s_flags & MS_RDONLY) | 189 | if (sb->s_flags & MS_RDONLY) |
@@ -190,6 +200,7 @@ xfs_parseargs( | |||
190 | mp->m_flags |= XFS_MOUNT_BARRIER; | 200 | mp->m_flags |= XFS_MOUNT_BARRIER; |
191 | mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE; | 201 | mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE; |
192 | mp->m_flags |= XFS_MOUNT_SMALL_INUMS; | 202 | mp->m_flags |= XFS_MOUNT_SMALL_INUMS; |
203 | mp->m_flags |= XFS_MOUNT_DELAYLOG; | ||
193 | 204 | ||
194 | /* | 205 | /* |
195 | * These can be overridden by the mount option parsing. | 206 | * These can be overridden by the mount option parsing. |
@@ -208,24 +219,21 @@ xfs_parseargs( | |||
208 | 219 | ||
209 | if (!strcmp(this_char, MNTOPT_LOGBUFS)) { | 220 | if (!strcmp(this_char, MNTOPT_LOGBUFS)) { |
210 | if (!value || !*value) { | 221 | if (!value || !*value) { |
211 | cmn_err(CE_WARN, | 222 | xfs_warn(mp, "%s option requires an argument", |
212 | "XFS: %s option requires an argument", | ||
213 | this_char); | 223 | this_char); |
214 | return EINVAL; | 224 | return EINVAL; |
215 | } | 225 | } |
216 | mp->m_logbufs = simple_strtoul(value, &eov, 10); | 226 | mp->m_logbufs = simple_strtoul(value, &eov, 10); |
217 | } else if (!strcmp(this_char, MNTOPT_LOGBSIZE)) { | 227 | } else if (!strcmp(this_char, MNTOPT_LOGBSIZE)) { |
218 | if (!value || !*value) { | 228 | if (!value || !*value) { |
219 | cmn_err(CE_WARN, | 229 | xfs_warn(mp, "%s option requires an argument", |
220 | "XFS: %s option requires an argument", | ||
221 | this_char); | 230 | this_char); |
222 | return EINVAL; | 231 | return EINVAL; |
223 | } | 232 | } |
224 | mp->m_logbsize = suffix_strtoul(value, &eov, 10); | 233 | mp->m_logbsize = suffix_strtoul(value, &eov, 10); |
225 | } else if (!strcmp(this_char, MNTOPT_LOGDEV)) { | 234 | } else if (!strcmp(this_char, MNTOPT_LOGDEV)) { |
226 | if (!value || !*value) { | 235 | if (!value || !*value) { |
227 | cmn_err(CE_WARN, | 236 | xfs_warn(mp, "%s option requires an argument", |
228 | "XFS: %s option requires an argument", | ||
229 | this_char); | 237 | this_char); |
230 | return EINVAL; | 238 | return EINVAL; |
231 | } | 239 | } |
@@ -233,14 +241,12 @@ xfs_parseargs( | |||
233 | if (!mp->m_logname) | 241 | if (!mp->m_logname) |
234 | return ENOMEM; | 242 | return ENOMEM; |
235 | } else if (!strcmp(this_char, MNTOPT_MTPT)) { | 243 | } else if (!strcmp(this_char, MNTOPT_MTPT)) { |
236 | cmn_err(CE_WARN, | 244 | xfs_warn(mp, "%s option not allowed on this system", |
237 | "XFS: %s option not allowed on this system", | ||
238 | this_char); | 245 | this_char); |
239 | return EINVAL; | 246 | return EINVAL; |
240 | } else if (!strcmp(this_char, MNTOPT_RTDEV)) { | 247 | } else if (!strcmp(this_char, MNTOPT_RTDEV)) { |
241 | if (!value || !*value) { | 248 | if (!value || !*value) { |
242 | cmn_err(CE_WARN, | 249 | xfs_warn(mp, "%s option requires an argument", |
243 | "XFS: %s option requires an argument", | ||
244 | this_char); | 250 | this_char); |
245 | return EINVAL; | 251 | return EINVAL; |
246 | } | 252 | } |
@@ -249,8 +255,7 @@ xfs_parseargs( | |||
249 | return ENOMEM; | 255 | return ENOMEM; |
250 | } else if (!strcmp(this_char, MNTOPT_BIOSIZE)) { | 256 | } else if (!strcmp(this_char, MNTOPT_BIOSIZE)) { |
251 | if (!value || !*value) { | 257 | if (!value || !*value) { |
252 | cmn_err(CE_WARN, | 258 | xfs_warn(mp, "%s option requires an argument", |
253 | "XFS: %s option requires an argument", | ||
254 | this_char); | 259 | this_char); |
255 | return EINVAL; | 260 | return EINVAL; |
256 | } | 261 | } |
@@ -258,8 +263,7 @@ xfs_parseargs( | |||
258 | iosizelog = ffs(iosize) - 1; | 263 | iosizelog = ffs(iosize) - 1; |
259 | } else if (!strcmp(this_char, MNTOPT_ALLOCSIZE)) { | 264 | } else if (!strcmp(this_char, MNTOPT_ALLOCSIZE)) { |
260 | if (!value || !*value) { | 265 | if (!value || !*value) { |
261 | cmn_err(CE_WARN, | 266 | xfs_warn(mp, "%s option requires an argument", |
262 | "XFS: %s option requires an argument", | ||
263 | this_char); | 267 | this_char); |
264 | return EINVAL; | 268 | return EINVAL; |
265 | } | 269 | } |
@@ -281,16 +285,14 @@ xfs_parseargs( | |||
281 | mp->m_flags |= XFS_MOUNT_SWALLOC; | 285 | mp->m_flags |= XFS_MOUNT_SWALLOC; |
282 | } else if (!strcmp(this_char, MNTOPT_SUNIT)) { | 286 | } else if (!strcmp(this_char, MNTOPT_SUNIT)) { |
283 | if (!value || !*value) { | 287 | if (!value || !*value) { |
284 | cmn_err(CE_WARN, | 288 | xfs_warn(mp, "%s option requires an argument", |
285 | "XFS: %s option requires an argument", | ||
286 | this_char); | 289 | this_char); |
287 | return EINVAL; | 290 | return EINVAL; |
288 | } | 291 | } |
289 | dsunit = simple_strtoul(value, &eov, 10); | 292 | dsunit = simple_strtoul(value, &eov, 10); |
290 | } else if (!strcmp(this_char, MNTOPT_SWIDTH)) { | 293 | } else if (!strcmp(this_char, MNTOPT_SWIDTH)) { |
291 | if (!value || !*value) { | 294 | if (!value || !*value) { |
292 | cmn_err(CE_WARN, | 295 | xfs_warn(mp, "%s option requires an argument", |
293 | "XFS: %s option requires an argument", | ||
294 | this_char); | 296 | this_char); |
295 | return EINVAL; | 297 | return EINVAL; |
296 | } | 298 | } |
@@ -298,8 +300,7 @@ xfs_parseargs( | |||
298 | } else if (!strcmp(this_char, MNTOPT_64BITINODE)) { | 300 | } else if (!strcmp(this_char, MNTOPT_64BITINODE)) { |
299 | mp->m_flags &= ~XFS_MOUNT_SMALL_INUMS; | 301 | mp->m_flags &= ~XFS_MOUNT_SMALL_INUMS; |
300 | #if !XFS_BIG_INUMS | 302 | #if !XFS_BIG_INUMS |
301 | cmn_err(CE_WARN, | 303 | xfs_warn(mp, "%s option not allowed on this system", |
302 | "XFS: %s option not allowed on this system", | ||
303 | this_char); | 304 | this_char); |
304 | return EINVAL; | 305 | return EINVAL; |
305 | #endif | 306 | #endif |
@@ -354,26 +355,26 @@ xfs_parseargs( | |||
354 | mp->m_qflags &= ~XFS_OQUOTA_ENFD; | 355 | mp->m_qflags &= ~XFS_OQUOTA_ENFD; |
355 | } else if (!strcmp(this_char, MNTOPT_DELAYLOG)) { | 356 | } else if (!strcmp(this_char, MNTOPT_DELAYLOG)) { |
356 | mp->m_flags |= XFS_MOUNT_DELAYLOG; | 357 | mp->m_flags |= XFS_MOUNT_DELAYLOG; |
357 | cmn_err(CE_WARN, | ||
358 | "Enabling EXPERIMENTAL delayed logging feature " | ||
359 | "- use at your own risk.\n"); | ||
360 | } else if (!strcmp(this_char, MNTOPT_NODELAYLOG)) { | 358 | } else if (!strcmp(this_char, MNTOPT_NODELAYLOG)) { |
361 | mp->m_flags &= ~XFS_MOUNT_DELAYLOG; | 359 | mp->m_flags &= ~XFS_MOUNT_DELAYLOG; |
360 | } else if (!strcmp(this_char, MNTOPT_DISCARD)) { | ||
361 | mp->m_flags |= XFS_MOUNT_DISCARD; | ||
362 | } else if (!strcmp(this_char, MNTOPT_NODISCARD)) { | ||
363 | mp->m_flags &= ~XFS_MOUNT_DISCARD; | ||
362 | } else if (!strcmp(this_char, "ihashsize")) { | 364 | } else if (!strcmp(this_char, "ihashsize")) { |
363 | cmn_err(CE_WARN, | 365 | xfs_warn(mp, |
364 | "XFS: ihashsize no longer used, option is deprecated."); | 366 | "ihashsize no longer used, option is deprecated."); |
365 | } else if (!strcmp(this_char, "osyncisdsync")) { | 367 | } else if (!strcmp(this_char, "osyncisdsync")) { |
366 | cmn_err(CE_WARN, | 368 | xfs_warn(mp, |
367 | "XFS: osyncisdsync has no effect, option is deprecated."); | 369 | "osyncisdsync has no effect, option is deprecated."); |
368 | } else if (!strcmp(this_char, "osyncisosync")) { | 370 | } else if (!strcmp(this_char, "osyncisosync")) { |
369 | cmn_err(CE_WARN, | 371 | xfs_warn(mp, |
370 | "XFS: osyncisosync has no effect, option is deprecated."); | 372 | "osyncisosync has no effect, option is deprecated."); |
371 | } else if (!strcmp(this_char, "irixsgid")) { | 373 | } else if (!strcmp(this_char, "irixsgid")) { |
372 | cmn_err(CE_WARN, | 374 | xfs_warn(mp, |
373 | "XFS: irixsgid is now a sysctl(2) variable, option is deprecated."); | 375 | "irixsgid is now a sysctl(2) variable, option is deprecated."); |
374 | } else { | 376 | } else { |
375 | cmn_err(CE_WARN, | 377 | xfs_warn(mp, "unknown mount option [%s].", this_char); |
376 | "XFS: unknown mount option [%s].", this_char); | ||
377 | return EINVAL; | 378 | return EINVAL; |
378 | } | 379 | } |
379 | } | 380 | } |
@@ -383,40 +384,44 @@ xfs_parseargs( | |||
383 | */ | 384 | */ |
384 | if ((mp->m_flags & XFS_MOUNT_NORECOVERY) && | 385 | if ((mp->m_flags & XFS_MOUNT_NORECOVERY) && |
385 | !(mp->m_flags & XFS_MOUNT_RDONLY)) { | 386 | !(mp->m_flags & XFS_MOUNT_RDONLY)) { |
386 | cmn_err(CE_WARN, "XFS: no-recovery mounts must be read-only."); | 387 | xfs_warn(mp, "no-recovery mounts must be read-only."); |
387 | return EINVAL; | 388 | return EINVAL; |
388 | } | 389 | } |
389 | 390 | ||
390 | if ((mp->m_flags & XFS_MOUNT_NOALIGN) && (dsunit || dswidth)) { | 391 | if ((mp->m_flags & XFS_MOUNT_NOALIGN) && (dsunit || dswidth)) { |
391 | cmn_err(CE_WARN, | 392 | xfs_warn(mp, |
392 | "XFS: sunit and swidth options incompatible with the noalign option"); | 393 | "sunit and swidth options incompatible with the noalign option"); |
394 | return EINVAL; | ||
395 | } | ||
396 | |||
397 | if ((mp->m_flags & XFS_MOUNT_DISCARD) && | ||
398 | !(mp->m_flags & XFS_MOUNT_DELAYLOG)) { | ||
399 | xfs_warn(mp, | ||
400 | "the discard option is incompatible with the nodelaylog option"); | ||
393 | return EINVAL; | 401 | return EINVAL; |
394 | } | 402 | } |
395 | 403 | ||
396 | #ifndef CONFIG_XFS_QUOTA | 404 | #ifndef CONFIG_XFS_QUOTA |
397 | if (XFS_IS_QUOTA_RUNNING(mp)) { | 405 | if (XFS_IS_QUOTA_RUNNING(mp)) { |
398 | cmn_err(CE_WARN, | 406 | xfs_warn(mp, "quota support not available in this kernel."); |
399 | "XFS: quota support not available in this kernel."); | ||
400 | return EINVAL; | 407 | return EINVAL; |
401 | } | 408 | } |
402 | #endif | 409 | #endif |
403 | 410 | ||
404 | if ((mp->m_qflags & (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE)) && | 411 | if ((mp->m_qflags & (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE)) && |
405 | (mp->m_qflags & (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE))) { | 412 | (mp->m_qflags & (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE))) { |
406 | cmn_err(CE_WARN, | 413 | xfs_warn(mp, "cannot mount with both project and group quota"); |
407 | "XFS: cannot mount with both project and group quota"); | ||
408 | return EINVAL; | 414 | return EINVAL; |
409 | } | 415 | } |
410 | 416 | ||
411 | if ((dsunit && !dswidth) || (!dsunit && dswidth)) { | 417 | if ((dsunit && !dswidth) || (!dsunit && dswidth)) { |
412 | cmn_err(CE_WARN, | 418 | xfs_warn(mp, "sunit and swidth must be specified together"); |
413 | "XFS: sunit and swidth must be specified together"); | ||
414 | return EINVAL; | 419 | return EINVAL; |
415 | } | 420 | } |
416 | 421 | ||
417 | if (dsunit && (dswidth % dsunit != 0)) { | 422 | if (dsunit && (dswidth % dsunit != 0)) { |
418 | cmn_err(CE_WARN, | 423 | xfs_warn(mp, |
419 | "XFS: stripe width (%d) must be a multiple of the stripe unit (%d)", | 424 | "stripe width (%d) must be a multiple of the stripe unit (%d)", |
420 | dswidth, dsunit); | 425 | dswidth, dsunit); |
421 | return EINVAL; | 426 | return EINVAL; |
422 | } | 427 | } |
@@ -442,8 +447,7 @@ done: | |||
442 | mp->m_logbufs != 0 && | 447 | mp->m_logbufs != 0 && |
443 | (mp->m_logbufs < XLOG_MIN_ICLOGS || | 448 | (mp->m_logbufs < XLOG_MIN_ICLOGS || |
444 | mp->m_logbufs > XLOG_MAX_ICLOGS)) { | 449 | mp->m_logbufs > XLOG_MAX_ICLOGS)) { |
445 | cmn_err(CE_WARN, | 450 | xfs_warn(mp, "invalid logbufs value: %d [not %d-%d]", |
446 | "XFS: invalid logbufs value: %d [not %d-%d]", | ||
447 | mp->m_logbufs, XLOG_MIN_ICLOGS, XLOG_MAX_ICLOGS); | 451 | mp->m_logbufs, XLOG_MIN_ICLOGS, XLOG_MAX_ICLOGS); |
448 | return XFS_ERROR(EINVAL); | 452 | return XFS_ERROR(EINVAL); |
449 | } | 453 | } |
@@ -452,22 +456,16 @@ done: | |||
452 | (mp->m_logbsize < XLOG_MIN_RECORD_BSIZE || | 456 | (mp->m_logbsize < XLOG_MIN_RECORD_BSIZE || |
453 | mp->m_logbsize > XLOG_MAX_RECORD_BSIZE || | 457 | mp->m_logbsize > XLOG_MAX_RECORD_BSIZE || |
454 | !is_power_of_2(mp->m_logbsize))) { | 458 | !is_power_of_2(mp->m_logbsize))) { |
455 | cmn_err(CE_WARN, | 459 | xfs_warn(mp, |
456 | "XFS: invalid logbufsize: %d [not 16k,32k,64k,128k or 256k]", | 460 | "invalid logbufsize: %d [not 16k,32k,64k,128k or 256k]", |
457 | mp->m_logbsize); | 461 | mp->m_logbsize); |
458 | return XFS_ERROR(EINVAL); | 462 | return XFS_ERROR(EINVAL); |
459 | } | 463 | } |
460 | 464 | ||
461 | mp->m_fsname = kstrndup(sb->s_id, MAXNAMELEN, GFP_KERNEL); | ||
462 | if (!mp->m_fsname) | ||
463 | return ENOMEM; | ||
464 | mp->m_fsname_len = strlen(mp->m_fsname) + 1; | ||
465 | |||
466 | if (iosizelog) { | 465 | if (iosizelog) { |
467 | if (iosizelog > XFS_MAX_IO_LOG || | 466 | if (iosizelog > XFS_MAX_IO_LOG || |
468 | iosizelog < XFS_MIN_IO_LOG) { | 467 | iosizelog < XFS_MIN_IO_LOG) { |
469 | cmn_err(CE_WARN, | 468 | xfs_warn(mp, "invalid log iosize: %d [not %d-%d]", |
470 | "XFS: invalid log iosize: %d [not %d-%d]", | ||
471 | iosizelog, XFS_MIN_IO_LOG, | 469 | iosizelog, XFS_MIN_IO_LOG, |
472 | XFS_MAX_IO_LOG); | 470 | XFS_MAX_IO_LOG); |
473 | return XFS_ERROR(EINVAL); | 471 | return XFS_ERROR(EINVAL); |
@@ -503,6 +501,7 @@ xfs_showargs( | |||
503 | { XFS_MOUNT_FILESTREAMS, "," MNTOPT_FILESTREAM }, | 501 | { XFS_MOUNT_FILESTREAMS, "," MNTOPT_FILESTREAM }, |
504 | { XFS_MOUNT_GRPID, "," MNTOPT_GRPID }, | 502 | { XFS_MOUNT_GRPID, "," MNTOPT_GRPID }, |
505 | { XFS_MOUNT_DELAYLOG, "," MNTOPT_DELAYLOG }, | 503 | { XFS_MOUNT_DELAYLOG, "," MNTOPT_DELAYLOG }, |
504 | { XFS_MOUNT_DISCARD, "," MNTOPT_DISCARD }, | ||
506 | { 0, NULL } | 505 | { 0, NULL } |
507 | }; | 506 | }; |
508 | static struct proc_xfs_info xfs_info_unset[] = { | 507 | static struct proc_xfs_info xfs_info_unset[] = { |
@@ -577,7 +576,7 @@ xfs_max_file_offset( | |||
577 | 576 | ||
578 | /* Figure out maximum filesize, on Linux this can depend on | 577 | /* Figure out maximum filesize, on Linux this can depend on |
579 | * the filesystem blocksize (on 32 bit platforms). | 578 | * the filesystem blocksize (on 32 bit platforms). |
580 | * __block_prepare_write does this in an [unsigned] long... | 579 | * __block_write_begin does this in an [unsigned] long... |
581 | * page->index << (PAGE_CACHE_SHIFT - bbits) | 580 | * page->index << (PAGE_CACHE_SHIFT - bbits) |
582 | * So, for page sized blocks (4K on 32 bit platforms), | 581 | * So, for page sized blocks (4K on 32 bit platforms), |
583 | * this wraps at around 8Tb (hence MAX_LFS_FILESIZE which is | 582 | * this wraps at around 8Tb (hence MAX_LFS_FILESIZE which is |
@@ -610,10 +609,11 @@ xfs_blkdev_get( | |||
610 | { | 609 | { |
611 | int error = 0; | 610 | int error = 0; |
612 | 611 | ||
613 | *bdevp = open_bdev_exclusive(name, FMODE_READ|FMODE_WRITE, mp); | 612 | *bdevp = blkdev_get_by_path(name, FMODE_READ|FMODE_WRITE|FMODE_EXCL, |
613 | mp); | ||
614 | if (IS_ERR(*bdevp)) { | 614 | if (IS_ERR(*bdevp)) { |
615 | error = PTR_ERR(*bdevp); | 615 | error = PTR_ERR(*bdevp); |
616 | printk("XFS: Invalid device [%s], error=%d\n", name, error); | 616 | xfs_warn(mp, "Invalid device [%s], error=%d\n", name, error); |
617 | } | 617 | } |
618 | 618 | ||
619 | return -error; | 619 | return -error; |
@@ -624,77 +624,14 @@ xfs_blkdev_put( | |||
624 | struct block_device *bdev) | 624 | struct block_device *bdev) |
625 | { | 625 | { |
626 | if (bdev) | 626 | if (bdev) |
627 | close_bdev_exclusive(bdev, FMODE_READ|FMODE_WRITE); | 627 | blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); |
628 | } | ||
629 | |||
630 | /* | ||
631 | * Try to write out the superblock using barriers. | ||
632 | */ | ||
633 | STATIC int | ||
634 | xfs_barrier_test( | ||
635 | xfs_mount_t *mp) | ||
636 | { | ||
637 | xfs_buf_t *sbp = xfs_getsb(mp, 0); | ||
638 | int error; | ||
639 | |||
640 | XFS_BUF_UNDONE(sbp); | ||
641 | XFS_BUF_UNREAD(sbp); | ||
642 | XFS_BUF_UNDELAYWRITE(sbp); | ||
643 | XFS_BUF_WRITE(sbp); | ||
644 | XFS_BUF_UNASYNC(sbp); | ||
645 | XFS_BUF_ORDERED(sbp); | ||
646 | |||
647 | xfsbdstrat(mp, sbp); | ||
648 | error = xfs_iowait(sbp); | ||
649 | |||
650 | /* | ||
651 | * Clear all the flags we set and possible error state in the | ||
652 | * buffer. We only did the write to try out whether barriers | ||
653 | * worked and shouldn't leave any traces in the superblock | ||
654 | * buffer. | ||
655 | */ | ||
656 | XFS_BUF_DONE(sbp); | ||
657 | XFS_BUF_ERROR(sbp, 0); | ||
658 | XFS_BUF_UNORDERED(sbp); | ||
659 | |||
660 | xfs_buf_relse(sbp); | ||
661 | return error; | ||
662 | } | ||
663 | |||
664 | STATIC void | ||
665 | xfs_mountfs_check_barriers(xfs_mount_t *mp) | ||
666 | { | ||
667 | int error; | ||
668 | |||
669 | if (mp->m_logdev_targp != mp->m_ddev_targp) { | ||
670 | xfs_fs_cmn_err(CE_NOTE, mp, | ||
671 | "Disabling barriers, not supported with external log device"); | ||
672 | mp->m_flags &= ~XFS_MOUNT_BARRIER; | ||
673 | return; | ||
674 | } | ||
675 | |||
676 | if (xfs_readonly_buftarg(mp->m_ddev_targp)) { | ||
677 | xfs_fs_cmn_err(CE_NOTE, mp, | ||
678 | "Disabling barriers, underlying device is readonly"); | ||
679 | mp->m_flags &= ~XFS_MOUNT_BARRIER; | ||
680 | return; | ||
681 | } | ||
682 | |||
683 | error = xfs_barrier_test(mp); | ||
684 | if (error) { | ||
685 | xfs_fs_cmn_err(CE_NOTE, mp, | ||
686 | "Disabling barriers, trial barrier write failed"); | ||
687 | mp->m_flags &= ~XFS_MOUNT_BARRIER; | ||
688 | return; | ||
689 | } | ||
690 | } | 628 | } |
691 | 629 | ||
692 | void | 630 | void |
693 | xfs_blkdev_issue_flush( | 631 | xfs_blkdev_issue_flush( |
694 | xfs_buftarg_t *buftarg) | 632 | xfs_buftarg_t *buftarg) |
695 | { | 633 | { |
696 | blkdev_issue_flush(buftarg->bt_bdev, GFP_KERNEL, NULL, | 634 | blkdev_issue_flush(buftarg->bt_bdev, GFP_KERNEL, NULL); |
697 | BLKDEV_IFL_WAIT); | ||
698 | } | 635 | } |
699 | 636 | ||
700 | STATIC void | 637 | STATIC void |
@@ -747,8 +684,8 @@ xfs_open_devices( | |||
747 | goto out_close_logdev; | 684 | goto out_close_logdev; |
748 | 685 | ||
749 | if (rtdev == ddev || rtdev == logdev) { | 686 | if (rtdev == ddev || rtdev == logdev) { |
750 | cmn_err(CE_WARN, | 687 | xfs_warn(mp, |
751 | "XFS: Cannot mount filesystem with identical rtdev and ddev/logdev."); | 688 | "Cannot mount filesystem with identical rtdev and ddev/logdev."); |
752 | error = EINVAL; | 689 | error = EINVAL; |
753 | goto out_close_rtdev; | 690 | goto out_close_rtdev; |
754 | } | 691 | } |
@@ -758,18 +695,20 @@ xfs_open_devices( | |||
758 | * Setup xfs_mount buffer target pointers | 695 | * Setup xfs_mount buffer target pointers |
759 | */ | 696 | */ |
760 | error = ENOMEM; | 697 | error = ENOMEM; |
761 | mp->m_ddev_targp = xfs_alloc_buftarg(ddev, 0, mp->m_fsname); | 698 | mp->m_ddev_targp = xfs_alloc_buftarg(mp, ddev, 0, mp->m_fsname); |
762 | if (!mp->m_ddev_targp) | 699 | if (!mp->m_ddev_targp) |
763 | goto out_close_rtdev; | 700 | goto out_close_rtdev; |
764 | 701 | ||
765 | if (rtdev) { | 702 | if (rtdev) { |
766 | mp->m_rtdev_targp = xfs_alloc_buftarg(rtdev, 1, mp->m_fsname); | 703 | mp->m_rtdev_targp = xfs_alloc_buftarg(mp, rtdev, 1, |
704 | mp->m_fsname); | ||
767 | if (!mp->m_rtdev_targp) | 705 | if (!mp->m_rtdev_targp) |
768 | goto out_free_ddev_targ; | 706 | goto out_free_ddev_targ; |
769 | } | 707 | } |
770 | 708 | ||
771 | if (logdev && logdev != ddev) { | 709 | if (logdev && logdev != ddev) { |
772 | mp->m_logdev_targp = xfs_alloc_buftarg(logdev, 1, mp->m_fsname); | 710 | mp->m_logdev_targp = xfs_alloc_buftarg(mp, logdev, 1, |
711 | mp->m_fsname); | ||
773 | if (!mp->m_logdev_targp) | 712 | if (!mp->m_logdev_targp) |
774 | goto out_free_rtdev_targ; | 713 | goto out_free_rtdev_targ; |
775 | } else { | 714 | } else { |
@@ -829,63 +768,6 @@ xfs_setup_devices( | |||
829 | return 0; | 768 | return 0; |
830 | } | 769 | } |
831 | 770 | ||
832 | /* | ||
833 | * XFS AIL push thread support | ||
834 | */ | ||
835 | void | ||
836 | xfsaild_wakeup( | ||
837 | struct xfs_ail *ailp, | ||
838 | xfs_lsn_t threshold_lsn) | ||
839 | { | ||
840 | ailp->xa_target = threshold_lsn; | ||
841 | wake_up_process(ailp->xa_task); | ||
842 | } | ||
843 | |||
844 | STATIC int | ||
845 | xfsaild( | ||
846 | void *data) | ||
847 | { | ||
848 | struct xfs_ail *ailp = data; | ||
849 | xfs_lsn_t last_pushed_lsn = 0; | ||
850 | long tout = 0; /* milliseconds */ | ||
851 | |||
852 | while (!kthread_should_stop()) { | ||
853 | schedule_timeout_interruptible(tout ? | ||
854 | msecs_to_jiffies(tout) : MAX_SCHEDULE_TIMEOUT); | ||
855 | |||
856 | /* swsusp */ | ||
857 | try_to_freeze(); | ||
858 | |||
859 | ASSERT(ailp->xa_mount->m_log); | ||
860 | if (XFS_FORCED_SHUTDOWN(ailp->xa_mount)) | ||
861 | continue; | ||
862 | |||
863 | tout = xfsaild_push(ailp, &last_pushed_lsn); | ||
864 | } | ||
865 | |||
866 | return 0; | ||
867 | } /* xfsaild */ | ||
868 | |||
869 | int | ||
870 | xfsaild_start( | ||
871 | struct xfs_ail *ailp) | ||
872 | { | ||
873 | ailp->xa_target = 0; | ||
874 | ailp->xa_task = kthread_run(xfsaild, ailp, "xfsaild/%s", | ||
875 | ailp->xa_mount->m_fsname); | ||
876 | if (IS_ERR(ailp->xa_task)) | ||
877 | return -PTR_ERR(ailp->xa_task); | ||
878 | return 0; | ||
879 | } | ||
880 | |||
881 | void | ||
882 | xfsaild_stop( | ||
883 | struct xfs_ail *ailp) | ||
884 | { | ||
885 | kthread_stop(ailp->xa_task); | ||
886 | } | ||
887 | |||
888 | |||
889 | /* Catch misguided souls that try to use this interface on XFS */ | 771 | /* Catch misguided souls that try to use this interface on XFS */ |
890 | STATIC struct inode * | 772 | STATIC struct inode * |
891 | xfs_fs_alloc_inode( | 773 | xfs_fs_alloc_inode( |
@@ -938,7 +820,7 @@ out_reclaim: | |||
938 | * Slab object creation initialisation for the XFS inode. | 820 | * Slab object creation initialisation for the XFS inode. |
939 | * This covers only the idempotent fields in the XFS inode; | 821 | * This covers only the idempotent fields in the XFS inode; |
940 | * all other fields need to be initialised on allocation | 822 | * all other fields need to be initialised on allocation |
941 | * from the slab. This avoids the need to repeatedly intialise | 823 | * from the slab. This avoids the need to repeatedly initialise |
942 | * fields in the xfs inode that left in the initialise state | 824 | * fields in the xfs inode that left in the initialise state |
943 | * when freeing the inode. | 825 | * when freeing the inode. |
944 | */ | 826 | */ |
@@ -972,12 +854,7 @@ xfs_fs_inode_init_once( | |||
972 | 854 | ||
973 | /* | 855 | /* |
974 | * Dirty the XFS inode when mark_inode_dirty_sync() is called so that | 856 | * Dirty the XFS inode when mark_inode_dirty_sync() is called so that |
975 | * we catch unlogged VFS level updates to the inode. Care must be taken | 857 | * we catch unlogged VFS level updates to the inode. |
976 | * here - the transaction code calls mark_inode_dirty_sync() to mark the | ||
977 | * VFS inode dirty in a transaction and clears the i_update_core field; | ||
978 | * it must clear the field after calling mark_inode_dirty_sync() to | ||
979 | * correctly indicate that the dirty state has been propagated into the | ||
980 | * inode log item. | ||
981 | * | 858 | * |
982 | * We need the barrier() to maintain correct ordering between unlogged | 859 | * We need the barrier() to maintain correct ordering between unlogged |
983 | * updates and the transaction commit code that clears the i_update_core | 860 | * updates and the transaction commit code that clears the i_update_core |
@@ -986,7 +863,8 @@ xfs_fs_inode_init_once( | |||
986 | */ | 863 | */ |
987 | STATIC void | 864 | STATIC void |
988 | xfs_fs_dirty_inode( | 865 | xfs_fs_dirty_inode( |
989 | struct inode *inode) | 866 | struct inode *inode, |
867 | int flags) | ||
990 | { | 868 | { |
991 | barrier(); | 869 | barrier(); |
992 | XFS_I(inode)->i_update_core = 1; | 870 | XFS_I(inode)->i_update_core = 1; |
@@ -1084,7 +962,7 @@ xfs_fs_write_inode( | |||
1084 | error = 0; | 962 | error = 0; |
1085 | goto out_unlock; | 963 | goto out_unlock; |
1086 | } | 964 | } |
1087 | error = xfs_iflush(ip, 0); | 965 | error = xfs_iflush(ip, SYNC_TRYLOCK); |
1088 | } | 966 | } |
1089 | 967 | ||
1090 | out_unlock: | 968 | out_unlock: |
@@ -1126,6 +1004,8 @@ xfs_fs_evict_inode( | |||
1126 | */ | 1004 | */ |
1127 | ASSERT(!rwsem_is_locked(&ip->i_iolock.mr_lock)); | 1005 | ASSERT(!rwsem_is_locked(&ip->i_iolock.mr_lock)); |
1128 | mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino); | 1006 | mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino); |
1007 | lockdep_set_class_and_name(&ip->i_iolock.mr_lock, | ||
1008 | &xfs_iolock_reclaimable, "xfs_iolock_reclaimable"); | ||
1129 | 1009 | ||
1130 | xfs_inactive(ip); | 1010 | xfs_inactive(ip); |
1131 | } | 1011 | } |
@@ -1195,22 +1075,12 @@ xfs_fs_sync_fs( | |||
1195 | return -error; | 1075 | return -error; |
1196 | 1076 | ||
1197 | if (laptop_mode) { | 1077 | if (laptop_mode) { |
1198 | int prev_sync_seq = mp->m_sync_seq; | ||
1199 | |||
1200 | /* | 1078 | /* |
1201 | * The disk must be active because we're syncing. | 1079 | * The disk must be active because we're syncing. |
1202 | * We schedule xfssyncd now (now that the disk is | 1080 | * We schedule xfssyncd now (now that the disk is |
1203 | * active) instead of later (when it might not be). | 1081 | * active) instead of later (when it might not be). |
1204 | */ | 1082 | */ |
1205 | wake_up_process(mp->m_sync_task); | 1083 | flush_delayed_work_sync(&mp->m_sync_work); |
1206 | /* | ||
1207 | * We have to wait for the sync iteration to complete. | ||
1208 | * If we don't, the disk activity caused by the sync | ||
1209 | * will come after the sync is completed, and that | ||
1210 | * triggers another sync from laptop mode. | ||
1211 | */ | ||
1212 | wait_event(mp->m_wait_single_sync_task, | ||
1213 | mp->m_sync_seq != prev_sync_seq); | ||
1214 | } | 1084 | } |
1215 | 1085 | ||
1216 | return 0; | 1086 | return 0; |
@@ -1308,14 +1178,6 @@ xfs_fs_remount( | |||
1308 | switch (token) { | 1178 | switch (token) { |
1309 | case Opt_barrier: | 1179 | case Opt_barrier: |
1310 | mp->m_flags |= XFS_MOUNT_BARRIER; | 1180 | mp->m_flags |= XFS_MOUNT_BARRIER; |
1311 | |||
1312 | /* | ||
1313 | * Test if barriers are actually working if we can, | ||
1314 | * else delay this check until the filesystem is | ||
1315 | * marked writeable. | ||
1316 | */ | ||
1317 | if (!(mp->m_flags & XFS_MOUNT_RDONLY)) | ||
1318 | xfs_mountfs_check_barriers(mp); | ||
1319 | break; | 1181 | break; |
1320 | case Opt_nobarrier: | 1182 | case Opt_nobarrier: |
1321 | mp->m_flags &= ~XFS_MOUNT_BARRIER; | 1183 | mp->m_flags &= ~XFS_MOUNT_BARRIER; |
@@ -1338,8 +1200,8 @@ xfs_fs_remount( | |||
1338 | * options that we can't actually change. | 1200 | * options that we can't actually change. |
1339 | */ | 1201 | */ |
1340 | #if 0 | 1202 | #if 0 |
1341 | printk(KERN_INFO | 1203 | xfs_info(mp, |
1342 | "XFS: mount option \"%s\" not supported for remount\n", p); | 1204 | "mount option \"%s\" not supported for remount\n", p); |
1343 | return -EINVAL; | 1205 | return -EINVAL; |
1344 | #else | 1206 | #else |
1345 | break; | 1207 | break; |
@@ -1350,8 +1212,6 @@ xfs_fs_remount( | |||
1350 | /* ro -> rw */ | 1212 | /* ro -> rw */ |
1351 | if ((mp->m_flags & XFS_MOUNT_RDONLY) && !(*flags & MS_RDONLY)) { | 1213 | if ((mp->m_flags & XFS_MOUNT_RDONLY) && !(*flags & MS_RDONLY)) { |
1352 | mp->m_flags &= ~XFS_MOUNT_RDONLY; | 1214 | mp->m_flags &= ~XFS_MOUNT_RDONLY; |
1353 | if (mp->m_flags & XFS_MOUNT_BARRIER) | ||
1354 | xfs_mountfs_check_barriers(mp); | ||
1355 | 1215 | ||
1356 | /* | 1216 | /* |
1357 | * If this is the first remount to writeable state we | 1217 | * If this is the first remount to writeable state we |
@@ -1360,8 +1220,7 @@ xfs_fs_remount( | |||
1360 | if (mp->m_update_flags) { | 1220 | if (mp->m_update_flags) { |
1361 | error = xfs_mount_log_sb(mp, mp->m_update_flags); | 1221 | error = xfs_mount_log_sb(mp, mp->m_update_flags); |
1362 | if (error) { | 1222 | if (error) { |
1363 | cmn_err(CE_WARN, | 1223 | xfs_warn(mp, "failed to write sb changes"); |
1364 | "XFS: failed to write sb changes"); | ||
1365 | return error; | 1224 | return error; |
1366 | } | 1225 | } |
1367 | mp->m_update_flags = 0; | 1226 | mp->m_update_flags = 0; |
@@ -1407,7 +1266,7 @@ xfs_fs_freeze( | |||
1407 | 1266 | ||
1408 | xfs_save_resvblks(mp); | 1267 | xfs_save_resvblks(mp); |
1409 | xfs_quiesce_attr(mp); | 1268 | xfs_quiesce_attr(mp); |
1410 | return -xfs_fs_log_dummy(mp, SYNC_WAIT); | 1269 | return -xfs_fs_log_dummy(mp); |
1411 | } | 1270 | } |
1412 | 1271 | ||
1413 | STATIC int | 1272 | STATIC int |
@@ -1445,15 +1304,15 @@ xfs_finish_flags( | |||
1445 | mp->m_logbsize = mp->m_sb.sb_logsunit; | 1304 | mp->m_logbsize = mp->m_sb.sb_logsunit; |
1446 | } else if (mp->m_logbsize > 0 && | 1305 | } else if (mp->m_logbsize > 0 && |
1447 | mp->m_logbsize < mp->m_sb.sb_logsunit) { | 1306 | mp->m_logbsize < mp->m_sb.sb_logsunit) { |
1448 | cmn_err(CE_WARN, | 1307 | xfs_warn(mp, |
1449 | "XFS: logbuf size must be greater than or equal to log stripe size"); | 1308 | "logbuf size must be greater than or equal to log stripe size"); |
1450 | return XFS_ERROR(EINVAL); | 1309 | return XFS_ERROR(EINVAL); |
1451 | } | 1310 | } |
1452 | } else { | 1311 | } else { |
1453 | /* Fail a mount if the logbuf is larger than 32K */ | 1312 | /* Fail a mount if the logbuf is larger than 32K */ |
1454 | if (mp->m_logbsize > XLOG_BIG_RECORD_BSIZE) { | 1313 | if (mp->m_logbsize > XLOG_BIG_RECORD_BSIZE) { |
1455 | cmn_err(CE_WARN, | 1314 | xfs_warn(mp, |
1456 | "XFS: logbuf size for version 1 logs must be 16K or 32K"); | 1315 | "logbuf size for version 1 logs must be 16K or 32K"); |
1457 | return XFS_ERROR(EINVAL); | 1316 | return XFS_ERROR(EINVAL); |
1458 | } | 1317 | } |
1459 | } | 1318 | } |
@@ -1470,8 +1329,8 @@ xfs_finish_flags( | |||
1470 | * prohibit r/w mounts of read-only filesystems | 1329 | * prohibit r/w mounts of read-only filesystems |
1471 | */ | 1330 | */ |
1472 | if ((mp->m_sb.sb_flags & XFS_SBF_READONLY) && !ronly) { | 1331 | if ((mp->m_sb.sb_flags & XFS_SBF_READONLY) && !ronly) { |
1473 | cmn_err(CE_WARN, | 1332 | xfs_warn(mp, |
1474 | "XFS: cannot mount a read-only filesystem as read-write"); | 1333 | "cannot mount a read-only filesystem as read-write"); |
1475 | return XFS_ERROR(EROFS); | 1334 | return XFS_ERROR(EROFS); |
1476 | } | 1335 | } |
1477 | 1336 | ||
@@ -1495,9 +1354,6 @@ xfs_fs_fill_super( | |||
1495 | spin_lock_init(&mp->m_sb_lock); | 1354 | spin_lock_init(&mp->m_sb_lock); |
1496 | mutex_init(&mp->m_growlock); | 1355 | mutex_init(&mp->m_growlock); |
1497 | atomic_set(&mp->m_active_trans, 0); | 1356 | atomic_set(&mp->m_active_trans, 0); |
1498 | INIT_LIST_HEAD(&mp->m_sync_list); | ||
1499 | spin_lock_init(&mp->m_sync_lock); | ||
1500 | init_waitqueue_head(&mp->m_wait_single_sync_task); | ||
1501 | 1357 | ||
1502 | mp->m_super = sb; | 1358 | mp->m_super = sb; |
1503 | sb->s_fs_info = mp; | 1359 | sb->s_fs_info = mp; |
@@ -1521,8 +1377,9 @@ xfs_fs_fill_super( | |||
1521 | if (error) | 1377 | if (error) |
1522 | goto out_free_fsname; | 1378 | goto out_free_fsname; |
1523 | 1379 | ||
1524 | if (xfs_icsb_init_counters(mp)) | 1380 | error = xfs_icsb_init_counters(mp); |
1525 | mp->m_flags |= XFS_MOUNT_NO_PERCPU_SB; | 1381 | if (error) |
1382 | goto out_close_devices; | ||
1526 | 1383 | ||
1527 | error = xfs_readsb(mp, flags); | 1384 | error = xfs_readsb(mp, flags); |
1528 | if (error) | 1385 | if (error) |
@@ -1536,17 +1393,18 @@ xfs_fs_fill_super( | |||
1536 | if (error) | 1393 | if (error) |
1537 | goto out_free_sb; | 1394 | goto out_free_sb; |
1538 | 1395 | ||
1539 | if (mp->m_flags & XFS_MOUNT_BARRIER) | ||
1540 | xfs_mountfs_check_barriers(mp); | ||
1541 | |||
1542 | error = xfs_filestream_mount(mp); | 1396 | error = xfs_filestream_mount(mp); |
1543 | if (error) | 1397 | if (error) |
1544 | goto out_free_sb; | 1398 | goto out_free_sb; |
1545 | 1399 | ||
1546 | error = xfs_mountfs(mp); | 1400 | /* |
1547 | if (error) | 1401 | * we must configure the block size in the superblock before we run the |
1548 | goto out_filestream_unmount; | 1402 | * full mount process as the mount process can lookup and cache inodes. |
1549 | 1403 | * For the same reason we must also initialise the syncd and register | |
1404 | * the inode cache shrinker so that inodes can be reclaimed during | ||
1405 | * operations like a quotacheck that iterate all inodes in the | ||
1406 | * filesystem. | ||
1407 | */ | ||
1550 | sb->s_magic = XFS_SB_MAGIC; | 1408 | sb->s_magic = XFS_SB_MAGIC; |
1551 | sb->s_blocksize = mp->m_sb.sb_blocksize; | 1409 | sb->s_blocksize = mp->m_sb.sb_blocksize; |
1552 | sb->s_blocksize_bits = ffs(sb->s_blocksize) - 1; | 1410 | sb->s_blocksize_bits = ffs(sb->s_blocksize) - 1; |
@@ -1554,6 +1412,16 @@ xfs_fs_fill_super( | |||
1554 | sb->s_time_gran = 1; | 1412 | sb->s_time_gran = 1; |
1555 | set_posix_acl_flag(sb); | 1413 | set_posix_acl_flag(sb); |
1556 | 1414 | ||
1415 | error = xfs_syncd_init(mp); | ||
1416 | if (error) | ||
1417 | goto out_filestream_unmount; | ||
1418 | |||
1419 | xfs_inode_shrinker_register(mp); | ||
1420 | |||
1421 | error = xfs_mountfs(mp); | ||
1422 | if (error) | ||
1423 | goto out_syncd_stop; | ||
1424 | |||
1557 | root = igrab(VFS_I(mp->m_rootip)); | 1425 | root = igrab(VFS_I(mp->m_rootip)); |
1558 | if (!root) { | 1426 | if (!root) { |
1559 | error = ENOENT; | 1427 | error = ENOENT; |
@@ -1569,20 +1437,18 @@ xfs_fs_fill_super( | |||
1569 | goto fail_vnrele; | 1437 | goto fail_vnrele; |
1570 | } | 1438 | } |
1571 | 1439 | ||
1572 | error = xfs_syncd_init(mp); | ||
1573 | if (error) | ||
1574 | goto fail_vnrele; | ||
1575 | |||
1576 | xfs_inode_shrinker_register(mp); | ||
1577 | |||
1578 | return 0; | 1440 | return 0; |
1579 | 1441 | ||
1442 | out_syncd_stop: | ||
1443 | xfs_inode_shrinker_unregister(mp); | ||
1444 | xfs_syncd_stop(mp); | ||
1580 | out_filestream_unmount: | 1445 | out_filestream_unmount: |
1581 | xfs_filestream_unmount(mp); | 1446 | xfs_filestream_unmount(mp); |
1582 | out_free_sb: | 1447 | out_free_sb: |
1583 | xfs_freesb(mp); | 1448 | xfs_freesb(mp); |
1584 | out_destroy_counters: | 1449 | out_destroy_counters: |
1585 | xfs_icsb_destroy_counters(mp); | 1450 | xfs_icsb_destroy_counters(mp); |
1451 | out_close_devices: | ||
1586 | xfs_close_devices(mp); | 1452 | xfs_close_devices(mp); |
1587 | out_free_fsname: | 1453 | out_free_fsname: |
1588 | xfs_free_fsname(mp); | 1454 | xfs_free_fsname(mp); |
@@ -1599,6 +1465,9 @@ xfs_fs_fill_super( | |||
1599 | } | 1465 | } |
1600 | 1466 | ||
1601 | fail_unmount: | 1467 | fail_unmount: |
1468 | xfs_inode_shrinker_unregister(mp); | ||
1469 | xfs_syncd_stop(mp); | ||
1470 | |||
1602 | /* | 1471 | /* |
1603 | * Blow away any referenced inode in the filestreams cache. | 1472 | * Blow away any referenced inode in the filestreams cache. |
1604 | * This can and will cause log traffic as inodes go inactive | 1473 | * This can and will cause log traffic as inodes go inactive |
@@ -1612,16 +1481,14 @@ xfs_fs_fill_super( | |||
1612 | goto out_free_sb; | 1481 | goto out_free_sb; |
1613 | } | 1482 | } |
1614 | 1483 | ||
1615 | STATIC int | 1484 | STATIC struct dentry * |
1616 | xfs_fs_get_sb( | 1485 | xfs_fs_mount( |
1617 | struct file_system_type *fs_type, | 1486 | struct file_system_type *fs_type, |
1618 | int flags, | 1487 | int flags, |
1619 | const char *dev_name, | 1488 | const char *dev_name, |
1620 | void *data, | 1489 | void *data) |
1621 | struct vfsmount *mnt) | ||
1622 | { | 1490 | { |
1623 | return get_sb_bdev(fs_type, flags, dev_name, data, xfs_fs_fill_super, | 1491 | return mount_bdev(fs_type, flags, dev_name, data, xfs_fs_fill_super); |
1624 | mnt); | ||
1625 | } | 1492 | } |
1626 | 1493 | ||
1627 | static const struct super_operations xfs_super_operations = { | 1494 | static const struct super_operations xfs_super_operations = { |
@@ -1642,7 +1509,7 @@ static const struct super_operations xfs_super_operations = { | |||
1642 | static struct file_system_type xfs_fs_type = { | 1509 | static struct file_system_type xfs_fs_type = { |
1643 | .owner = THIS_MODULE, | 1510 | .owner = THIS_MODULE, |
1644 | .name = "xfs", | 1511 | .name = "xfs", |
1645 | .get_sb = xfs_fs_get_sb, | 1512 | .mount = xfs_fs_mount, |
1646 | .kill_sb = kill_block_super, | 1513 | .kill_sb = kill_block_super, |
1647 | .fs_flags = FS_REQUIRES_DEV, | 1514 | .fs_flags = FS_REQUIRES_DEV, |
1648 | }; | 1515 | }; |
@@ -1790,6 +1657,38 @@ xfs_destroy_zones(void) | |||
1790 | } | 1657 | } |
1791 | 1658 | ||
1792 | STATIC int __init | 1659 | STATIC int __init |
1660 | xfs_init_workqueues(void) | ||
1661 | { | ||
1662 | /* | ||
1663 | * max_active is set to 8 to give enough concurency to allow | ||
1664 | * multiple work operations on each CPU to run. This allows multiple | ||
1665 | * filesystems to be running sync work concurrently, and scales with | ||
1666 | * the number of CPUs in the system. | ||
1667 | */ | ||
1668 | xfs_syncd_wq = alloc_workqueue("xfssyncd", WQ_CPU_INTENSIVE, 8); | ||
1669 | if (!xfs_syncd_wq) | ||
1670 | goto out; | ||
1671 | |||
1672 | xfs_ail_wq = alloc_workqueue("xfsail", WQ_CPU_INTENSIVE, 8); | ||
1673 | if (!xfs_ail_wq) | ||
1674 | goto out_destroy_syncd; | ||
1675 | |||
1676 | return 0; | ||
1677 | |||
1678 | out_destroy_syncd: | ||
1679 | destroy_workqueue(xfs_syncd_wq); | ||
1680 | out: | ||
1681 | return -ENOMEM; | ||
1682 | } | ||
1683 | |||
1684 | STATIC void | ||
1685 | xfs_destroy_workqueues(void) | ||
1686 | { | ||
1687 | destroy_workqueue(xfs_ail_wq); | ||
1688 | destroy_workqueue(xfs_syncd_wq); | ||
1689 | } | ||
1690 | |||
1691 | STATIC int __init | ||
1793 | init_xfs_fs(void) | 1692 | init_xfs_fs(void) |
1794 | { | 1693 | { |
1795 | int error; | 1694 | int error; |
@@ -1804,10 +1703,14 @@ init_xfs_fs(void) | |||
1804 | if (error) | 1703 | if (error) |
1805 | goto out; | 1704 | goto out; |
1806 | 1705 | ||
1807 | error = xfs_mru_cache_init(); | 1706 | error = xfs_init_workqueues(); |
1808 | if (error) | 1707 | if (error) |
1809 | goto out_destroy_zones; | 1708 | goto out_destroy_zones; |
1810 | 1709 | ||
1710 | error = xfs_mru_cache_init(); | ||
1711 | if (error) | ||
1712 | goto out_destroy_wq; | ||
1713 | |||
1811 | error = xfs_filestream_init(); | 1714 | error = xfs_filestream_init(); |
1812 | if (error) | 1715 | if (error) |
1813 | goto out_mru_cache_uninit; | 1716 | goto out_mru_cache_uninit; |
@@ -1841,6 +1744,8 @@ init_xfs_fs(void) | |||
1841 | xfs_filestream_uninit(); | 1744 | xfs_filestream_uninit(); |
1842 | out_mru_cache_uninit: | 1745 | out_mru_cache_uninit: |
1843 | xfs_mru_cache_uninit(); | 1746 | xfs_mru_cache_uninit(); |
1747 | out_destroy_wq: | ||
1748 | xfs_destroy_workqueues(); | ||
1844 | out_destroy_zones: | 1749 | out_destroy_zones: |
1845 | xfs_destroy_zones(); | 1750 | xfs_destroy_zones(); |
1846 | out: | 1751 | out: |
@@ -1857,6 +1762,7 @@ exit_xfs_fs(void) | |||
1857 | xfs_buf_terminate(); | 1762 | xfs_buf_terminate(); |
1858 | xfs_filestream_uninit(); | 1763 | xfs_filestream_uninit(); |
1859 | xfs_mru_cache_uninit(); | 1764 | xfs_mru_cache_uninit(); |
1765 | xfs_destroy_workqueues(); | ||
1860 | xfs_destroy_zones(); | 1766 | xfs_destroy_zones(); |
1861 | } | 1767 | } |
1862 | 1768 | ||
diff --git a/fs/xfs/linux-2.6/xfs_super.h b/fs/xfs/linux-2.6/xfs_super.h index 1ef4a4d2d997..50a3266c999e 100644 --- a/fs/xfs/linux-2.6/xfs_super.h +++ b/fs/xfs/linux-2.6/xfs_super.h | |||
@@ -62,6 +62,7 @@ extern void xfs_qm_exit(void); | |||
62 | # define XFS_DBG_STRING "no debug" | 62 | # define XFS_DBG_STRING "no debug" |
63 | #endif | 63 | #endif |
64 | 64 | ||
65 | #define XFS_VERSION_STRING "SGI XFS" | ||
65 | #define XFS_BUILD_OPTIONS XFS_ACL_STRING \ | 66 | #define XFS_BUILD_OPTIONS XFS_ACL_STRING \ |
66 | XFS_SECURITY_STRING \ | 67 | XFS_SECURITY_STRING \ |
67 | XFS_REALTIME_STRING \ | 68 | XFS_REALTIME_STRING \ |
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index 81976ffed7d6..8ecad5ff9f9b 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c | |||
@@ -22,6 +22,7 @@ | |||
22 | #include "xfs_log.h" | 22 | #include "xfs_log.h" |
23 | #include "xfs_inum.h" | 23 | #include "xfs_inum.h" |
24 | #include "xfs_trans.h" | 24 | #include "xfs_trans.h" |
25 | #include "xfs_trans_priv.h" | ||
25 | #include "xfs_sb.h" | 26 | #include "xfs_sb.h" |
26 | #include "xfs_ag.h" | 27 | #include "xfs_ag.h" |
27 | #include "xfs_mount.h" | 28 | #include "xfs_mount.h" |
@@ -39,42 +40,61 @@ | |||
39 | #include <linux/kthread.h> | 40 | #include <linux/kthread.h> |
40 | #include <linux/freezer.h> | 41 | #include <linux/freezer.h> |
41 | 42 | ||
43 | struct workqueue_struct *xfs_syncd_wq; /* sync workqueue */ | ||
42 | 44 | ||
43 | STATIC xfs_inode_t * | 45 | /* |
44 | xfs_inode_ag_lookup( | 46 | * The inode lookup is done in batches to keep the amount of lock traffic and |
45 | struct xfs_mount *mp, | 47 | * radix tree lookups to a minimum. The batch size is a trade off between |
46 | struct xfs_perag *pag, | 48 | * lookup reduction and stack usage. This is in the reclaim path, so we can't |
47 | uint32_t *first_index, | 49 | * be too greedy. |
48 | int tag) | 50 | */ |
51 | #define XFS_LOOKUP_BATCH 32 | ||
52 | |||
53 | STATIC int | ||
54 | xfs_inode_ag_walk_grab( | ||
55 | struct xfs_inode *ip) | ||
49 | { | 56 | { |
50 | int nr_found; | 57 | struct inode *inode = VFS_I(ip); |
51 | struct xfs_inode *ip; | 58 | |
59 | ASSERT(rcu_read_lock_held()); | ||
52 | 60 | ||
53 | /* | 61 | /* |
54 | * use a gang lookup to find the next inode in the tree | 62 | * check for stale RCU freed inode |
55 | * as the tree is sparse and a gang lookup walks to find | 63 | * |
56 | * the number of objects requested. | 64 | * If the inode has been reallocated, it doesn't matter if it's not in |
65 | * the AG we are walking - we are walking for writeback, so if it | ||
66 | * passes all the "valid inode" checks and is dirty, then we'll write | ||
67 | * it back anyway. If it has been reallocated and still being | ||
68 | * initialised, the XFS_INEW check below will catch it. | ||
57 | */ | 69 | */ |
58 | if (tag == XFS_ICI_NO_TAG) { | 70 | spin_lock(&ip->i_flags_lock); |
59 | nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, | 71 | if (!ip->i_ino) |
60 | (void **)&ip, *first_index, 1); | 72 | goto out_unlock_noent; |
61 | } else { | 73 | |
62 | nr_found = radix_tree_gang_lookup_tag(&pag->pag_ici_root, | 74 | /* avoid new or reclaimable inodes. Leave for reclaim code to flush */ |
63 | (void **)&ip, *first_index, 1, tag); | 75 | if (__xfs_iflags_test(ip, XFS_INEW | XFS_IRECLAIMABLE | XFS_IRECLAIM)) |
76 | goto out_unlock_noent; | ||
77 | spin_unlock(&ip->i_flags_lock); | ||
78 | |||
79 | /* nothing to sync during shutdown */ | ||
80 | if (XFS_FORCED_SHUTDOWN(ip->i_mount)) | ||
81 | return EFSCORRUPTED; | ||
82 | |||
83 | /* If we can't grab the inode, it must on it's way to reclaim. */ | ||
84 | if (!igrab(inode)) | ||
85 | return ENOENT; | ||
86 | |||
87 | if (is_bad_inode(inode)) { | ||
88 | IRELE(ip); | ||
89 | return ENOENT; | ||
64 | } | 90 | } |
65 | if (!nr_found) | ||
66 | return NULL; | ||
67 | 91 | ||
68 | /* | 92 | /* inode is valid */ |
69 | * Update the index for the next lookup. Catch overflows | 93 | return 0; |
70 | * into the next AG range which can occur if we have inodes | 94 | |
71 | * in the last block of the AG and we are currently | 95 | out_unlock_noent: |
72 | * pointing to the last inode. | 96 | spin_unlock(&ip->i_flags_lock); |
73 | */ | 97 | return ENOENT; |
74 | *first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1); | ||
75 | if (*first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) | ||
76 | return NULL; | ||
77 | return ip; | ||
78 | } | 98 | } |
79 | 99 | ||
80 | STATIC int | 100 | STATIC int |
@@ -83,49 +103,83 @@ xfs_inode_ag_walk( | |||
83 | struct xfs_perag *pag, | 103 | struct xfs_perag *pag, |
84 | int (*execute)(struct xfs_inode *ip, | 104 | int (*execute)(struct xfs_inode *ip, |
85 | struct xfs_perag *pag, int flags), | 105 | struct xfs_perag *pag, int flags), |
86 | int flags, | 106 | int flags) |
87 | int tag, | ||
88 | int exclusive, | ||
89 | int *nr_to_scan) | ||
90 | { | 107 | { |
91 | uint32_t first_index; | 108 | uint32_t first_index; |
92 | int last_error = 0; | 109 | int last_error = 0; |
93 | int skipped; | 110 | int skipped; |
111 | int done; | ||
112 | int nr_found; | ||
94 | 113 | ||
95 | restart: | 114 | restart: |
115 | done = 0; | ||
96 | skipped = 0; | 116 | skipped = 0; |
97 | first_index = 0; | 117 | first_index = 0; |
118 | nr_found = 0; | ||
98 | do { | 119 | do { |
120 | struct xfs_inode *batch[XFS_LOOKUP_BATCH]; | ||
99 | int error = 0; | 121 | int error = 0; |
100 | xfs_inode_t *ip; | 122 | int i; |
101 | 123 | ||
102 | if (exclusive) | 124 | rcu_read_lock(); |
103 | write_lock(&pag->pag_ici_lock); | 125 | nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, |
104 | else | 126 | (void **)batch, first_index, |
105 | read_lock(&pag->pag_ici_lock); | 127 | XFS_LOOKUP_BATCH); |
106 | ip = xfs_inode_ag_lookup(mp, pag, &first_index, tag); | 128 | if (!nr_found) { |
107 | if (!ip) { | 129 | rcu_read_unlock(); |
108 | if (exclusive) | ||
109 | write_unlock(&pag->pag_ici_lock); | ||
110 | else | ||
111 | read_unlock(&pag->pag_ici_lock); | ||
112 | break; | 130 | break; |
113 | } | 131 | } |
114 | 132 | ||
115 | /* execute releases pag->pag_ici_lock */ | 133 | /* |
116 | error = execute(ip, pag, flags); | 134 | * Grab the inodes before we drop the lock. if we found |
117 | if (error == EAGAIN) { | 135 | * nothing, nr == 0 and the loop will be skipped. |
118 | skipped++; | 136 | */ |
119 | continue; | 137 | for (i = 0; i < nr_found; i++) { |
138 | struct xfs_inode *ip = batch[i]; | ||
139 | |||
140 | if (done || xfs_inode_ag_walk_grab(ip)) | ||
141 | batch[i] = NULL; | ||
142 | |||
143 | /* | ||
144 | * Update the index for the next lookup. Catch | ||
145 | * overflows into the next AG range which can occur if | ||
146 | * we have inodes in the last block of the AG and we | ||
147 | * are currently pointing to the last inode. | ||
148 | * | ||
149 | * Because we may see inodes that are from the wrong AG | ||
150 | * due to RCU freeing and reallocation, only update the | ||
151 | * index if it lies in this AG. It was a race that lead | ||
152 | * us to see this inode, so another lookup from the | ||
153 | * same index will not find it again. | ||
154 | */ | ||
155 | if (XFS_INO_TO_AGNO(mp, ip->i_ino) != pag->pag_agno) | ||
156 | continue; | ||
157 | first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1); | ||
158 | if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) | ||
159 | done = 1; | ||
160 | } | ||
161 | |||
162 | /* unlock now we've grabbed the inodes. */ | ||
163 | rcu_read_unlock(); | ||
164 | |||
165 | for (i = 0; i < nr_found; i++) { | ||
166 | if (!batch[i]) | ||
167 | continue; | ||
168 | error = execute(batch[i], pag, flags); | ||
169 | IRELE(batch[i]); | ||
170 | if (error == EAGAIN) { | ||
171 | skipped++; | ||
172 | continue; | ||
173 | } | ||
174 | if (error && last_error != EFSCORRUPTED) | ||
175 | last_error = error; | ||
120 | } | 176 | } |
121 | if (error) | ||
122 | last_error = error; | ||
123 | 177 | ||
124 | /* bail out if the filesystem is corrupted. */ | 178 | /* bail out if the filesystem is corrupted. */ |
125 | if (error == EFSCORRUPTED) | 179 | if (error == EFSCORRUPTED) |
126 | break; | 180 | break; |
127 | 181 | ||
128 | } while ((*nr_to_scan)--); | 182 | } while (nr_found && !done); |
129 | 183 | ||
130 | if (skipped) { | 184 | if (skipped) { |
131 | delay(1); | 185 | delay(1); |
@@ -134,110 +188,32 @@ restart: | |||
134 | return last_error; | 188 | return last_error; |
135 | } | 189 | } |
136 | 190 | ||
137 | /* | ||
138 | * Select the next per-ag structure to iterate during the walk. The reclaim | ||
139 | * walk is optimised only to walk AGs with reclaimable inodes in them. | ||
140 | */ | ||
141 | static struct xfs_perag * | ||
142 | xfs_inode_ag_iter_next_pag( | ||
143 | struct xfs_mount *mp, | ||
144 | xfs_agnumber_t *first, | ||
145 | int tag) | ||
146 | { | ||
147 | struct xfs_perag *pag = NULL; | ||
148 | |||
149 | if (tag == XFS_ICI_RECLAIM_TAG) { | ||
150 | int found; | ||
151 | int ref; | ||
152 | |||
153 | spin_lock(&mp->m_perag_lock); | ||
154 | found = radix_tree_gang_lookup_tag(&mp->m_perag_tree, | ||
155 | (void **)&pag, *first, 1, tag); | ||
156 | if (found <= 0) { | ||
157 | spin_unlock(&mp->m_perag_lock); | ||
158 | return NULL; | ||
159 | } | ||
160 | *first = pag->pag_agno + 1; | ||
161 | /* open coded pag reference increment */ | ||
162 | ref = atomic_inc_return(&pag->pag_ref); | ||
163 | spin_unlock(&mp->m_perag_lock); | ||
164 | trace_xfs_perag_get_reclaim(mp, pag->pag_agno, ref, _RET_IP_); | ||
165 | } else { | ||
166 | pag = xfs_perag_get(mp, *first); | ||
167 | (*first)++; | ||
168 | } | ||
169 | return pag; | ||
170 | } | ||
171 | |||
172 | int | 191 | int |
173 | xfs_inode_ag_iterator( | 192 | xfs_inode_ag_iterator( |
174 | struct xfs_mount *mp, | 193 | struct xfs_mount *mp, |
175 | int (*execute)(struct xfs_inode *ip, | 194 | int (*execute)(struct xfs_inode *ip, |
176 | struct xfs_perag *pag, int flags), | 195 | struct xfs_perag *pag, int flags), |
177 | int flags, | 196 | int flags) |
178 | int tag, | ||
179 | int exclusive, | ||
180 | int *nr_to_scan) | ||
181 | { | 197 | { |
182 | struct xfs_perag *pag; | 198 | struct xfs_perag *pag; |
183 | int error = 0; | 199 | int error = 0; |
184 | int last_error = 0; | 200 | int last_error = 0; |
185 | xfs_agnumber_t ag; | 201 | xfs_agnumber_t ag; |
186 | int nr; | ||
187 | 202 | ||
188 | nr = nr_to_scan ? *nr_to_scan : INT_MAX; | ||
189 | ag = 0; | 203 | ag = 0; |
190 | while ((pag = xfs_inode_ag_iter_next_pag(mp, &ag, tag))) { | 204 | while ((pag = xfs_perag_get(mp, ag))) { |
191 | error = xfs_inode_ag_walk(mp, pag, execute, flags, tag, | 205 | ag = pag->pag_agno + 1; |
192 | exclusive, &nr); | 206 | error = xfs_inode_ag_walk(mp, pag, execute, flags); |
193 | xfs_perag_put(pag); | 207 | xfs_perag_put(pag); |
194 | if (error) { | 208 | if (error) { |
195 | last_error = error; | 209 | last_error = error; |
196 | if (error == EFSCORRUPTED) | 210 | if (error == EFSCORRUPTED) |
197 | break; | 211 | break; |
198 | } | 212 | } |
199 | if (nr <= 0) | ||
200 | break; | ||
201 | } | 213 | } |
202 | if (nr_to_scan) | ||
203 | *nr_to_scan = nr; | ||
204 | return XFS_ERROR(last_error); | 214 | return XFS_ERROR(last_error); |
205 | } | 215 | } |
206 | 216 | ||
207 | /* must be called with pag_ici_lock held and releases it */ | ||
208 | int | ||
209 | xfs_sync_inode_valid( | ||
210 | struct xfs_inode *ip, | ||
211 | struct xfs_perag *pag) | ||
212 | { | ||
213 | struct inode *inode = VFS_I(ip); | ||
214 | int error = EFSCORRUPTED; | ||
215 | |||
216 | /* nothing to sync during shutdown */ | ||
217 | if (XFS_FORCED_SHUTDOWN(ip->i_mount)) | ||
218 | goto out_unlock; | ||
219 | |||
220 | /* avoid new or reclaimable inodes. Leave for reclaim code to flush */ | ||
221 | error = ENOENT; | ||
222 | if (xfs_iflags_test(ip, XFS_INEW | XFS_IRECLAIMABLE | XFS_IRECLAIM)) | ||
223 | goto out_unlock; | ||
224 | |||
225 | /* If we can't grab the inode, it must on it's way to reclaim. */ | ||
226 | if (!igrab(inode)) | ||
227 | goto out_unlock; | ||
228 | |||
229 | if (is_bad_inode(inode)) { | ||
230 | IRELE(ip); | ||
231 | goto out_unlock; | ||
232 | } | ||
233 | |||
234 | /* inode is valid */ | ||
235 | error = 0; | ||
236 | out_unlock: | ||
237 | read_unlock(&pag->pag_ici_lock); | ||
238 | return error; | ||
239 | } | ||
240 | |||
241 | STATIC int | 217 | STATIC int |
242 | xfs_sync_inode_data( | 218 | xfs_sync_inode_data( |
243 | struct xfs_inode *ip, | 219 | struct xfs_inode *ip, |
@@ -248,10 +224,6 @@ xfs_sync_inode_data( | |||
248 | struct address_space *mapping = inode->i_mapping; | 224 | struct address_space *mapping = inode->i_mapping; |
249 | int error = 0; | 225 | int error = 0; |
250 | 226 | ||
251 | error = xfs_sync_inode_valid(ip, pag); | ||
252 | if (error) | ||
253 | return error; | ||
254 | |||
255 | if (!mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) | 227 | if (!mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) |
256 | goto out_wait; | 228 | goto out_wait; |
257 | 229 | ||
@@ -268,7 +240,6 @@ xfs_sync_inode_data( | |||
268 | out_wait: | 240 | out_wait: |
269 | if (flags & SYNC_WAIT) | 241 | if (flags & SYNC_WAIT) |
270 | xfs_ioend_wait(ip); | 242 | xfs_ioend_wait(ip); |
271 | IRELE(ip); | ||
272 | return error; | 243 | return error; |
273 | } | 244 | } |
274 | 245 | ||
@@ -280,10 +251,6 @@ xfs_sync_inode_attr( | |||
280 | { | 251 | { |
281 | int error = 0; | 252 | int error = 0; |
282 | 253 | ||
283 | error = xfs_sync_inode_valid(ip, pag); | ||
284 | if (error) | ||
285 | return error; | ||
286 | |||
287 | xfs_ilock(ip, XFS_ILOCK_SHARED); | 254 | xfs_ilock(ip, XFS_ILOCK_SHARED); |
288 | if (xfs_inode_clean(ip)) | 255 | if (xfs_inode_clean(ip)) |
289 | goto out_unlock; | 256 | goto out_unlock; |
@@ -300,9 +267,18 @@ xfs_sync_inode_attr( | |||
300 | 267 | ||
301 | error = xfs_iflush(ip, flags); | 268 | error = xfs_iflush(ip, flags); |
302 | 269 | ||
270 | /* | ||
271 | * We don't want to try again on non-blocking flushes that can't run | ||
272 | * again immediately. If an inode really must be written, then that's | ||
273 | * what the SYNC_WAIT flag is for. | ||
274 | */ | ||
275 | if (error == EAGAIN) { | ||
276 | ASSERT(!(flags & SYNC_WAIT)); | ||
277 | error = 0; | ||
278 | } | ||
279 | |||
303 | out_unlock: | 280 | out_unlock: |
304 | xfs_iunlock(ip, XFS_ILOCK_SHARED); | 281 | xfs_iunlock(ip, XFS_ILOCK_SHARED); |
305 | IRELE(ip); | ||
306 | return error; | 282 | return error; |
307 | } | 283 | } |
308 | 284 | ||
@@ -318,8 +294,7 @@ xfs_sync_data( | |||
318 | 294 | ||
319 | ASSERT((flags & ~(SYNC_TRYLOCK|SYNC_WAIT)) == 0); | 295 | ASSERT((flags & ~(SYNC_TRYLOCK|SYNC_WAIT)) == 0); |
320 | 296 | ||
321 | error = xfs_inode_ag_iterator(mp, xfs_sync_inode_data, flags, | 297 | error = xfs_inode_ag_iterator(mp, xfs_sync_inode_data, flags); |
322 | XFS_ICI_NO_TAG, 0, NULL); | ||
323 | if (error) | 298 | if (error) |
324 | return XFS_ERROR(error); | 299 | return XFS_ERROR(error); |
325 | 300 | ||
@@ -337,8 +312,7 @@ xfs_sync_attr( | |||
337 | { | 312 | { |
338 | ASSERT((flags & ~SYNC_WAIT) == 0); | 313 | ASSERT((flags & ~SYNC_WAIT) == 0); |
339 | 314 | ||
340 | return xfs_inode_ag_iterator(mp, xfs_sync_inode_attr, flags, | 315 | return xfs_inode_ag_iterator(mp, xfs_sync_inode_attr, flags); |
341 | XFS_ICI_NO_TAG, 0, NULL); | ||
342 | } | 316 | } |
343 | 317 | ||
344 | STATIC int | 318 | STATIC int |
@@ -401,7 +375,7 @@ xfs_quiesce_data( | |||
401 | 375 | ||
402 | /* mark the log as covered if needed */ | 376 | /* mark the log as covered if needed */ |
403 | if (xfs_log_need_covered(mp)) | 377 | if (xfs_log_need_covered(mp)) |
404 | error2 = xfs_fs_log_dummy(mp, SYNC_WAIT); | 378 | error2 = xfs_fs_log_dummy(mp); |
405 | 379 | ||
406 | /* flush data-only devices */ | 380 | /* flush data-only devices */ |
407 | if (mp->m_rtdev_targp) | 381 | if (mp->m_rtdev_targp) |
@@ -440,7 +414,7 @@ xfs_quiesce_fs( | |||
440 | /* | 414 | /* |
441 | * Second stage of a quiesce. The data is already synced, now we have to take | 415 | * Second stage of a quiesce. The data is already synced, now we have to take |
442 | * care of the metadata. New transactions are already blocked, so we need to | 416 | * care of the metadata. New transactions are already blocked, so we need to |
443 | * wait for any remaining transactions to drain out before proceding. | 417 | * wait for any remaining transactions to drain out before proceeding. |
444 | */ | 418 | */ |
445 | void | 419 | void |
446 | xfs_quiesce_attr( | 420 | xfs_quiesce_attr( |
@@ -464,69 +438,18 @@ xfs_quiesce_attr( | |||
464 | /* Push the superblock and write an unmount record */ | 438 | /* Push the superblock and write an unmount record */ |
465 | error = xfs_log_sbcount(mp, 1); | 439 | error = xfs_log_sbcount(mp, 1); |
466 | if (error) | 440 | if (error) |
467 | xfs_fs_cmn_err(CE_WARN, mp, | 441 | xfs_warn(mp, "xfs_attr_quiesce: failed to log sb changes. " |
468 | "xfs_attr_quiesce: failed to log sb changes. " | ||
469 | "Frozen image may not be consistent."); | 442 | "Frozen image may not be consistent."); |
470 | xfs_log_unmount_write(mp); | 443 | xfs_log_unmount_write(mp); |
471 | xfs_unmountfs_writesb(mp); | 444 | xfs_unmountfs_writesb(mp); |
472 | } | 445 | } |
473 | 446 | ||
474 | /* | 447 | static void |
475 | * Enqueue a work item to be picked up by the vfs xfssyncd thread. | 448 | xfs_syncd_queue_sync( |
476 | * Doing this has two advantages: | 449 | struct xfs_mount *mp) |
477 | * - It saves on stack space, which is tight in certain situations | ||
478 | * - It can be used (with care) as a mechanism to avoid deadlocks. | ||
479 | * Flushing while allocating in a full filesystem requires both. | ||
480 | */ | ||
481 | STATIC void | ||
482 | xfs_syncd_queue_work( | ||
483 | struct xfs_mount *mp, | ||
484 | void *data, | ||
485 | void (*syncer)(struct xfs_mount *, void *), | ||
486 | struct completion *completion) | ||
487 | { | ||
488 | struct xfs_sync_work *work; | ||
489 | |||
490 | work = kmem_alloc(sizeof(struct xfs_sync_work), KM_SLEEP); | ||
491 | INIT_LIST_HEAD(&work->w_list); | ||
492 | work->w_syncer = syncer; | ||
493 | work->w_data = data; | ||
494 | work->w_mount = mp; | ||
495 | work->w_completion = completion; | ||
496 | spin_lock(&mp->m_sync_lock); | ||
497 | list_add_tail(&work->w_list, &mp->m_sync_list); | ||
498 | spin_unlock(&mp->m_sync_lock); | ||
499 | wake_up_process(mp->m_sync_task); | ||
500 | } | ||
501 | |||
502 | /* | ||
503 | * Flush delayed allocate data, attempting to free up reserved space | ||
504 | * from existing allocations. At this point a new allocation attempt | ||
505 | * has failed with ENOSPC and we are in the process of scratching our | ||
506 | * heads, looking about for more room... | ||
507 | */ | ||
508 | STATIC void | ||
509 | xfs_flush_inodes_work( | ||
510 | struct xfs_mount *mp, | ||
511 | void *arg) | ||
512 | { | ||
513 | struct inode *inode = arg; | ||
514 | xfs_sync_data(mp, SYNC_TRYLOCK); | ||
515 | xfs_sync_data(mp, SYNC_TRYLOCK | SYNC_WAIT); | ||
516 | iput(inode); | ||
517 | } | ||
518 | |||
519 | void | ||
520 | xfs_flush_inodes( | ||
521 | xfs_inode_t *ip) | ||
522 | { | 450 | { |
523 | struct inode *inode = VFS_I(ip); | 451 | queue_delayed_work(xfs_syncd_wq, &mp->m_sync_work, |
524 | DECLARE_COMPLETION_ONSTACK(completion); | 452 | msecs_to_jiffies(xfs_syncd_centisecs * 10)); |
525 | |||
526 | igrab(inode); | ||
527 | xfs_syncd_queue_work(ip->i_mount, inode, xfs_flush_inodes_work, &completion); | ||
528 | wait_for_completion(&completion); | ||
529 | xfs_log_force(ip->i_mount, XFS_LOG_SYNC); | ||
530 | } | 453 | } |
531 | 454 | ||
532 | /* | 455 | /* |
@@ -536,84 +459,119 @@ xfs_flush_inodes( | |||
536 | */ | 459 | */ |
537 | STATIC void | 460 | STATIC void |
538 | xfs_sync_worker( | 461 | xfs_sync_worker( |
539 | struct xfs_mount *mp, | 462 | struct work_struct *work) |
540 | void *unused) | ||
541 | { | 463 | { |
464 | struct xfs_mount *mp = container_of(to_delayed_work(work), | ||
465 | struct xfs_mount, m_sync_work); | ||
542 | int error; | 466 | int error; |
543 | 467 | ||
544 | if (!(mp->m_flags & XFS_MOUNT_RDONLY)) { | 468 | if (!(mp->m_flags & XFS_MOUNT_RDONLY)) { |
545 | xfs_log_force(mp, 0); | ||
546 | xfs_reclaim_inodes(mp, 0); | ||
547 | /* dgc: errors ignored here */ | 469 | /* dgc: errors ignored here */ |
548 | error = xfs_qm_sync(mp, SYNC_TRYLOCK); | ||
549 | if (mp->m_super->s_frozen == SB_UNFROZEN && | 470 | if (mp->m_super->s_frozen == SB_UNFROZEN && |
550 | xfs_log_need_covered(mp)) | 471 | xfs_log_need_covered(mp)) |
551 | error = xfs_fs_log_dummy(mp, 0); | 472 | error = xfs_fs_log_dummy(mp); |
473 | else | ||
474 | xfs_log_force(mp, 0); | ||
475 | error = xfs_qm_sync(mp, SYNC_TRYLOCK); | ||
476 | |||
477 | /* start pushing all the metadata that is currently dirty */ | ||
478 | xfs_ail_push_all(mp->m_ail); | ||
552 | } | 479 | } |
553 | mp->m_sync_seq++; | 480 | |
554 | wake_up(&mp->m_wait_single_sync_task); | 481 | /* queue us up again */ |
482 | xfs_syncd_queue_sync(mp); | ||
555 | } | 483 | } |
556 | 484 | ||
557 | STATIC int | 485 | /* |
558 | xfssyncd( | 486 | * Queue a new inode reclaim pass if there are reclaimable inodes and there |
559 | void *arg) | 487 | * isn't a reclaim pass already in progress. By default it runs every 5s based |
488 | * on the xfs syncd work default of 30s. Perhaps this should have it's own | ||
489 | * tunable, but that can be done if this method proves to be ineffective or too | ||
490 | * aggressive. | ||
491 | */ | ||
492 | static void | ||
493 | xfs_syncd_queue_reclaim( | ||
494 | struct xfs_mount *mp) | ||
560 | { | 495 | { |
561 | struct xfs_mount *mp = arg; | ||
562 | long timeleft; | ||
563 | xfs_sync_work_t *work, *n; | ||
564 | LIST_HEAD (tmp); | ||
565 | |||
566 | set_freezable(); | ||
567 | timeleft = xfs_syncd_centisecs * msecs_to_jiffies(10); | ||
568 | for (;;) { | ||
569 | if (list_empty(&mp->m_sync_list)) | ||
570 | timeleft = schedule_timeout_interruptible(timeleft); | ||
571 | /* swsusp */ | ||
572 | try_to_freeze(); | ||
573 | if (kthread_should_stop() && list_empty(&mp->m_sync_list)) | ||
574 | break; | ||
575 | 496 | ||
576 | spin_lock(&mp->m_sync_lock); | 497 | /* |
577 | /* | 498 | * We can have inodes enter reclaim after we've shut down the syncd |
578 | * We can get woken by laptop mode, to do a sync - | 499 | * workqueue during unmount, so don't allow reclaim work to be queued |
579 | * that's the (only!) case where the list would be | 500 | * during unmount. |
580 | * empty with time remaining. | 501 | */ |
581 | */ | 502 | if (!(mp->m_super->s_flags & MS_ACTIVE)) |
582 | if (!timeleft || list_empty(&mp->m_sync_list)) { | 503 | return; |
583 | if (!timeleft) | ||
584 | timeleft = xfs_syncd_centisecs * | ||
585 | msecs_to_jiffies(10); | ||
586 | INIT_LIST_HEAD(&mp->m_sync_work.w_list); | ||
587 | list_add_tail(&mp->m_sync_work.w_list, | ||
588 | &mp->m_sync_list); | ||
589 | } | ||
590 | list_splice_init(&mp->m_sync_list, &tmp); | ||
591 | spin_unlock(&mp->m_sync_lock); | ||
592 | 504 | ||
593 | list_for_each_entry_safe(work, n, &tmp, w_list) { | 505 | rcu_read_lock(); |
594 | (*work->w_syncer)(mp, work->w_data); | 506 | if (radix_tree_tagged(&mp->m_perag_tree, XFS_ICI_RECLAIM_TAG)) { |
595 | list_del(&work->w_list); | 507 | queue_delayed_work(xfs_syncd_wq, &mp->m_reclaim_work, |
596 | if (work == &mp->m_sync_work) | 508 | msecs_to_jiffies(xfs_syncd_centisecs / 6 * 10)); |
597 | continue; | ||
598 | if (work->w_completion) | ||
599 | complete(work->w_completion); | ||
600 | kmem_free(work); | ||
601 | } | ||
602 | } | 509 | } |
510 | rcu_read_unlock(); | ||
511 | } | ||
603 | 512 | ||
604 | return 0; | 513 | /* |
514 | * This is a fast pass over the inode cache to try to get reclaim moving on as | ||
515 | * many inodes as possible in a short period of time. It kicks itself every few | ||
516 | * seconds, as well as being kicked by the inode cache shrinker when memory | ||
517 | * goes low. It scans as quickly as possible avoiding locked inodes or those | ||
518 | * already being flushed, and once done schedules a future pass. | ||
519 | */ | ||
520 | STATIC void | ||
521 | xfs_reclaim_worker( | ||
522 | struct work_struct *work) | ||
523 | { | ||
524 | struct xfs_mount *mp = container_of(to_delayed_work(work), | ||
525 | struct xfs_mount, m_reclaim_work); | ||
526 | |||
527 | xfs_reclaim_inodes(mp, SYNC_TRYLOCK); | ||
528 | xfs_syncd_queue_reclaim(mp); | ||
529 | } | ||
530 | |||
531 | /* | ||
532 | * Flush delayed allocate data, attempting to free up reserved space | ||
533 | * from existing allocations. At this point a new allocation attempt | ||
534 | * has failed with ENOSPC and we are in the process of scratching our | ||
535 | * heads, looking about for more room. | ||
536 | * | ||
537 | * Queue a new data flush if there isn't one already in progress and | ||
538 | * wait for completion of the flush. This means that we only ever have one | ||
539 | * inode flush in progress no matter how many ENOSPC events are occurring and | ||
540 | * so will prevent the system from bogging down due to every concurrent | ||
541 | * ENOSPC event scanning all the active inodes in the system for writeback. | ||
542 | */ | ||
543 | void | ||
544 | xfs_flush_inodes( | ||
545 | struct xfs_inode *ip) | ||
546 | { | ||
547 | struct xfs_mount *mp = ip->i_mount; | ||
548 | |||
549 | queue_work(xfs_syncd_wq, &mp->m_flush_work); | ||
550 | flush_work_sync(&mp->m_flush_work); | ||
551 | } | ||
552 | |||
553 | STATIC void | ||
554 | xfs_flush_worker( | ||
555 | struct work_struct *work) | ||
556 | { | ||
557 | struct xfs_mount *mp = container_of(work, | ||
558 | struct xfs_mount, m_flush_work); | ||
559 | |||
560 | xfs_sync_data(mp, SYNC_TRYLOCK); | ||
561 | xfs_sync_data(mp, SYNC_TRYLOCK | SYNC_WAIT); | ||
605 | } | 562 | } |
606 | 563 | ||
607 | int | 564 | int |
608 | xfs_syncd_init( | 565 | xfs_syncd_init( |
609 | struct xfs_mount *mp) | 566 | struct xfs_mount *mp) |
610 | { | 567 | { |
611 | mp->m_sync_work.w_syncer = xfs_sync_worker; | 568 | INIT_WORK(&mp->m_flush_work, xfs_flush_worker); |
612 | mp->m_sync_work.w_mount = mp; | 569 | INIT_DELAYED_WORK(&mp->m_sync_work, xfs_sync_worker); |
613 | mp->m_sync_work.w_completion = NULL; | 570 | INIT_DELAYED_WORK(&mp->m_reclaim_work, xfs_reclaim_worker); |
614 | mp->m_sync_task = kthread_run(xfssyncd, mp, "xfssyncd/%s", mp->m_fsname); | 571 | |
615 | if (IS_ERR(mp->m_sync_task)) | 572 | xfs_syncd_queue_sync(mp); |
616 | return -PTR_ERR(mp->m_sync_task); | 573 | xfs_syncd_queue_reclaim(mp); |
574 | |||
617 | return 0; | 575 | return 0; |
618 | } | 576 | } |
619 | 577 | ||
@@ -621,7 +579,9 @@ void | |||
621 | xfs_syncd_stop( | 579 | xfs_syncd_stop( |
622 | struct xfs_mount *mp) | 580 | struct xfs_mount *mp) |
623 | { | 581 | { |
624 | kthread_stop(mp->m_sync_task); | 582 | cancel_delayed_work_sync(&mp->m_sync_work); |
583 | cancel_delayed_work_sync(&mp->m_reclaim_work); | ||
584 | cancel_work_sync(&mp->m_flush_work); | ||
625 | } | 585 | } |
626 | 586 | ||
627 | void | 587 | void |
@@ -640,6 +600,10 @@ __xfs_inode_set_reclaim_tag( | |||
640 | XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino), | 600 | XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino), |
641 | XFS_ICI_RECLAIM_TAG); | 601 | XFS_ICI_RECLAIM_TAG); |
642 | spin_unlock(&ip->i_mount->m_perag_lock); | 602 | spin_unlock(&ip->i_mount->m_perag_lock); |
603 | |||
604 | /* schedule periodic background inode reclaim */ | ||
605 | xfs_syncd_queue_reclaim(ip->i_mount); | ||
606 | |||
643 | trace_xfs_perag_set_reclaim(ip->i_mount, pag->pag_agno, | 607 | trace_xfs_perag_set_reclaim(ip->i_mount, pag->pag_agno, |
644 | -1, _RET_IP_); | 608 | -1, _RET_IP_); |
645 | } | 609 | } |
@@ -659,12 +623,12 @@ xfs_inode_set_reclaim_tag( | |||
659 | struct xfs_perag *pag; | 623 | struct xfs_perag *pag; |
660 | 624 | ||
661 | pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); | 625 | pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); |
662 | write_lock(&pag->pag_ici_lock); | 626 | spin_lock(&pag->pag_ici_lock); |
663 | spin_lock(&ip->i_flags_lock); | 627 | spin_lock(&ip->i_flags_lock); |
664 | __xfs_inode_set_reclaim_tag(pag, ip); | 628 | __xfs_inode_set_reclaim_tag(pag, ip); |
665 | __xfs_iflags_set(ip, XFS_IRECLAIMABLE); | 629 | __xfs_iflags_set(ip, XFS_IRECLAIMABLE); |
666 | spin_unlock(&ip->i_flags_lock); | 630 | spin_unlock(&ip->i_flags_lock); |
667 | write_unlock(&pag->pag_ici_lock); | 631 | spin_unlock(&pag->pag_ici_lock); |
668 | xfs_perag_put(pag); | 632 | xfs_perag_put(pag); |
669 | } | 633 | } |
670 | 634 | ||
@@ -698,6 +662,53 @@ __xfs_inode_clear_reclaim_tag( | |||
698 | } | 662 | } |
699 | 663 | ||
700 | /* | 664 | /* |
665 | * Grab the inode for reclaim exclusively. | ||
666 | * Return 0 if we grabbed it, non-zero otherwise. | ||
667 | */ | ||
668 | STATIC int | ||
669 | xfs_reclaim_inode_grab( | ||
670 | struct xfs_inode *ip, | ||
671 | int flags) | ||
672 | { | ||
673 | ASSERT(rcu_read_lock_held()); | ||
674 | |||
675 | /* quick check for stale RCU freed inode */ | ||
676 | if (!ip->i_ino) | ||
677 | return 1; | ||
678 | |||
679 | /* | ||
680 | * do some unlocked checks first to avoid unnecessary lock traffic. | ||
681 | * The first is a flush lock check, the second is a already in reclaim | ||
682 | * check. Only do these checks if we are not going to block on locks. | ||
683 | */ | ||
684 | if ((flags & SYNC_TRYLOCK) && | ||
685 | (!ip->i_flush.done || __xfs_iflags_test(ip, XFS_IRECLAIM))) { | ||
686 | return 1; | ||
687 | } | ||
688 | |||
689 | /* | ||
690 | * The radix tree lock here protects a thread in xfs_iget from racing | ||
691 | * with us starting reclaim on the inode. Once we have the | ||
692 | * XFS_IRECLAIM flag set it will not touch us. | ||
693 | * | ||
694 | * Due to RCU lookup, we may find inodes that have been freed and only | ||
695 | * have XFS_IRECLAIM set. Indeed, we may see reallocated inodes that | ||
696 | * aren't candidates for reclaim at all, so we must check the | ||
697 | * XFS_IRECLAIMABLE is set first before proceeding to reclaim. | ||
698 | */ | ||
699 | spin_lock(&ip->i_flags_lock); | ||
700 | if (!__xfs_iflags_test(ip, XFS_IRECLAIMABLE) || | ||
701 | __xfs_iflags_test(ip, XFS_IRECLAIM)) { | ||
702 | /* not a reclaim candidate. */ | ||
703 | spin_unlock(&ip->i_flags_lock); | ||
704 | return 1; | ||
705 | } | ||
706 | __xfs_iflags_set(ip, XFS_IRECLAIM); | ||
707 | spin_unlock(&ip->i_flags_lock); | ||
708 | return 0; | ||
709 | } | ||
710 | |||
711 | /* | ||
701 | * Inodes in different states need to be treated differently, and the return | 712 | * Inodes in different states need to be treated differently, and the return |
702 | * value of xfs_iflush is not sufficient to get this right. The following table | 713 | * value of xfs_iflush is not sufficient to get this right. The following table |
703 | * lists the inode states and the reclaim actions necessary for non-blocking | 714 | * lists the inode states and the reclaim actions necessary for non-blocking |
@@ -753,25 +764,10 @@ xfs_reclaim_inode( | |||
753 | struct xfs_perag *pag, | 764 | struct xfs_perag *pag, |
754 | int sync_mode) | 765 | int sync_mode) |
755 | { | 766 | { |
756 | int error = 0; | 767 | int error; |
757 | |||
758 | /* | ||
759 | * The radix tree lock here protects a thread in xfs_iget from racing | ||
760 | * with us starting reclaim on the inode. Once we have the | ||
761 | * XFS_IRECLAIM flag set it will not touch us. | ||
762 | */ | ||
763 | spin_lock(&ip->i_flags_lock); | ||
764 | ASSERT_ALWAYS(__xfs_iflags_test(ip, XFS_IRECLAIMABLE)); | ||
765 | if (__xfs_iflags_test(ip, XFS_IRECLAIM)) { | ||
766 | /* ignore as it is already under reclaim */ | ||
767 | spin_unlock(&ip->i_flags_lock); | ||
768 | write_unlock(&pag->pag_ici_lock); | ||
769 | return 0; | ||
770 | } | ||
771 | __xfs_iflags_set(ip, XFS_IRECLAIM); | ||
772 | spin_unlock(&ip->i_flags_lock); | ||
773 | write_unlock(&pag->pag_ici_lock); | ||
774 | 768 | ||
769 | restart: | ||
770 | error = 0; | ||
775 | xfs_ilock(ip, XFS_ILOCK_EXCL); | 771 | xfs_ilock(ip, XFS_ILOCK_EXCL); |
776 | if (!xfs_iflock_nowait(ip)) { | 772 | if (!xfs_iflock_nowait(ip)) { |
777 | if (!(sync_mode & SYNC_WAIT)) | 773 | if (!(sync_mode & SYNC_WAIT)) |
@@ -797,9 +793,31 @@ xfs_reclaim_inode( | |||
797 | if (xfs_inode_clean(ip)) | 793 | if (xfs_inode_clean(ip)) |
798 | goto reclaim; | 794 | goto reclaim; |
799 | 795 | ||
800 | /* Now we have an inode that needs flushing */ | 796 | /* |
801 | error = xfs_iflush(ip, sync_mode); | 797 | * Now we have an inode that needs flushing. |
798 | * | ||
799 | * We do a nonblocking flush here even if we are doing a SYNC_WAIT | ||
800 | * reclaim as we can deadlock with inode cluster removal. | ||
801 | * xfs_ifree_cluster() can lock the inode buffer before it locks the | ||
802 | * ip->i_lock, and we are doing the exact opposite here. As a result, | ||
803 | * doing a blocking xfs_itobp() to get the cluster buffer will result | ||
804 | * in an ABBA deadlock with xfs_ifree_cluster(). | ||
805 | * | ||
806 | * As xfs_ifree_cluser() must gather all inodes that are active in the | ||
807 | * cache to mark them stale, if we hit this case we don't actually want | ||
808 | * to do IO here - we want the inode marked stale so we can simply | ||
809 | * reclaim it. Hence if we get an EAGAIN error on a SYNC_WAIT flush, | ||
810 | * just unlock the inode, back off and try again. Hopefully the next | ||
811 | * pass through will see the stale flag set on the inode. | ||
812 | */ | ||
813 | error = xfs_iflush(ip, SYNC_TRYLOCK | sync_mode); | ||
802 | if (sync_mode & SYNC_WAIT) { | 814 | if (sync_mode & SYNC_WAIT) { |
815 | if (error == EAGAIN) { | ||
816 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | ||
817 | /* backoff longer than in xfs_ifree_cluster */ | ||
818 | delay(2); | ||
819 | goto restart; | ||
820 | } | ||
803 | xfs_iflock(ip); | 821 | xfs_iflock(ip); |
804 | goto reclaim; | 822 | goto reclaim; |
805 | } | 823 | } |
@@ -814,7 +832,7 @@ xfs_reclaim_inode( | |||
814 | * pass on the error. | 832 | * pass on the error. |
815 | */ | 833 | */ |
816 | if (error && error != EAGAIN && !XFS_FORCED_SHUTDOWN(ip->i_mount)) { | 834 | if (error && error != EAGAIN && !XFS_FORCED_SHUTDOWN(ip->i_mount)) { |
817 | xfs_fs_cmn_err(CE_WARN, ip->i_mount, | 835 | xfs_warn(ip->i_mount, |
818 | "inode 0x%llx background reclaim flush failed with %d", | 836 | "inode 0x%llx background reclaim flush failed with %d", |
819 | (long long)ip->i_ino, error); | 837 | (long long)ip->i_ino, error); |
820 | } | 838 | } |
@@ -842,12 +860,12 @@ reclaim: | |||
842 | * added to the tree assert that it's been there before to catch | 860 | * added to the tree assert that it's been there before to catch |
843 | * problems with the inode life time early on. | 861 | * problems with the inode life time early on. |
844 | */ | 862 | */ |
845 | write_lock(&pag->pag_ici_lock); | 863 | spin_lock(&pag->pag_ici_lock); |
846 | if (!radix_tree_delete(&pag->pag_ici_root, | 864 | if (!radix_tree_delete(&pag->pag_ici_root, |
847 | XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino))) | 865 | XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino))) |
848 | ASSERT(0); | 866 | ASSERT(0); |
849 | __xfs_inode_clear_reclaim(pag, ip); | 867 | __xfs_inode_clear_reclaim(pag, ip); |
850 | write_unlock(&pag->pag_ici_lock); | 868 | spin_unlock(&pag->pag_ici_lock); |
851 | 869 | ||
852 | /* | 870 | /* |
853 | * Here we do an (almost) spurious inode lock in order to coordinate | 871 | * Here we do an (almost) spurious inode lock in order to coordinate |
@@ -868,45 +886,181 @@ reclaim: | |||
868 | 886 | ||
869 | } | 887 | } |
870 | 888 | ||
889 | /* | ||
890 | * Walk the AGs and reclaim the inodes in them. Even if the filesystem is | ||
891 | * corrupted, we still want to try to reclaim all the inodes. If we don't, | ||
892 | * then a shut down during filesystem unmount reclaim walk leak all the | ||
893 | * unreclaimed inodes. | ||
894 | */ | ||
895 | int | ||
896 | xfs_reclaim_inodes_ag( | ||
897 | struct xfs_mount *mp, | ||
898 | int flags, | ||
899 | int *nr_to_scan) | ||
900 | { | ||
901 | struct xfs_perag *pag; | ||
902 | int error = 0; | ||
903 | int last_error = 0; | ||
904 | xfs_agnumber_t ag; | ||
905 | int trylock = flags & SYNC_TRYLOCK; | ||
906 | int skipped; | ||
907 | |||
908 | restart: | ||
909 | ag = 0; | ||
910 | skipped = 0; | ||
911 | while ((pag = xfs_perag_get_tag(mp, ag, XFS_ICI_RECLAIM_TAG))) { | ||
912 | unsigned long first_index = 0; | ||
913 | int done = 0; | ||
914 | int nr_found = 0; | ||
915 | |||
916 | ag = pag->pag_agno + 1; | ||
917 | |||
918 | if (trylock) { | ||
919 | if (!mutex_trylock(&pag->pag_ici_reclaim_lock)) { | ||
920 | skipped++; | ||
921 | xfs_perag_put(pag); | ||
922 | continue; | ||
923 | } | ||
924 | first_index = pag->pag_ici_reclaim_cursor; | ||
925 | } else | ||
926 | mutex_lock(&pag->pag_ici_reclaim_lock); | ||
927 | |||
928 | do { | ||
929 | struct xfs_inode *batch[XFS_LOOKUP_BATCH]; | ||
930 | int i; | ||
931 | |||
932 | rcu_read_lock(); | ||
933 | nr_found = radix_tree_gang_lookup_tag( | ||
934 | &pag->pag_ici_root, | ||
935 | (void **)batch, first_index, | ||
936 | XFS_LOOKUP_BATCH, | ||
937 | XFS_ICI_RECLAIM_TAG); | ||
938 | if (!nr_found) { | ||
939 | done = 1; | ||
940 | rcu_read_unlock(); | ||
941 | break; | ||
942 | } | ||
943 | |||
944 | /* | ||
945 | * Grab the inodes before we drop the lock. if we found | ||
946 | * nothing, nr == 0 and the loop will be skipped. | ||
947 | */ | ||
948 | for (i = 0; i < nr_found; i++) { | ||
949 | struct xfs_inode *ip = batch[i]; | ||
950 | |||
951 | if (done || xfs_reclaim_inode_grab(ip, flags)) | ||
952 | batch[i] = NULL; | ||
953 | |||
954 | /* | ||
955 | * Update the index for the next lookup. Catch | ||
956 | * overflows into the next AG range which can | ||
957 | * occur if we have inodes in the last block of | ||
958 | * the AG and we are currently pointing to the | ||
959 | * last inode. | ||
960 | * | ||
961 | * Because we may see inodes that are from the | ||
962 | * wrong AG due to RCU freeing and | ||
963 | * reallocation, only update the index if it | ||
964 | * lies in this AG. It was a race that lead us | ||
965 | * to see this inode, so another lookup from | ||
966 | * the same index will not find it again. | ||
967 | */ | ||
968 | if (XFS_INO_TO_AGNO(mp, ip->i_ino) != | ||
969 | pag->pag_agno) | ||
970 | continue; | ||
971 | first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1); | ||
972 | if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) | ||
973 | done = 1; | ||
974 | } | ||
975 | |||
976 | /* unlock now we've grabbed the inodes. */ | ||
977 | rcu_read_unlock(); | ||
978 | |||
979 | for (i = 0; i < nr_found; i++) { | ||
980 | if (!batch[i]) | ||
981 | continue; | ||
982 | error = xfs_reclaim_inode(batch[i], pag, flags); | ||
983 | if (error && last_error != EFSCORRUPTED) | ||
984 | last_error = error; | ||
985 | } | ||
986 | |||
987 | *nr_to_scan -= XFS_LOOKUP_BATCH; | ||
988 | |||
989 | } while (nr_found && !done && *nr_to_scan > 0); | ||
990 | |||
991 | if (trylock && !done) | ||
992 | pag->pag_ici_reclaim_cursor = first_index; | ||
993 | else | ||
994 | pag->pag_ici_reclaim_cursor = 0; | ||
995 | mutex_unlock(&pag->pag_ici_reclaim_lock); | ||
996 | xfs_perag_put(pag); | ||
997 | } | ||
998 | |||
999 | /* | ||
1000 | * if we skipped any AG, and we still have scan count remaining, do | ||
1001 | * another pass this time using blocking reclaim semantics (i.e | ||
1002 | * waiting on the reclaim locks and ignoring the reclaim cursors). This | ||
1003 | * ensure that when we get more reclaimers than AGs we block rather | ||
1004 | * than spin trying to execute reclaim. | ||
1005 | */ | ||
1006 | if (trylock && skipped && *nr_to_scan > 0) { | ||
1007 | trylock = 0; | ||
1008 | goto restart; | ||
1009 | } | ||
1010 | return XFS_ERROR(last_error); | ||
1011 | } | ||
1012 | |||
871 | int | 1013 | int |
872 | xfs_reclaim_inodes( | 1014 | xfs_reclaim_inodes( |
873 | xfs_mount_t *mp, | 1015 | xfs_mount_t *mp, |
874 | int mode) | 1016 | int mode) |
875 | { | 1017 | { |
876 | return xfs_inode_ag_iterator(mp, xfs_reclaim_inode, mode, | 1018 | int nr_to_scan = INT_MAX; |
877 | XFS_ICI_RECLAIM_TAG, 1, NULL); | 1019 | |
1020 | return xfs_reclaim_inodes_ag(mp, mode, &nr_to_scan); | ||
878 | } | 1021 | } |
879 | 1022 | ||
880 | /* | 1023 | /* |
881 | * Shrinker infrastructure. | 1024 | * Inode cache shrinker. |
1025 | * | ||
1026 | * When called we make sure that there is a background (fast) inode reclaim in | ||
1027 | * progress, while we will throttle the speed of reclaim via doiing synchronous | ||
1028 | * reclaim of inodes. That means if we come across dirty inodes, we wait for | ||
1029 | * them to be cleaned, which we hope will not be very long due to the | ||
1030 | * background walker having already kicked the IO off on those dirty inodes. | ||
882 | */ | 1031 | */ |
883 | static int | 1032 | static int |
884 | xfs_reclaim_inode_shrink( | 1033 | xfs_reclaim_inode_shrink( |
885 | struct shrinker *shrink, | 1034 | struct shrinker *shrink, |
886 | int nr_to_scan, | 1035 | struct shrink_control *sc) |
887 | gfp_t gfp_mask) | ||
888 | { | 1036 | { |
889 | struct xfs_mount *mp; | 1037 | struct xfs_mount *mp; |
890 | struct xfs_perag *pag; | 1038 | struct xfs_perag *pag; |
891 | xfs_agnumber_t ag; | 1039 | xfs_agnumber_t ag; |
892 | int reclaimable; | 1040 | int reclaimable; |
1041 | int nr_to_scan = sc->nr_to_scan; | ||
1042 | gfp_t gfp_mask = sc->gfp_mask; | ||
893 | 1043 | ||
894 | mp = container_of(shrink, struct xfs_mount, m_inode_shrink); | 1044 | mp = container_of(shrink, struct xfs_mount, m_inode_shrink); |
895 | if (nr_to_scan) { | 1045 | if (nr_to_scan) { |
1046 | /* kick background reclaimer and push the AIL */ | ||
1047 | xfs_syncd_queue_reclaim(mp); | ||
1048 | xfs_ail_push_all(mp->m_ail); | ||
1049 | |||
896 | if (!(gfp_mask & __GFP_FS)) | 1050 | if (!(gfp_mask & __GFP_FS)) |
897 | return -1; | 1051 | return -1; |
898 | 1052 | ||
899 | xfs_inode_ag_iterator(mp, xfs_reclaim_inode, 0, | 1053 | xfs_reclaim_inodes_ag(mp, SYNC_TRYLOCK | SYNC_WAIT, |
900 | XFS_ICI_RECLAIM_TAG, 1, &nr_to_scan); | 1054 | &nr_to_scan); |
901 | /* if we don't exhaust the scan, don't bother coming back */ | 1055 | /* terminate if we don't exhaust the scan */ |
902 | if (nr_to_scan > 0) | 1056 | if (nr_to_scan > 0) |
903 | return -1; | 1057 | return -1; |
904 | } | 1058 | } |
905 | 1059 | ||
906 | reclaimable = 0; | 1060 | reclaimable = 0; |
907 | ag = 0; | 1061 | ag = 0; |
908 | while ((pag = xfs_inode_ag_iter_next_pag(mp, &ag, | 1062 | while ((pag = xfs_perag_get_tag(mp, ag, XFS_ICI_RECLAIM_TAG))) { |
909 | XFS_ICI_RECLAIM_TAG))) { | 1063 | ag = pag->pag_agno + 1; |
910 | reclaimable += pag->pag_ici_reclaimable; | 1064 | reclaimable += pag->pag_ici_reclaimable; |
911 | xfs_perag_put(pag); | 1065 | xfs_perag_put(pag); |
912 | } | 1066 | } |
diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h index fe78726196f8..e3a6ad27415f 100644 --- a/fs/xfs/linux-2.6/xfs_sync.h +++ b/fs/xfs/linux-2.6/xfs_sync.h | |||
@@ -32,6 +32,8 @@ typedef struct xfs_sync_work { | |||
32 | #define SYNC_WAIT 0x0001 /* wait for i/o to complete */ | 32 | #define SYNC_WAIT 0x0001 /* wait for i/o to complete */ |
33 | #define SYNC_TRYLOCK 0x0002 /* only try to lock inodes */ | 33 | #define SYNC_TRYLOCK 0x0002 /* only try to lock inodes */ |
34 | 34 | ||
35 | extern struct workqueue_struct *xfs_syncd_wq; /* sync workqueue */ | ||
36 | |||
35 | int xfs_syncd_init(struct xfs_mount *mp); | 37 | int xfs_syncd_init(struct xfs_mount *mp); |
36 | void xfs_syncd_stop(struct xfs_mount *mp); | 38 | void xfs_syncd_stop(struct xfs_mount *mp); |
37 | 39 | ||
@@ -47,10 +49,10 @@ void __xfs_inode_set_reclaim_tag(struct xfs_perag *pag, struct xfs_inode *ip); | |||
47 | void __xfs_inode_clear_reclaim_tag(struct xfs_mount *mp, struct xfs_perag *pag, | 49 | void __xfs_inode_clear_reclaim_tag(struct xfs_mount *mp, struct xfs_perag *pag, |
48 | struct xfs_inode *ip); | 50 | struct xfs_inode *ip); |
49 | 51 | ||
50 | int xfs_sync_inode_valid(struct xfs_inode *ip, struct xfs_perag *pag); | 52 | int xfs_sync_inode_grab(struct xfs_inode *ip); |
51 | int xfs_inode_ag_iterator(struct xfs_mount *mp, | 53 | int xfs_inode_ag_iterator(struct xfs_mount *mp, |
52 | int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag, int flags), | 54 | int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag, int flags), |
53 | int flags, int tag, int write_lock, int *nr_to_scan); | 55 | int flags); |
54 | 56 | ||
55 | void xfs_inode_shrinker_register(struct xfs_mount *mp); | 57 | void xfs_inode_shrinker_register(struct xfs_mount *mp); |
56 | void xfs_inode_shrinker_unregister(struct xfs_mount *mp); | 58 | void xfs_inode_shrinker_unregister(struct xfs_mount *mp); |
diff --git a/fs/xfs/linux-2.6/xfs_sysctl.c b/fs/xfs/linux-2.6/xfs_sysctl.c index 7bb5092d6ae4..ee2d2adaa438 100644 --- a/fs/xfs/linux-2.6/xfs_sysctl.c +++ b/fs/xfs/linux-2.6/xfs_sysctl.c | |||
@@ -18,6 +18,7 @@ | |||
18 | #include "xfs.h" | 18 | #include "xfs.h" |
19 | #include <linux/sysctl.h> | 19 | #include <linux/sysctl.h> |
20 | #include <linux/proc_fs.h> | 20 | #include <linux/proc_fs.h> |
21 | #include "xfs_error.h" | ||
21 | 22 | ||
22 | static struct ctl_table_header *xfs_table_header; | 23 | static struct ctl_table_header *xfs_table_header; |
23 | 24 | ||
@@ -36,7 +37,7 @@ xfs_stats_clear_proc_handler( | |||
36 | ret = proc_dointvec_minmax(ctl, write, buffer, lenp, ppos); | 37 | ret = proc_dointvec_minmax(ctl, write, buffer, lenp, ppos); |
37 | 38 | ||
38 | if (!ret && write && *valp) { | 39 | if (!ret && write && *valp) { |
39 | printk("XFS Clearing xfsstats\n"); | 40 | xfs_notice(NULL, "Clearing xfsstats"); |
40 | for_each_possible_cpu(c) { | 41 | for_each_possible_cpu(c) { |
41 | preempt_disable(); | 42 | preempt_disable(); |
42 | /* save vn_active, it's a universal truth! */ | 43 | /* save vn_active, it's a universal truth! */ |
@@ -51,6 +52,26 @@ xfs_stats_clear_proc_handler( | |||
51 | 52 | ||
52 | return ret; | 53 | return ret; |
53 | } | 54 | } |
55 | |||
56 | STATIC int | ||
57 | xfs_panic_mask_proc_handler( | ||
58 | ctl_table *ctl, | ||
59 | int write, | ||
60 | void __user *buffer, | ||
61 | size_t *lenp, | ||
62 | loff_t *ppos) | ||
63 | { | ||
64 | int ret, *valp = ctl->data; | ||
65 | |||
66 | ret = proc_dointvec_minmax(ctl, write, buffer, lenp, ppos); | ||
67 | if (!ret && write) { | ||
68 | xfs_panic_mask = *valp; | ||
69 | #ifdef DEBUG | ||
70 | xfs_panic_mask |= (XFS_PTAG_SHUTDOWN_CORRUPT | XFS_PTAG_LOGRES); | ||
71 | #endif | ||
72 | } | ||
73 | return ret; | ||
74 | } | ||
54 | #endif /* CONFIG_PROC_FS */ | 75 | #endif /* CONFIG_PROC_FS */ |
55 | 76 | ||
56 | static ctl_table xfs_table[] = { | 77 | static ctl_table xfs_table[] = { |
@@ -77,7 +98,7 @@ static ctl_table xfs_table[] = { | |||
77 | .data = &xfs_params.panic_mask.val, | 98 | .data = &xfs_params.panic_mask.val, |
78 | .maxlen = sizeof(int), | 99 | .maxlen = sizeof(int), |
79 | .mode = 0644, | 100 | .mode = 0644, |
80 | .proc_handler = proc_dointvec_minmax, | 101 | .proc_handler = xfs_panic_mask_proc_handler, |
81 | .extra1 = &xfs_params.panic_mask.min, | 102 | .extra1 = &xfs_params.panic_mask.min, |
82 | .extra2 = &xfs_params.panic_mask.max | 103 | .extra2 = &xfs_params.panic_mask.max |
83 | }, | 104 | }, |
diff --git a/fs/xfs/linux-2.6/xfs_trace.h b/fs/xfs/linux-2.6/xfs_trace.h index be5dffd282a1..d48b7a579ae1 100644 --- a/fs/xfs/linux-2.6/xfs_trace.h +++ b/fs/xfs/linux-2.6/xfs_trace.h | |||
@@ -124,7 +124,7 @@ DEFINE_EVENT(xfs_perag_class, name, \ | |||
124 | unsigned long caller_ip), \ | 124 | unsigned long caller_ip), \ |
125 | TP_ARGS(mp, agno, refcount, caller_ip)) | 125 | TP_ARGS(mp, agno, refcount, caller_ip)) |
126 | DEFINE_PERAG_REF_EVENT(xfs_perag_get); | 126 | DEFINE_PERAG_REF_EVENT(xfs_perag_get); |
127 | DEFINE_PERAG_REF_EVENT(xfs_perag_get_reclaim); | 127 | DEFINE_PERAG_REF_EVENT(xfs_perag_get_tag); |
128 | DEFINE_PERAG_REF_EVENT(xfs_perag_put); | 128 | DEFINE_PERAG_REF_EVENT(xfs_perag_put); |
129 | DEFINE_PERAG_REF_EVENT(xfs_perag_set_reclaim); | 129 | DEFINE_PERAG_REF_EVENT(xfs_perag_set_reclaim); |
130 | DEFINE_PERAG_REF_EVENT(xfs_perag_clear_reclaim); | 130 | DEFINE_PERAG_REF_EVENT(xfs_perag_clear_reclaim); |
@@ -325,13 +325,12 @@ DEFINE_BUF_EVENT(xfs_buf_lock); | |||
325 | DEFINE_BUF_EVENT(xfs_buf_lock_done); | 325 | DEFINE_BUF_EVENT(xfs_buf_lock_done); |
326 | DEFINE_BUF_EVENT(xfs_buf_cond_lock); | 326 | DEFINE_BUF_EVENT(xfs_buf_cond_lock); |
327 | DEFINE_BUF_EVENT(xfs_buf_unlock); | 327 | DEFINE_BUF_EVENT(xfs_buf_unlock); |
328 | DEFINE_BUF_EVENT(xfs_buf_ordered_retry); | ||
329 | DEFINE_BUF_EVENT(xfs_buf_iowait); | 328 | DEFINE_BUF_EVENT(xfs_buf_iowait); |
330 | DEFINE_BUF_EVENT(xfs_buf_iowait_done); | 329 | DEFINE_BUF_EVENT(xfs_buf_iowait_done); |
331 | DEFINE_BUF_EVENT(xfs_buf_delwri_queue); | 330 | DEFINE_BUF_EVENT(xfs_buf_delwri_queue); |
332 | DEFINE_BUF_EVENT(xfs_buf_delwri_dequeue); | 331 | DEFINE_BUF_EVENT(xfs_buf_delwri_dequeue); |
333 | DEFINE_BUF_EVENT(xfs_buf_delwri_split); | 332 | DEFINE_BUF_EVENT(xfs_buf_delwri_split); |
334 | DEFINE_BUF_EVENT(xfs_buf_get_noaddr); | 333 | DEFINE_BUF_EVENT(xfs_buf_get_uncached); |
335 | DEFINE_BUF_EVENT(xfs_bdstrat_shut); | 334 | DEFINE_BUF_EVENT(xfs_bdstrat_shut); |
336 | DEFINE_BUF_EVENT(xfs_buf_item_relse); | 335 | DEFINE_BUF_EVENT(xfs_buf_item_relse); |
337 | DEFINE_BUF_EVENT(xfs_buf_item_iodone); | 336 | DEFINE_BUF_EVENT(xfs_buf_item_iodone); |
@@ -767,8 +766,8 @@ DECLARE_EVENT_CLASS(xfs_loggrant_class, | |||
767 | __field(int, curr_res) | 766 | __field(int, curr_res) |
768 | __field(int, unit_res) | 767 | __field(int, unit_res) |
769 | __field(unsigned int, flags) | 768 | __field(unsigned int, flags) |
770 | __field(void *, reserve_headq) | 769 | __field(int, reserveq) |
771 | __field(void *, write_headq) | 770 | __field(int, writeq) |
772 | __field(int, grant_reserve_cycle) | 771 | __field(int, grant_reserve_cycle) |
773 | __field(int, grant_reserve_bytes) | 772 | __field(int, grant_reserve_bytes) |
774 | __field(int, grant_write_cycle) | 773 | __field(int, grant_write_cycle) |
@@ -785,19 +784,21 @@ DECLARE_EVENT_CLASS(xfs_loggrant_class, | |||
785 | __entry->curr_res = tic->t_curr_res; | 784 | __entry->curr_res = tic->t_curr_res; |
786 | __entry->unit_res = tic->t_unit_res; | 785 | __entry->unit_res = tic->t_unit_res; |
787 | __entry->flags = tic->t_flags; | 786 | __entry->flags = tic->t_flags; |
788 | __entry->reserve_headq = log->l_reserve_headq; | 787 | __entry->reserveq = list_empty(&log->l_reserveq); |
789 | __entry->write_headq = log->l_write_headq; | 788 | __entry->writeq = list_empty(&log->l_writeq); |
790 | __entry->grant_reserve_cycle = log->l_grant_reserve_cycle; | 789 | xlog_crack_grant_head(&log->l_grant_reserve_head, |
791 | __entry->grant_reserve_bytes = log->l_grant_reserve_bytes; | 790 | &__entry->grant_reserve_cycle, |
792 | __entry->grant_write_cycle = log->l_grant_write_cycle; | 791 | &__entry->grant_reserve_bytes); |
793 | __entry->grant_write_bytes = log->l_grant_write_bytes; | 792 | xlog_crack_grant_head(&log->l_grant_write_head, |
793 | &__entry->grant_write_cycle, | ||
794 | &__entry->grant_write_bytes); | ||
794 | __entry->curr_cycle = log->l_curr_cycle; | 795 | __entry->curr_cycle = log->l_curr_cycle; |
795 | __entry->curr_block = log->l_curr_block; | 796 | __entry->curr_block = log->l_curr_block; |
796 | __entry->tail_lsn = log->l_tail_lsn; | 797 | __entry->tail_lsn = atomic64_read(&log->l_tail_lsn); |
797 | ), | 798 | ), |
798 | TP_printk("dev %d:%d type %s t_ocnt %u t_cnt %u t_curr_res %u " | 799 | TP_printk("dev %d:%d type %s t_ocnt %u t_cnt %u t_curr_res %u " |
799 | "t_unit_res %u t_flags %s reserve_headq 0x%p " | 800 | "t_unit_res %u t_flags %s reserveq %s " |
800 | "write_headq 0x%p grant_reserve_cycle %d " | 801 | "writeq %s grant_reserve_cycle %d " |
801 | "grant_reserve_bytes %d grant_write_cycle %d " | 802 | "grant_reserve_bytes %d grant_write_cycle %d " |
802 | "grant_write_bytes %d curr_cycle %d curr_block %d " | 803 | "grant_write_bytes %d curr_cycle %d curr_block %d " |
803 | "tail_cycle %d tail_block %d", | 804 | "tail_cycle %d tail_block %d", |
@@ -808,8 +809,8 @@ DECLARE_EVENT_CLASS(xfs_loggrant_class, | |||
808 | __entry->curr_res, | 809 | __entry->curr_res, |
809 | __entry->unit_res, | 810 | __entry->unit_res, |
810 | __print_flags(__entry->flags, "|", XLOG_TIC_FLAGS), | 811 | __print_flags(__entry->flags, "|", XLOG_TIC_FLAGS), |
811 | __entry->reserve_headq, | 812 | __entry->reserveq ? "empty" : "active", |
812 | __entry->write_headq, | 813 | __entry->writeq ? "empty" : "active", |
813 | __entry->grant_reserve_cycle, | 814 | __entry->grant_reserve_cycle, |
814 | __entry->grant_reserve_bytes, | 815 | __entry->grant_reserve_bytes, |
815 | __entry->grant_write_cycle, | 816 | __entry->grant_write_cycle, |
@@ -836,6 +837,7 @@ DEFINE_LOGGRANT_EVENT(xfs_log_grant_sleep1); | |||
836 | DEFINE_LOGGRANT_EVENT(xfs_log_grant_wake1); | 837 | DEFINE_LOGGRANT_EVENT(xfs_log_grant_wake1); |
837 | DEFINE_LOGGRANT_EVENT(xfs_log_grant_sleep2); | 838 | DEFINE_LOGGRANT_EVENT(xfs_log_grant_sleep2); |
838 | DEFINE_LOGGRANT_EVENT(xfs_log_grant_wake2); | 839 | DEFINE_LOGGRANT_EVENT(xfs_log_grant_wake2); |
840 | DEFINE_LOGGRANT_EVENT(xfs_log_grant_wake_up); | ||
839 | DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_enter); | 841 | DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_enter); |
840 | DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_exit); | 842 | DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_exit); |
841 | DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_error); | 843 | DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_error); |
@@ -843,6 +845,7 @@ DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_sleep1); | |||
843 | DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_wake1); | 845 | DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_wake1); |
844 | DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_sleep2); | 846 | DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_sleep2); |
845 | DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_wake2); | 847 | DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_wake2); |
848 | DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_wake_up); | ||
846 | DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_enter); | 849 | DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_enter); |
847 | DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_exit); | 850 | DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_exit); |
848 | DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_sub); | 851 | DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_sub); |
@@ -936,10 +939,10 @@ DEFINE_PAGE_EVENT(xfs_writepage); | |||
936 | DEFINE_PAGE_EVENT(xfs_releasepage); | 939 | DEFINE_PAGE_EVENT(xfs_releasepage); |
937 | DEFINE_PAGE_EVENT(xfs_invalidatepage); | 940 | DEFINE_PAGE_EVENT(xfs_invalidatepage); |
938 | 941 | ||
939 | DECLARE_EVENT_CLASS(xfs_iomap_class, | 942 | DECLARE_EVENT_CLASS(xfs_imap_class, |
940 | TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count, | 943 | TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count, |
941 | int flags, struct xfs_bmbt_irec *irec), | 944 | int type, struct xfs_bmbt_irec *irec), |
942 | TP_ARGS(ip, offset, count, flags, irec), | 945 | TP_ARGS(ip, offset, count, type, irec), |
943 | TP_STRUCT__entry( | 946 | TP_STRUCT__entry( |
944 | __field(dev_t, dev) | 947 | __field(dev_t, dev) |
945 | __field(xfs_ino_t, ino) | 948 | __field(xfs_ino_t, ino) |
@@ -947,7 +950,7 @@ DECLARE_EVENT_CLASS(xfs_iomap_class, | |||
947 | __field(loff_t, new_size) | 950 | __field(loff_t, new_size) |
948 | __field(loff_t, offset) | 951 | __field(loff_t, offset) |
949 | __field(size_t, count) | 952 | __field(size_t, count) |
950 | __field(int, flags) | 953 | __field(int, type) |
951 | __field(xfs_fileoff_t, startoff) | 954 | __field(xfs_fileoff_t, startoff) |
952 | __field(xfs_fsblock_t, startblock) | 955 | __field(xfs_fsblock_t, startblock) |
953 | __field(xfs_filblks_t, blockcount) | 956 | __field(xfs_filblks_t, blockcount) |
@@ -959,13 +962,13 @@ DECLARE_EVENT_CLASS(xfs_iomap_class, | |||
959 | __entry->new_size = ip->i_new_size; | 962 | __entry->new_size = ip->i_new_size; |
960 | __entry->offset = offset; | 963 | __entry->offset = offset; |
961 | __entry->count = count; | 964 | __entry->count = count; |
962 | __entry->flags = flags; | 965 | __entry->type = type; |
963 | __entry->startoff = irec ? irec->br_startoff : 0; | 966 | __entry->startoff = irec ? irec->br_startoff : 0; |
964 | __entry->startblock = irec ? irec->br_startblock : 0; | 967 | __entry->startblock = irec ? irec->br_startblock : 0; |
965 | __entry->blockcount = irec ? irec->br_blockcount : 0; | 968 | __entry->blockcount = irec ? irec->br_blockcount : 0; |
966 | ), | 969 | ), |
967 | TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx " | 970 | TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx " |
968 | "offset 0x%llx count %zd flags %s " | 971 | "offset 0x%llx count %zd type %s " |
969 | "startoff 0x%llx startblock %lld blockcount 0x%llx", | 972 | "startoff 0x%llx startblock %lld blockcount 0x%llx", |
970 | MAJOR(__entry->dev), MINOR(__entry->dev), | 973 | MAJOR(__entry->dev), MINOR(__entry->dev), |
971 | __entry->ino, | 974 | __entry->ino, |
@@ -973,20 +976,21 @@ DECLARE_EVENT_CLASS(xfs_iomap_class, | |||
973 | __entry->new_size, | 976 | __entry->new_size, |
974 | __entry->offset, | 977 | __entry->offset, |
975 | __entry->count, | 978 | __entry->count, |
976 | __print_flags(__entry->flags, "|", BMAPI_FLAGS), | 979 | __print_symbolic(__entry->type, XFS_IO_TYPES), |
977 | __entry->startoff, | 980 | __entry->startoff, |
978 | (__int64_t)__entry->startblock, | 981 | (__int64_t)__entry->startblock, |
979 | __entry->blockcount) | 982 | __entry->blockcount) |
980 | ) | 983 | ) |
981 | 984 | ||
982 | #define DEFINE_IOMAP_EVENT(name) \ | 985 | #define DEFINE_IOMAP_EVENT(name) \ |
983 | DEFINE_EVENT(xfs_iomap_class, name, \ | 986 | DEFINE_EVENT(xfs_imap_class, name, \ |
984 | TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count, \ | 987 | TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count, \ |
985 | int flags, struct xfs_bmbt_irec *irec), \ | 988 | int type, struct xfs_bmbt_irec *irec), \ |
986 | TP_ARGS(ip, offset, count, flags, irec)) | 989 | TP_ARGS(ip, offset, count, type, irec)) |
987 | DEFINE_IOMAP_EVENT(xfs_iomap_enter); | 990 | DEFINE_IOMAP_EVENT(xfs_map_blocks_found); |
988 | DEFINE_IOMAP_EVENT(xfs_iomap_found); | 991 | DEFINE_IOMAP_EVENT(xfs_map_blocks_alloc); |
989 | DEFINE_IOMAP_EVENT(xfs_iomap_alloc); | 992 | DEFINE_IOMAP_EVENT(xfs_get_blocks_found); |
993 | DEFINE_IOMAP_EVENT(xfs_get_blocks_alloc); | ||
990 | 994 | ||
991 | DECLARE_EVENT_CLASS(xfs_simple_io_class, | 995 | DECLARE_EVENT_CLASS(xfs_simple_io_class, |
992 | TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count), | 996 | TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count), |
@@ -1023,6 +1027,7 @@ DEFINE_EVENT(xfs_simple_io_class, name, \ | |||
1023 | TP_ARGS(ip, offset, count)) | 1027 | TP_ARGS(ip, offset, count)) |
1024 | DEFINE_SIMPLE_IO_EVENT(xfs_delalloc_enospc); | 1028 | DEFINE_SIMPLE_IO_EVENT(xfs_delalloc_enospc); |
1025 | DEFINE_SIMPLE_IO_EVENT(xfs_unwritten_convert); | 1029 | DEFINE_SIMPLE_IO_EVENT(xfs_unwritten_convert); |
1030 | DEFINE_SIMPLE_IO_EVENT(xfs_get_blocks_notfound); | ||
1026 | 1031 | ||
1027 | 1032 | ||
1028 | TRACE_EVENT(xfs_itruncate_start, | 1033 | TRACE_EVENT(xfs_itruncate_start, |
@@ -1146,44 +1151,7 @@ TRACE_EVENT(xfs_bunmap, | |||
1146 | 1151 | ||
1147 | ); | 1152 | ); |
1148 | 1153 | ||
1149 | #define XFS_BUSY_SYNC \ | 1154 | DECLARE_EVENT_CLASS(xfs_busy_class, |
1150 | { 0, "async" }, \ | ||
1151 | { 1, "sync" } | ||
1152 | |||
1153 | TRACE_EVENT(xfs_alloc_busy, | ||
1154 | TP_PROTO(struct xfs_trans *trans, xfs_agnumber_t agno, | ||
1155 | xfs_agblock_t agbno, xfs_extlen_t len, int sync), | ||
1156 | TP_ARGS(trans, agno, agbno, len, sync), | ||
1157 | TP_STRUCT__entry( | ||
1158 | __field(dev_t, dev) | ||
1159 | __field(struct xfs_trans *, tp) | ||
1160 | __field(int, tid) | ||
1161 | __field(xfs_agnumber_t, agno) | ||
1162 | __field(xfs_agblock_t, agbno) | ||
1163 | __field(xfs_extlen_t, len) | ||
1164 | __field(int, sync) | ||
1165 | ), | ||
1166 | TP_fast_assign( | ||
1167 | __entry->dev = trans->t_mountp->m_super->s_dev; | ||
1168 | __entry->tp = trans; | ||
1169 | __entry->tid = trans->t_ticket->t_tid; | ||
1170 | __entry->agno = agno; | ||
1171 | __entry->agbno = agbno; | ||
1172 | __entry->len = len; | ||
1173 | __entry->sync = sync; | ||
1174 | ), | ||
1175 | TP_printk("dev %d:%d trans 0x%p tid 0x%x agno %u agbno %u len %u %s", | ||
1176 | MAJOR(__entry->dev), MINOR(__entry->dev), | ||
1177 | __entry->tp, | ||
1178 | __entry->tid, | ||
1179 | __entry->agno, | ||
1180 | __entry->agbno, | ||
1181 | __entry->len, | ||
1182 | __print_symbolic(__entry->sync, XFS_BUSY_SYNC)) | ||
1183 | |||
1184 | ); | ||
1185 | |||
1186 | TRACE_EVENT(xfs_alloc_unbusy, | ||
1187 | TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, | 1155 | TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, |
1188 | xfs_agblock_t agbno, xfs_extlen_t len), | 1156 | xfs_agblock_t agbno, xfs_extlen_t len), |
1189 | TP_ARGS(mp, agno, agbno, len), | 1157 | TP_ARGS(mp, agno, agbno, len), |
@@ -1205,35 +1173,45 @@ TRACE_EVENT(xfs_alloc_unbusy, | |||
1205 | __entry->agbno, | 1173 | __entry->agbno, |
1206 | __entry->len) | 1174 | __entry->len) |
1207 | ); | 1175 | ); |
1208 | 1176 | #define DEFINE_BUSY_EVENT(name) \ | |
1209 | #define XFS_BUSY_STATES \ | 1177 | DEFINE_EVENT(xfs_busy_class, name, \ |
1210 | { 0, "missing" }, \ | 1178 | TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, \ |
1211 | { 1, "found" } | 1179 | xfs_agblock_t agbno, xfs_extlen_t len), \ |
1212 | 1180 | TP_ARGS(mp, agno, agbno, len)) | |
1213 | TRACE_EVENT(xfs_alloc_busysearch, | 1181 | DEFINE_BUSY_EVENT(xfs_alloc_busy); |
1182 | DEFINE_BUSY_EVENT(xfs_alloc_busy_enomem); | ||
1183 | DEFINE_BUSY_EVENT(xfs_alloc_busy_force); | ||
1184 | DEFINE_BUSY_EVENT(xfs_alloc_busy_reuse); | ||
1185 | DEFINE_BUSY_EVENT(xfs_alloc_busy_clear); | ||
1186 | |||
1187 | TRACE_EVENT(xfs_alloc_busy_trim, | ||
1214 | TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, | 1188 | TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, |
1215 | xfs_agblock_t agbno, xfs_extlen_t len, int found), | 1189 | xfs_agblock_t agbno, xfs_extlen_t len, |
1216 | TP_ARGS(mp, agno, agbno, len, found), | 1190 | xfs_agblock_t tbno, xfs_extlen_t tlen), |
1191 | TP_ARGS(mp, agno, agbno, len, tbno, tlen), | ||
1217 | TP_STRUCT__entry( | 1192 | TP_STRUCT__entry( |
1218 | __field(dev_t, dev) | 1193 | __field(dev_t, dev) |
1219 | __field(xfs_agnumber_t, agno) | 1194 | __field(xfs_agnumber_t, agno) |
1220 | __field(xfs_agblock_t, agbno) | 1195 | __field(xfs_agblock_t, agbno) |
1221 | __field(xfs_extlen_t, len) | 1196 | __field(xfs_extlen_t, len) |
1222 | __field(int, found) | 1197 | __field(xfs_agblock_t, tbno) |
1198 | __field(xfs_extlen_t, tlen) | ||
1223 | ), | 1199 | ), |
1224 | TP_fast_assign( | 1200 | TP_fast_assign( |
1225 | __entry->dev = mp->m_super->s_dev; | 1201 | __entry->dev = mp->m_super->s_dev; |
1226 | __entry->agno = agno; | 1202 | __entry->agno = agno; |
1227 | __entry->agbno = agbno; | 1203 | __entry->agbno = agbno; |
1228 | __entry->len = len; | 1204 | __entry->len = len; |
1229 | __entry->found = found; | 1205 | __entry->tbno = tbno; |
1206 | __entry->tlen = tlen; | ||
1230 | ), | 1207 | ), |
1231 | TP_printk("dev %d:%d agno %u agbno %u len %u %s", | 1208 | TP_printk("dev %d:%d agno %u agbno %u len %u tbno %u tlen %u", |
1232 | MAJOR(__entry->dev), MINOR(__entry->dev), | 1209 | MAJOR(__entry->dev), MINOR(__entry->dev), |
1233 | __entry->agno, | 1210 | __entry->agno, |
1234 | __entry->agbno, | 1211 | __entry->agbno, |
1235 | __entry->len, | 1212 | __entry->len, |
1236 | __print_symbolic(__entry->found, XFS_BUSY_STATES)) | 1213 | __entry->tbno, |
1214 | __entry->tlen) | ||
1237 | ); | 1215 | ); |
1238 | 1216 | ||
1239 | TRACE_EVENT(xfs_trans_commit_lsn, | 1217 | TRACE_EVENT(xfs_trans_commit_lsn, |
@@ -1413,7 +1391,7 @@ DECLARE_EVENT_CLASS(xfs_alloc_class, | |||
1413 | __entry->wasfromfl, | 1391 | __entry->wasfromfl, |
1414 | __entry->isfl, | 1392 | __entry->isfl, |
1415 | __entry->userdata, | 1393 | __entry->userdata, |
1416 | __entry->firstblock) | 1394 | (unsigned long long)__entry->firstblock) |
1417 | ) | 1395 | ) |
1418 | 1396 | ||
1419 | #define DEFINE_ALLOC_EVENT(name) \ | 1397 | #define DEFINE_ALLOC_EVENT(name) \ |
@@ -1421,17 +1399,21 @@ DEFINE_EVENT(xfs_alloc_class, name, \ | |||
1421 | TP_PROTO(struct xfs_alloc_arg *args), \ | 1399 | TP_PROTO(struct xfs_alloc_arg *args), \ |
1422 | TP_ARGS(args)) | 1400 | TP_ARGS(args)) |
1423 | DEFINE_ALLOC_EVENT(xfs_alloc_exact_done); | 1401 | DEFINE_ALLOC_EVENT(xfs_alloc_exact_done); |
1402 | DEFINE_ALLOC_EVENT(xfs_alloc_exact_notfound); | ||
1424 | DEFINE_ALLOC_EVENT(xfs_alloc_exact_error); | 1403 | DEFINE_ALLOC_EVENT(xfs_alloc_exact_error); |
1425 | DEFINE_ALLOC_EVENT(xfs_alloc_near_nominleft); | 1404 | DEFINE_ALLOC_EVENT(xfs_alloc_near_nominleft); |
1426 | DEFINE_ALLOC_EVENT(xfs_alloc_near_first); | 1405 | DEFINE_ALLOC_EVENT(xfs_alloc_near_first); |
1427 | DEFINE_ALLOC_EVENT(xfs_alloc_near_greater); | 1406 | DEFINE_ALLOC_EVENT(xfs_alloc_near_greater); |
1428 | DEFINE_ALLOC_EVENT(xfs_alloc_near_lesser); | 1407 | DEFINE_ALLOC_EVENT(xfs_alloc_near_lesser); |
1429 | DEFINE_ALLOC_EVENT(xfs_alloc_near_error); | 1408 | DEFINE_ALLOC_EVENT(xfs_alloc_near_error); |
1409 | DEFINE_ALLOC_EVENT(xfs_alloc_near_noentry); | ||
1410 | DEFINE_ALLOC_EVENT(xfs_alloc_near_busy); | ||
1430 | DEFINE_ALLOC_EVENT(xfs_alloc_size_neither); | 1411 | DEFINE_ALLOC_EVENT(xfs_alloc_size_neither); |
1431 | DEFINE_ALLOC_EVENT(xfs_alloc_size_noentry); | 1412 | DEFINE_ALLOC_EVENT(xfs_alloc_size_noentry); |
1432 | DEFINE_ALLOC_EVENT(xfs_alloc_size_nominleft); | 1413 | DEFINE_ALLOC_EVENT(xfs_alloc_size_nominleft); |
1433 | DEFINE_ALLOC_EVENT(xfs_alloc_size_done); | 1414 | DEFINE_ALLOC_EVENT(xfs_alloc_size_done); |
1434 | DEFINE_ALLOC_EVENT(xfs_alloc_size_error); | 1415 | DEFINE_ALLOC_EVENT(xfs_alloc_size_error); |
1416 | DEFINE_ALLOC_EVENT(xfs_alloc_size_busy); | ||
1435 | DEFINE_ALLOC_EVENT(xfs_alloc_small_freelist); | 1417 | DEFINE_ALLOC_EVENT(xfs_alloc_small_freelist); |
1436 | DEFINE_ALLOC_EVENT(xfs_alloc_small_notenough); | 1418 | DEFINE_ALLOC_EVENT(xfs_alloc_small_notenough); |
1437 | DEFINE_ALLOC_EVENT(xfs_alloc_small_done); | 1419 | DEFINE_ALLOC_EVENT(xfs_alloc_small_done); |
@@ -1753,6 +1735,39 @@ DEFINE_LOG_RECOVER_INO_ITEM(xfs_log_recover_inode_recover); | |||
1753 | DEFINE_LOG_RECOVER_INO_ITEM(xfs_log_recover_inode_cancel); | 1735 | DEFINE_LOG_RECOVER_INO_ITEM(xfs_log_recover_inode_cancel); |
1754 | DEFINE_LOG_RECOVER_INO_ITEM(xfs_log_recover_inode_skip); | 1736 | DEFINE_LOG_RECOVER_INO_ITEM(xfs_log_recover_inode_skip); |
1755 | 1737 | ||
1738 | DECLARE_EVENT_CLASS(xfs_discard_class, | ||
1739 | TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, | ||
1740 | xfs_agblock_t agbno, xfs_extlen_t len), | ||
1741 | TP_ARGS(mp, agno, agbno, len), | ||
1742 | TP_STRUCT__entry( | ||
1743 | __field(dev_t, dev) | ||
1744 | __field(xfs_agnumber_t, agno) | ||
1745 | __field(xfs_agblock_t, agbno) | ||
1746 | __field(xfs_extlen_t, len) | ||
1747 | ), | ||
1748 | TP_fast_assign( | ||
1749 | __entry->dev = mp->m_super->s_dev; | ||
1750 | __entry->agno = agno; | ||
1751 | __entry->agbno = agbno; | ||
1752 | __entry->len = len; | ||
1753 | ), | ||
1754 | TP_printk("dev %d:%d agno %u agbno %u len %u\n", | ||
1755 | MAJOR(__entry->dev), MINOR(__entry->dev), | ||
1756 | __entry->agno, | ||
1757 | __entry->agbno, | ||
1758 | __entry->len) | ||
1759 | ) | ||
1760 | |||
1761 | #define DEFINE_DISCARD_EVENT(name) \ | ||
1762 | DEFINE_EVENT(xfs_discard_class, name, \ | ||
1763 | TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, \ | ||
1764 | xfs_agblock_t agbno, xfs_extlen_t len), \ | ||
1765 | TP_ARGS(mp, agno, agbno, len)) | ||
1766 | DEFINE_DISCARD_EVENT(xfs_discard_extent); | ||
1767 | DEFINE_DISCARD_EVENT(xfs_discard_toosmall); | ||
1768 | DEFINE_DISCARD_EVENT(xfs_discard_exclude); | ||
1769 | DEFINE_DISCARD_EVENT(xfs_discard_busy); | ||
1770 | |||
1756 | #endif /* _TRACE_XFS_H */ | 1771 | #endif /* _TRACE_XFS_H */ |
1757 | 1772 | ||
1758 | #undef TRACE_INCLUDE_PATH | 1773 | #undef TRACE_INCLUDE_PATH |
diff --git a/fs/xfs/linux-2.6/xfs_version.h b/fs/xfs/linux-2.6/xfs_version.h deleted file mode 100644 index f8d279d7563a..000000000000 --- a/fs/xfs/linux-2.6/xfs_version.h +++ /dev/null | |||
@@ -1,29 +0,0 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2001-2002,2005 Silicon Graphics, Inc. | ||
3 | * All Rights Reserved. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or | ||
6 | * modify it under the terms of the GNU General Public License as | ||
7 | * published by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope that it would be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, write the Free Software Foundation, | ||
16 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
17 | */ | ||
18 | #ifndef __XFS_VERSION_H__ | ||
19 | #define __XFS_VERSION_H__ | ||
20 | |||
21 | /* | ||
22 | * Dummy file that can contain a timestamp to put into the | ||
23 | * XFS init string, to help users keep track of what they're | ||
24 | * running | ||
25 | */ | ||
26 | |||
27 | #define XFS_VERSION_STRING "SGI XFS" | ||
28 | |||
29 | #endif /* __XFS_VERSION_H__ */ | ||