diff options
author | Stefan Richter <stefanr@s5r6.in-berlin.de> | 2011-05-10 14:52:07 -0400 |
---|---|---|
committer | Stefan Richter <stefanr@s5r6.in-berlin.de> | 2011-05-10 16:50:41 -0400 |
commit | 020abf03cd659388f94cb328e1e1df0656e0d7ff (patch) | |
tree | 40d05011708ad1b4a05928d167eb120420581aa6 /fs/xfs | |
parent | 0ff8fbc61727c926883eec381fbd3d32d1fab504 (diff) | |
parent | 693d92a1bbc9e42681c42ed190bd42b636ca876f (diff) |
Merge tag 'v2.6.39-rc7'
in order to pull in changes in drivers/media/dvb/firewire/ and
sound/firewire/.
Diffstat (limited to 'fs/xfs')
80 files changed, 4461 insertions, 4282 deletions
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile index 0dce969d6cad..284a7c89697e 100644 --- a/fs/xfs/Makefile +++ b/fs/xfs/Makefile | |||
@@ -16,14 +16,11 @@ | |||
16 | # Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | 16 | # Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA |
17 | # | 17 | # |
18 | 18 | ||
19 | EXTRA_CFLAGS += -I$(src) -I$(src)/linux-2.6 | 19 | ccflags-y := -I$(src) -I$(src)/linux-2.6 |
20 | ccflags-$(CONFIG_XFS_DEBUG) += -g | ||
20 | 21 | ||
21 | XFS_LINUX := linux-2.6 | 22 | XFS_LINUX := linux-2.6 |
22 | 23 | ||
23 | ifeq ($(CONFIG_XFS_DEBUG),y) | ||
24 | EXTRA_CFLAGS += -g | ||
25 | endif | ||
26 | |||
27 | obj-$(CONFIG_XFS_FS) += xfs.o | 24 | obj-$(CONFIG_XFS_FS) += xfs.o |
28 | 25 | ||
29 | xfs-y += linux-2.6/xfs_trace.o | 26 | xfs-y += linux-2.6/xfs_trace.o |
@@ -98,17 +95,17 @@ xfs-y += $(addprefix $(XFS_LINUX)/, \ | |||
98 | kmem.o \ | 95 | kmem.o \ |
99 | xfs_aops.o \ | 96 | xfs_aops.o \ |
100 | xfs_buf.o \ | 97 | xfs_buf.o \ |
98 | xfs_discard.o \ | ||
101 | xfs_export.o \ | 99 | xfs_export.o \ |
102 | xfs_file.o \ | 100 | xfs_file.o \ |
103 | xfs_fs_subr.o \ | 101 | xfs_fs_subr.o \ |
104 | xfs_globals.o \ | 102 | xfs_globals.o \ |
105 | xfs_ioctl.o \ | 103 | xfs_ioctl.o \ |
106 | xfs_iops.o \ | 104 | xfs_iops.o \ |
105 | xfs_message.o \ | ||
107 | xfs_super.o \ | 106 | xfs_super.o \ |
108 | xfs_sync.o \ | 107 | xfs_sync.o \ |
109 | xfs_xattr.o) | 108 | xfs_xattr.o) |
110 | 109 | ||
111 | # Objects in support/ | 110 | # Objects in support/ |
112 | xfs-y += $(addprefix support/, \ | 111 | xfs-y += support/uuid.o |
113 | debug.o \ | ||
114 | uuid.o) | ||
diff --git a/fs/xfs/linux-2.6/kmem.c b/fs/xfs/linux-2.6/kmem.c index 666c9db48eb6..a907de565db3 100644 --- a/fs/xfs/linux-2.6/kmem.c +++ b/fs/xfs/linux-2.6/kmem.c | |||
@@ -23,6 +23,7 @@ | |||
23 | #include <linux/backing-dev.h> | 23 | #include <linux/backing-dev.h> |
24 | #include "time.h" | 24 | #include "time.h" |
25 | #include "kmem.h" | 25 | #include "kmem.h" |
26 | #include "xfs_message.h" | ||
26 | 27 | ||
27 | /* | 28 | /* |
28 | * Greedy allocation. May fail and may return vmalloced memory. | 29 | * Greedy allocation. May fail and may return vmalloced memory. |
@@ -56,8 +57,8 @@ kmem_alloc(size_t size, unsigned int __nocast flags) | |||
56 | if (ptr || (flags & (KM_MAYFAIL|KM_NOSLEEP))) | 57 | if (ptr || (flags & (KM_MAYFAIL|KM_NOSLEEP))) |
57 | return ptr; | 58 | return ptr; |
58 | if (!(++retries % 100)) | 59 | if (!(++retries % 100)) |
59 | printk(KERN_ERR "XFS: possible memory allocation " | 60 | xfs_err(NULL, |
60 | "deadlock in %s (mode:0x%x)\n", | 61 | "possible memory allocation deadlock in %s (mode:0x%x)", |
61 | __func__, lflags); | 62 | __func__, lflags); |
62 | congestion_wait(BLK_RW_ASYNC, HZ/50); | 63 | congestion_wait(BLK_RW_ASYNC, HZ/50); |
63 | } while (1); | 64 | } while (1); |
@@ -112,8 +113,8 @@ kmem_zone_alloc(kmem_zone_t *zone, unsigned int __nocast flags) | |||
112 | if (ptr || (flags & (KM_MAYFAIL|KM_NOSLEEP))) | 113 | if (ptr || (flags & (KM_MAYFAIL|KM_NOSLEEP))) |
113 | return ptr; | 114 | return ptr; |
114 | if (!(++retries % 100)) | 115 | if (!(++retries % 100)) |
115 | printk(KERN_ERR "XFS: possible memory allocation " | 116 | xfs_err(NULL, |
116 | "deadlock in %s (mode:0x%x)\n", | 117 | "possible memory allocation deadlock in %s (mode:0x%x)", |
117 | __func__, lflags); | 118 | __func__, lflags); |
118 | congestion_wait(BLK_RW_ASYNC, HZ/50); | 119 | congestion_wait(BLK_RW_ASYNC, HZ/50); |
119 | } while (1); | 120 | } while (1); |
diff --git a/fs/xfs/linux-2.6/sv.h b/fs/xfs/linux-2.6/sv.h deleted file mode 100644 index 4dfc7c370819..000000000000 --- a/fs/xfs/linux-2.6/sv.h +++ /dev/null | |||
@@ -1,59 +0,0 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc. | ||
3 | * All Rights Reserved. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or | ||
6 | * modify it under the terms of the GNU General Public License as | ||
7 | * published by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope that it would be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, write the Free Software Foundation, | ||
16 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
17 | */ | ||
18 | #ifndef __XFS_SUPPORT_SV_H__ | ||
19 | #define __XFS_SUPPORT_SV_H__ | ||
20 | |||
21 | #include <linux/wait.h> | ||
22 | #include <linux/sched.h> | ||
23 | #include <linux/spinlock.h> | ||
24 | |||
25 | /* | ||
26 | * Synchronisation variables. | ||
27 | * | ||
28 | * (Parameters "pri", "svf" and "rts" are not implemented) | ||
29 | */ | ||
30 | |||
31 | typedef struct sv_s { | ||
32 | wait_queue_head_t waiters; | ||
33 | } sv_t; | ||
34 | |||
35 | static inline void _sv_wait(sv_t *sv, spinlock_t *lock) | ||
36 | { | ||
37 | DECLARE_WAITQUEUE(wait, current); | ||
38 | |||
39 | add_wait_queue_exclusive(&sv->waiters, &wait); | ||
40 | __set_current_state(TASK_UNINTERRUPTIBLE); | ||
41 | spin_unlock(lock); | ||
42 | |||
43 | schedule(); | ||
44 | |||
45 | remove_wait_queue(&sv->waiters, &wait); | ||
46 | } | ||
47 | |||
48 | #define sv_init(sv,flag,name) \ | ||
49 | init_waitqueue_head(&(sv)->waiters) | ||
50 | #define sv_destroy(sv) \ | ||
51 | /*NOTHING*/ | ||
52 | #define sv_wait(sv, pri, lock, s) \ | ||
53 | _sv_wait(sv, lock) | ||
54 | #define sv_signal(sv) \ | ||
55 | wake_up(&(sv)->waiters) | ||
56 | #define sv_broadcast(sv) \ | ||
57 | wake_up_all(&(sv)->waiters) | ||
58 | |||
59 | #endif /* __XFS_SUPPORT_SV_H__ */ | ||
diff --git a/fs/xfs/linux-2.6/xfs_acl.c b/fs/xfs/linux-2.6/xfs_acl.c index b2771862fd3d..39f4f809bb68 100644 --- a/fs/xfs/linux-2.6/xfs_acl.c +++ b/fs/xfs/linux-2.6/xfs_acl.c | |||
@@ -219,12 +219,13 @@ xfs_set_acl(struct inode *inode, int type, struct posix_acl *acl) | |||
219 | } | 219 | } |
220 | 220 | ||
221 | int | 221 | int |
222 | xfs_check_acl(struct inode *inode, int mask) | 222 | xfs_check_acl(struct inode *inode, int mask, unsigned int flags) |
223 | { | 223 | { |
224 | struct xfs_inode *ip = XFS_I(inode); | 224 | struct xfs_inode *ip; |
225 | struct posix_acl *acl; | 225 | struct posix_acl *acl; |
226 | int error = -EAGAIN; | 226 | int error = -EAGAIN; |
227 | 227 | ||
228 | ip = XFS_I(inode); | ||
228 | trace_xfs_check_acl(ip); | 229 | trace_xfs_check_acl(ip); |
229 | 230 | ||
230 | /* | 231 | /* |
@@ -234,6 +235,12 @@ xfs_check_acl(struct inode *inode, int mask) | |||
234 | if (!XFS_IFORK_Q(ip)) | 235 | if (!XFS_IFORK_Q(ip)) |
235 | return -EAGAIN; | 236 | return -EAGAIN; |
236 | 237 | ||
238 | if (flags & IPERM_FLAG_RCU) { | ||
239 | if (!negative_cached_acl(inode, ACL_TYPE_ACCESS)) | ||
240 | return -ECHILD; | ||
241 | return -EAGAIN; | ||
242 | } | ||
243 | |||
237 | acl = xfs_get_acl(inode, ACL_TYPE_ACCESS); | 244 | acl = xfs_get_acl(inode, ACL_TYPE_ACCESS); |
238 | if (IS_ERR(acl)) | 245 | if (IS_ERR(acl)) |
239 | return PTR_ERR(acl); | 246 | return PTR_ERR(acl); |
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c index 691f61223ed6..79ce38be15a1 100644 --- a/fs/xfs/linux-2.6/xfs_aops.c +++ b/fs/xfs/linux-2.6/xfs_aops.c | |||
@@ -38,15 +38,6 @@ | |||
38 | #include <linux/pagevec.h> | 38 | #include <linux/pagevec.h> |
39 | #include <linux/writeback.h> | 39 | #include <linux/writeback.h> |
40 | 40 | ||
41 | /* | ||
42 | * Types of I/O for bmap clustering and I/O completion tracking. | ||
43 | */ | ||
44 | enum { | ||
45 | IO_READ, /* mapping for a read */ | ||
46 | IO_DELAY, /* mapping covers delalloc region */ | ||
47 | IO_UNWRITTEN, /* mapping covers allocated but uninitialized data */ | ||
48 | IO_NEW /* just allocated */ | ||
49 | }; | ||
50 | 41 | ||
51 | /* | 42 | /* |
52 | * Prime number of hash buckets since address is used as the key. | 43 | * Prime number of hash buckets since address is used as the key. |
@@ -182,9 +173,6 @@ xfs_setfilesize( | |||
182 | xfs_inode_t *ip = XFS_I(ioend->io_inode); | 173 | xfs_inode_t *ip = XFS_I(ioend->io_inode); |
183 | xfs_fsize_t isize; | 174 | xfs_fsize_t isize; |
184 | 175 | ||
185 | ASSERT((ip->i_d.di_mode & S_IFMT) == S_IFREG); | ||
186 | ASSERT(ioend->io_type != IO_READ); | ||
187 | |||
188 | if (unlikely(ioend->io_error)) | 176 | if (unlikely(ioend->io_error)) |
189 | return 0; | 177 | return 0; |
190 | 178 | ||
@@ -244,10 +232,8 @@ xfs_end_io( | |||
244 | * We might have to update the on-disk file size after extending | 232 | * We might have to update the on-disk file size after extending |
245 | * writes. | 233 | * writes. |
246 | */ | 234 | */ |
247 | if (ioend->io_type != IO_READ) { | 235 | error = xfs_setfilesize(ioend); |
248 | error = xfs_setfilesize(ioend); | 236 | ASSERT(!error || error == EAGAIN); |
249 | ASSERT(!error || error == EAGAIN); | ||
250 | } | ||
251 | 237 | ||
252 | /* | 238 | /* |
253 | * If we didn't complete processing of the ioend, requeue it to the | 239 | * If we didn't complete processing of the ioend, requeue it to the |
@@ -318,14 +304,63 @@ STATIC int | |||
318 | xfs_map_blocks( | 304 | xfs_map_blocks( |
319 | struct inode *inode, | 305 | struct inode *inode, |
320 | loff_t offset, | 306 | loff_t offset, |
321 | ssize_t count, | ||
322 | struct xfs_bmbt_irec *imap, | 307 | struct xfs_bmbt_irec *imap, |
323 | int flags) | 308 | int type, |
309 | int nonblocking) | ||
324 | { | 310 | { |
325 | int nmaps = 1; | 311 | struct xfs_inode *ip = XFS_I(inode); |
326 | int new = 0; | 312 | struct xfs_mount *mp = ip->i_mount; |
313 | ssize_t count = 1 << inode->i_blkbits; | ||
314 | xfs_fileoff_t offset_fsb, end_fsb; | ||
315 | int error = 0; | ||
316 | int bmapi_flags = XFS_BMAPI_ENTIRE; | ||
317 | int nimaps = 1; | ||
318 | |||
319 | if (XFS_FORCED_SHUTDOWN(mp)) | ||
320 | return -XFS_ERROR(EIO); | ||
321 | |||
322 | if (type == IO_UNWRITTEN) | ||
323 | bmapi_flags |= XFS_BMAPI_IGSTATE; | ||
324 | |||
325 | if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) { | ||
326 | if (nonblocking) | ||
327 | return -XFS_ERROR(EAGAIN); | ||
328 | xfs_ilock(ip, XFS_ILOCK_SHARED); | ||
329 | } | ||
327 | 330 | ||
328 | return -xfs_iomap(XFS_I(inode), offset, count, flags, imap, &nmaps, &new); | 331 | ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE || |
332 | (ip->i_df.if_flags & XFS_IFEXTENTS)); | ||
333 | ASSERT(offset <= mp->m_maxioffset); | ||
334 | |||
335 | if (offset + count > mp->m_maxioffset) | ||
336 | count = mp->m_maxioffset - offset; | ||
337 | end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count); | ||
338 | offset_fsb = XFS_B_TO_FSBT(mp, offset); | ||
339 | error = xfs_bmapi(NULL, ip, offset_fsb, end_fsb - offset_fsb, | ||
340 | bmapi_flags, NULL, 0, imap, &nimaps, NULL); | ||
341 | xfs_iunlock(ip, XFS_ILOCK_SHARED); | ||
342 | |||
343 | if (error) | ||
344 | return -XFS_ERROR(error); | ||
345 | |||
346 | if (type == IO_DELALLOC && | ||
347 | (!nimaps || isnullstartblock(imap->br_startblock))) { | ||
348 | error = xfs_iomap_write_allocate(ip, offset, count, imap); | ||
349 | if (!error) | ||
350 | trace_xfs_map_blocks_alloc(ip, offset, count, type, imap); | ||
351 | return -XFS_ERROR(error); | ||
352 | } | ||
353 | |||
354 | #ifdef DEBUG | ||
355 | if (type == IO_UNWRITTEN) { | ||
356 | ASSERT(nimaps); | ||
357 | ASSERT(imap->br_startblock != HOLESTARTBLOCK); | ||
358 | ASSERT(imap->br_startblock != DELAYSTARTBLOCK); | ||
359 | } | ||
360 | #endif | ||
361 | if (nimaps) | ||
362 | trace_xfs_map_blocks_found(ip, offset, count, type, imap); | ||
363 | return 0; | ||
329 | } | 364 | } |
330 | 365 | ||
331 | STATIC int | 366 | STATIC int |
@@ -378,28 +413,19 @@ xfs_submit_ioend_bio( | |||
378 | if (xfs_ioend_new_eof(ioend)) | 413 | if (xfs_ioend_new_eof(ioend)) |
379 | xfs_mark_inode_dirty(XFS_I(ioend->io_inode)); | 414 | xfs_mark_inode_dirty(XFS_I(ioend->io_inode)); |
380 | 415 | ||
381 | submit_bio(wbc->sync_mode == WB_SYNC_ALL ? | 416 | submit_bio(wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE, bio); |
382 | WRITE_SYNC_PLUG : WRITE, bio); | ||
383 | ASSERT(!bio_flagged(bio, BIO_EOPNOTSUPP)); | ||
384 | bio_put(bio); | ||
385 | } | 417 | } |
386 | 418 | ||
387 | STATIC struct bio * | 419 | STATIC struct bio * |
388 | xfs_alloc_ioend_bio( | 420 | xfs_alloc_ioend_bio( |
389 | struct buffer_head *bh) | 421 | struct buffer_head *bh) |
390 | { | 422 | { |
391 | struct bio *bio; | ||
392 | int nvecs = bio_get_nr_vecs(bh->b_bdev); | 423 | int nvecs = bio_get_nr_vecs(bh->b_bdev); |
393 | 424 | struct bio *bio = bio_alloc(GFP_NOIO, nvecs); | |
394 | do { | ||
395 | bio = bio_alloc(GFP_NOIO, nvecs); | ||
396 | nvecs >>= 1; | ||
397 | } while (!bio); | ||
398 | 425 | ||
399 | ASSERT(bio->bi_private == NULL); | 426 | ASSERT(bio->bi_private == NULL); |
400 | bio->bi_sector = bh->b_blocknr * (bh->b_size >> 9); | 427 | bio->bi_sector = bh->b_blocknr * (bh->b_size >> 9); |
401 | bio->bi_bdev = bh->b_bdev; | 428 | bio->bi_bdev = bh->b_bdev; |
402 | bio_get(bio); | ||
403 | return bio; | 429 | return bio; |
404 | } | 430 | } |
405 | 431 | ||
@@ -470,9 +496,8 @@ xfs_submit_ioend( | |||
470 | /* Pass 1 - start writeback */ | 496 | /* Pass 1 - start writeback */ |
471 | do { | 497 | do { |
472 | next = ioend->io_list; | 498 | next = ioend->io_list; |
473 | for (bh = ioend->io_buffer_head; bh; bh = bh->b_private) { | 499 | for (bh = ioend->io_buffer_head; bh; bh = bh->b_private) |
474 | xfs_start_buffer_writeback(bh); | 500 | xfs_start_buffer_writeback(bh); |
475 | } | ||
476 | } while ((ioend = next) != NULL); | 501 | } while ((ioend = next) != NULL); |
477 | 502 | ||
478 | /* Pass 2 - submit I/O */ | 503 | /* Pass 2 - submit I/O */ |
@@ -600,117 +625,13 @@ xfs_map_at_offset( | |||
600 | ASSERT(imap->br_startblock != HOLESTARTBLOCK); | 625 | ASSERT(imap->br_startblock != HOLESTARTBLOCK); |
601 | ASSERT(imap->br_startblock != DELAYSTARTBLOCK); | 626 | ASSERT(imap->br_startblock != DELAYSTARTBLOCK); |
602 | 627 | ||
603 | lock_buffer(bh); | ||
604 | xfs_map_buffer(inode, bh, imap, offset); | 628 | xfs_map_buffer(inode, bh, imap, offset); |
605 | bh->b_bdev = xfs_find_bdev_for_inode(inode); | ||
606 | set_buffer_mapped(bh); | 629 | set_buffer_mapped(bh); |
607 | clear_buffer_delay(bh); | 630 | clear_buffer_delay(bh); |
608 | clear_buffer_unwritten(bh); | 631 | clear_buffer_unwritten(bh); |
609 | } | 632 | } |
610 | 633 | ||
611 | /* | 634 | /* |
612 | * Look for a page at index that is suitable for clustering. | ||
613 | */ | ||
614 | STATIC unsigned int | ||
615 | xfs_probe_page( | ||
616 | struct page *page, | ||
617 | unsigned int pg_offset) | ||
618 | { | ||
619 | struct buffer_head *bh, *head; | ||
620 | int ret = 0; | ||
621 | |||
622 | if (PageWriteback(page)) | ||
623 | return 0; | ||
624 | if (!PageDirty(page)) | ||
625 | return 0; | ||
626 | if (!page->mapping) | ||
627 | return 0; | ||
628 | if (!page_has_buffers(page)) | ||
629 | return 0; | ||
630 | |||
631 | bh = head = page_buffers(page); | ||
632 | do { | ||
633 | if (!buffer_uptodate(bh)) | ||
634 | break; | ||
635 | if (!buffer_mapped(bh)) | ||
636 | break; | ||
637 | ret += bh->b_size; | ||
638 | if (ret >= pg_offset) | ||
639 | break; | ||
640 | } while ((bh = bh->b_this_page) != head); | ||
641 | |||
642 | return ret; | ||
643 | } | ||
644 | |||
645 | STATIC size_t | ||
646 | xfs_probe_cluster( | ||
647 | struct inode *inode, | ||
648 | struct page *startpage, | ||
649 | struct buffer_head *bh, | ||
650 | struct buffer_head *head) | ||
651 | { | ||
652 | struct pagevec pvec; | ||
653 | pgoff_t tindex, tlast, tloff; | ||
654 | size_t total = 0; | ||
655 | int done = 0, i; | ||
656 | |||
657 | /* First sum forwards in this page */ | ||
658 | do { | ||
659 | if (!buffer_uptodate(bh) || !buffer_mapped(bh)) | ||
660 | return total; | ||
661 | total += bh->b_size; | ||
662 | } while ((bh = bh->b_this_page) != head); | ||
663 | |||
664 | /* if we reached the end of the page, sum forwards in following pages */ | ||
665 | tlast = i_size_read(inode) >> PAGE_CACHE_SHIFT; | ||
666 | tindex = startpage->index + 1; | ||
667 | |||
668 | /* Prune this back to avoid pathological behavior */ | ||
669 | tloff = min(tlast, startpage->index + 64); | ||
670 | |||
671 | pagevec_init(&pvec, 0); | ||
672 | while (!done && tindex <= tloff) { | ||
673 | unsigned len = min_t(pgoff_t, PAGEVEC_SIZE, tlast - tindex + 1); | ||
674 | |||
675 | if (!pagevec_lookup(&pvec, inode->i_mapping, tindex, len)) | ||
676 | break; | ||
677 | |||
678 | for (i = 0; i < pagevec_count(&pvec); i++) { | ||
679 | struct page *page = pvec.pages[i]; | ||
680 | size_t pg_offset, pg_len = 0; | ||
681 | |||
682 | if (tindex == tlast) { | ||
683 | pg_offset = | ||
684 | i_size_read(inode) & (PAGE_CACHE_SIZE - 1); | ||
685 | if (!pg_offset) { | ||
686 | done = 1; | ||
687 | break; | ||
688 | } | ||
689 | } else | ||
690 | pg_offset = PAGE_CACHE_SIZE; | ||
691 | |||
692 | if (page->index == tindex && trylock_page(page)) { | ||
693 | pg_len = xfs_probe_page(page, pg_offset); | ||
694 | unlock_page(page); | ||
695 | } | ||
696 | |||
697 | if (!pg_len) { | ||
698 | done = 1; | ||
699 | break; | ||
700 | } | ||
701 | |||
702 | total += pg_len; | ||
703 | tindex++; | ||
704 | } | ||
705 | |||
706 | pagevec_release(&pvec); | ||
707 | cond_resched(); | ||
708 | } | ||
709 | |||
710 | return total; | ||
711 | } | ||
712 | |||
713 | /* | ||
714 | * Test if a given page is suitable for writing as part of an unwritten | 635 | * Test if a given page is suitable for writing as part of an unwritten |
715 | * or delayed allocate extent. | 636 | * or delayed allocate extent. |
716 | */ | 637 | */ |
@@ -731,9 +652,9 @@ xfs_is_delayed_page( | |||
731 | if (buffer_unwritten(bh)) | 652 | if (buffer_unwritten(bh)) |
732 | acceptable = (type == IO_UNWRITTEN); | 653 | acceptable = (type == IO_UNWRITTEN); |
733 | else if (buffer_delay(bh)) | 654 | else if (buffer_delay(bh)) |
734 | acceptable = (type == IO_DELAY); | 655 | acceptable = (type == IO_DELALLOC); |
735 | else if (buffer_dirty(bh) && buffer_mapped(bh)) | 656 | else if (buffer_dirty(bh) && buffer_mapped(bh)) |
736 | acceptable = (type == IO_NEW); | 657 | acceptable = (type == IO_OVERWRITE); |
737 | else | 658 | else |
738 | break; | 659 | break; |
739 | } while ((bh = bh->b_this_page) != head); | 660 | } while ((bh = bh->b_this_page) != head); |
@@ -758,8 +679,7 @@ xfs_convert_page( | |||
758 | loff_t tindex, | 679 | loff_t tindex, |
759 | struct xfs_bmbt_irec *imap, | 680 | struct xfs_bmbt_irec *imap, |
760 | xfs_ioend_t **ioendp, | 681 | xfs_ioend_t **ioendp, |
761 | struct writeback_control *wbc, | 682 | struct writeback_control *wbc) |
762 | int all_bh) | ||
763 | { | 683 | { |
764 | struct buffer_head *bh, *head; | 684 | struct buffer_head *bh, *head; |
765 | xfs_off_t end_offset; | 685 | xfs_off_t end_offset; |
@@ -814,37 +734,30 @@ xfs_convert_page( | |||
814 | continue; | 734 | continue; |
815 | } | 735 | } |
816 | 736 | ||
817 | if (buffer_unwritten(bh) || buffer_delay(bh)) { | 737 | if (buffer_unwritten(bh) || buffer_delay(bh) || |
738 | buffer_mapped(bh)) { | ||
818 | if (buffer_unwritten(bh)) | 739 | if (buffer_unwritten(bh)) |
819 | type = IO_UNWRITTEN; | 740 | type = IO_UNWRITTEN; |
741 | else if (buffer_delay(bh)) | ||
742 | type = IO_DELALLOC; | ||
820 | else | 743 | else |
821 | type = IO_DELAY; | 744 | type = IO_OVERWRITE; |
822 | 745 | ||
823 | if (!xfs_imap_valid(inode, imap, offset)) { | 746 | if (!xfs_imap_valid(inode, imap, offset)) { |
824 | done = 1; | 747 | done = 1; |
825 | continue; | 748 | continue; |
826 | } | 749 | } |
827 | 750 | ||
828 | ASSERT(imap->br_startblock != HOLESTARTBLOCK); | 751 | lock_buffer(bh); |
829 | ASSERT(imap->br_startblock != DELAYSTARTBLOCK); | 752 | if (type != IO_OVERWRITE) |
830 | 753 | xfs_map_at_offset(inode, bh, imap, offset); | |
831 | xfs_map_at_offset(inode, bh, imap, offset); | ||
832 | xfs_add_to_ioend(inode, bh, offset, type, | 754 | xfs_add_to_ioend(inode, bh, offset, type, |
833 | ioendp, done); | 755 | ioendp, done); |
834 | 756 | ||
835 | page_dirty--; | 757 | page_dirty--; |
836 | count++; | 758 | count++; |
837 | } else { | 759 | } else { |
838 | type = IO_NEW; | 760 | done = 1; |
839 | if (buffer_mapped(bh) && all_bh) { | ||
840 | lock_buffer(bh); | ||
841 | xfs_add_to_ioend(inode, bh, offset, | ||
842 | type, ioendp, done); | ||
843 | count++; | ||
844 | page_dirty--; | ||
845 | } else { | ||
846 | done = 1; | ||
847 | } | ||
848 | } | 761 | } |
849 | } while (offset += len, (bh = bh->b_this_page) != head); | 762 | } while (offset += len, (bh = bh->b_this_page) != head); |
850 | 763 | ||
@@ -876,7 +789,6 @@ xfs_cluster_write( | |||
876 | struct xfs_bmbt_irec *imap, | 789 | struct xfs_bmbt_irec *imap, |
877 | xfs_ioend_t **ioendp, | 790 | xfs_ioend_t **ioendp, |
878 | struct writeback_control *wbc, | 791 | struct writeback_control *wbc, |
879 | int all_bh, | ||
880 | pgoff_t tlast) | 792 | pgoff_t tlast) |
881 | { | 793 | { |
882 | struct pagevec pvec; | 794 | struct pagevec pvec; |
@@ -891,7 +803,7 @@ xfs_cluster_write( | |||
891 | 803 | ||
892 | for (i = 0; i < pagevec_count(&pvec); i++) { | 804 | for (i = 0; i < pagevec_count(&pvec); i++) { |
893 | done = xfs_convert_page(inode, pvec.pages[i], tindex++, | 805 | done = xfs_convert_page(inode, pvec.pages[i], tindex++, |
894 | imap, ioendp, wbc, all_bh); | 806 | imap, ioendp, wbc); |
895 | if (done) | 807 | if (done) |
896 | break; | 808 | break; |
897 | } | 809 | } |
@@ -935,13 +847,13 @@ xfs_aops_discard_page( | |||
935 | struct buffer_head *bh, *head; | 847 | struct buffer_head *bh, *head; |
936 | loff_t offset = page_offset(page); | 848 | loff_t offset = page_offset(page); |
937 | 849 | ||
938 | if (!xfs_is_delayed_page(page, IO_DELAY)) | 850 | if (!xfs_is_delayed_page(page, IO_DELALLOC)) |
939 | goto out_invalidate; | 851 | goto out_invalidate; |
940 | 852 | ||
941 | if (XFS_FORCED_SHUTDOWN(ip->i_mount)) | 853 | if (XFS_FORCED_SHUTDOWN(ip->i_mount)) |
942 | goto out_invalidate; | 854 | goto out_invalidate; |
943 | 855 | ||
944 | xfs_fs_cmn_err(CE_ALERT, ip->i_mount, | 856 | xfs_alert(ip->i_mount, |
945 | "page discard on page %p, inode 0x%llx, offset %llu.", | 857 | "page discard on page %p, inode 0x%llx, offset %llu.", |
946 | page, ip->i_ino, offset); | 858 | page, ip->i_ino, offset); |
947 | 859 | ||
@@ -959,7 +871,7 @@ xfs_aops_discard_page( | |||
959 | if (error) { | 871 | if (error) { |
960 | /* something screwed, just bail */ | 872 | /* something screwed, just bail */ |
961 | if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) { | 873 | if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) { |
962 | xfs_fs_cmn_err(CE_ALERT, ip->i_mount, | 874 | xfs_alert(ip->i_mount, |
963 | "page discard unable to remove delalloc mapping."); | 875 | "page discard unable to remove delalloc mapping."); |
964 | } | 876 | } |
965 | break; | 877 | break; |
@@ -1002,10 +914,10 @@ xfs_vm_writepage( | |||
1002 | unsigned int type; | 914 | unsigned int type; |
1003 | __uint64_t end_offset; | 915 | __uint64_t end_offset; |
1004 | pgoff_t end_index, last_index; | 916 | pgoff_t end_index, last_index; |
1005 | ssize_t size, len; | 917 | ssize_t len; |
1006 | int flags, err, imap_valid = 0, uptodate = 1; | 918 | int err, imap_valid = 0, uptodate = 1; |
1007 | int count = 0; | 919 | int count = 0; |
1008 | int all_bh = 0; | 920 | int nonblocking = 0; |
1009 | 921 | ||
1010 | trace_xfs_writepage(inode, page, 0); | 922 | trace_xfs_writepage(inode, page, 0); |
1011 | 923 | ||
@@ -1056,10 +968,14 @@ xfs_vm_writepage( | |||
1056 | 968 | ||
1057 | bh = head = page_buffers(page); | 969 | bh = head = page_buffers(page); |
1058 | offset = page_offset(page); | 970 | offset = page_offset(page); |
1059 | flags = BMAPI_READ; | 971 | type = IO_OVERWRITE; |
1060 | type = IO_NEW; | 972 | |
973 | if (wbc->sync_mode == WB_SYNC_NONE && wbc->nonblocking) | ||
974 | nonblocking = 1; | ||
1061 | 975 | ||
1062 | do { | 976 | do { |
977 | int new_ioend = 0; | ||
978 | |||
1063 | if (offset >= end_offset) | 979 | if (offset >= end_offset) |
1064 | break; | 980 | break; |
1065 | if (!buffer_uptodate(bh)) | 981 | if (!buffer_uptodate(bh)) |
@@ -1076,90 +992,54 @@ xfs_vm_writepage( | |||
1076 | continue; | 992 | continue; |
1077 | } | 993 | } |
1078 | 994 | ||
1079 | if (imap_valid) | 995 | if (buffer_unwritten(bh)) { |
1080 | imap_valid = xfs_imap_valid(inode, &imap, offset); | 996 | if (type != IO_UNWRITTEN) { |
1081 | |||
1082 | if (buffer_unwritten(bh) || buffer_delay(bh)) { | ||
1083 | int new_ioend = 0; | ||
1084 | |||
1085 | /* | ||
1086 | * Make sure we don't use a read-only iomap | ||
1087 | */ | ||
1088 | if (flags == BMAPI_READ) | ||
1089 | imap_valid = 0; | ||
1090 | |||
1091 | if (buffer_unwritten(bh)) { | ||
1092 | type = IO_UNWRITTEN; | 997 | type = IO_UNWRITTEN; |
1093 | flags = BMAPI_WRITE | BMAPI_IGNSTATE; | 998 | imap_valid = 0; |
1094 | } else if (buffer_delay(bh)) { | ||
1095 | type = IO_DELAY; | ||
1096 | flags = BMAPI_ALLOCATE; | ||
1097 | |||
1098 | if (wbc->sync_mode == WB_SYNC_NONE) | ||
1099 | flags |= BMAPI_TRYLOCK; | ||
1100 | } | ||
1101 | |||
1102 | if (!imap_valid) { | ||
1103 | /* | ||
1104 | * If we didn't have a valid mapping then we | ||
1105 | * need to ensure that we put the new mapping | ||
1106 | * in a new ioend structure. This needs to be | ||
1107 | * done to ensure that the ioends correctly | ||
1108 | * reflect the block mappings at io completion | ||
1109 | * for unwritten extent conversion. | ||
1110 | */ | ||
1111 | new_ioend = 1; | ||
1112 | err = xfs_map_blocks(inode, offset, len, | ||
1113 | &imap, flags); | ||
1114 | if (err) | ||
1115 | goto error; | ||
1116 | imap_valid = xfs_imap_valid(inode, &imap, | ||
1117 | offset); | ||
1118 | } | 999 | } |
1119 | if (imap_valid) { | 1000 | } else if (buffer_delay(bh)) { |
1120 | xfs_map_at_offset(inode, bh, &imap, offset); | 1001 | if (type != IO_DELALLOC) { |
1121 | xfs_add_to_ioend(inode, bh, offset, type, | 1002 | type = IO_DELALLOC; |
1122 | &ioend, new_ioend); | 1003 | imap_valid = 0; |
1123 | count++; | ||
1124 | } | 1004 | } |
1125 | } else if (buffer_uptodate(bh)) { | 1005 | } else if (buffer_uptodate(bh)) { |
1126 | /* | 1006 | if (type != IO_OVERWRITE) { |
1127 | * we got here because the buffer is already mapped. | 1007 | type = IO_OVERWRITE; |
1128 | * That means it must already have extents allocated | 1008 | imap_valid = 0; |
1129 | * underneath it. Map the extent by reading it. | ||
1130 | */ | ||
1131 | if (!imap_valid || flags != BMAPI_READ) { | ||
1132 | flags = BMAPI_READ; | ||
1133 | size = xfs_probe_cluster(inode, page, bh, head); | ||
1134 | err = xfs_map_blocks(inode, offset, size, | ||
1135 | &imap, flags); | ||
1136 | if (err) | ||
1137 | goto error; | ||
1138 | imap_valid = xfs_imap_valid(inode, &imap, | ||
1139 | offset); | ||
1140 | } | 1009 | } |
1010 | } else { | ||
1011 | if (PageUptodate(page)) { | ||
1012 | ASSERT(buffer_mapped(bh)); | ||
1013 | imap_valid = 0; | ||
1014 | } | ||
1015 | continue; | ||
1016 | } | ||
1141 | 1017 | ||
1018 | if (imap_valid) | ||
1019 | imap_valid = xfs_imap_valid(inode, &imap, offset); | ||
1020 | if (!imap_valid) { | ||
1142 | /* | 1021 | /* |
1143 | * We set the type to IO_NEW in case we are doing a | 1022 | * If we didn't have a valid mapping then we need to |
1144 | * small write at EOF that is extending the file but | 1023 | * put the new mapping into a separate ioend structure. |
1145 | * without needing an allocation. We need to update the | 1024 | * This ensures non-contiguous extents always have |
1146 | * file size on I/O completion in this case so it is | 1025 | * separate ioends, which is particularly important |
1147 | * the same case as having just allocated a new extent | 1026 | * for unwritten extent conversion at I/O completion |
1148 | * that we are writing into for the first time. | 1027 | * time. |
1149 | */ | 1028 | */ |
1150 | type = IO_NEW; | 1029 | new_ioend = 1; |
1151 | if (trylock_buffer(bh)) { | 1030 | err = xfs_map_blocks(inode, offset, &imap, type, |
1152 | if (imap_valid) | 1031 | nonblocking); |
1153 | all_bh = 1; | 1032 | if (err) |
1154 | xfs_add_to_ioend(inode, bh, offset, type, | 1033 | goto error; |
1155 | &ioend, !imap_valid); | 1034 | imap_valid = xfs_imap_valid(inode, &imap, offset); |
1156 | count++; | 1035 | } |
1157 | } else { | 1036 | if (imap_valid) { |
1158 | imap_valid = 0; | 1037 | lock_buffer(bh); |
1159 | } | 1038 | if (type != IO_OVERWRITE) |
1160 | } else if (PageUptodate(page)) { | 1039 | xfs_map_at_offset(inode, bh, &imap, offset); |
1161 | ASSERT(buffer_mapped(bh)); | 1040 | xfs_add_to_ioend(inode, bh, offset, type, &ioend, |
1162 | imap_valid = 0; | 1041 | new_ioend); |
1042 | count++; | ||
1163 | } | 1043 | } |
1164 | 1044 | ||
1165 | if (!iohead) | 1045 | if (!iohead) |
@@ -1188,7 +1068,7 @@ xfs_vm_writepage( | |||
1188 | end_index = last_index; | 1068 | end_index = last_index; |
1189 | 1069 | ||
1190 | xfs_cluster_write(inode, page->index + 1, &imap, &ioend, | 1070 | xfs_cluster_write(inode, page->index + 1, &imap, &ioend, |
1191 | wbc, all_bh, end_index); | 1071 | wbc, end_index); |
1192 | } | 1072 | } |
1193 | 1073 | ||
1194 | if (iohead) | 1074 | if (iohead) |
@@ -1257,13 +1137,19 @@ __xfs_get_blocks( | |||
1257 | int create, | 1137 | int create, |
1258 | int direct) | 1138 | int direct) |
1259 | { | 1139 | { |
1260 | int flags = create ? BMAPI_WRITE : BMAPI_READ; | 1140 | struct xfs_inode *ip = XFS_I(inode); |
1141 | struct xfs_mount *mp = ip->i_mount; | ||
1142 | xfs_fileoff_t offset_fsb, end_fsb; | ||
1143 | int error = 0; | ||
1144 | int lockmode = 0; | ||
1261 | struct xfs_bmbt_irec imap; | 1145 | struct xfs_bmbt_irec imap; |
1146 | int nimaps = 1; | ||
1262 | xfs_off_t offset; | 1147 | xfs_off_t offset; |
1263 | ssize_t size; | 1148 | ssize_t size; |
1264 | int nimap = 1; | ||
1265 | int new = 0; | 1149 | int new = 0; |
1266 | int error; | 1150 | |
1151 | if (XFS_FORCED_SHUTDOWN(mp)) | ||
1152 | return -XFS_ERROR(EIO); | ||
1267 | 1153 | ||
1268 | offset = (xfs_off_t)iblock << inode->i_blkbits; | 1154 | offset = (xfs_off_t)iblock << inode->i_blkbits; |
1269 | ASSERT(bh_result->b_size >= (1 << inode->i_blkbits)); | 1155 | ASSERT(bh_result->b_size >= (1 << inode->i_blkbits)); |
@@ -1272,15 +1158,45 @@ __xfs_get_blocks( | |||
1272 | if (!create && direct && offset >= i_size_read(inode)) | 1158 | if (!create && direct && offset >= i_size_read(inode)) |
1273 | return 0; | 1159 | return 0; |
1274 | 1160 | ||
1275 | if (direct && create) | 1161 | if (create) { |
1276 | flags |= BMAPI_DIRECT; | 1162 | lockmode = XFS_ILOCK_EXCL; |
1163 | xfs_ilock(ip, lockmode); | ||
1164 | } else { | ||
1165 | lockmode = xfs_ilock_map_shared(ip); | ||
1166 | } | ||
1167 | |||
1168 | ASSERT(offset <= mp->m_maxioffset); | ||
1169 | if (offset + size > mp->m_maxioffset) | ||
1170 | size = mp->m_maxioffset - offset; | ||
1171 | end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + size); | ||
1172 | offset_fsb = XFS_B_TO_FSBT(mp, offset); | ||
1277 | 1173 | ||
1278 | error = xfs_iomap(XFS_I(inode), offset, size, flags, &imap, &nimap, | 1174 | error = xfs_bmapi(NULL, ip, offset_fsb, end_fsb - offset_fsb, |
1279 | &new); | 1175 | XFS_BMAPI_ENTIRE, NULL, 0, &imap, &nimaps, NULL); |
1280 | if (error) | 1176 | if (error) |
1281 | return -error; | 1177 | goto out_unlock; |
1282 | if (nimap == 0) | 1178 | |
1283 | return 0; | 1179 | if (create && |
1180 | (!nimaps || | ||
1181 | (imap.br_startblock == HOLESTARTBLOCK || | ||
1182 | imap.br_startblock == DELAYSTARTBLOCK))) { | ||
1183 | if (direct) { | ||
1184 | error = xfs_iomap_write_direct(ip, offset, size, | ||
1185 | &imap, nimaps); | ||
1186 | } else { | ||
1187 | error = xfs_iomap_write_delay(ip, offset, size, &imap); | ||
1188 | } | ||
1189 | if (error) | ||
1190 | goto out_unlock; | ||
1191 | |||
1192 | trace_xfs_get_blocks_alloc(ip, offset, size, 0, &imap); | ||
1193 | } else if (nimaps) { | ||
1194 | trace_xfs_get_blocks_found(ip, offset, size, 0, &imap); | ||
1195 | } else { | ||
1196 | trace_xfs_get_blocks_notfound(ip, offset, size); | ||
1197 | goto out_unlock; | ||
1198 | } | ||
1199 | xfs_iunlock(ip, lockmode); | ||
1284 | 1200 | ||
1285 | if (imap.br_startblock != HOLESTARTBLOCK && | 1201 | if (imap.br_startblock != HOLESTARTBLOCK && |
1286 | imap.br_startblock != DELAYSTARTBLOCK) { | 1202 | imap.br_startblock != DELAYSTARTBLOCK) { |
@@ -1347,6 +1263,10 @@ __xfs_get_blocks( | |||
1347 | } | 1263 | } |
1348 | 1264 | ||
1349 | return 0; | 1265 | return 0; |
1266 | |||
1267 | out_unlock: | ||
1268 | xfs_iunlock(ip, lockmode); | ||
1269 | return -error; | ||
1350 | } | 1270 | } |
1351 | 1271 | ||
1352 | int | 1272 | int |
@@ -1375,7 +1295,7 @@ xfs_get_blocks_direct( | |||
1375 | * If the private argument is non-NULL __xfs_get_blocks signals us that we | 1295 | * If the private argument is non-NULL __xfs_get_blocks signals us that we |
1376 | * need to issue a transaction to convert the range from unwritten to written | 1296 | * need to issue a transaction to convert the range from unwritten to written |
1377 | * extents. In case this is regular synchronous I/O we just call xfs_end_io | 1297 | * extents. In case this is regular synchronous I/O we just call xfs_end_io |
1378 | * to do this and we are done. But in case this was a successfull AIO | 1298 | * to do this and we are done. But in case this was a successful AIO |
1379 | * request this handler is called from interrupt context, from which we | 1299 | * request this handler is called from interrupt context, from which we |
1380 | * can't start transactions. In that case offload the I/O completion to | 1300 | * can't start transactions. In that case offload the I/O completion to |
1381 | * the workqueues we also use for buffered I/O completion. | 1301 | * the workqueues we also use for buffered I/O completion. |
@@ -1434,7 +1354,7 @@ xfs_vm_direct_IO( | |||
1434 | ssize_t ret; | 1354 | ssize_t ret; |
1435 | 1355 | ||
1436 | if (rw & WRITE) { | 1356 | if (rw & WRITE) { |
1437 | iocb->private = xfs_alloc_ioend(inode, IO_NEW); | 1357 | iocb->private = xfs_alloc_ioend(inode, IO_DIRECT); |
1438 | 1358 | ||
1439 | ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov, | 1359 | ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov, |
1440 | offset, nr_segs, | 1360 | offset, nr_segs, |
@@ -1490,7 +1410,7 @@ xfs_vm_write_failed( | |||
1490 | if (error) { | 1410 | if (error) { |
1491 | /* something screwed, just bail */ | 1411 | /* something screwed, just bail */ |
1492 | if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) { | 1412 | if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) { |
1493 | xfs_fs_cmn_err(CE_ALERT, ip->i_mount, | 1413 | xfs_alert(ip->i_mount, |
1494 | "xfs_vm_write_failed: unable to clean up ino %lld", | 1414 | "xfs_vm_write_failed: unable to clean up ino %lld", |
1495 | ip->i_ino); | 1415 | ip->i_ino); |
1496 | } | 1416 | } |
@@ -1574,7 +1494,6 @@ const struct address_space_operations xfs_address_space_operations = { | |||
1574 | .readpages = xfs_vm_readpages, | 1494 | .readpages = xfs_vm_readpages, |
1575 | .writepage = xfs_vm_writepage, | 1495 | .writepage = xfs_vm_writepage, |
1576 | .writepages = xfs_vm_writepages, | 1496 | .writepages = xfs_vm_writepages, |
1577 | .sync_page = block_sync_page, | ||
1578 | .releasepage = xfs_vm_releasepage, | 1497 | .releasepage = xfs_vm_releasepage, |
1579 | .invalidatepage = xfs_vm_invalidatepage, | 1498 | .invalidatepage = xfs_vm_invalidatepage, |
1580 | .write_begin = xfs_vm_write_begin, | 1499 | .write_begin = xfs_vm_write_begin, |
diff --git a/fs/xfs/linux-2.6/xfs_aops.h b/fs/xfs/linux-2.6/xfs_aops.h index c5057fb6237a..71f721e1a71f 100644 --- a/fs/xfs/linux-2.6/xfs_aops.h +++ b/fs/xfs/linux-2.6/xfs_aops.h | |||
@@ -23,6 +23,22 @@ extern struct workqueue_struct *xfsconvertd_workqueue; | |||
23 | extern mempool_t *xfs_ioend_pool; | 23 | extern mempool_t *xfs_ioend_pool; |
24 | 24 | ||
25 | /* | 25 | /* |
26 | * Types of I/O for bmap clustering and I/O completion tracking. | ||
27 | */ | ||
28 | enum { | ||
29 | IO_DIRECT = 0, /* special case for direct I/O ioends */ | ||
30 | IO_DELALLOC, /* mapping covers delalloc region */ | ||
31 | IO_UNWRITTEN, /* mapping covers allocated but uninitialized data */ | ||
32 | IO_OVERWRITE, /* mapping covers already allocated extent */ | ||
33 | }; | ||
34 | |||
35 | #define XFS_IO_TYPES \ | ||
36 | { 0, "" }, \ | ||
37 | { IO_DELALLOC, "delalloc" }, \ | ||
38 | { IO_UNWRITTEN, "unwritten" }, \ | ||
39 | { IO_OVERWRITE, "overwrite" } | ||
40 | |||
41 | /* | ||
26 | * xfs_ioend struct manages large extent writes for XFS. | 42 | * xfs_ioend struct manages large extent writes for XFS. |
27 | * It can manage several multi-page bio's at once. | 43 | * It can manage several multi-page bio's at once. |
28 | */ | 44 | */ |
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index 4c5deb6e9e31..9ef9ed2cfe2e 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c | |||
@@ -44,12 +44,7 @@ | |||
44 | 44 | ||
45 | static kmem_zone_t *xfs_buf_zone; | 45 | static kmem_zone_t *xfs_buf_zone; |
46 | STATIC int xfsbufd(void *); | 46 | STATIC int xfsbufd(void *); |
47 | STATIC int xfsbufd_wakeup(struct shrinker *, int, gfp_t); | ||
48 | STATIC void xfs_buf_delwri_queue(xfs_buf_t *, int); | 47 | STATIC void xfs_buf_delwri_queue(xfs_buf_t *, int); |
49 | static struct shrinker xfs_buf_shake = { | ||
50 | .shrink = xfsbufd_wakeup, | ||
51 | .seeks = DEFAULT_SEEKS, | ||
52 | }; | ||
53 | 48 | ||
54 | static struct workqueue_struct *xfslogd_workqueue; | 49 | static struct workqueue_struct *xfslogd_workqueue; |
55 | struct workqueue_struct *xfsdatad_workqueue; | 50 | struct workqueue_struct *xfsdatad_workqueue; |
@@ -99,77 +94,79 @@ xfs_buf_vmap_len( | |||
99 | } | 94 | } |
100 | 95 | ||
101 | /* | 96 | /* |
102 | * Page Region interfaces. | 97 | * xfs_buf_lru_add - add a buffer to the LRU. |
103 | * | 98 | * |
104 | * For pages in filesystems where the blocksize is smaller than the | 99 | * The LRU takes a new reference to the buffer so that it will only be freed |
105 | * pagesize, we use the page->private field (long) to hold a bitmap | 100 | * once the shrinker takes the buffer off the LRU. |
106 | * of uptodate regions within the page. | ||
107 | * | ||
108 | * Each such region is "bytes per page / bits per long" bytes long. | ||
109 | * | ||
110 | * NBPPR == number-of-bytes-per-page-region | ||
111 | * BTOPR == bytes-to-page-region (rounded up) | ||
112 | * BTOPRT == bytes-to-page-region-truncated (rounded down) | ||
113 | */ | 101 | */ |
114 | #if (BITS_PER_LONG == 32) | 102 | STATIC void |
115 | #define PRSHIFT (PAGE_CACHE_SHIFT - 5) /* (32 == 1<<5) */ | 103 | xfs_buf_lru_add( |
116 | #elif (BITS_PER_LONG == 64) | 104 | struct xfs_buf *bp) |
117 | #define PRSHIFT (PAGE_CACHE_SHIFT - 6) /* (64 == 1<<6) */ | ||
118 | #else | ||
119 | #error BITS_PER_LONG must be 32 or 64 | ||
120 | #endif | ||
121 | #define NBPPR (PAGE_CACHE_SIZE/BITS_PER_LONG) | ||
122 | #define BTOPR(b) (((unsigned int)(b) + (NBPPR - 1)) >> PRSHIFT) | ||
123 | #define BTOPRT(b) (((unsigned int)(b) >> PRSHIFT)) | ||
124 | |||
125 | STATIC unsigned long | ||
126 | page_region_mask( | ||
127 | size_t offset, | ||
128 | size_t length) | ||
129 | { | 105 | { |
130 | unsigned long mask; | 106 | struct xfs_buftarg *btp = bp->b_target; |
131 | int first, final; | ||
132 | |||
133 | first = BTOPR(offset); | ||
134 | final = BTOPRT(offset + length - 1); | ||
135 | first = min(first, final); | ||
136 | |||
137 | mask = ~0UL; | ||
138 | mask <<= BITS_PER_LONG - (final - first); | ||
139 | mask >>= BITS_PER_LONG - (final); | ||
140 | |||
141 | ASSERT(offset + length <= PAGE_CACHE_SIZE); | ||
142 | ASSERT((final - first) < BITS_PER_LONG && (final - first) >= 0); | ||
143 | 107 | ||
144 | return mask; | 108 | spin_lock(&btp->bt_lru_lock); |
109 | if (list_empty(&bp->b_lru)) { | ||
110 | atomic_inc(&bp->b_hold); | ||
111 | list_add_tail(&bp->b_lru, &btp->bt_lru); | ||
112 | btp->bt_lru_nr++; | ||
113 | } | ||
114 | spin_unlock(&btp->bt_lru_lock); | ||
145 | } | 115 | } |
146 | 116 | ||
117 | /* | ||
118 | * xfs_buf_lru_del - remove a buffer from the LRU | ||
119 | * | ||
120 | * The unlocked check is safe here because it only occurs when there are not | ||
121 | * b_lru_ref counts left on the inode under the pag->pag_buf_lock. it is there | ||
122 | * to optimise the shrinker removing the buffer from the LRU and calling | ||
123 | * xfs_buf_free(). i.e. it removes an unnecessary round trip on the | ||
124 | * bt_lru_lock. | ||
125 | */ | ||
147 | STATIC void | 126 | STATIC void |
148 | set_page_region( | 127 | xfs_buf_lru_del( |
149 | struct page *page, | 128 | struct xfs_buf *bp) |
150 | size_t offset, | ||
151 | size_t length) | ||
152 | { | 129 | { |
153 | set_page_private(page, | 130 | struct xfs_buftarg *btp = bp->b_target; |
154 | page_private(page) | page_region_mask(offset, length)); | ||
155 | if (page_private(page) == ~0UL) | ||
156 | SetPageUptodate(page); | ||
157 | } | ||
158 | 131 | ||
159 | STATIC int | 132 | if (list_empty(&bp->b_lru)) |
160 | test_page_region( | 133 | return; |
161 | struct page *page, | ||
162 | size_t offset, | ||
163 | size_t length) | ||
164 | { | ||
165 | unsigned long mask = page_region_mask(offset, length); | ||
166 | 134 | ||
167 | return (mask && (page_private(page) & mask) == mask); | 135 | spin_lock(&btp->bt_lru_lock); |
136 | if (!list_empty(&bp->b_lru)) { | ||
137 | list_del_init(&bp->b_lru); | ||
138 | btp->bt_lru_nr--; | ||
139 | } | ||
140 | spin_unlock(&btp->bt_lru_lock); | ||
168 | } | 141 | } |
169 | 142 | ||
170 | /* | 143 | /* |
171 | * Internal xfs_buf_t object manipulation | 144 | * When we mark a buffer stale, we remove the buffer from the LRU and clear the |
145 | * b_lru_ref count so that the buffer is freed immediately when the buffer | ||
146 | * reference count falls to zero. If the buffer is already on the LRU, we need | ||
147 | * to remove the reference that LRU holds on the buffer. | ||
148 | * | ||
149 | * This prevents build-up of stale buffers on the LRU. | ||
172 | */ | 150 | */ |
151 | void | ||
152 | xfs_buf_stale( | ||
153 | struct xfs_buf *bp) | ||
154 | { | ||
155 | bp->b_flags |= XBF_STALE; | ||
156 | atomic_set(&(bp)->b_lru_ref, 0); | ||
157 | if (!list_empty(&bp->b_lru)) { | ||
158 | struct xfs_buftarg *btp = bp->b_target; | ||
159 | |||
160 | spin_lock(&btp->bt_lru_lock); | ||
161 | if (!list_empty(&bp->b_lru)) { | ||
162 | list_del_init(&bp->b_lru); | ||
163 | btp->bt_lru_nr--; | ||
164 | atomic_dec(&bp->b_hold); | ||
165 | } | ||
166 | spin_unlock(&btp->bt_lru_lock); | ||
167 | } | ||
168 | ASSERT(atomic_read(&bp->b_hold) >= 1); | ||
169 | } | ||
173 | 170 | ||
174 | STATIC void | 171 | STATIC void |
175 | _xfs_buf_initialize( | 172 | _xfs_buf_initialize( |
@@ -186,7 +183,9 @@ _xfs_buf_initialize( | |||
186 | 183 | ||
187 | memset(bp, 0, sizeof(xfs_buf_t)); | 184 | memset(bp, 0, sizeof(xfs_buf_t)); |
188 | atomic_set(&bp->b_hold, 1); | 185 | atomic_set(&bp->b_hold, 1); |
186 | atomic_set(&bp->b_lru_ref, 1); | ||
189 | init_completion(&bp->b_iowait); | 187 | init_completion(&bp->b_iowait); |
188 | INIT_LIST_HEAD(&bp->b_lru); | ||
190 | INIT_LIST_HEAD(&bp->b_list); | 189 | INIT_LIST_HEAD(&bp->b_list); |
191 | RB_CLEAR_NODE(&bp->b_rbnode); | 190 | RB_CLEAR_NODE(&bp->b_rbnode); |
192 | sema_init(&bp->b_sema, 0); /* held, no waiters */ | 191 | sema_init(&bp->b_sema, 0); /* held, no waiters */ |
@@ -262,7 +261,9 @@ xfs_buf_free( | |||
262 | { | 261 | { |
263 | trace_xfs_buf_free(bp, _RET_IP_); | 262 | trace_xfs_buf_free(bp, _RET_IP_); |
264 | 263 | ||
265 | if (bp->b_flags & (_XBF_PAGE_CACHE|_XBF_PAGES)) { | 264 | ASSERT(list_empty(&bp->b_lru)); |
265 | |||
266 | if (bp->b_flags & _XBF_PAGES) { | ||
266 | uint i; | 267 | uint i; |
267 | 268 | ||
268 | if (xfs_buf_is_vmapped(bp)) | 269 | if (xfs_buf_is_vmapped(bp)) |
@@ -272,56 +273,77 @@ xfs_buf_free( | |||
272 | for (i = 0; i < bp->b_page_count; i++) { | 273 | for (i = 0; i < bp->b_page_count; i++) { |
273 | struct page *page = bp->b_pages[i]; | 274 | struct page *page = bp->b_pages[i]; |
274 | 275 | ||
275 | if (bp->b_flags & _XBF_PAGE_CACHE) | 276 | __free_page(page); |
276 | ASSERT(!PagePrivate(page)); | ||
277 | page_cache_release(page); | ||
278 | } | 277 | } |
279 | } | 278 | } else if (bp->b_flags & _XBF_KMEM) |
279 | kmem_free(bp->b_addr); | ||
280 | _xfs_buf_free_pages(bp); | 280 | _xfs_buf_free_pages(bp); |
281 | xfs_buf_deallocate(bp); | 281 | xfs_buf_deallocate(bp); |
282 | } | 282 | } |
283 | 283 | ||
284 | /* | 284 | /* |
285 | * Finds all pages for buffer in question and builds it's page list. | 285 | * Allocates all the pages for buffer in question and builds it's page list. |
286 | */ | 286 | */ |
287 | STATIC int | 287 | STATIC int |
288 | _xfs_buf_lookup_pages( | 288 | xfs_buf_allocate_memory( |
289 | xfs_buf_t *bp, | 289 | xfs_buf_t *bp, |
290 | uint flags) | 290 | uint flags) |
291 | { | 291 | { |
292 | struct address_space *mapping = bp->b_target->bt_mapping; | ||
293 | size_t blocksize = bp->b_target->bt_bsize; | ||
294 | size_t size = bp->b_count_desired; | 292 | size_t size = bp->b_count_desired; |
295 | size_t nbytes, offset; | 293 | size_t nbytes, offset; |
296 | gfp_t gfp_mask = xb_to_gfp(flags); | 294 | gfp_t gfp_mask = xb_to_gfp(flags); |
297 | unsigned short page_count, i; | 295 | unsigned short page_count, i; |
298 | pgoff_t first; | ||
299 | xfs_off_t end; | 296 | xfs_off_t end; |
300 | int error; | 297 | int error; |
301 | 298 | ||
299 | /* | ||
300 | * for buffers that are contained within a single page, just allocate | ||
301 | * the memory from the heap - there's no need for the complexity of | ||
302 | * page arrays to keep allocation down to order 0. | ||
303 | */ | ||
304 | if (bp->b_buffer_length < PAGE_SIZE) { | ||
305 | bp->b_addr = kmem_alloc(bp->b_buffer_length, xb_to_km(flags)); | ||
306 | if (!bp->b_addr) { | ||
307 | /* low memory - use alloc_page loop instead */ | ||
308 | goto use_alloc_page; | ||
309 | } | ||
310 | |||
311 | if (((unsigned long)(bp->b_addr + bp->b_buffer_length - 1) & | ||
312 | PAGE_MASK) != | ||
313 | ((unsigned long)bp->b_addr & PAGE_MASK)) { | ||
314 | /* b_addr spans two pages - use alloc_page instead */ | ||
315 | kmem_free(bp->b_addr); | ||
316 | bp->b_addr = NULL; | ||
317 | goto use_alloc_page; | ||
318 | } | ||
319 | bp->b_offset = offset_in_page(bp->b_addr); | ||
320 | bp->b_pages = bp->b_page_array; | ||
321 | bp->b_pages[0] = virt_to_page(bp->b_addr); | ||
322 | bp->b_page_count = 1; | ||
323 | bp->b_flags |= XBF_MAPPED | _XBF_KMEM; | ||
324 | return 0; | ||
325 | } | ||
326 | |||
327 | use_alloc_page: | ||
302 | end = bp->b_file_offset + bp->b_buffer_length; | 328 | end = bp->b_file_offset + bp->b_buffer_length; |
303 | page_count = xfs_buf_btoc(end) - xfs_buf_btoct(bp->b_file_offset); | 329 | page_count = xfs_buf_btoc(end) - xfs_buf_btoct(bp->b_file_offset); |
304 | |||
305 | error = _xfs_buf_get_pages(bp, page_count, flags); | 330 | error = _xfs_buf_get_pages(bp, page_count, flags); |
306 | if (unlikely(error)) | 331 | if (unlikely(error)) |
307 | return error; | 332 | return error; |
308 | bp->b_flags |= _XBF_PAGE_CACHE; | ||
309 | 333 | ||
310 | offset = bp->b_offset; | 334 | offset = bp->b_offset; |
311 | first = bp->b_file_offset >> PAGE_CACHE_SHIFT; | 335 | bp->b_flags |= _XBF_PAGES; |
312 | 336 | ||
313 | for (i = 0; i < bp->b_page_count; i++) { | 337 | for (i = 0; i < bp->b_page_count; i++) { |
314 | struct page *page; | 338 | struct page *page; |
315 | uint retries = 0; | 339 | uint retries = 0; |
316 | 340 | retry: | |
317 | retry: | 341 | page = alloc_page(gfp_mask); |
318 | page = find_or_create_page(mapping, first + i, gfp_mask); | ||
319 | if (unlikely(page == NULL)) { | 342 | if (unlikely(page == NULL)) { |
320 | if (flags & XBF_READ_AHEAD) { | 343 | if (flags & XBF_READ_AHEAD) { |
321 | bp->b_page_count = i; | 344 | bp->b_page_count = i; |
322 | for (i = 0; i < bp->b_page_count; i++) | 345 | error = ENOMEM; |
323 | unlock_page(bp->b_pages[i]); | 346 | goto out_free_pages; |
324 | return -ENOMEM; | ||
325 | } | 347 | } |
326 | 348 | ||
327 | /* | 349 | /* |
@@ -331,65 +353,55 @@ _xfs_buf_lookup_pages( | |||
331 | * handle buffer allocation failures we can't do much. | 353 | * handle buffer allocation failures we can't do much. |
332 | */ | 354 | */ |
333 | if (!(++retries % 100)) | 355 | if (!(++retries % 100)) |
334 | printk(KERN_ERR | 356 | xfs_err(NULL, |
335 | "XFS: possible memory allocation " | 357 | "possible memory allocation deadlock in %s (mode:0x%x)", |
336 | "deadlock in %s (mode:0x%x)\n", | ||
337 | __func__, gfp_mask); | 358 | __func__, gfp_mask); |
338 | 359 | ||
339 | XFS_STATS_INC(xb_page_retries); | 360 | XFS_STATS_INC(xb_page_retries); |
340 | xfsbufd_wakeup(NULL, 0, gfp_mask); | ||
341 | congestion_wait(BLK_RW_ASYNC, HZ/50); | 361 | congestion_wait(BLK_RW_ASYNC, HZ/50); |
342 | goto retry; | 362 | goto retry; |
343 | } | 363 | } |
344 | 364 | ||
345 | XFS_STATS_INC(xb_page_found); | 365 | XFS_STATS_INC(xb_page_found); |
346 | 366 | ||
347 | nbytes = min_t(size_t, size, PAGE_CACHE_SIZE - offset); | 367 | nbytes = min_t(size_t, size, PAGE_SIZE - offset); |
348 | size -= nbytes; | 368 | size -= nbytes; |
349 | |||
350 | ASSERT(!PagePrivate(page)); | ||
351 | if (!PageUptodate(page)) { | ||
352 | page_count--; | ||
353 | if (blocksize >= PAGE_CACHE_SIZE) { | ||
354 | if (flags & XBF_READ) | ||
355 | bp->b_flags |= _XBF_PAGE_LOCKED; | ||
356 | } else if (!PagePrivate(page)) { | ||
357 | if (test_page_region(page, offset, nbytes)) | ||
358 | page_count++; | ||
359 | } | ||
360 | } | ||
361 | |||
362 | bp->b_pages[i] = page; | 369 | bp->b_pages[i] = page; |
363 | offset = 0; | 370 | offset = 0; |
364 | } | 371 | } |
372 | return 0; | ||
365 | 373 | ||
366 | if (!(bp->b_flags & _XBF_PAGE_LOCKED)) { | 374 | out_free_pages: |
367 | for (i = 0; i < bp->b_page_count; i++) | 375 | for (i = 0; i < bp->b_page_count; i++) |
368 | unlock_page(bp->b_pages[i]); | 376 | __free_page(bp->b_pages[i]); |
369 | } | ||
370 | |||
371 | if (page_count == bp->b_page_count) | ||
372 | bp->b_flags |= XBF_DONE; | ||
373 | |||
374 | return error; | 377 | return error; |
375 | } | 378 | } |
376 | 379 | ||
377 | /* | 380 | /* |
378 | * Map buffer into kernel address-space if nessecary. | 381 | * Map buffer into kernel address-space if necessary. |
379 | */ | 382 | */ |
380 | STATIC int | 383 | STATIC int |
381 | _xfs_buf_map_pages( | 384 | _xfs_buf_map_pages( |
382 | xfs_buf_t *bp, | 385 | xfs_buf_t *bp, |
383 | uint flags) | 386 | uint flags) |
384 | { | 387 | { |
385 | /* A single page buffer is always mappable */ | 388 | ASSERT(bp->b_flags & _XBF_PAGES); |
386 | if (bp->b_page_count == 1) { | 389 | if (bp->b_page_count == 1) { |
390 | /* A single page buffer is always mappable */ | ||
387 | bp->b_addr = page_address(bp->b_pages[0]) + bp->b_offset; | 391 | bp->b_addr = page_address(bp->b_pages[0]) + bp->b_offset; |
388 | bp->b_flags |= XBF_MAPPED; | 392 | bp->b_flags |= XBF_MAPPED; |
389 | } else if (flags & XBF_MAPPED) { | 393 | } else if (flags & XBF_MAPPED) { |
390 | bp->b_addr = vm_map_ram(bp->b_pages, bp->b_page_count, | 394 | int retried = 0; |
391 | -1, PAGE_KERNEL); | 395 | |
392 | if (unlikely(bp->b_addr == NULL)) | 396 | do { |
397 | bp->b_addr = vm_map_ram(bp->b_pages, bp->b_page_count, | ||
398 | -1, PAGE_KERNEL); | ||
399 | if (bp->b_addr) | ||
400 | break; | ||
401 | vm_unmap_aliases(); | ||
402 | } while (retried++ <= 1); | ||
403 | |||
404 | if (!bp->b_addr) | ||
393 | return -ENOMEM; | 405 | return -ENOMEM; |
394 | bp->b_addr += bp->b_offset; | 406 | bp->b_addr += bp->b_offset; |
395 | bp->b_flags |= XBF_MAPPED; | 407 | bp->b_flags |= XBF_MAPPED; |
@@ -500,9 +512,14 @@ found: | |||
500 | } | 512 | } |
501 | } | 513 | } |
502 | 514 | ||
515 | /* | ||
516 | * if the buffer is stale, clear all the external state associated with | ||
517 | * it. We need to keep flags such as how we allocated the buffer memory | ||
518 | * intact here. | ||
519 | */ | ||
503 | if (bp->b_flags & XBF_STALE) { | 520 | if (bp->b_flags & XBF_STALE) { |
504 | ASSERT((bp->b_flags & _XBF_DELWRI_Q) == 0); | 521 | ASSERT((bp->b_flags & _XBF_DELWRI_Q) == 0); |
505 | bp->b_flags &= XBF_MAPPED; | 522 | bp->b_flags &= XBF_MAPPED | _XBF_KMEM | _XBF_PAGES; |
506 | } | 523 | } |
507 | 524 | ||
508 | trace_xfs_buf_find(bp, flags, _RET_IP_); | 525 | trace_xfs_buf_find(bp, flags, _RET_IP_); |
@@ -523,7 +540,7 @@ xfs_buf_get( | |||
523 | xfs_buf_flags_t flags) | 540 | xfs_buf_flags_t flags) |
524 | { | 541 | { |
525 | xfs_buf_t *bp, *new_bp; | 542 | xfs_buf_t *bp, *new_bp; |
526 | int error = 0, i; | 543 | int error = 0; |
527 | 544 | ||
528 | new_bp = xfs_buf_allocate(flags); | 545 | new_bp = xfs_buf_allocate(flags); |
529 | if (unlikely(!new_bp)) | 546 | if (unlikely(!new_bp)) |
@@ -531,7 +548,7 @@ xfs_buf_get( | |||
531 | 548 | ||
532 | bp = _xfs_buf_find(target, ioff, isize, flags, new_bp); | 549 | bp = _xfs_buf_find(target, ioff, isize, flags, new_bp); |
533 | if (bp == new_bp) { | 550 | if (bp == new_bp) { |
534 | error = _xfs_buf_lookup_pages(bp, flags); | 551 | error = xfs_buf_allocate_memory(bp, flags); |
535 | if (error) | 552 | if (error) |
536 | goto no_buffer; | 553 | goto no_buffer; |
537 | } else { | 554 | } else { |
@@ -540,14 +557,11 @@ xfs_buf_get( | |||
540 | return NULL; | 557 | return NULL; |
541 | } | 558 | } |
542 | 559 | ||
543 | for (i = 0; i < bp->b_page_count; i++) | ||
544 | mark_page_accessed(bp->b_pages[i]); | ||
545 | |||
546 | if (!(bp->b_flags & XBF_MAPPED)) { | 560 | if (!(bp->b_flags & XBF_MAPPED)) { |
547 | error = _xfs_buf_map_pages(bp, flags); | 561 | error = _xfs_buf_map_pages(bp, flags); |
548 | if (unlikely(error)) { | 562 | if (unlikely(error)) { |
549 | printk(KERN_WARNING "%s: failed to map pages\n", | 563 | xfs_warn(target->bt_mount, |
550 | __func__); | 564 | "%s: failed to map pages\n", __func__); |
551 | goto no_buffer; | 565 | goto no_buffer; |
552 | } | 566 | } |
553 | } | 567 | } |
@@ -641,10 +655,7 @@ xfs_buf_readahead( | |||
641 | xfs_off_t ioff, | 655 | xfs_off_t ioff, |
642 | size_t isize) | 656 | size_t isize) |
643 | { | 657 | { |
644 | struct backing_dev_info *bdi; | 658 | if (bdi_read_congested(target->bt_bdi)) |
645 | |||
646 | bdi = target->bt_mapping->backing_dev_info; | ||
647 | if (bdi_read_congested(bdi)) | ||
648 | return; | 659 | return; |
649 | 660 | ||
650 | xfs_buf_read(target, ioff, isize, | 661 | xfs_buf_read(target, ioff, isize, |
@@ -722,10 +733,10 @@ xfs_buf_associate_memory( | |||
722 | size_t buflen; | 733 | size_t buflen; |
723 | int page_count; | 734 | int page_count; |
724 | 735 | ||
725 | pageaddr = (unsigned long)mem & PAGE_CACHE_MASK; | 736 | pageaddr = (unsigned long)mem & PAGE_MASK; |
726 | offset = (unsigned long)mem - pageaddr; | 737 | offset = (unsigned long)mem - pageaddr; |
727 | buflen = PAGE_CACHE_ALIGN(len + offset); | 738 | buflen = PAGE_ALIGN(len + offset); |
728 | page_count = buflen >> PAGE_CACHE_SHIFT; | 739 | page_count = buflen >> PAGE_SHIFT; |
729 | 740 | ||
730 | /* Free any previous set of page pointers */ | 741 | /* Free any previous set of page pointers */ |
731 | if (bp->b_pages) | 742 | if (bp->b_pages) |
@@ -742,13 +753,12 @@ xfs_buf_associate_memory( | |||
742 | 753 | ||
743 | for (i = 0; i < bp->b_page_count; i++) { | 754 | for (i = 0; i < bp->b_page_count; i++) { |
744 | bp->b_pages[i] = mem_to_page((void *)pageaddr); | 755 | bp->b_pages[i] = mem_to_page((void *)pageaddr); |
745 | pageaddr += PAGE_CACHE_SIZE; | 756 | pageaddr += PAGE_SIZE; |
746 | } | 757 | } |
747 | 758 | ||
748 | bp->b_count_desired = len; | 759 | bp->b_count_desired = len; |
749 | bp->b_buffer_length = buflen; | 760 | bp->b_buffer_length = buflen; |
750 | bp->b_flags |= XBF_MAPPED; | 761 | bp->b_flags |= XBF_MAPPED; |
751 | bp->b_flags &= ~_XBF_PAGE_LOCKED; | ||
752 | 762 | ||
753 | return 0; | 763 | return 0; |
754 | } | 764 | } |
@@ -781,8 +791,8 @@ xfs_buf_get_uncached( | |||
781 | 791 | ||
782 | error = _xfs_buf_map_pages(bp, XBF_MAPPED); | 792 | error = _xfs_buf_map_pages(bp, XBF_MAPPED); |
783 | if (unlikely(error)) { | 793 | if (unlikely(error)) { |
784 | printk(KERN_WARNING "%s: failed to map pages\n", | 794 | xfs_warn(target->bt_mount, |
785 | __func__); | 795 | "%s: failed to map pages\n", __func__); |
786 | goto fail_free_mem; | 796 | goto fail_free_mem; |
787 | } | 797 | } |
788 | 798 | ||
@@ -827,7 +837,7 @@ xfs_buf_rele( | |||
827 | trace_xfs_buf_rele(bp, _RET_IP_); | 837 | trace_xfs_buf_rele(bp, _RET_IP_); |
828 | 838 | ||
829 | if (!pag) { | 839 | if (!pag) { |
830 | ASSERT(!bp->b_relse); | 840 | ASSERT(list_empty(&bp->b_lru)); |
831 | ASSERT(RB_EMPTY_NODE(&bp->b_rbnode)); | 841 | ASSERT(RB_EMPTY_NODE(&bp->b_rbnode)); |
832 | if (atomic_dec_and_test(&bp->b_hold)) | 842 | if (atomic_dec_and_test(&bp->b_hold)) |
833 | xfs_buf_free(bp); | 843 | xfs_buf_free(bp); |
@@ -835,13 +845,15 @@ xfs_buf_rele( | |||
835 | } | 845 | } |
836 | 846 | ||
837 | ASSERT(!RB_EMPTY_NODE(&bp->b_rbnode)); | 847 | ASSERT(!RB_EMPTY_NODE(&bp->b_rbnode)); |
848 | |||
838 | ASSERT(atomic_read(&bp->b_hold) > 0); | 849 | ASSERT(atomic_read(&bp->b_hold) > 0); |
839 | if (atomic_dec_and_lock(&bp->b_hold, &pag->pag_buf_lock)) { | 850 | if (atomic_dec_and_lock(&bp->b_hold, &pag->pag_buf_lock)) { |
840 | if (bp->b_relse) { | 851 | if (!(bp->b_flags & XBF_STALE) && |
841 | atomic_inc(&bp->b_hold); | 852 | atomic_read(&bp->b_lru_ref)) { |
853 | xfs_buf_lru_add(bp); | ||
842 | spin_unlock(&pag->pag_buf_lock); | 854 | spin_unlock(&pag->pag_buf_lock); |
843 | bp->b_relse(bp); | ||
844 | } else { | 855 | } else { |
856 | xfs_buf_lru_del(bp); | ||
845 | ASSERT(!(bp->b_flags & (XBF_DELWRI|_XBF_DELWRI_Q))); | 857 | ASSERT(!(bp->b_flags & (XBF_DELWRI|_XBF_DELWRI_Q))); |
846 | rb_erase(&bp->b_rbnode, &pag->pag_buf_tree); | 858 | rb_erase(&bp->b_rbnode, &pag->pag_buf_tree); |
847 | spin_unlock(&pag->pag_buf_lock); | 859 | spin_unlock(&pag->pag_buf_lock); |
@@ -853,20 +865,7 @@ xfs_buf_rele( | |||
853 | 865 | ||
854 | 866 | ||
855 | /* | 867 | /* |
856 | * Mutual exclusion on buffers. Locking model: | 868 | * Lock a buffer object, if it is not already locked. |
857 | * | ||
858 | * Buffers associated with inodes for which buffer locking | ||
859 | * is not enabled are not protected by semaphores, and are | ||
860 | * assumed to be exclusively owned by the caller. There is a | ||
861 | * spinlock in the buffer, used by the caller when concurrent | ||
862 | * access is possible. | ||
863 | */ | ||
864 | |||
865 | /* | ||
866 | * Locks a buffer object, if it is not already locked. Note that this in | ||
867 | * no way locks the underlying pages, so it is only useful for | ||
868 | * synchronizing concurrent use of buffer objects, not for synchronizing | ||
869 | * independent access to the underlying pages. | ||
870 | * | 869 | * |
871 | * If we come across a stale, pinned, locked buffer, we know that we are | 870 | * If we come across a stale, pinned, locked buffer, we know that we are |
872 | * being asked to lock a buffer that has been reallocated. Because it is | 871 | * being asked to lock a buffer that has been reallocated. Because it is |
@@ -900,10 +899,7 @@ xfs_buf_lock_value( | |||
900 | } | 899 | } |
901 | 900 | ||
902 | /* | 901 | /* |
903 | * Locks a buffer object. | 902 | * Lock a buffer object. |
904 | * Note that this in no way locks the underlying pages, so it is only | ||
905 | * useful for synchronizing concurrent use of buffer objects, not for | ||
906 | * synchronizing independent access to the underlying pages. | ||
907 | * | 903 | * |
908 | * If we come across a stale, pinned, locked buffer, we know that we | 904 | * If we come across a stale, pinned, locked buffer, we know that we |
909 | * are being asked to lock a buffer that has been reallocated. Because | 905 | * are being asked to lock a buffer that has been reallocated. Because |
@@ -919,8 +915,6 @@ xfs_buf_lock( | |||
919 | 915 | ||
920 | if (atomic_read(&bp->b_pin_count) && (bp->b_flags & XBF_STALE)) | 916 | if (atomic_read(&bp->b_pin_count) && (bp->b_flags & XBF_STALE)) |
921 | xfs_log_force(bp->b_target->bt_mount, 0); | 917 | xfs_log_force(bp->b_target->bt_mount, 0); |
922 | if (atomic_read(&bp->b_io_remaining)) | ||
923 | blk_run_address_space(bp->b_target->bt_mapping); | ||
924 | down(&bp->b_sema); | 918 | down(&bp->b_sema); |
925 | XB_SET_OWNER(bp); | 919 | XB_SET_OWNER(bp); |
926 | 920 | ||
@@ -964,9 +958,7 @@ xfs_buf_wait_unpin( | |||
964 | set_current_state(TASK_UNINTERRUPTIBLE); | 958 | set_current_state(TASK_UNINTERRUPTIBLE); |
965 | if (atomic_read(&bp->b_pin_count) == 0) | 959 | if (atomic_read(&bp->b_pin_count) == 0) |
966 | break; | 960 | break; |
967 | if (atomic_read(&bp->b_io_remaining)) | 961 | io_schedule(); |
968 | blk_run_address_space(bp->b_target->bt_mapping); | ||
969 | schedule(); | ||
970 | } | 962 | } |
971 | remove_wait_queue(&bp->b_waiters, &wait); | 963 | remove_wait_queue(&bp->b_waiters, &wait); |
972 | set_current_state(TASK_RUNNING); | 964 | set_current_state(TASK_RUNNING); |
@@ -1178,10 +1170,8 @@ _xfs_buf_ioend( | |||
1178 | xfs_buf_t *bp, | 1170 | xfs_buf_t *bp, |
1179 | int schedule) | 1171 | int schedule) |
1180 | { | 1172 | { |
1181 | if (atomic_dec_and_test(&bp->b_io_remaining) == 1) { | 1173 | if (atomic_dec_and_test(&bp->b_io_remaining) == 1) |
1182 | bp->b_flags &= ~_XBF_PAGE_LOCKED; | ||
1183 | xfs_buf_ioend(bp, schedule); | 1174 | xfs_buf_ioend(bp, schedule); |
1184 | } | ||
1185 | } | 1175 | } |
1186 | 1176 | ||
1187 | STATIC void | 1177 | STATIC void |
@@ -1190,35 +1180,12 @@ xfs_buf_bio_end_io( | |||
1190 | int error) | 1180 | int error) |
1191 | { | 1181 | { |
1192 | xfs_buf_t *bp = (xfs_buf_t *)bio->bi_private; | 1182 | xfs_buf_t *bp = (xfs_buf_t *)bio->bi_private; |
1193 | unsigned int blocksize = bp->b_target->bt_bsize; | ||
1194 | struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; | ||
1195 | 1183 | ||
1196 | xfs_buf_ioerror(bp, -error); | 1184 | xfs_buf_ioerror(bp, -error); |
1197 | 1185 | ||
1198 | if (!error && xfs_buf_is_vmapped(bp) && (bp->b_flags & XBF_READ)) | 1186 | if (!error && xfs_buf_is_vmapped(bp) && (bp->b_flags & XBF_READ)) |
1199 | invalidate_kernel_vmap_range(bp->b_addr, xfs_buf_vmap_len(bp)); | 1187 | invalidate_kernel_vmap_range(bp->b_addr, xfs_buf_vmap_len(bp)); |
1200 | 1188 | ||
1201 | do { | ||
1202 | struct page *page = bvec->bv_page; | ||
1203 | |||
1204 | ASSERT(!PagePrivate(page)); | ||
1205 | if (unlikely(bp->b_error)) { | ||
1206 | if (bp->b_flags & XBF_READ) | ||
1207 | ClearPageUptodate(page); | ||
1208 | } else if (blocksize >= PAGE_CACHE_SIZE) { | ||
1209 | SetPageUptodate(page); | ||
1210 | } else if (!PagePrivate(page) && | ||
1211 | (bp->b_flags & _XBF_PAGE_CACHE)) { | ||
1212 | set_page_region(page, bvec->bv_offset, bvec->bv_len); | ||
1213 | } | ||
1214 | |||
1215 | if (--bvec >= bio->bi_io_vec) | ||
1216 | prefetchw(&bvec->bv_page->flags); | ||
1217 | |||
1218 | if (bp->b_flags & _XBF_PAGE_LOCKED) | ||
1219 | unlock_page(page); | ||
1220 | } while (bvec >= bio->bi_io_vec); | ||
1221 | |||
1222 | _xfs_buf_ioend(bp, 1); | 1189 | _xfs_buf_ioend(bp, 1); |
1223 | bio_put(bio); | 1190 | bio_put(bio); |
1224 | } | 1191 | } |
@@ -1232,7 +1199,6 @@ _xfs_buf_ioapply( | |||
1232 | int offset = bp->b_offset; | 1199 | int offset = bp->b_offset; |
1233 | int size = bp->b_count_desired; | 1200 | int size = bp->b_count_desired; |
1234 | sector_t sector = bp->b_bn; | 1201 | sector_t sector = bp->b_bn; |
1235 | unsigned int blocksize = bp->b_target->bt_bsize; | ||
1236 | 1202 | ||
1237 | total_nr_pages = bp->b_page_count; | 1203 | total_nr_pages = bp->b_page_count; |
1238 | map_i = 0; | 1204 | map_i = 0; |
@@ -1253,29 +1219,6 @@ _xfs_buf_ioapply( | |||
1253 | (bp->b_flags & XBF_READ_AHEAD) ? READA : READ; | 1219 | (bp->b_flags & XBF_READ_AHEAD) ? READA : READ; |
1254 | } | 1220 | } |
1255 | 1221 | ||
1256 | /* Special code path for reading a sub page size buffer in -- | ||
1257 | * we populate up the whole page, and hence the other metadata | ||
1258 | * in the same page. This optimization is only valid when the | ||
1259 | * filesystem block size is not smaller than the page size. | ||
1260 | */ | ||
1261 | if ((bp->b_buffer_length < PAGE_CACHE_SIZE) && | ||
1262 | ((bp->b_flags & (XBF_READ|_XBF_PAGE_LOCKED)) == | ||
1263 | (XBF_READ|_XBF_PAGE_LOCKED)) && | ||
1264 | (blocksize >= PAGE_CACHE_SIZE)) { | ||
1265 | bio = bio_alloc(GFP_NOIO, 1); | ||
1266 | |||
1267 | bio->bi_bdev = bp->b_target->bt_bdev; | ||
1268 | bio->bi_sector = sector - (offset >> BBSHIFT); | ||
1269 | bio->bi_end_io = xfs_buf_bio_end_io; | ||
1270 | bio->bi_private = bp; | ||
1271 | |||
1272 | bio_add_page(bio, bp->b_pages[0], PAGE_CACHE_SIZE, 0); | ||
1273 | size = 0; | ||
1274 | |||
1275 | atomic_inc(&bp->b_io_remaining); | ||
1276 | |||
1277 | goto submit_io; | ||
1278 | } | ||
1279 | 1222 | ||
1280 | next_chunk: | 1223 | next_chunk: |
1281 | atomic_inc(&bp->b_io_remaining); | 1224 | atomic_inc(&bp->b_io_remaining); |
@@ -1289,8 +1232,9 @@ next_chunk: | |||
1289 | bio->bi_end_io = xfs_buf_bio_end_io; | 1232 | bio->bi_end_io = xfs_buf_bio_end_io; |
1290 | bio->bi_private = bp; | 1233 | bio->bi_private = bp; |
1291 | 1234 | ||
1235 | |||
1292 | for (; size && nr_pages; nr_pages--, map_i++) { | 1236 | for (; size && nr_pages; nr_pages--, map_i++) { |
1293 | int rbytes, nbytes = PAGE_CACHE_SIZE - offset; | 1237 | int rbytes, nbytes = PAGE_SIZE - offset; |
1294 | 1238 | ||
1295 | if (nbytes > size) | 1239 | if (nbytes > size) |
1296 | nbytes = size; | 1240 | nbytes = size; |
@@ -1305,7 +1249,6 @@ next_chunk: | |||
1305 | total_nr_pages--; | 1249 | total_nr_pages--; |
1306 | } | 1250 | } |
1307 | 1251 | ||
1308 | submit_io: | ||
1309 | if (likely(bio->bi_size)) { | 1252 | if (likely(bio->bi_size)) { |
1310 | if (xfs_buf_is_vmapped(bp)) { | 1253 | if (xfs_buf_is_vmapped(bp)) { |
1311 | flush_kernel_vmap_range(bp->b_addr, | 1254 | flush_kernel_vmap_range(bp->b_addr, |
@@ -1315,18 +1258,7 @@ submit_io: | |||
1315 | if (size) | 1258 | if (size) |
1316 | goto next_chunk; | 1259 | goto next_chunk; |
1317 | } else { | 1260 | } else { |
1318 | /* | ||
1319 | * if we get here, no pages were added to the bio. However, | ||
1320 | * we can't just error out here - if the pages are locked then | ||
1321 | * we have to unlock them otherwise we can hang on a later | ||
1322 | * access to the page. | ||
1323 | */ | ||
1324 | xfs_buf_ioerror(bp, EIO); | 1261 | xfs_buf_ioerror(bp, EIO); |
1325 | if (bp->b_flags & _XBF_PAGE_LOCKED) { | ||
1326 | int i; | ||
1327 | for (i = 0; i < bp->b_page_count; i++) | ||
1328 | unlock_page(bp->b_pages[i]); | ||
1329 | } | ||
1330 | bio_put(bio); | 1262 | bio_put(bio); |
1331 | } | 1263 | } |
1332 | } | 1264 | } |
@@ -1371,8 +1303,6 @@ xfs_buf_iowait( | |||
1371 | { | 1303 | { |
1372 | trace_xfs_buf_iowait(bp, _RET_IP_); | 1304 | trace_xfs_buf_iowait(bp, _RET_IP_); |
1373 | 1305 | ||
1374 | if (atomic_read(&bp->b_io_remaining)) | ||
1375 | blk_run_address_space(bp->b_target->bt_mapping); | ||
1376 | wait_for_completion(&bp->b_iowait); | 1306 | wait_for_completion(&bp->b_iowait); |
1377 | 1307 | ||
1378 | trace_xfs_buf_iowait_done(bp, _RET_IP_); | 1308 | trace_xfs_buf_iowait_done(bp, _RET_IP_); |
@@ -1390,8 +1320,8 @@ xfs_buf_offset( | |||
1390 | return XFS_BUF_PTR(bp) + offset; | 1320 | return XFS_BUF_PTR(bp) + offset; |
1391 | 1321 | ||
1392 | offset += bp->b_offset; | 1322 | offset += bp->b_offset; |
1393 | page = bp->b_pages[offset >> PAGE_CACHE_SHIFT]; | 1323 | page = bp->b_pages[offset >> PAGE_SHIFT]; |
1394 | return (xfs_caddr_t)page_address(page) + (offset & (PAGE_CACHE_SIZE-1)); | 1324 | return (xfs_caddr_t)page_address(page) + (offset & (PAGE_SIZE-1)); |
1395 | } | 1325 | } |
1396 | 1326 | ||
1397 | /* | 1327 | /* |
@@ -1413,9 +1343,9 @@ xfs_buf_iomove( | |||
1413 | page = bp->b_pages[xfs_buf_btoct(boff + bp->b_offset)]; | 1343 | page = bp->b_pages[xfs_buf_btoct(boff + bp->b_offset)]; |
1414 | cpoff = xfs_buf_poff(boff + bp->b_offset); | 1344 | cpoff = xfs_buf_poff(boff + bp->b_offset); |
1415 | csize = min_t(size_t, | 1345 | csize = min_t(size_t, |
1416 | PAGE_CACHE_SIZE-cpoff, bp->b_count_desired-boff); | 1346 | PAGE_SIZE-cpoff, bp->b_count_desired-boff); |
1417 | 1347 | ||
1418 | ASSERT(((csize + cpoff) <= PAGE_CACHE_SIZE)); | 1348 | ASSERT(((csize + cpoff) <= PAGE_SIZE)); |
1419 | 1349 | ||
1420 | switch (mode) { | 1350 | switch (mode) { |
1421 | case XBRW_ZERO: | 1351 | case XBRW_ZERO: |
@@ -1438,51 +1368,84 @@ xfs_buf_iomove( | |||
1438 | */ | 1368 | */ |
1439 | 1369 | ||
1440 | /* | 1370 | /* |
1441 | * Wait for any bufs with callbacks that have been submitted but | 1371 | * Wait for any bufs with callbacks that have been submitted but have not yet |
1442 | * have not yet returned... walk the hash list for the target. | 1372 | * returned. These buffers will have an elevated hold count, so wait on those |
1373 | * while freeing all the buffers only held by the LRU. | ||
1443 | */ | 1374 | */ |
1444 | void | 1375 | void |
1445 | xfs_wait_buftarg( | 1376 | xfs_wait_buftarg( |
1446 | struct xfs_buftarg *btp) | 1377 | struct xfs_buftarg *btp) |
1447 | { | 1378 | { |
1448 | struct xfs_perag *pag; | 1379 | struct xfs_buf *bp; |
1449 | uint i; | ||
1450 | 1380 | ||
1451 | for (i = 0; i < btp->bt_mount->m_sb.sb_agcount; i++) { | 1381 | restart: |
1452 | pag = xfs_perag_get(btp->bt_mount, i); | 1382 | spin_lock(&btp->bt_lru_lock); |
1453 | spin_lock(&pag->pag_buf_lock); | 1383 | while (!list_empty(&btp->bt_lru)) { |
1454 | while (rb_first(&pag->pag_buf_tree)) { | 1384 | bp = list_first_entry(&btp->bt_lru, struct xfs_buf, b_lru); |
1455 | spin_unlock(&pag->pag_buf_lock); | 1385 | if (atomic_read(&bp->b_hold) > 1) { |
1386 | spin_unlock(&btp->bt_lru_lock); | ||
1456 | delay(100); | 1387 | delay(100); |
1457 | spin_lock(&pag->pag_buf_lock); | 1388 | goto restart; |
1458 | } | 1389 | } |
1459 | spin_unlock(&pag->pag_buf_lock); | 1390 | /* |
1460 | xfs_perag_put(pag); | 1391 | * clear the LRU reference count so the bufer doesn't get |
1392 | * ignored in xfs_buf_rele(). | ||
1393 | */ | ||
1394 | atomic_set(&bp->b_lru_ref, 0); | ||
1395 | spin_unlock(&btp->bt_lru_lock); | ||
1396 | xfs_buf_rele(bp); | ||
1397 | spin_lock(&btp->bt_lru_lock); | ||
1461 | } | 1398 | } |
1399 | spin_unlock(&btp->bt_lru_lock); | ||
1462 | } | 1400 | } |
1463 | 1401 | ||
1464 | /* | 1402 | int |
1465 | * buftarg list for delwrite queue processing | 1403 | xfs_buftarg_shrink( |
1466 | */ | 1404 | struct shrinker *shrink, |
1467 | static LIST_HEAD(xfs_buftarg_list); | 1405 | int nr_to_scan, |
1468 | static DEFINE_SPINLOCK(xfs_buftarg_lock); | 1406 | gfp_t mask) |
1469 | |||
1470 | STATIC void | ||
1471 | xfs_register_buftarg( | ||
1472 | xfs_buftarg_t *btp) | ||
1473 | { | 1407 | { |
1474 | spin_lock(&xfs_buftarg_lock); | 1408 | struct xfs_buftarg *btp = container_of(shrink, |
1475 | list_add(&btp->bt_list, &xfs_buftarg_list); | 1409 | struct xfs_buftarg, bt_shrinker); |
1476 | spin_unlock(&xfs_buftarg_lock); | 1410 | struct xfs_buf *bp; |
1477 | } | 1411 | LIST_HEAD(dispose); |
1478 | 1412 | ||
1479 | STATIC void | 1413 | if (!nr_to_scan) |
1480 | xfs_unregister_buftarg( | 1414 | return btp->bt_lru_nr; |
1481 | xfs_buftarg_t *btp) | 1415 | |
1482 | { | 1416 | spin_lock(&btp->bt_lru_lock); |
1483 | spin_lock(&xfs_buftarg_lock); | 1417 | while (!list_empty(&btp->bt_lru)) { |
1484 | list_del(&btp->bt_list); | 1418 | if (nr_to_scan-- <= 0) |
1485 | spin_unlock(&xfs_buftarg_lock); | 1419 | break; |
1420 | |||
1421 | bp = list_first_entry(&btp->bt_lru, struct xfs_buf, b_lru); | ||
1422 | |||
1423 | /* | ||
1424 | * Decrement the b_lru_ref count unless the value is already | ||
1425 | * zero. If the value is already zero, we need to reclaim the | ||
1426 | * buffer, otherwise it gets another trip through the LRU. | ||
1427 | */ | ||
1428 | if (!atomic_add_unless(&bp->b_lru_ref, -1, 0)) { | ||
1429 | list_move_tail(&bp->b_lru, &btp->bt_lru); | ||
1430 | continue; | ||
1431 | } | ||
1432 | |||
1433 | /* | ||
1434 | * remove the buffer from the LRU now to avoid needing another | ||
1435 | * lock round trip inside xfs_buf_rele(). | ||
1436 | */ | ||
1437 | list_move(&bp->b_lru, &dispose); | ||
1438 | btp->bt_lru_nr--; | ||
1439 | } | ||
1440 | spin_unlock(&btp->bt_lru_lock); | ||
1441 | |||
1442 | while (!list_empty(&dispose)) { | ||
1443 | bp = list_first_entry(&dispose, struct xfs_buf, b_lru); | ||
1444 | list_del_init(&bp->b_lru); | ||
1445 | xfs_buf_rele(bp); | ||
1446 | } | ||
1447 | |||
1448 | return btp->bt_lru_nr; | ||
1486 | } | 1449 | } |
1487 | 1450 | ||
1488 | void | 1451 | void |
@@ -1490,17 +1453,13 @@ xfs_free_buftarg( | |||
1490 | struct xfs_mount *mp, | 1453 | struct xfs_mount *mp, |
1491 | struct xfs_buftarg *btp) | 1454 | struct xfs_buftarg *btp) |
1492 | { | 1455 | { |
1456 | unregister_shrinker(&btp->bt_shrinker); | ||
1457 | |||
1493 | xfs_flush_buftarg(btp, 1); | 1458 | xfs_flush_buftarg(btp, 1); |
1494 | if (mp->m_flags & XFS_MOUNT_BARRIER) | 1459 | if (mp->m_flags & XFS_MOUNT_BARRIER) |
1495 | xfs_blkdev_issue_flush(btp); | 1460 | xfs_blkdev_issue_flush(btp); |
1496 | iput(btp->bt_mapping->host); | ||
1497 | 1461 | ||
1498 | /* Unregister the buftarg first so that we don't get a | ||
1499 | * wakeup finding a non-existent task | ||
1500 | */ | ||
1501 | xfs_unregister_buftarg(btp); | ||
1502 | kthread_stop(btp->bt_task); | 1462 | kthread_stop(btp->bt_task); |
1503 | |||
1504 | kmem_free(btp); | 1463 | kmem_free(btp); |
1505 | } | 1464 | } |
1506 | 1465 | ||
@@ -1516,21 +1475,12 @@ xfs_setsize_buftarg_flags( | |||
1516 | btp->bt_smask = sectorsize - 1; | 1475 | btp->bt_smask = sectorsize - 1; |
1517 | 1476 | ||
1518 | if (set_blocksize(btp->bt_bdev, sectorsize)) { | 1477 | if (set_blocksize(btp->bt_bdev, sectorsize)) { |
1519 | printk(KERN_WARNING | 1478 | xfs_warn(btp->bt_mount, |
1520 | "XFS: Cannot set_blocksize to %u on device %s\n", | 1479 | "Cannot set_blocksize to %u on device %s\n", |
1521 | sectorsize, XFS_BUFTARG_NAME(btp)); | 1480 | sectorsize, XFS_BUFTARG_NAME(btp)); |
1522 | return EINVAL; | 1481 | return EINVAL; |
1523 | } | 1482 | } |
1524 | 1483 | ||
1525 | if (verbose && | ||
1526 | (PAGE_CACHE_SIZE / BITS_PER_LONG) > sectorsize) { | ||
1527 | printk(KERN_WARNING | ||
1528 | "XFS: %u byte sectors in use on device %s. " | ||
1529 | "This is suboptimal; %u or greater is ideal.\n", | ||
1530 | sectorsize, XFS_BUFTARG_NAME(btp), | ||
1531 | (unsigned int)PAGE_CACHE_SIZE / BITS_PER_LONG); | ||
1532 | } | ||
1533 | |||
1534 | return 0; | 1484 | return 0; |
1535 | } | 1485 | } |
1536 | 1486 | ||
@@ -1545,7 +1495,7 @@ xfs_setsize_buftarg_early( | |||
1545 | struct block_device *bdev) | 1495 | struct block_device *bdev) |
1546 | { | 1496 | { |
1547 | return xfs_setsize_buftarg_flags(btp, | 1497 | return xfs_setsize_buftarg_flags(btp, |
1548 | PAGE_CACHE_SIZE, bdev_logical_block_size(bdev), 0); | 1498 | PAGE_SIZE, bdev_logical_block_size(bdev), 0); |
1549 | } | 1499 | } |
1550 | 1500 | ||
1551 | int | 1501 | int |
@@ -1558,59 +1508,17 @@ xfs_setsize_buftarg( | |||
1558 | } | 1508 | } |
1559 | 1509 | ||
1560 | STATIC int | 1510 | STATIC int |
1561 | xfs_mapping_buftarg( | ||
1562 | xfs_buftarg_t *btp, | ||
1563 | struct block_device *bdev) | ||
1564 | { | ||
1565 | struct backing_dev_info *bdi; | ||
1566 | struct inode *inode; | ||
1567 | struct address_space *mapping; | ||
1568 | static const struct address_space_operations mapping_aops = { | ||
1569 | .sync_page = block_sync_page, | ||
1570 | .migratepage = fail_migrate_page, | ||
1571 | }; | ||
1572 | |||
1573 | inode = new_inode(bdev->bd_inode->i_sb); | ||
1574 | if (!inode) { | ||
1575 | printk(KERN_WARNING | ||
1576 | "XFS: Cannot allocate mapping inode for device %s\n", | ||
1577 | XFS_BUFTARG_NAME(btp)); | ||
1578 | return ENOMEM; | ||
1579 | } | ||
1580 | inode->i_ino = get_next_ino(); | ||
1581 | inode->i_mode = S_IFBLK; | ||
1582 | inode->i_bdev = bdev; | ||
1583 | inode->i_rdev = bdev->bd_dev; | ||
1584 | bdi = blk_get_backing_dev_info(bdev); | ||
1585 | if (!bdi) | ||
1586 | bdi = &default_backing_dev_info; | ||
1587 | mapping = &inode->i_data; | ||
1588 | mapping->a_ops = &mapping_aops; | ||
1589 | mapping->backing_dev_info = bdi; | ||
1590 | mapping_set_gfp_mask(mapping, GFP_NOFS); | ||
1591 | btp->bt_mapping = mapping; | ||
1592 | return 0; | ||
1593 | } | ||
1594 | |||
1595 | STATIC int | ||
1596 | xfs_alloc_delwrite_queue( | 1511 | xfs_alloc_delwrite_queue( |
1597 | xfs_buftarg_t *btp, | 1512 | xfs_buftarg_t *btp, |
1598 | const char *fsname) | 1513 | const char *fsname) |
1599 | { | 1514 | { |
1600 | int error = 0; | ||
1601 | |||
1602 | INIT_LIST_HEAD(&btp->bt_list); | ||
1603 | INIT_LIST_HEAD(&btp->bt_delwrite_queue); | 1515 | INIT_LIST_HEAD(&btp->bt_delwrite_queue); |
1604 | spin_lock_init(&btp->bt_delwrite_lock); | 1516 | spin_lock_init(&btp->bt_delwrite_lock); |
1605 | btp->bt_flags = 0; | 1517 | btp->bt_flags = 0; |
1606 | btp->bt_task = kthread_run(xfsbufd, btp, "xfsbufd/%s", fsname); | 1518 | btp->bt_task = kthread_run(xfsbufd, btp, "xfsbufd/%s", fsname); |
1607 | if (IS_ERR(btp->bt_task)) { | 1519 | if (IS_ERR(btp->bt_task)) |
1608 | error = PTR_ERR(btp->bt_task); | 1520 | return PTR_ERR(btp->bt_task); |
1609 | goto out_error; | 1521 | return 0; |
1610 | } | ||
1611 | xfs_register_buftarg(btp); | ||
1612 | out_error: | ||
1613 | return error; | ||
1614 | } | 1522 | } |
1615 | 1523 | ||
1616 | xfs_buftarg_t * | 1524 | xfs_buftarg_t * |
@@ -1627,12 +1535,19 @@ xfs_alloc_buftarg( | |||
1627 | btp->bt_mount = mp; | 1535 | btp->bt_mount = mp; |
1628 | btp->bt_dev = bdev->bd_dev; | 1536 | btp->bt_dev = bdev->bd_dev; |
1629 | btp->bt_bdev = bdev; | 1537 | btp->bt_bdev = bdev; |
1630 | if (xfs_setsize_buftarg_early(btp, bdev)) | 1538 | btp->bt_bdi = blk_get_backing_dev_info(bdev); |
1539 | if (!btp->bt_bdi) | ||
1631 | goto error; | 1540 | goto error; |
1632 | if (xfs_mapping_buftarg(btp, bdev)) | 1541 | |
1542 | INIT_LIST_HEAD(&btp->bt_lru); | ||
1543 | spin_lock_init(&btp->bt_lru_lock); | ||
1544 | if (xfs_setsize_buftarg_early(btp, bdev)) | ||
1633 | goto error; | 1545 | goto error; |
1634 | if (xfs_alloc_delwrite_queue(btp, fsname)) | 1546 | if (xfs_alloc_delwrite_queue(btp, fsname)) |
1635 | goto error; | 1547 | goto error; |
1548 | btp->bt_shrinker.shrink = xfs_buftarg_shrink; | ||
1549 | btp->bt_shrinker.seeks = DEFAULT_SEEKS; | ||
1550 | register_shrinker(&btp->bt_shrinker); | ||
1636 | return btp; | 1551 | return btp; |
1637 | 1552 | ||
1638 | error: | 1553 | error: |
@@ -1737,27 +1652,6 @@ xfs_buf_runall_queues( | |||
1737 | flush_workqueue(queue); | 1652 | flush_workqueue(queue); |
1738 | } | 1653 | } |
1739 | 1654 | ||
1740 | STATIC int | ||
1741 | xfsbufd_wakeup( | ||
1742 | struct shrinker *shrink, | ||
1743 | int priority, | ||
1744 | gfp_t mask) | ||
1745 | { | ||
1746 | xfs_buftarg_t *btp; | ||
1747 | |||
1748 | spin_lock(&xfs_buftarg_lock); | ||
1749 | list_for_each_entry(btp, &xfs_buftarg_list, bt_list) { | ||
1750 | if (test_bit(XBT_FORCE_SLEEP, &btp->bt_flags)) | ||
1751 | continue; | ||
1752 | if (list_empty(&btp->bt_delwrite_queue)) | ||
1753 | continue; | ||
1754 | set_bit(XBT_FORCE_FLUSH, &btp->bt_flags); | ||
1755 | wake_up_process(btp->bt_task); | ||
1756 | } | ||
1757 | spin_unlock(&xfs_buftarg_lock); | ||
1758 | return 0; | ||
1759 | } | ||
1760 | |||
1761 | /* | 1655 | /* |
1762 | * Move as many buffers as specified to the supplied list | 1656 | * Move as many buffers as specified to the supplied list |
1763 | * idicating if we skipped any buffers to prevent deadlocks. | 1657 | * idicating if we skipped any buffers to prevent deadlocks. |
@@ -1845,8 +1739,8 @@ xfsbufd( | |||
1845 | do { | 1739 | do { |
1846 | long age = xfs_buf_age_centisecs * msecs_to_jiffies(10); | 1740 | long age = xfs_buf_age_centisecs * msecs_to_jiffies(10); |
1847 | long tout = xfs_buf_timer_centisecs * msecs_to_jiffies(10); | 1741 | long tout = xfs_buf_timer_centisecs * msecs_to_jiffies(10); |
1848 | int count = 0; | ||
1849 | struct list_head tmp; | 1742 | struct list_head tmp; |
1743 | struct blk_plug plug; | ||
1850 | 1744 | ||
1851 | if (unlikely(freezing(current))) { | 1745 | if (unlikely(freezing(current))) { |
1852 | set_bit(XBT_FORCE_SLEEP, &target->bt_flags); | 1746 | set_bit(XBT_FORCE_SLEEP, &target->bt_flags); |
@@ -1862,16 +1756,15 @@ xfsbufd( | |||
1862 | 1756 | ||
1863 | xfs_buf_delwri_split(target, &tmp, age); | 1757 | xfs_buf_delwri_split(target, &tmp, age); |
1864 | list_sort(NULL, &tmp, xfs_buf_cmp); | 1758 | list_sort(NULL, &tmp, xfs_buf_cmp); |
1759 | |||
1760 | blk_start_plug(&plug); | ||
1865 | while (!list_empty(&tmp)) { | 1761 | while (!list_empty(&tmp)) { |
1866 | struct xfs_buf *bp; | 1762 | struct xfs_buf *bp; |
1867 | bp = list_first_entry(&tmp, struct xfs_buf, b_list); | 1763 | bp = list_first_entry(&tmp, struct xfs_buf, b_list); |
1868 | list_del_init(&bp->b_list); | 1764 | list_del_init(&bp->b_list); |
1869 | xfs_bdstrat_cb(bp); | 1765 | xfs_bdstrat_cb(bp); |
1870 | count++; | ||
1871 | } | 1766 | } |
1872 | if (count) | 1767 | blk_finish_plug(&plug); |
1873 | blk_run_address_space(target->bt_mapping); | ||
1874 | |||
1875 | } while (!kthread_should_stop()); | 1768 | } while (!kthread_should_stop()); |
1876 | 1769 | ||
1877 | return 0; | 1770 | return 0; |
@@ -1891,6 +1784,7 @@ xfs_flush_buftarg( | |||
1891 | int pincount = 0; | 1784 | int pincount = 0; |
1892 | LIST_HEAD(tmp_list); | 1785 | LIST_HEAD(tmp_list); |
1893 | LIST_HEAD(wait_list); | 1786 | LIST_HEAD(wait_list); |
1787 | struct blk_plug plug; | ||
1894 | 1788 | ||
1895 | xfs_buf_runall_queues(xfsconvertd_workqueue); | 1789 | xfs_buf_runall_queues(xfsconvertd_workqueue); |
1896 | xfs_buf_runall_queues(xfsdatad_workqueue); | 1790 | xfs_buf_runall_queues(xfsdatad_workqueue); |
@@ -1905,6 +1799,8 @@ xfs_flush_buftarg( | |||
1905 | * we do that after issuing all the IO. | 1799 | * we do that after issuing all the IO. |
1906 | */ | 1800 | */ |
1907 | list_sort(NULL, &tmp_list, xfs_buf_cmp); | 1801 | list_sort(NULL, &tmp_list, xfs_buf_cmp); |
1802 | |||
1803 | blk_start_plug(&plug); | ||
1908 | while (!list_empty(&tmp_list)) { | 1804 | while (!list_empty(&tmp_list)) { |
1909 | bp = list_first_entry(&tmp_list, struct xfs_buf, b_list); | 1805 | bp = list_first_entry(&tmp_list, struct xfs_buf, b_list); |
1910 | ASSERT(target == bp->b_target); | 1806 | ASSERT(target == bp->b_target); |
@@ -1915,10 +1811,10 @@ xfs_flush_buftarg( | |||
1915 | } | 1811 | } |
1916 | xfs_bdstrat_cb(bp); | 1812 | xfs_bdstrat_cb(bp); |
1917 | } | 1813 | } |
1814 | blk_finish_plug(&plug); | ||
1918 | 1815 | ||
1919 | if (wait) { | 1816 | if (wait) { |
1920 | /* Expedite and wait for IO to complete. */ | 1817 | /* Wait for IO to complete. */ |
1921 | blk_run_address_space(target->bt_mapping); | ||
1922 | while (!list_empty(&wait_list)) { | 1818 | while (!list_empty(&wait_list)) { |
1923 | bp = list_first_entry(&wait_list, struct xfs_buf, b_list); | 1819 | bp = list_first_entry(&wait_list, struct xfs_buf, b_list); |
1924 | 1820 | ||
@@ -1944,15 +1840,15 @@ xfs_buf_init(void) | |||
1944 | if (!xfslogd_workqueue) | 1840 | if (!xfslogd_workqueue) |
1945 | goto out_free_buf_zone; | 1841 | goto out_free_buf_zone; |
1946 | 1842 | ||
1947 | xfsdatad_workqueue = create_workqueue("xfsdatad"); | 1843 | xfsdatad_workqueue = alloc_workqueue("xfsdatad", WQ_MEM_RECLAIM, 1); |
1948 | if (!xfsdatad_workqueue) | 1844 | if (!xfsdatad_workqueue) |
1949 | goto out_destroy_xfslogd_workqueue; | 1845 | goto out_destroy_xfslogd_workqueue; |
1950 | 1846 | ||
1951 | xfsconvertd_workqueue = create_workqueue("xfsconvertd"); | 1847 | xfsconvertd_workqueue = alloc_workqueue("xfsconvertd", |
1848 | WQ_MEM_RECLAIM, 1); | ||
1952 | if (!xfsconvertd_workqueue) | 1849 | if (!xfsconvertd_workqueue) |
1953 | goto out_destroy_xfsdatad_workqueue; | 1850 | goto out_destroy_xfsdatad_workqueue; |
1954 | 1851 | ||
1955 | register_shrinker(&xfs_buf_shake); | ||
1956 | return 0; | 1852 | return 0; |
1957 | 1853 | ||
1958 | out_destroy_xfsdatad_workqueue: | 1854 | out_destroy_xfsdatad_workqueue: |
@@ -1968,7 +1864,6 @@ xfs_buf_init(void) | |||
1968 | void | 1864 | void |
1969 | xfs_buf_terminate(void) | 1865 | xfs_buf_terminate(void) |
1970 | { | 1866 | { |
1971 | unregister_shrinker(&xfs_buf_shake); | ||
1972 | destroy_workqueue(xfsconvertd_workqueue); | 1867 | destroy_workqueue(xfsconvertd_workqueue); |
1973 | destroy_workqueue(xfsdatad_workqueue); | 1868 | destroy_workqueue(xfsdatad_workqueue); |
1974 | destroy_workqueue(xfslogd_workqueue); | 1869 | destroy_workqueue(xfslogd_workqueue); |
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h index 383a3f37cf98..a9a1c4512645 100644 --- a/fs/xfs/linux-2.6/xfs_buf.h +++ b/fs/xfs/linux-2.6/xfs_buf.h | |||
@@ -61,30 +61,11 @@ typedef enum { | |||
61 | #define XBF_DONT_BLOCK (1 << 16)/* do not block in current thread */ | 61 | #define XBF_DONT_BLOCK (1 << 16)/* do not block in current thread */ |
62 | 62 | ||
63 | /* flags used only internally */ | 63 | /* flags used only internally */ |
64 | #define _XBF_PAGE_CACHE (1 << 17)/* backed by pagecache */ | ||
65 | #define _XBF_PAGES (1 << 18)/* backed by refcounted pages */ | 64 | #define _XBF_PAGES (1 << 18)/* backed by refcounted pages */ |
66 | #define _XBF_RUN_QUEUES (1 << 19)/* run block device task queue */ | 65 | #define _XBF_RUN_QUEUES (1 << 19)/* run block device task queue */ |
66 | #define _XBF_KMEM (1 << 20)/* backed by heap memory */ | ||
67 | #define _XBF_DELWRI_Q (1 << 21)/* buffer on delwri queue */ | 67 | #define _XBF_DELWRI_Q (1 << 21)/* buffer on delwri queue */ |
68 | 68 | ||
69 | /* | ||
70 | * Special flag for supporting metadata blocks smaller than a FSB. | ||
71 | * | ||
72 | * In this case we can have multiple xfs_buf_t on a single page and | ||
73 | * need to lock out concurrent xfs_buf_t readers as they only | ||
74 | * serialise access to the buffer. | ||
75 | * | ||
76 | * If the FSB size >= PAGE_CACHE_SIZE case, we have no serialisation | ||
77 | * between reads of the page. Hence we can have one thread read the | ||
78 | * page and modify it, but then race with another thread that thinks | ||
79 | * the page is not up-to-date and hence reads it again. | ||
80 | * | ||
81 | * The result is that the first modifcation to the page is lost. | ||
82 | * This sort of AGF/AGI reading race can happen when unlinking inodes | ||
83 | * that require truncation and results in the AGI unlinked list | ||
84 | * modifications being lost. | ||
85 | */ | ||
86 | #define _XBF_PAGE_LOCKED (1 << 22) | ||
87 | |||
88 | typedef unsigned int xfs_buf_flags_t; | 69 | typedef unsigned int xfs_buf_flags_t; |
89 | 70 | ||
90 | #define XFS_BUF_FLAGS \ | 71 | #define XFS_BUF_FLAGS \ |
@@ -100,12 +81,10 @@ typedef unsigned int xfs_buf_flags_t; | |||
100 | { XBF_LOCK, "LOCK" }, /* should never be set */\ | 81 | { XBF_LOCK, "LOCK" }, /* should never be set */\ |
101 | { XBF_TRYLOCK, "TRYLOCK" }, /* ditto */\ | 82 | { XBF_TRYLOCK, "TRYLOCK" }, /* ditto */\ |
102 | { XBF_DONT_BLOCK, "DONT_BLOCK" }, /* ditto */\ | 83 | { XBF_DONT_BLOCK, "DONT_BLOCK" }, /* ditto */\ |
103 | { _XBF_PAGE_CACHE, "PAGE_CACHE" }, \ | ||
104 | { _XBF_PAGES, "PAGES" }, \ | 84 | { _XBF_PAGES, "PAGES" }, \ |
105 | { _XBF_RUN_QUEUES, "RUN_QUEUES" }, \ | 85 | { _XBF_RUN_QUEUES, "RUN_QUEUES" }, \ |
106 | { _XBF_DELWRI_Q, "DELWRI_Q" }, \ | 86 | { _XBF_KMEM, "KMEM" }, \ |
107 | { _XBF_PAGE_LOCKED, "PAGE_LOCKED" } | 87 | { _XBF_DELWRI_Q, "DELWRI_Q" } |
108 | |||
109 | 88 | ||
110 | typedef enum { | 89 | typedef enum { |
111 | XBT_FORCE_SLEEP = 0, | 90 | XBT_FORCE_SLEEP = 0, |
@@ -120,7 +99,7 @@ typedef struct xfs_bufhash { | |||
120 | typedef struct xfs_buftarg { | 99 | typedef struct xfs_buftarg { |
121 | dev_t bt_dev; | 100 | dev_t bt_dev; |
122 | struct block_device *bt_bdev; | 101 | struct block_device *bt_bdev; |
123 | struct address_space *bt_mapping; | 102 | struct backing_dev_info *bt_bdi; |
124 | struct xfs_mount *bt_mount; | 103 | struct xfs_mount *bt_mount; |
125 | unsigned int bt_bsize; | 104 | unsigned int bt_bsize; |
126 | unsigned int bt_sshift; | 105 | unsigned int bt_sshift; |
@@ -128,27 +107,19 @@ typedef struct xfs_buftarg { | |||
128 | 107 | ||
129 | /* per device delwri queue */ | 108 | /* per device delwri queue */ |
130 | struct task_struct *bt_task; | 109 | struct task_struct *bt_task; |
131 | struct list_head bt_list; | ||
132 | struct list_head bt_delwrite_queue; | 110 | struct list_head bt_delwrite_queue; |
133 | spinlock_t bt_delwrite_lock; | 111 | spinlock_t bt_delwrite_lock; |
134 | unsigned long bt_flags; | 112 | unsigned long bt_flags; |
135 | } xfs_buftarg_t; | ||
136 | 113 | ||
137 | /* | 114 | /* LRU control structures */ |
138 | * xfs_buf_t: Buffer structure for pagecache-based buffers | 115 | struct shrinker bt_shrinker; |
139 | * | 116 | struct list_head bt_lru; |
140 | * This buffer structure is used by the pagecache buffer management routines | 117 | spinlock_t bt_lru_lock; |
141 | * to refer to an assembly of pages forming a logical buffer. | 118 | unsigned int bt_lru_nr; |
142 | * | 119 | } xfs_buftarg_t; |
143 | * The buffer structure is used on a temporary basis only, and discarded when | ||
144 | * released. The real data storage is recorded in the pagecache. Buffers are | ||
145 | * hashed to the block device on which the file system resides. | ||
146 | */ | ||
147 | 120 | ||
148 | struct xfs_buf; | 121 | struct xfs_buf; |
149 | typedef void (*xfs_buf_iodone_t)(struct xfs_buf *); | 122 | typedef void (*xfs_buf_iodone_t)(struct xfs_buf *); |
150 | typedef void (*xfs_buf_relse_t)(struct xfs_buf *); | ||
151 | typedef int (*xfs_buf_bdstrat_t)(struct xfs_buf *); | ||
152 | 123 | ||
153 | #define XB_PAGES 2 | 124 | #define XB_PAGES 2 |
154 | 125 | ||
@@ -164,9 +135,11 @@ typedef struct xfs_buf { | |||
164 | xfs_off_t b_file_offset; /* offset in file */ | 135 | xfs_off_t b_file_offset; /* offset in file */ |
165 | size_t b_buffer_length;/* size of buffer in bytes */ | 136 | size_t b_buffer_length;/* size of buffer in bytes */ |
166 | atomic_t b_hold; /* reference count */ | 137 | atomic_t b_hold; /* reference count */ |
138 | atomic_t b_lru_ref; /* lru reclaim ref count */ | ||
167 | xfs_buf_flags_t b_flags; /* status flags */ | 139 | xfs_buf_flags_t b_flags; /* status flags */ |
168 | struct semaphore b_sema; /* semaphore for lockables */ | 140 | struct semaphore b_sema; /* semaphore for lockables */ |
169 | 141 | ||
142 | struct list_head b_lru; /* lru list */ | ||
170 | wait_queue_head_t b_waiters; /* unpin waiters */ | 143 | wait_queue_head_t b_waiters; /* unpin waiters */ |
171 | struct list_head b_list; | 144 | struct list_head b_list; |
172 | struct xfs_perag *b_pag; /* contains rbtree root */ | 145 | struct xfs_perag *b_pag; /* contains rbtree root */ |
@@ -176,7 +149,6 @@ typedef struct xfs_buf { | |||
176 | void *b_addr; /* virtual address of buffer */ | 149 | void *b_addr; /* virtual address of buffer */ |
177 | struct work_struct b_iodone_work; | 150 | struct work_struct b_iodone_work; |
178 | xfs_buf_iodone_t b_iodone; /* I/O completion function */ | 151 | xfs_buf_iodone_t b_iodone; /* I/O completion function */ |
179 | xfs_buf_relse_t b_relse; /* releasing function */ | ||
180 | struct completion b_iowait; /* queue for I/O waiters */ | 152 | struct completion b_iowait; /* queue for I/O waiters */ |
181 | void *b_fspriv; | 153 | void *b_fspriv; |
182 | void *b_fspriv2; | 154 | void *b_fspriv2; |
@@ -264,7 +236,8 @@ extern void xfs_buf_terminate(void); | |||
264 | #define XFS_BUF_ZEROFLAGS(bp) ((bp)->b_flags &= \ | 236 | #define XFS_BUF_ZEROFLAGS(bp) ((bp)->b_flags &= \ |
265 | ~(XBF_READ|XBF_WRITE|XBF_ASYNC|XBF_DELWRI|XBF_ORDERED)) | 237 | ~(XBF_READ|XBF_WRITE|XBF_ASYNC|XBF_DELWRI|XBF_ORDERED)) |
266 | 238 | ||
267 | #define XFS_BUF_STALE(bp) ((bp)->b_flags |= XBF_STALE) | 239 | void xfs_buf_stale(struct xfs_buf *bp); |
240 | #define XFS_BUF_STALE(bp) xfs_buf_stale(bp); | ||
268 | #define XFS_BUF_UNSTALE(bp) ((bp)->b_flags &= ~XBF_STALE) | 241 | #define XFS_BUF_UNSTALE(bp) ((bp)->b_flags &= ~XBF_STALE) |
269 | #define XFS_BUF_ISSTALE(bp) ((bp)->b_flags & XBF_STALE) | 242 | #define XFS_BUF_ISSTALE(bp) ((bp)->b_flags & XBF_STALE) |
270 | #define XFS_BUF_SUPER_STALE(bp) do { \ | 243 | #define XFS_BUF_SUPER_STALE(bp) do { \ |
@@ -315,7 +288,6 @@ extern void xfs_buf_terminate(void); | |||
315 | #define XFS_BUF_FSPRIVATE2(bp, type) ((type)(bp)->b_fspriv2) | 288 | #define XFS_BUF_FSPRIVATE2(bp, type) ((type)(bp)->b_fspriv2) |
316 | #define XFS_BUF_SET_FSPRIVATE2(bp, val) ((bp)->b_fspriv2 = (void*)(val)) | 289 | #define XFS_BUF_SET_FSPRIVATE2(bp, val) ((bp)->b_fspriv2 = (void*)(val)) |
317 | #define XFS_BUF_SET_START(bp) do { } while (0) | 290 | #define XFS_BUF_SET_START(bp) do { } while (0) |
318 | #define XFS_BUF_SET_BRELSE_FUNC(bp, func) ((bp)->b_relse = (func)) | ||
319 | 291 | ||
320 | #define XFS_BUF_PTR(bp) (xfs_caddr_t)((bp)->b_addr) | 292 | #define XFS_BUF_PTR(bp) (xfs_caddr_t)((bp)->b_addr) |
321 | #define XFS_BUF_SET_PTR(bp, val, cnt) xfs_buf_associate_memory(bp, val, cnt) | 293 | #define XFS_BUF_SET_PTR(bp, val, cnt) xfs_buf_associate_memory(bp, val, cnt) |
@@ -328,9 +300,15 @@ extern void xfs_buf_terminate(void); | |||
328 | #define XFS_BUF_SIZE(bp) ((bp)->b_buffer_length) | 300 | #define XFS_BUF_SIZE(bp) ((bp)->b_buffer_length) |
329 | #define XFS_BUF_SET_SIZE(bp, cnt) ((bp)->b_buffer_length = (cnt)) | 301 | #define XFS_BUF_SET_SIZE(bp, cnt) ((bp)->b_buffer_length = (cnt)) |
330 | 302 | ||
331 | #define XFS_BUF_SET_VTYPE_REF(bp, type, ref) do { } while (0) | 303 | static inline void |
304 | xfs_buf_set_ref( | ||
305 | struct xfs_buf *bp, | ||
306 | int lru_ref) | ||
307 | { | ||
308 | atomic_set(&bp->b_lru_ref, lru_ref); | ||
309 | } | ||
310 | #define XFS_BUF_SET_VTYPE_REF(bp, type, ref) xfs_buf_set_ref(bp, ref) | ||
332 | #define XFS_BUF_SET_VTYPE(bp, type) do { } while (0) | 311 | #define XFS_BUF_SET_VTYPE(bp, type) do { } while (0) |
333 | #define XFS_BUF_SET_REF(bp, ref) do { } while (0) | ||
334 | 312 | ||
335 | #define XFS_BUF_ISPINNED(bp) atomic_read(&((bp)->b_pin_count)) | 313 | #define XFS_BUF_ISPINNED(bp) atomic_read(&((bp)->b_pin_count)) |
336 | 314 | ||
@@ -346,8 +324,7 @@ extern void xfs_buf_terminate(void); | |||
346 | 324 | ||
347 | static inline void xfs_buf_relse(xfs_buf_t *bp) | 325 | static inline void xfs_buf_relse(xfs_buf_t *bp) |
348 | { | 326 | { |
349 | if (!bp->b_relse) | 327 | xfs_buf_unlock(bp); |
350 | xfs_buf_unlock(bp); | ||
351 | xfs_buf_rele(bp); | 328 | xfs_buf_rele(bp); |
352 | } | 329 | } |
353 | 330 | ||
diff --git a/fs/xfs/linux-2.6/xfs_discard.c b/fs/xfs/linux-2.6/xfs_discard.c new file mode 100644 index 000000000000..d61611c88012 --- /dev/null +++ b/fs/xfs/linux-2.6/xfs_discard.c | |||
@@ -0,0 +1,193 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2010 Red Hat, Inc. | ||
3 | * All Rights Reserved. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or | ||
6 | * modify it under the terms of the GNU General Public License as | ||
7 | * published by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope that it would be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, write the Free Software Foundation, | ||
16 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
17 | */ | ||
18 | #include "xfs.h" | ||
19 | #include "xfs_sb.h" | ||
20 | #include "xfs_inum.h" | ||
21 | #include "xfs_log.h" | ||
22 | #include "xfs_ag.h" | ||
23 | #include "xfs_mount.h" | ||
24 | #include "xfs_quota.h" | ||
25 | #include "xfs_trans.h" | ||
26 | #include "xfs_alloc_btree.h" | ||
27 | #include "xfs_bmap_btree.h" | ||
28 | #include "xfs_ialloc_btree.h" | ||
29 | #include "xfs_btree.h" | ||
30 | #include "xfs_inode.h" | ||
31 | #include "xfs_alloc.h" | ||
32 | #include "xfs_error.h" | ||
33 | #include "xfs_discard.h" | ||
34 | #include "xfs_trace.h" | ||
35 | |||
36 | STATIC int | ||
37 | xfs_trim_extents( | ||
38 | struct xfs_mount *mp, | ||
39 | xfs_agnumber_t agno, | ||
40 | xfs_fsblock_t start, | ||
41 | xfs_fsblock_t len, | ||
42 | xfs_fsblock_t minlen, | ||
43 | __uint64_t *blocks_trimmed) | ||
44 | { | ||
45 | struct block_device *bdev = mp->m_ddev_targp->bt_bdev; | ||
46 | struct xfs_btree_cur *cur; | ||
47 | struct xfs_buf *agbp; | ||
48 | struct xfs_perag *pag; | ||
49 | int error; | ||
50 | int i; | ||
51 | |||
52 | pag = xfs_perag_get(mp, agno); | ||
53 | |||
54 | error = xfs_alloc_read_agf(mp, NULL, agno, 0, &agbp); | ||
55 | if (error || !agbp) | ||
56 | goto out_put_perag; | ||
57 | |||
58 | cur = xfs_allocbt_init_cursor(mp, NULL, agbp, agno, XFS_BTNUM_CNT); | ||
59 | |||
60 | /* | ||
61 | * Force out the log. This means any transactions that might have freed | ||
62 | * space before we took the AGF buffer lock are now on disk, and the | ||
63 | * volatile disk cache is flushed. | ||
64 | */ | ||
65 | xfs_log_force(mp, XFS_LOG_SYNC); | ||
66 | |||
67 | /* | ||
68 | * Look up the longest btree in the AGF and start with it. | ||
69 | */ | ||
70 | error = xfs_alloc_lookup_le(cur, 0, | ||
71 | XFS_BUF_TO_AGF(agbp)->agf_longest, &i); | ||
72 | if (error) | ||
73 | goto out_del_cursor; | ||
74 | |||
75 | /* | ||
76 | * Loop until we are done with all extents that are large | ||
77 | * enough to be worth discarding. | ||
78 | */ | ||
79 | while (i) { | ||
80 | xfs_agblock_t fbno; | ||
81 | xfs_extlen_t flen; | ||
82 | |||
83 | error = xfs_alloc_get_rec(cur, &fbno, &flen, &i); | ||
84 | if (error) | ||
85 | goto out_del_cursor; | ||
86 | XFS_WANT_CORRUPTED_GOTO(i == 1, out_del_cursor); | ||
87 | ASSERT(flen <= XFS_BUF_TO_AGF(agbp)->agf_longest); | ||
88 | |||
89 | /* | ||
90 | * Too small? Give up. | ||
91 | */ | ||
92 | if (flen < minlen) { | ||
93 | trace_xfs_discard_toosmall(mp, agno, fbno, flen); | ||
94 | goto out_del_cursor; | ||
95 | } | ||
96 | |||
97 | /* | ||
98 | * If the extent is entirely outside of the range we are | ||
99 | * supposed to discard skip it. Do not bother to trim | ||
100 | * down partially overlapping ranges for now. | ||
101 | */ | ||
102 | if (XFS_AGB_TO_FSB(mp, agno, fbno) + flen < start || | ||
103 | XFS_AGB_TO_FSB(mp, agno, fbno) >= start + len) { | ||
104 | trace_xfs_discard_exclude(mp, agno, fbno, flen); | ||
105 | goto next_extent; | ||
106 | } | ||
107 | |||
108 | /* | ||
109 | * If any blocks in the range are still busy, skip the | ||
110 | * discard and try again the next time. | ||
111 | */ | ||
112 | if (xfs_alloc_busy_search(mp, agno, fbno, flen)) { | ||
113 | trace_xfs_discard_busy(mp, agno, fbno, flen); | ||
114 | goto next_extent; | ||
115 | } | ||
116 | |||
117 | trace_xfs_discard_extent(mp, agno, fbno, flen); | ||
118 | error = -blkdev_issue_discard(bdev, | ||
119 | XFS_AGB_TO_DADDR(mp, agno, fbno), | ||
120 | XFS_FSB_TO_BB(mp, flen), | ||
121 | GFP_NOFS, 0); | ||
122 | if (error) | ||
123 | goto out_del_cursor; | ||
124 | *blocks_trimmed += flen; | ||
125 | |||
126 | next_extent: | ||
127 | error = xfs_btree_decrement(cur, 0, &i); | ||
128 | if (error) | ||
129 | goto out_del_cursor; | ||
130 | } | ||
131 | |||
132 | out_del_cursor: | ||
133 | xfs_btree_del_cursor(cur, error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR); | ||
134 | xfs_buf_relse(agbp); | ||
135 | out_put_perag: | ||
136 | xfs_perag_put(pag); | ||
137 | return error; | ||
138 | } | ||
139 | |||
140 | int | ||
141 | xfs_ioc_trim( | ||
142 | struct xfs_mount *mp, | ||
143 | struct fstrim_range __user *urange) | ||
144 | { | ||
145 | struct request_queue *q = mp->m_ddev_targp->bt_bdev->bd_disk->queue; | ||
146 | unsigned int granularity = q->limits.discard_granularity; | ||
147 | struct fstrim_range range; | ||
148 | xfs_fsblock_t start, len, minlen; | ||
149 | xfs_agnumber_t start_agno, end_agno, agno; | ||
150 | __uint64_t blocks_trimmed = 0; | ||
151 | int error, last_error = 0; | ||
152 | |||
153 | if (!capable(CAP_SYS_ADMIN)) | ||
154 | return -XFS_ERROR(EPERM); | ||
155 | if (!blk_queue_discard(q)) | ||
156 | return -XFS_ERROR(EOPNOTSUPP); | ||
157 | if (copy_from_user(&range, urange, sizeof(range))) | ||
158 | return -XFS_ERROR(EFAULT); | ||
159 | |||
160 | /* | ||
161 | * Truncating down the len isn't actually quite correct, but using | ||
162 | * XFS_B_TO_FSB would mean we trivially get overflows for values | ||
163 | * of ULLONG_MAX or slightly lower. And ULLONG_MAX is the default | ||
164 | * used by the fstrim application. In the end it really doesn't | ||
165 | * matter as trimming blocks is an advisory interface. | ||
166 | */ | ||
167 | start = XFS_B_TO_FSBT(mp, range.start); | ||
168 | len = XFS_B_TO_FSBT(mp, range.len); | ||
169 | minlen = XFS_B_TO_FSB(mp, max_t(u64, granularity, range.minlen)); | ||
170 | |||
171 | start_agno = XFS_FSB_TO_AGNO(mp, start); | ||
172 | if (start_agno >= mp->m_sb.sb_agcount) | ||
173 | return -XFS_ERROR(EINVAL); | ||
174 | |||
175 | end_agno = XFS_FSB_TO_AGNO(mp, start + len); | ||
176 | if (end_agno >= mp->m_sb.sb_agcount) | ||
177 | end_agno = mp->m_sb.sb_agcount - 1; | ||
178 | |||
179 | for (agno = start_agno; agno <= end_agno; agno++) { | ||
180 | error = -xfs_trim_extents(mp, agno, start, len, minlen, | ||
181 | &blocks_trimmed); | ||
182 | if (error) | ||
183 | last_error = error; | ||
184 | } | ||
185 | |||
186 | if (last_error) | ||
187 | return last_error; | ||
188 | |||
189 | range.len = XFS_FSB_TO_B(mp, blocks_trimmed); | ||
190 | if (copy_to_user(urange, &range, sizeof(range))) | ||
191 | return -XFS_ERROR(EFAULT); | ||
192 | return 0; | ||
193 | } | ||
diff --git a/fs/xfs/linux-2.6/xfs_discard.h b/fs/xfs/linux-2.6/xfs_discard.h new file mode 100644 index 000000000000..e82b6dd3e127 --- /dev/null +++ b/fs/xfs/linux-2.6/xfs_discard.h | |||
@@ -0,0 +1,8 @@ | |||
1 | #ifndef XFS_DISCARD_H | ||
2 | #define XFS_DISCARD_H 1 | ||
3 | |||
4 | struct fstrim_range; | ||
5 | |||
6 | extern int xfs_ioc_trim(struct xfs_mount *, struct fstrim_range __user *); | ||
7 | |||
8 | #endif /* XFS_DISCARD_H */ | ||
diff --git a/fs/xfs/linux-2.6/xfs_export.c b/fs/xfs/linux-2.6/xfs_export.c index 3764d74790ec..f4f878fc0083 100644 --- a/fs/xfs/linux-2.6/xfs_export.c +++ b/fs/xfs/linux-2.6/xfs_export.c | |||
@@ -70,8 +70,16 @@ xfs_fs_encode_fh( | |||
70 | else | 70 | else |
71 | fileid_type = FILEID_INO32_GEN_PARENT; | 71 | fileid_type = FILEID_INO32_GEN_PARENT; |
72 | 72 | ||
73 | /* filesystem may contain 64bit inode numbers */ | 73 | /* |
74 | if (!(XFS_M(inode->i_sb)->m_flags & XFS_MOUNT_SMALL_INUMS)) | 74 | * If the the filesystem may contain 64bit inode numbers, we need |
75 | * to use larger file handles that can represent them. | ||
76 | * | ||
77 | * While we only allocate inodes that do not fit into 32 bits any | ||
78 | * large enough filesystem may contain them, thus the slightly | ||
79 | * confusing looking conditional below. | ||
80 | */ | ||
81 | if (!(XFS_M(inode->i_sb)->m_flags & XFS_MOUNT_SMALL_INUMS) || | ||
82 | (XFS_M(inode->i_sb)->m_flags & XFS_MOUNT_32BITINODES)) | ||
75 | fileid_type |= XFS_FILEID_TYPE_64FLAG; | 83 | fileid_type |= XFS_FILEID_TYPE_64FLAG; |
76 | 84 | ||
77 | /* | 85 | /* |
@@ -81,8 +89,10 @@ xfs_fs_encode_fh( | |||
81 | * seven combinations work. The real answer is "don't use v2". | 89 | * seven combinations work. The real answer is "don't use v2". |
82 | */ | 90 | */ |
83 | len = xfs_fileid_length(fileid_type); | 91 | len = xfs_fileid_length(fileid_type); |
84 | if (*max_len < len) | 92 | if (*max_len < len) { |
93 | *max_len = len; | ||
85 | return 255; | 94 | return 255; |
95 | } | ||
86 | *max_len = len; | 96 | *max_len = len; |
87 | 97 | ||
88 | switch (fileid_type) { | 98 | switch (fileid_type) { |
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c index ba8ad422a165..f4213ba1ff85 100644 --- a/fs/xfs/linux-2.6/xfs_file.c +++ b/fs/xfs/linux-2.6/xfs_file.c | |||
@@ -37,10 +37,45 @@ | |||
37 | #include "xfs_trace.h" | 37 | #include "xfs_trace.h" |
38 | 38 | ||
39 | #include <linux/dcache.h> | 39 | #include <linux/dcache.h> |
40 | #include <linux/falloc.h> | ||
40 | 41 | ||
41 | static const struct vm_operations_struct xfs_file_vm_ops; | 42 | static const struct vm_operations_struct xfs_file_vm_ops; |
42 | 43 | ||
43 | /* | 44 | /* |
45 | * Locking primitives for read and write IO paths to ensure we consistently use | ||
46 | * and order the inode->i_mutex, ip->i_lock and ip->i_iolock. | ||
47 | */ | ||
48 | static inline void | ||
49 | xfs_rw_ilock( | ||
50 | struct xfs_inode *ip, | ||
51 | int type) | ||
52 | { | ||
53 | if (type & XFS_IOLOCK_EXCL) | ||
54 | mutex_lock(&VFS_I(ip)->i_mutex); | ||
55 | xfs_ilock(ip, type); | ||
56 | } | ||
57 | |||
58 | static inline void | ||
59 | xfs_rw_iunlock( | ||
60 | struct xfs_inode *ip, | ||
61 | int type) | ||
62 | { | ||
63 | xfs_iunlock(ip, type); | ||
64 | if (type & XFS_IOLOCK_EXCL) | ||
65 | mutex_unlock(&VFS_I(ip)->i_mutex); | ||
66 | } | ||
67 | |||
68 | static inline void | ||
69 | xfs_rw_ilock_demote( | ||
70 | struct xfs_inode *ip, | ||
71 | int type) | ||
72 | { | ||
73 | xfs_ilock_demote(ip, type); | ||
74 | if (type & XFS_IOLOCK_EXCL) | ||
75 | mutex_unlock(&VFS_I(ip)->i_mutex); | ||
76 | } | ||
77 | |||
78 | /* | ||
44 | * xfs_iozero | 79 | * xfs_iozero |
45 | * | 80 | * |
46 | * xfs_iozero clears the specified range of buffer supplied, | 81 | * xfs_iozero clears the specified range of buffer supplied, |
@@ -262,22 +297,21 @@ xfs_file_aio_read( | |||
262 | if (XFS_FORCED_SHUTDOWN(mp)) | 297 | if (XFS_FORCED_SHUTDOWN(mp)) |
263 | return -EIO; | 298 | return -EIO; |
264 | 299 | ||
265 | if (unlikely(ioflags & IO_ISDIRECT)) | ||
266 | mutex_lock(&inode->i_mutex); | ||
267 | xfs_ilock(ip, XFS_IOLOCK_SHARED); | ||
268 | |||
269 | if (unlikely(ioflags & IO_ISDIRECT)) { | 300 | if (unlikely(ioflags & IO_ISDIRECT)) { |
301 | xfs_rw_ilock(ip, XFS_IOLOCK_EXCL); | ||
302 | |||
270 | if (inode->i_mapping->nrpages) { | 303 | if (inode->i_mapping->nrpages) { |
271 | ret = -xfs_flushinval_pages(ip, | 304 | ret = -xfs_flushinval_pages(ip, |
272 | (iocb->ki_pos & PAGE_CACHE_MASK), | 305 | (iocb->ki_pos & PAGE_CACHE_MASK), |
273 | -1, FI_REMAPF_LOCKED); | 306 | -1, FI_REMAPF_LOCKED); |
307 | if (ret) { | ||
308 | xfs_rw_iunlock(ip, XFS_IOLOCK_EXCL); | ||
309 | return ret; | ||
310 | } | ||
274 | } | 311 | } |
275 | mutex_unlock(&inode->i_mutex); | 312 | xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL); |
276 | if (ret) { | 313 | } else |
277 | xfs_iunlock(ip, XFS_IOLOCK_SHARED); | 314 | xfs_rw_ilock(ip, XFS_IOLOCK_SHARED); |
278 | return ret; | ||
279 | } | ||
280 | } | ||
281 | 315 | ||
282 | trace_xfs_file_read(ip, size, iocb->ki_pos, ioflags); | 316 | trace_xfs_file_read(ip, size, iocb->ki_pos, ioflags); |
283 | 317 | ||
@@ -285,7 +319,7 @@ xfs_file_aio_read( | |||
285 | if (ret > 0) | 319 | if (ret > 0) |
286 | XFS_STATS_ADD(xs_read_bytes, ret); | 320 | XFS_STATS_ADD(xs_read_bytes, ret); |
287 | 321 | ||
288 | xfs_iunlock(ip, XFS_IOLOCK_SHARED); | 322 | xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED); |
289 | return ret; | 323 | return ret; |
290 | } | 324 | } |
291 | 325 | ||
@@ -309,7 +343,7 @@ xfs_file_splice_read( | |||
309 | if (XFS_FORCED_SHUTDOWN(ip->i_mount)) | 343 | if (XFS_FORCED_SHUTDOWN(ip->i_mount)) |
310 | return -EIO; | 344 | return -EIO; |
311 | 345 | ||
312 | xfs_ilock(ip, XFS_IOLOCK_SHARED); | 346 | xfs_rw_ilock(ip, XFS_IOLOCK_SHARED); |
313 | 347 | ||
314 | trace_xfs_file_splice_read(ip, count, *ppos, ioflags); | 348 | trace_xfs_file_splice_read(ip, count, *ppos, ioflags); |
315 | 349 | ||
@@ -317,10 +351,61 @@ xfs_file_splice_read( | |||
317 | if (ret > 0) | 351 | if (ret > 0) |
318 | XFS_STATS_ADD(xs_read_bytes, ret); | 352 | XFS_STATS_ADD(xs_read_bytes, ret); |
319 | 353 | ||
320 | xfs_iunlock(ip, XFS_IOLOCK_SHARED); | 354 | xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED); |
321 | return ret; | 355 | return ret; |
322 | } | 356 | } |
323 | 357 | ||
358 | STATIC void | ||
359 | xfs_aio_write_isize_update( | ||
360 | struct inode *inode, | ||
361 | loff_t *ppos, | ||
362 | ssize_t bytes_written) | ||
363 | { | ||
364 | struct xfs_inode *ip = XFS_I(inode); | ||
365 | xfs_fsize_t isize = i_size_read(inode); | ||
366 | |||
367 | if (bytes_written > 0) | ||
368 | XFS_STATS_ADD(xs_write_bytes, bytes_written); | ||
369 | |||
370 | if (unlikely(bytes_written < 0 && bytes_written != -EFAULT && | ||
371 | *ppos > isize)) | ||
372 | *ppos = isize; | ||
373 | |||
374 | if (*ppos > ip->i_size) { | ||
375 | xfs_rw_ilock(ip, XFS_ILOCK_EXCL); | ||
376 | if (*ppos > ip->i_size) | ||
377 | ip->i_size = *ppos; | ||
378 | xfs_rw_iunlock(ip, XFS_ILOCK_EXCL); | ||
379 | } | ||
380 | } | ||
381 | |||
382 | /* | ||
383 | * If this was a direct or synchronous I/O that failed (such as ENOSPC) then | ||
384 | * part of the I/O may have been written to disk before the error occurred. In | ||
385 | * this case the on-disk file size may have been adjusted beyond the in-memory | ||
386 | * file size and now needs to be truncated back. | ||
387 | */ | ||
388 | STATIC void | ||
389 | xfs_aio_write_newsize_update( | ||
390 | struct xfs_inode *ip) | ||
391 | { | ||
392 | if (ip->i_new_size) { | ||
393 | xfs_rw_ilock(ip, XFS_ILOCK_EXCL); | ||
394 | ip->i_new_size = 0; | ||
395 | if (ip->i_d.di_size > ip->i_size) | ||
396 | ip->i_d.di_size = ip->i_size; | ||
397 | xfs_rw_iunlock(ip, XFS_ILOCK_EXCL); | ||
398 | } | ||
399 | } | ||
400 | |||
401 | /* | ||
402 | * xfs_file_splice_write() does not use xfs_rw_ilock() because | ||
403 | * generic_file_splice_write() takes the i_mutex itself. This, in theory, | ||
404 | * couuld cause lock inversions between the aio_write path and the splice path | ||
405 | * if someone is doing concurrent splice(2) based writes and write(2) based | ||
406 | * writes to the same inode. The only real way to fix this is to re-implement | ||
407 | * the generic code here with correct locking orders. | ||
408 | */ | ||
324 | STATIC ssize_t | 409 | STATIC ssize_t |
325 | xfs_file_splice_write( | 410 | xfs_file_splice_write( |
326 | struct pipe_inode_info *pipe, | 411 | struct pipe_inode_info *pipe, |
@@ -331,7 +416,7 @@ xfs_file_splice_write( | |||
331 | { | 416 | { |
332 | struct inode *inode = outfilp->f_mapping->host; | 417 | struct inode *inode = outfilp->f_mapping->host; |
333 | struct xfs_inode *ip = XFS_I(inode); | 418 | struct xfs_inode *ip = XFS_I(inode); |
334 | xfs_fsize_t isize, new_size; | 419 | xfs_fsize_t new_size; |
335 | int ioflags = 0; | 420 | int ioflags = 0; |
336 | ssize_t ret; | 421 | ssize_t ret; |
337 | 422 | ||
@@ -355,27 +440,9 @@ xfs_file_splice_write( | |||
355 | trace_xfs_file_splice_write(ip, count, *ppos, ioflags); | 440 | trace_xfs_file_splice_write(ip, count, *ppos, ioflags); |
356 | 441 | ||
357 | ret = generic_file_splice_write(pipe, outfilp, ppos, count, flags); | 442 | ret = generic_file_splice_write(pipe, outfilp, ppos, count, flags); |
358 | if (ret > 0) | ||
359 | XFS_STATS_ADD(xs_write_bytes, ret); | ||
360 | |||
361 | isize = i_size_read(inode); | ||
362 | if (unlikely(ret < 0 && ret != -EFAULT && *ppos > isize)) | ||
363 | *ppos = isize; | ||
364 | |||
365 | if (*ppos > ip->i_size) { | ||
366 | xfs_ilock(ip, XFS_ILOCK_EXCL); | ||
367 | if (*ppos > ip->i_size) | ||
368 | ip->i_size = *ppos; | ||
369 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | ||
370 | } | ||
371 | 443 | ||
372 | if (ip->i_new_size) { | 444 | xfs_aio_write_isize_update(inode, ppos, ret); |
373 | xfs_ilock(ip, XFS_ILOCK_EXCL); | 445 | xfs_aio_write_newsize_update(ip); |
374 | ip->i_new_size = 0; | ||
375 | if (ip->i_d.di_size > ip->i_size) | ||
376 | ip->i_d.di_size = ip->i_size; | ||
377 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | ||
378 | } | ||
379 | xfs_iunlock(ip, XFS_IOLOCK_EXCL); | 446 | xfs_iunlock(ip, XFS_IOLOCK_EXCL); |
380 | return ret; | 447 | return ret; |
381 | } | 448 | } |
@@ -562,247 +629,318 @@ out_lock: | |||
562 | return error; | 629 | return error; |
563 | } | 630 | } |
564 | 631 | ||
632 | /* | ||
633 | * Common pre-write limit and setup checks. | ||
634 | * | ||
635 | * Returns with iolock held according to @iolock. | ||
636 | */ | ||
565 | STATIC ssize_t | 637 | STATIC ssize_t |
566 | xfs_file_aio_write( | 638 | xfs_file_aio_write_checks( |
567 | struct kiocb *iocb, | 639 | struct file *file, |
568 | const struct iovec *iovp, | 640 | loff_t *pos, |
569 | unsigned long nr_segs, | 641 | size_t *count, |
570 | loff_t pos) | 642 | int *iolock) |
571 | { | 643 | { |
572 | struct file *file = iocb->ki_filp; | 644 | struct inode *inode = file->f_mapping->host; |
573 | struct address_space *mapping = file->f_mapping; | ||
574 | struct inode *inode = mapping->host; | ||
575 | struct xfs_inode *ip = XFS_I(inode); | 645 | struct xfs_inode *ip = XFS_I(inode); |
576 | struct xfs_mount *mp = ip->i_mount; | 646 | xfs_fsize_t new_size; |
577 | ssize_t ret = 0, error = 0; | 647 | int error = 0; |
578 | int ioflags = 0; | ||
579 | xfs_fsize_t isize, new_size; | ||
580 | int iolock; | ||
581 | size_t ocount = 0, count; | ||
582 | int need_i_mutex; | ||
583 | 648 | ||
584 | XFS_STATS_INC(xs_write_calls); | 649 | error = generic_write_checks(file, pos, count, S_ISBLK(inode->i_mode)); |
650 | if (error) { | ||
651 | xfs_rw_iunlock(ip, XFS_ILOCK_EXCL | *iolock); | ||
652 | *iolock = 0; | ||
653 | return error; | ||
654 | } | ||
585 | 655 | ||
586 | BUG_ON(iocb->ki_pos != pos); | 656 | new_size = *pos + *count; |
657 | if (new_size > ip->i_size) | ||
658 | ip->i_new_size = new_size; | ||
587 | 659 | ||
588 | if (unlikely(file->f_flags & O_DIRECT)) | 660 | if (likely(!(file->f_mode & FMODE_NOCMTIME))) |
589 | ioflags |= IO_ISDIRECT; | 661 | file_update_time(file); |
590 | if (file->f_mode & FMODE_NOCMTIME) | 662 | |
591 | ioflags |= IO_INVIS; | 663 | /* |
664 | * If the offset is beyond the size of the file, we need to zero any | ||
665 | * blocks that fall between the existing EOF and the start of this | ||
666 | * write. | ||
667 | */ | ||
668 | if (*pos > ip->i_size) | ||
669 | error = -xfs_zero_eof(ip, *pos, ip->i_size); | ||
592 | 670 | ||
593 | error = generic_segment_checks(iovp, &nr_segs, &ocount, VERIFY_READ); | 671 | xfs_rw_iunlock(ip, XFS_ILOCK_EXCL); |
594 | if (error) | 672 | if (error) |
595 | return error; | 673 | return error; |
596 | 674 | ||
597 | count = ocount; | 675 | /* |
598 | if (count == 0) | 676 | * If we're writing the file then make sure to clear the setuid and |
599 | return 0; | 677 | * setgid bits if the process is not being run by root. This keeps |
600 | 678 | * people from modifying setuid and setgid binaries. | |
601 | xfs_wait_for_freeze(mp, SB_FREEZE_WRITE); | 679 | */ |
680 | return file_remove_suid(file); | ||
602 | 681 | ||
603 | if (XFS_FORCED_SHUTDOWN(mp)) | 682 | } |
604 | return -EIO; | ||
605 | 683 | ||
606 | relock: | 684 | /* |
607 | if (ioflags & IO_ISDIRECT) { | 685 | * xfs_file_dio_aio_write - handle direct IO writes |
608 | iolock = XFS_IOLOCK_SHARED; | 686 | * |
609 | need_i_mutex = 0; | 687 | * Lock the inode appropriately to prepare for and issue a direct IO write. |
610 | } else { | 688 | * By separating it from the buffered write path we remove all the tricky to |
611 | iolock = XFS_IOLOCK_EXCL; | 689 | * follow locking changes and looping. |
612 | need_i_mutex = 1; | 690 | * |
613 | mutex_lock(&inode->i_mutex); | 691 | * If there are cached pages or we're extending the file, we need IOLOCK_EXCL |
692 | * until we're sure the bytes at the new EOF have been zeroed and/or the cached | ||
693 | * pages are flushed out. | ||
694 | * | ||
695 | * In most cases the direct IO writes will be done holding IOLOCK_SHARED | ||
696 | * allowing them to be done in parallel with reads and other direct IO writes. | ||
697 | * However, if the IO is not aligned to filesystem blocks, the direct IO layer | ||
698 | * needs to do sub-block zeroing and that requires serialisation against other | ||
699 | * direct IOs to the same block. In this case we need to serialise the | ||
700 | * submission of the unaligned IOs so that we don't get racing block zeroing in | ||
701 | * the dio layer. To avoid the problem with aio, we also need to wait for | ||
702 | * outstanding IOs to complete so that unwritten extent conversion is completed | ||
703 | * before we try to map the overlapping block. This is currently implemented by | ||
704 | * hitting it with a big hammer (i.e. xfs_ioend_wait()). | ||
705 | * | ||
706 | * Returns with locks held indicated by @iolock and errors indicated by | ||
707 | * negative return values. | ||
708 | */ | ||
709 | STATIC ssize_t | ||
710 | xfs_file_dio_aio_write( | ||
711 | struct kiocb *iocb, | ||
712 | const struct iovec *iovp, | ||
713 | unsigned long nr_segs, | ||
714 | loff_t pos, | ||
715 | size_t ocount, | ||
716 | int *iolock) | ||
717 | { | ||
718 | struct file *file = iocb->ki_filp; | ||
719 | struct address_space *mapping = file->f_mapping; | ||
720 | struct inode *inode = mapping->host; | ||
721 | struct xfs_inode *ip = XFS_I(inode); | ||
722 | struct xfs_mount *mp = ip->i_mount; | ||
723 | ssize_t ret = 0; | ||
724 | size_t count = ocount; | ||
725 | int unaligned_io = 0; | ||
726 | struct xfs_buftarg *target = XFS_IS_REALTIME_INODE(ip) ? | ||
727 | mp->m_rtdev_targp : mp->m_ddev_targp; | ||
728 | |||
729 | *iolock = 0; | ||
730 | if ((pos & target->bt_smask) || (count & target->bt_smask)) | ||
731 | return -XFS_ERROR(EINVAL); | ||
732 | |||
733 | if ((pos & mp->m_blockmask) || ((pos + count) & mp->m_blockmask)) | ||
734 | unaligned_io = 1; | ||
735 | |||
736 | if (unaligned_io || mapping->nrpages || pos > ip->i_size) | ||
737 | *iolock = XFS_IOLOCK_EXCL; | ||
738 | else | ||
739 | *iolock = XFS_IOLOCK_SHARED; | ||
740 | xfs_rw_ilock(ip, XFS_ILOCK_EXCL | *iolock); | ||
741 | |||
742 | ret = xfs_file_aio_write_checks(file, &pos, &count, iolock); | ||
743 | if (ret) | ||
744 | return ret; | ||
745 | |||
746 | if (mapping->nrpages) { | ||
747 | WARN_ON(*iolock != XFS_IOLOCK_EXCL); | ||
748 | ret = -xfs_flushinval_pages(ip, (pos & PAGE_CACHE_MASK), -1, | ||
749 | FI_REMAPF_LOCKED); | ||
750 | if (ret) | ||
751 | return ret; | ||
614 | } | 752 | } |
615 | 753 | ||
616 | xfs_ilock(ip, XFS_ILOCK_EXCL|iolock); | 754 | /* |
617 | 755 | * If we are doing unaligned IO, wait for all other IO to drain, | |
618 | start: | 756 | * otherwise demote the lock if we had to flush cached pages |
619 | error = -generic_write_checks(file, &pos, &count, | 757 | */ |
620 | S_ISBLK(inode->i_mode)); | 758 | if (unaligned_io) |
621 | if (error) { | 759 | xfs_ioend_wait(ip); |
622 | xfs_iunlock(ip, XFS_ILOCK_EXCL|iolock); | 760 | else if (*iolock == XFS_IOLOCK_EXCL) { |
623 | goto out_unlock_mutex; | 761 | xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL); |
762 | *iolock = XFS_IOLOCK_SHARED; | ||
624 | } | 763 | } |
625 | 764 | ||
626 | if (ioflags & IO_ISDIRECT) { | 765 | trace_xfs_file_direct_write(ip, count, iocb->ki_pos, 0); |
627 | xfs_buftarg_t *target = | 766 | ret = generic_file_direct_write(iocb, iovp, |
628 | XFS_IS_REALTIME_INODE(ip) ? | 767 | &nr_segs, pos, &iocb->ki_pos, count, ocount); |
629 | mp->m_rtdev_targp : mp->m_ddev_targp; | ||
630 | 768 | ||
631 | if ((pos & target->bt_smask) || (count & target->bt_smask)) { | 769 | /* No fallback to buffered IO on errors for XFS. */ |
632 | xfs_iunlock(ip, XFS_ILOCK_EXCL|iolock); | 770 | ASSERT(ret < 0 || ret == count); |
633 | return XFS_ERROR(-EINVAL); | 771 | return ret; |
634 | } | 772 | } |
635 | 773 | ||
636 | if (!need_i_mutex && (mapping->nrpages || pos > ip->i_size)) { | 774 | STATIC ssize_t |
637 | xfs_iunlock(ip, XFS_ILOCK_EXCL|iolock); | 775 | xfs_file_buffered_aio_write( |
638 | iolock = XFS_IOLOCK_EXCL; | 776 | struct kiocb *iocb, |
639 | need_i_mutex = 1; | 777 | const struct iovec *iovp, |
640 | mutex_lock(&inode->i_mutex); | 778 | unsigned long nr_segs, |
641 | xfs_ilock(ip, XFS_ILOCK_EXCL|iolock); | 779 | loff_t pos, |
642 | goto start; | 780 | size_t ocount, |
643 | } | 781 | int *iolock) |
644 | } | 782 | { |
783 | struct file *file = iocb->ki_filp; | ||
784 | struct address_space *mapping = file->f_mapping; | ||
785 | struct inode *inode = mapping->host; | ||
786 | struct xfs_inode *ip = XFS_I(inode); | ||
787 | ssize_t ret; | ||
788 | int enospc = 0; | ||
789 | size_t count = ocount; | ||
645 | 790 | ||
646 | new_size = pos + count; | 791 | *iolock = XFS_IOLOCK_EXCL; |
647 | if (new_size > ip->i_size) | 792 | xfs_rw_ilock(ip, XFS_ILOCK_EXCL | *iolock); |
648 | ip->i_new_size = new_size; | ||
649 | 793 | ||
650 | if (likely(!(ioflags & IO_INVIS))) | 794 | ret = xfs_file_aio_write_checks(file, &pos, &count, iolock); |
651 | file_update_time(file); | 795 | if (ret) |
796 | return ret; | ||
652 | 797 | ||
798 | /* We can write back this queue in page reclaim */ | ||
799 | current->backing_dev_info = mapping->backing_dev_info; | ||
800 | |||
801 | write_retry: | ||
802 | trace_xfs_file_buffered_write(ip, count, iocb->ki_pos, 0); | ||
803 | ret = generic_file_buffered_write(iocb, iovp, nr_segs, | ||
804 | pos, &iocb->ki_pos, count, ret); | ||
653 | /* | 805 | /* |
654 | * If the offset is beyond the size of the file, we have a couple | 806 | * if we just got an ENOSPC, flush the inode now we aren't holding any |
655 | * of things to do. First, if there is already space allocated | 807 | * page locks and retry *once* |
656 | * we need to either create holes or zero the disk or ... | ||
657 | * | ||
658 | * If there is a page where the previous size lands, we need | ||
659 | * to zero it out up to the new size. | ||
660 | */ | 808 | */ |
661 | 809 | if (ret == -ENOSPC && !enospc) { | |
662 | if (pos > ip->i_size) { | 810 | ret = -xfs_flush_pages(ip, 0, -1, 0, FI_NONE); |
663 | error = xfs_zero_eof(ip, pos, ip->i_size); | 811 | if (ret) |
664 | if (error) { | 812 | return ret; |
665 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | 813 | enospc = 1; |
666 | goto out_unlock_internal; | 814 | goto write_retry; |
667 | } | ||
668 | } | 815 | } |
669 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | 816 | current->backing_dev_info = NULL; |
817 | return ret; | ||
818 | } | ||
670 | 819 | ||
671 | /* | 820 | STATIC ssize_t |
672 | * If we're writing the file then make sure to clear the | 821 | xfs_file_aio_write( |
673 | * setuid and setgid bits if the process is not being run | 822 | struct kiocb *iocb, |
674 | * by root. This keeps people from modifying setuid and | 823 | const struct iovec *iovp, |
675 | * setgid binaries. | 824 | unsigned long nr_segs, |
676 | */ | 825 | loff_t pos) |
677 | error = -file_remove_suid(file); | 826 | { |
678 | if (unlikely(error)) | 827 | struct file *file = iocb->ki_filp; |
679 | goto out_unlock_internal; | 828 | struct address_space *mapping = file->f_mapping; |
829 | struct inode *inode = mapping->host; | ||
830 | struct xfs_inode *ip = XFS_I(inode); | ||
831 | ssize_t ret; | ||
832 | int iolock; | ||
833 | size_t ocount = 0; | ||
680 | 834 | ||
681 | /* We can write back this queue in page reclaim */ | 835 | XFS_STATS_INC(xs_write_calls); |
682 | current->backing_dev_info = mapping->backing_dev_info; | ||
683 | 836 | ||
684 | if ((ioflags & IO_ISDIRECT)) { | 837 | BUG_ON(iocb->ki_pos != pos); |
685 | if (mapping->nrpages) { | ||
686 | WARN_ON(need_i_mutex == 0); | ||
687 | error = xfs_flushinval_pages(ip, | ||
688 | (pos & PAGE_CACHE_MASK), | ||
689 | -1, FI_REMAPF_LOCKED); | ||
690 | if (error) | ||
691 | goto out_unlock_internal; | ||
692 | } | ||
693 | 838 | ||
694 | if (need_i_mutex) { | 839 | ret = generic_segment_checks(iovp, &nr_segs, &ocount, VERIFY_READ); |
695 | /* demote the lock now the cached pages are gone */ | 840 | if (ret) |
696 | xfs_ilock_demote(ip, XFS_IOLOCK_EXCL); | 841 | return ret; |
697 | mutex_unlock(&inode->i_mutex); | ||
698 | 842 | ||
699 | iolock = XFS_IOLOCK_SHARED; | 843 | if (ocount == 0) |
700 | need_i_mutex = 0; | 844 | return 0; |
701 | } | ||
702 | 845 | ||
703 | trace_xfs_file_direct_write(ip, count, iocb->ki_pos, ioflags); | 846 | xfs_wait_for_freeze(ip->i_mount, SB_FREEZE_WRITE); |
704 | ret = generic_file_direct_write(iocb, iovp, | ||
705 | &nr_segs, pos, &iocb->ki_pos, count, ocount); | ||
706 | 847 | ||
707 | /* | 848 | if (XFS_FORCED_SHUTDOWN(ip->i_mount)) |
708 | * direct-io write to a hole: fall through to buffered I/O | 849 | return -EIO; |
709 | * for completing the rest of the request. | ||
710 | */ | ||
711 | if (ret >= 0 && ret != count) { | ||
712 | XFS_STATS_ADD(xs_write_bytes, ret); | ||
713 | 850 | ||
714 | pos += ret; | 851 | if (unlikely(file->f_flags & O_DIRECT)) |
715 | count -= ret; | 852 | ret = xfs_file_dio_aio_write(iocb, iovp, nr_segs, pos, |
853 | ocount, &iolock); | ||
854 | else | ||
855 | ret = xfs_file_buffered_aio_write(iocb, iovp, nr_segs, pos, | ||
856 | ocount, &iolock); | ||
716 | 857 | ||
717 | ioflags &= ~IO_ISDIRECT; | 858 | xfs_aio_write_isize_update(inode, &iocb->ki_pos, ret); |
718 | xfs_iunlock(ip, iolock); | ||
719 | goto relock; | ||
720 | } | ||
721 | } else { | ||
722 | int enospc = 0; | ||
723 | ssize_t ret2 = 0; | ||
724 | 859 | ||
725 | write_retry: | 860 | if (ret <= 0) |
726 | trace_xfs_file_buffered_write(ip, count, iocb->ki_pos, ioflags); | 861 | goto out_unlock; |
727 | ret2 = generic_file_buffered_write(iocb, iovp, nr_segs, | ||
728 | pos, &iocb->ki_pos, count, ret); | ||
729 | /* | ||
730 | * if we just got an ENOSPC, flush the inode now we | ||
731 | * aren't holding any page locks and retry *once* | ||
732 | */ | ||
733 | if (ret2 == -ENOSPC && !enospc) { | ||
734 | error = xfs_flush_pages(ip, 0, -1, 0, FI_NONE); | ||
735 | if (error) | ||
736 | goto out_unlock_internal; | ||
737 | enospc = 1; | ||
738 | goto write_retry; | ||
739 | } | ||
740 | ret = ret2; | ||
741 | } | ||
742 | 862 | ||
743 | current->backing_dev_info = NULL; | 863 | /* Handle various SYNC-type writes */ |
864 | if ((file->f_flags & O_DSYNC) || IS_SYNC(inode)) { | ||
865 | loff_t end = pos + ret - 1; | ||
866 | int error, error2; | ||
744 | 867 | ||
745 | isize = i_size_read(inode); | 868 | xfs_rw_iunlock(ip, iolock); |
746 | if (unlikely(ret < 0 && ret != -EFAULT && iocb->ki_pos > isize)) | 869 | error = filemap_write_and_wait_range(mapping, pos, end); |
747 | iocb->ki_pos = isize; | 870 | xfs_rw_ilock(ip, iolock); |
748 | 871 | ||
749 | if (iocb->ki_pos > ip->i_size) { | 872 | error2 = -xfs_file_fsync(file, |
750 | xfs_ilock(ip, XFS_ILOCK_EXCL); | 873 | (file->f_flags & __O_SYNC) ? 0 : 1); |
751 | if (iocb->ki_pos > ip->i_size) | 874 | if (error) |
752 | ip->i_size = iocb->ki_pos; | 875 | ret = error; |
753 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | 876 | else if (error2) |
877 | ret = error2; | ||
754 | } | 878 | } |
755 | 879 | ||
756 | error = -ret; | 880 | out_unlock: |
757 | if (ret <= 0) | 881 | xfs_aio_write_newsize_update(ip); |
758 | goto out_unlock_internal; | 882 | xfs_rw_iunlock(ip, iolock); |
883 | return ret; | ||
884 | } | ||
759 | 885 | ||
760 | XFS_STATS_ADD(xs_write_bytes, ret); | 886 | STATIC long |
887 | xfs_file_fallocate( | ||
888 | struct file *file, | ||
889 | int mode, | ||
890 | loff_t offset, | ||
891 | loff_t len) | ||
892 | { | ||
893 | struct inode *inode = file->f_path.dentry->d_inode; | ||
894 | long error; | ||
895 | loff_t new_size = 0; | ||
896 | xfs_flock64_t bf; | ||
897 | xfs_inode_t *ip = XFS_I(inode); | ||
898 | int cmd = XFS_IOC_RESVSP; | ||
899 | int attr_flags = XFS_ATTR_NOLOCK; | ||
761 | 900 | ||
762 | /* Handle various SYNC-type writes */ | 901 | if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)) |
763 | if ((file->f_flags & O_DSYNC) || IS_SYNC(inode)) { | 902 | return -EOPNOTSUPP; |
764 | loff_t end = pos + ret - 1; | ||
765 | int error2; | ||
766 | 903 | ||
767 | xfs_iunlock(ip, iolock); | 904 | bf.l_whence = 0; |
768 | if (need_i_mutex) | 905 | bf.l_start = offset; |
769 | mutex_unlock(&inode->i_mutex); | 906 | bf.l_len = len; |
770 | 907 | ||
771 | error2 = filemap_write_and_wait_range(mapping, pos, end); | 908 | xfs_ilock(ip, XFS_IOLOCK_EXCL); |
772 | if (!error) | ||
773 | error = error2; | ||
774 | if (need_i_mutex) | ||
775 | mutex_lock(&inode->i_mutex); | ||
776 | xfs_ilock(ip, iolock); | ||
777 | 909 | ||
778 | error2 = -xfs_file_fsync(file, | 910 | if (mode & FALLOC_FL_PUNCH_HOLE) |
779 | (file->f_flags & __O_SYNC) ? 0 : 1); | 911 | cmd = XFS_IOC_UNRESVSP; |
780 | if (!error) | 912 | |
781 | error = error2; | 913 | /* check the new inode size is valid before allocating */ |
914 | if (!(mode & FALLOC_FL_KEEP_SIZE) && | ||
915 | offset + len > i_size_read(inode)) { | ||
916 | new_size = offset + len; | ||
917 | error = inode_newsize_ok(inode, new_size); | ||
918 | if (error) | ||
919 | goto out_unlock; | ||
782 | } | 920 | } |
783 | 921 | ||
784 | out_unlock_internal: | 922 | if (file->f_flags & O_DSYNC) |
785 | if (ip->i_new_size) { | 923 | attr_flags |= XFS_ATTR_SYNC; |
786 | xfs_ilock(ip, XFS_ILOCK_EXCL); | 924 | |
787 | ip->i_new_size = 0; | 925 | error = -xfs_change_file_space(ip, cmd, &bf, 0, attr_flags); |
788 | /* | 926 | if (error) |
789 | * If this was a direct or synchronous I/O that failed (such | 927 | goto out_unlock; |
790 | * as ENOSPC) then part of the I/O may have been written to | 928 | |
791 | * disk before the error occured. In this case the on-disk | 929 | /* Change file size if needed */ |
792 | * file size may have been adjusted beyond the in-memory file | 930 | if (new_size) { |
793 | * size and now needs to be truncated back. | 931 | struct iattr iattr; |
794 | */ | 932 | |
795 | if (ip->i_d.di_size > ip->i_size) | 933 | iattr.ia_valid = ATTR_SIZE; |
796 | ip->i_d.di_size = ip->i_size; | 934 | iattr.ia_size = new_size; |
797 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | 935 | error = -xfs_setattr(ip, &iattr, XFS_ATTR_NOLOCK); |
798 | } | 936 | } |
799 | xfs_iunlock(ip, iolock); | 937 | |
800 | out_unlock_mutex: | 938 | out_unlock: |
801 | if (need_i_mutex) | 939 | xfs_iunlock(ip, XFS_IOLOCK_EXCL); |
802 | mutex_unlock(&inode->i_mutex); | 940 | return error; |
803 | return -error; | ||
804 | } | 941 | } |
805 | 942 | ||
943 | |||
806 | STATIC int | 944 | STATIC int |
807 | xfs_file_open( | 945 | xfs_file_open( |
808 | struct inode *inode, | 946 | struct inode *inode, |
@@ -921,6 +1059,7 @@ const struct file_operations xfs_file_operations = { | |||
921 | .open = xfs_file_open, | 1059 | .open = xfs_file_open, |
922 | .release = xfs_file_release, | 1060 | .release = xfs_file_release, |
923 | .fsync = xfs_file_fsync, | 1061 | .fsync = xfs_file_fsync, |
1062 | .fallocate = xfs_file_fallocate, | ||
924 | }; | 1063 | }; |
925 | 1064 | ||
926 | const struct file_operations xfs_dir_file_operations = { | 1065 | const struct file_operations xfs_dir_file_operations = { |
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c index ad442d9e392e..acca2c5ca3fa 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl.c +++ b/fs/xfs/linux-2.6/xfs_ioctl.c | |||
@@ -39,6 +39,7 @@ | |||
39 | #include "xfs_dfrag.h" | 39 | #include "xfs_dfrag.h" |
40 | #include "xfs_fsops.h" | 40 | #include "xfs_fsops.h" |
41 | #include "xfs_vnodeops.h" | 41 | #include "xfs_vnodeops.h" |
42 | #include "xfs_discard.h" | ||
42 | #include "xfs_quota.h" | 43 | #include "xfs_quota.h" |
43 | #include "xfs_inode_item.h" | 44 | #include "xfs_inode_item.h" |
44 | #include "xfs_export.h" | 45 | #include "xfs_export.h" |
@@ -623,6 +624,10 @@ xfs_ioc_space( | |||
623 | 624 | ||
624 | if (filp->f_flags & (O_NDELAY|O_NONBLOCK)) | 625 | if (filp->f_flags & (O_NDELAY|O_NONBLOCK)) |
625 | attr_flags |= XFS_ATTR_NONBLOCK; | 626 | attr_flags |= XFS_ATTR_NONBLOCK; |
627 | |||
628 | if (filp->f_flags & O_DSYNC) | ||
629 | attr_flags |= XFS_ATTR_SYNC; | ||
630 | |||
626 | if (ioflags & IO_INVIS) | 631 | if (ioflags & IO_INVIS) |
627 | attr_flags |= XFS_ATTR_DMI; | 632 | attr_flags |= XFS_ATTR_DMI; |
628 | 633 | ||
@@ -694,14 +699,19 @@ xfs_ioc_fsgeometry_v1( | |||
694 | xfs_mount_t *mp, | 699 | xfs_mount_t *mp, |
695 | void __user *arg) | 700 | void __user *arg) |
696 | { | 701 | { |
697 | xfs_fsop_geom_v1_t fsgeo; | 702 | xfs_fsop_geom_t fsgeo; |
698 | int error; | 703 | int error; |
699 | 704 | ||
700 | error = xfs_fs_geometry(mp, (xfs_fsop_geom_t *)&fsgeo, 3); | 705 | error = xfs_fs_geometry(mp, &fsgeo, 3); |
701 | if (error) | 706 | if (error) |
702 | return -error; | 707 | return -error; |
703 | 708 | ||
704 | if (copy_to_user(arg, &fsgeo, sizeof(fsgeo))) | 709 | /* |
710 | * Caller should have passed an argument of type | ||
711 | * xfs_fsop_geom_v1_t. This is a proper subset of the | ||
712 | * xfs_fsop_geom_t that xfs_fs_geometry() fills in. | ||
713 | */ | ||
714 | if (copy_to_user(arg, &fsgeo, sizeof(xfs_fsop_geom_v1_t))) | ||
705 | return -XFS_ERROR(EFAULT); | 715 | return -XFS_ERROR(EFAULT); |
706 | return 0; | 716 | return 0; |
707 | } | 717 | } |
@@ -984,10 +994,22 @@ xfs_ioctl_setattr( | |||
984 | 994 | ||
985 | /* | 995 | /* |
986 | * Extent size must be a multiple of the appropriate block | 996 | * Extent size must be a multiple of the appropriate block |
987 | * size, if set at all. | 997 | * size, if set at all. It must also be smaller than the |
998 | * maximum extent size supported by the filesystem. | ||
999 | * | ||
1000 | * Also, for non-realtime files, limit the extent size hint to | ||
1001 | * half the size of the AGs in the filesystem so alignment | ||
1002 | * doesn't result in extents larger than an AG. | ||
988 | */ | 1003 | */ |
989 | if (fa->fsx_extsize != 0) { | 1004 | if (fa->fsx_extsize != 0) { |
990 | xfs_extlen_t size; | 1005 | xfs_extlen_t size; |
1006 | xfs_fsblock_t extsize_fsb; | ||
1007 | |||
1008 | extsize_fsb = XFS_B_TO_FSB(mp, fa->fsx_extsize); | ||
1009 | if (extsize_fsb > MAXEXTLEN) { | ||
1010 | code = XFS_ERROR(EINVAL); | ||
1011 | goto error_return; | ||
1012 | } | ||
991 | 1013 | ||
992 | if (XFS_IS_REALTIME_INODE(ip) || | 1014 | if (XFS_IS_REALTIME_INODE(ip) || |
993 | ((mask & FSX_XFLAGS) && | 1015 | ((mask & FSX_XFLAGS) && |
@@ -996,6 +1018,10 @@ xfs_ioctl_setattr( | |||
996 | mp->m_sb.sb_blocklog; | 1018 | mp->m_sb.sb_blocklog; |
997 | } else { | 1019 | } else { |
998 | size = mp->m_sb.sb_blocksize; | 1020 | size = mp->m_sb.sb_blocksize; |
1021 | if (extsize_fsb > mp->m_sb.sb_agblocks / 2) { | ||
1022 | code = XFS_ERROR(EINVAL); | ||
1023 | goto error_return; | ||
1024 | } | ||
999 | } | 1025 | } |
1000 | 1026 | ||
1001 | if (fa->fsx_extsize % size) { | 1027 | if (fa->fsx_extsize % size) { |
@@ -1294,6 +1320,8 @@ xfs_file_ioctl( | |||
1294 | trace_xfs_file_ioctl(ip); | 1320 | trace_xfs_file_ioctl(ip); |
1295 | 1321 | ||
1296 | switch (cmd) { | 1322 | switch (cmd) { |
1323 | case FITRIM: | ||
1324 | return xfs_ioc_trim(mp, arg); | ||
1297 | case XFS_IOC_ALLOCSP: | 1325 | case XFS_IOC_ALLOCSP: |
1298 | case XFS_IOC_FREESP: | 1326 | case XFS_IOC_FREESP: |
1299 | case XFS_IOC_RESVSP: | 1327 | case XFS_IOC_RESVSP: |
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c index 94d5fd6a2973..dd21784525a8 100644 --- a/fs/xfs/linux-2.6/xfs_iops.c +++ b/fs/xfs/linux-2.6/xfs_iops.c | |||
@@ -46,7 +46,6 @@ | |||
46 | #include <linux/namei.h> | 46 | #include <linux/namei.h> |
47 | #include <linux/posix_acl.h> | 47 | #include <linux/posix_acl.h> |
48 | #include <linux/security.h> | 48 | #include <linux/security.h> |
49 | #include <linux/falloc.h> | ||
50 | #include <linux/fiemap.h> | 49 | #include <linux/fiemap.h> |
51 | #include <linux/slab.h> | 50 | #include <linux/slab.h> |
52 | 51 | ||
@@ -71,7 +70,7 @@ xfs_synchronize_times( | |||
71 | 70 | ||
72 | /* | 71 | /* |
73 | * If the linux inode is valid, mark it dirty. | 72 | * If the linux inode is valid, mark it dirty. |
74 | * Used when commiting a dirty inode into a transaction so that | 73 | * Used when committing a dirty inode into a transaction so that |
75 | * the inode will get written back by the linux code | 74 | * the inode will get written back by the linux code |
76 | */ | 75 | */ |
77 | void | 76 | void |
@@ -103,7 +102,8 @@ xfs_mark_inode_dirty( | |||
103 | STATIC int | 102 | STATIC int |
104 | xfs_init_security( | 103 | xfs_init_security( |
105 | struct inode *inode, | 104 | struct inode *inode, |
106 | struct inode *dir) | 105 | struct inode *dir, |
106 | const struct qstr *qstr) | ||
107 | { | 107 | { |
108 | struct xfs_inode *ip = XFS_I(inode); | 108 | struct xfs_inode *ip = XFS_I(inode); |
109 | size_t length; | 109 | size_t length; |
@@ -111,7 +111,7 @@ xfs_init_security( | |||
111 | unsigned char *name; | 111 | unsigned char *name; |
112 | int error; | 112 | int error; |
113 | 113 | ||
114 | error = security_inode_init_security(inode, dir, (char **)&name, | 114 | error = security_inode_init_security(inode, dir, qstr, (char **)&name, |
115 | &value, &length); | 115 | &value, &length); |
116 | if (error) { | 116 | if (error) { |
117 | if (error == -EOPNOTSUPP) | 117 | if (error == -EOPNOTSUPP) |
@@ -195,7 +195,7 @@ xfs_vn_mknod( | |||
195 | 195 | ||
196 | inode = VFS_I(ip); | 196 | inode = VFS_I(ip); |
197 | 197 | ||
198 | error = xfs_init_security(inode, dir); | 198 | error = xfs_init_security(inode, dir, &dentry->d_name); |
199 | if (unlikely(error)) | 199 | if (unlikely(error)) |
200 | goto out_cleanup_inode; | 200 | goto out_cleanup_inode; |
201 | 201 | ||
@@ -368,7 +368,7 @@ xfs_vn_symlink( | |||
368 | 368 | ||
369 | inode = VFS_I(cip); | 369 | inode = VFS_I(cip); |
370 | 370 | ||
371 | error = xfs_init_security(inode, dir); | 371 | error = xfs_init_security(inode, dir, &dentry->d_name); |
372 | if (unlikely(error)) | 372 | if (unlikely(error)) |
373 | goto out_cleanup_inode; | 373 | goto out_cleanup_inode; |
374 | 374 | ||
@@ -505,58 +505,6 @@ xfs_vn_setattr( | |||
505 | return -xfs_setattr(XFS_I(dentry->d_inode), iattr, 0); | 505 | return -xfs_setattr(XFS_I(dentry->d_inode), iattr, 0); |
506 | } | 506 | } |
507 | 507 | ||
508 | STATIC long | ||
509 | xfs_vn_fallocate( | ||
510 | struct inode *inode, | ||
511 | int mode, | ||
512 | loff_t offset, | ||
513 | loff_t len) | ||
514 | { | ||
515 | long error; | ||
516 | loff_t new_size = 0; | ||
517 | xfs_flock64_t bf; | ||
518 | xfs_inode_t *ip = XFS_I(inode); | ||
519 | |||
520 | /* preallocation on directories not yet supported */ | ||
521 | error = -ENODEV; | ||
522 | if (S_ISDIR(inode->i_mode)) | ||
523 | goto out_error; | ||
524 | |||
525 | bf.l_whence = 0; | ||
526 | bf.l_start = offset; | ||
527 | bf.l_len = len; | ||
528 | |||
529 | xfs_ilock(ip, XFS_IOLOCK_EXCL); | ||
530 | |||
531 | /* check the new inode size is valid before allocating */ | ||
532 | if (!(mode & FALLOC_FL_KEEP_SIZE) && | ||
533 | offset + len > i_size_read(inode)) { | ||
534 | new_size = offset + len; | ||
535 | error = inode_newsize_ok(inode, new_size); | ||
536 | if (error) | ||
537 | goto out_unlock; | ||
538 | } | ||
539 | |||
540 | error = -xfs_change_file_space(ip, XFS_IOC_RESVSP, &bf, | ||
541 | 0, XFS_ATTR_NOLOCK); | ||
542 | if (error) | ||
543 | goto out_unlock; | ||
544 | |||
545 | /* Change file size if needed */ | ||
546 | if (new_size) { | ||
547 | struct iattr iattr; | ||
548 | |||
549 | iattr.ia_valid = ATTR_SIZE; | ||
550 | iattr.ia_size = new_size; | ||
551 | error = -xfs_setattr(ip, &iattr, XFS_ATTR_NOLOCK); | ||
552 | } | ||
553 | |||
554 | out_unlock: | ||
555 | xfs_iunlock(ip, XFS_IOLOCK_EXCL); | ||
556 | out_error: | ||
557 | return error; | ||
558 | } | ||
559 | |||
560 | #define XFS_FIEMAP_FLAGS (FIEMAP_FLAG_SYNC|FIEMAP_FLAG_XATTR) | 508 | #define XFS_FIEMAP_FLAGS (FIEMAP_FLAG_SYNC|FIEMAP_FLAG_XATTR) |
561 | 509 | ||
562 | /* | 510 | /* |
@@ -650,7 +598,6 @@ static const struct inode_operations xfs_inode_operations = { | |||
650 | .getxattr = generic_getxattr, | 598 | .getxattr = generic_getxattr, |
651 | .removexattr = generic_removexattr, | 599 | .removexattr = generic_removexattr, |
652 | .listxattr = xfs_vn_listxattr, | 600 | .listxattr = xfs_vn_listxattr, |
653 | .fallocate = xfs_vn_fallocate, | ||
654 | .fiemap = xfs_vn_fiemap, | 601 | .fiemap = xfs_vn_fiemap, |
655 | }; | 602 | }; |
656 | 603 | ||
diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h index 214ddd71ff79..244be9cbfe78 100644 --- a/fs/xfs/linux-2.6/xfs_linux.h +++ b/fs/xfs/linux-2.6/xfs_linux.h | |||
@@ -37,10 +37,8 @@ | |||
37 | 37 | ||
38 | #include <kmem.h> | 38 | #include <kmem.h> |
39 | #include <mrlock.h> | 39 | #include <mrlock.h> |
40 | #include <sv.h> | ||
41 | #include <time.h> | 40 | #include <time.h> |
42 | 41 | ||
43 | #include <support/debug.h> | ||
44 | #include <support/uuid.h> | 42 | #include <support/uuid.h> |
45 | 43 | ||
46 | #include <linux/semaphore.h> | 44 | #include <linux/semaphore.h> |
@@ -87,6 +85,7 @@ | |||
87 | #include <xfs_aops.h> | 85 | #include <xfs_aops.h> |
88 | #include <xfs_super.h> | 86 | #include <xfs_super.h> |
89 | #include <xfs_buf.h> | 87 | #include <xfs_buf.h> |
88 | #include <xfs_message.h> | ||
90 | 89 | ||
91 | /* | 90 | /* |
92 | * Feature macros (disable/enable) | 91 | * Feature macros (disable/enable) |
@@ -281,4 +280,25 @@ static inline __uint64_t howmany_64(__uint64_t x, __uint32_t y) | |||
281 | #define __arch_pack | 280 | #define __arch_pack |
282 | #endif | 281 | #endif |
283 | 282 | ||
283 | #define ASSERT_ALWAYS(expr) \ | ||
284 | (unlikely(expr) ? (void)0 : assfail(#expr, __FILE__, __LINE__)) | ||
285 | |||
286 | #ifndef DEBUG | ||
287 | #define ASSERT(expr) ((void)0) | ||
288 | |||
289 | #ifndef STATIC | ||
290 | # define STATIC static noinline | ||
291 | #endif | ||
292 | |||
293 | #else /* DEBUG */ | ||
294 | |||
295 | #define ASSERT(expr) \ | ||
296 | (unlikely(expr) ? (void)0 : assfail(#expr, __FILE__, __LINE__)) | ||
297 | |||
298 | #ifndef STATIC | ||
299 | # define STATIC noinline | ||
300 | #endif | ||
301 | |||
302 | #endif /* DEBUG */ | ||
303 | |||
284 | #endif /* __XFS_LINUX__ */ | 304 | #endif /* __XFS_LINUX__ */ |
diff --git a/fs/xfs/linux-2.6/xfs_message.c b/fs/xfs/linux-2.6/xfs_message.c new file mode 100644 index 000000000000..9f76cceb678d --- /dev/null +++ b/fs/xfs/linux-2.6/xfs_message.c | |||
@@ -0,0 +1,126 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2011 Red Hat, Inc. All Rights Reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or | ||
5 | * modify it under the terms of the GNU General Public License as | ||
6 | * published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope that it would be useful, | ||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
11 | * GNU General Public License for more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program; if not, write the Free Software Foundation, | ||
15 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
16 | */ | ||
17 | |||
18 | #include "xfs.h" | ||
19 | #include "xfs_fs.h" | ||
20 | #include "xfs_types.h" | ||
21 | #include "xfs_log.h" | ||
22 | #include "xfs_inum.h" | ||
23 | #include "xfs_trans.h" | ||
24 | #include "xfs_sb.h" | ||
25 | #include "xfs_ag.h" | ||
26 | #include "xfs_mount.h" | ||
27 | |||
28 | /* | ||
29 | * XFS logging functions | ||
30 | */ | ||
31 | static void | ||
32 | __xfs_printk( | ||
33 | const char *level, | ||
34 | const struct xfs_mount *mp, | ||
35 | struct va_format *vaf) | ||
36 | { | ||
37 | if (mp && mp->m_fsname) { | ||
38 | printk("%sXFS (%s): %pV\n", level, mp->m_fsname, vaf); | ||
39 | return; | ||
40 | } | ||
41 | printk("%sXFS: %pV\n", level, vaf); | ||
42 | } | ||
43 | |||
44 | void xfs_printk( | ||
45 | const char *level, | ||
46 | const struct xfs_mount *mp, | ||
47 | const char *fmt, ...) | ||
48 | { | ||
49 | struct va_format vaf; | ||
50 | va_list args; | ||
51 | |||
52 | va_start(args, fmt); | ||
53 | |||
54 | vaf.fmt = fmt; | ||
55 | vaf.va = &args; | ||
56 | |||
57 | __xfs_printk(level, mp, &vaf); | ||
58 | va_end(args); | ||
59 | } | ||
60 | |||
61 | #define define_xfs_printk_level(func, kern_level) \ | ||
62 | void func(const struct xfs_mount *mp, const char *fmt, ...) \ | ||
63 | { \ | ||
64 | struct va_format vaf; \ | ||
65 | va_list args; \ | ||
66 | \ | ||
67 | va_start(args, fmt); \ | ||
68 | \ | ||
69 | vaf.fmt = fmt; \ | ||
70 | vaf.va = &args; \ | ||
71 | \ | ||
72 | __xfs_printk(kern_level, mp, &vaf); \ | ||
73 | va_end(args); \ | ||
74 | } \ | ||
75 | |||
76 | define_xfs_printk_level(xfs_emerg, KERN_EMERG); | ||
77 | define_xfs_printk_level(xfs_alert, KERN_ALERT); | ||
78 | define_xfs_printk_level(xfs_crit, KERN_CRIT); | ||
79 | define_xfs_printk_level(xfs_err, KERN_ERR); | ||
80 | define_xfs_printk_level(xfs_warn, KERN_WARNING); | ||
81 | define_xfs_printk_level(xfs_notice, KERN_NOTICE); | ||
82 | define_xfs_printk_level(xfs_info, KERN_INFO); | ||
83 | #ifdef DEBUG | ||
84 | define_xfs_printk_level(xfs_debug, KERN_DEBUG); | ||
85 | #endif | ||
86 | |||
87 | void | ||
88 | xfs_alert_tag( | ||
89 | const struct xfs_mount *mp, | ||
90 | int panic_tag, | ||
91 | const char *fmt, ...) | ||
92 | { | ||
93 | struct va_format vaf; | ||
94 | va_list args; | ||
95 | int do_panic = 0; | ||
96 | |||
97 | if (xfs_panic_mask && (xfs_panic_mask & panic_tag)) { | ||
98 | xfs_printk(KERN_ALERT, mp, | ||
99 | "XFS: Transforming an alert into a BUG."); | ||
100 | do_panic = 1; | ||
101 | } | ||
102 | |||
103 | va_start(args, fmt); | ||
104 | |||
105 | vaf.fmt = fmt; | ||
106 | vaf.va = &args; | ||
107 | |||
108 | __xfs_printk(KERN_ALERT, mp, &vaf); | ||
109 | va_end(args); | ||
110 | |||
111 | BUG_ON(do_panic); | ||
112 | } | ||
113 | |||
114 | void | ||
115 | assfail(char *expr, char *file, int line) | ||
116 | { | ||
117 | xfs_emerg(NULL, "Assertion failed: %s, file: %s, line: %d", | ||
118 | expr, file, line); | ||
119 | BUG(); | ||
120 | } | ||
121 | |||
122 | void | ||
123 | xfs_hex_dump(void *p, int length) | ||
124 | { | ||
125 | print_hex_dump(KERN_ALERT, "", DUMP_PREFIX_ADDRESS, 16, 1, p, length, 1); | ||
126 | } | ||
diff --git a/fs/xfs/linux-2.6/xfs_message.h b/fs/xfs/linux-2.6/xfs_message.h new file mode 100644 index 000000000000..f1b3fc1b6c4e --- /dev/null +++ b/fs/xfs/linux-2.6/xfs_message.h | |||
@@ -0,0 +1,40 @@ | |||
1 | #ifndef __XFS_MESSAGE_H | ||
2 | #define __XFS_MESSAGE_H 1 | ||
3 | |||
4 | struct xfs_mount; | ||
5 | |||
6 | extern void xfs_printk(const char *level, const struct xfs_mount *mp, | ||
7 | const char *fmt, ...) | ||
8 | __attribute__ ((format (printf, 3, 4))); | ||
9 | extern void xfs_emerg(const struct xfs_mount *mp, const char *fmt, ...) | ||
10 | __attribute__ ((format (printf, 2, 3))); | ||
11 | extern void xfs_alert(const struct xfs_mount *mp, const char *fmt, ...) | ||
12 | __attribute__ ((format (printf, 2, 3))); | ||
13 | extern void xfs_alert_tag(const struct xfs_mount *mp, int tag, | ||
14 | const char *fmt, ...) | ||
15 | __attribute__ ((format (printf, 3, 4))); | ||
16 | extern void xfs_crit(const struct xfs_mount *mp, const char *fmt, ...) | ||
17 | __attribute__ ((format (printf, 2, 3))); | ||
18 | extern void xfs_err(const struct xfs_mount *mp, const char *fmt, ...) | ||
19 | __attribute__ ((format (printf, 2, 3))); | ||
20 | extern void xfs_warn(const struct xfs_mount *mp, const char *fmt, ...) | ||
21 | __attribute__ ((format (printf, 2, 3))); | ||
22 | extern void xfs_notice(const struct xfs_mount *mp, const char *fmt, ...) | ||
23 | __attribute__ ((format (printf, 2, 3))); | ||
24 | extern void xfs_info(const struct xfs_mount *mp, const char *fmt, ...) | ||
25 | __attribute__ ((format (printf, 2, 3))); | ||
26 | |||
27 | #ifdef DEBUG | ||
28 | extern void xfs_debug(const struct xfs_mount *mp, const char *fmt, ...) | ||
29 | __attribute__ ((format (printf, 2, 3))); | ||
30 | #else | ||
31 | static inline void xfs_debug(const struct xfs_mount *mp, const char *fmt, ...) | ||
32 | { | ||
33 | } | ||
34 | #endif | ||
35 | |||
36 | extern void assfail(char *expr, char *f, int l); | ||
37 | |||
38 | extern void xfs_hex_dump(void *p, int length); | ||
39 | |||
40 | #endif /* __XFS_MESSAGE_H */ | ||
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index 064f964d4f3c..b38e58d02299 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c | |||
@@ -173,6 +173,15 @@ xfs_parseargs( | |||
173 | __uint8_t iosizelog = 0; | 173 | __uint8_t iosizelog = 0; |
174 | 174 | ||
175 | /* | 175 | /* |
176 | * set up the mount name first so all the errors will refer to the | ||
177 | * correct device. | ||
178 | */ | ||
179 | mp->m_fsname = kstrndup(sb->s_id, MAXNAMELEN, GFP_KERNEL); | ||
180 | if (!mp->m_fsname) | ||
181 | return ENOMEM; | ||
182 | mp->m_fsname_len = strlen(mp->m_fsname) + 1; | ||
183 | |||
184 | /* | ||
176 | * Copy binary VFS mount flags we are interested in. | 185 | * Copy binary VFS mount flags we are interested in. |
177 | */ | 186 | */ |
178 | if (sb->s_flags & MS_RDONLY) | 187 | if (sb->s_flags & MS_RDONLY) |
@@ -189,6 +198,7 @@ xfs_parseargs( | |||
189 | mp->m_flags |= XFS_MOUNT_BARRIER; | 198 | mp->m_flags |= XFS_MOUNT_BARRIER; |
190 | mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE; | 199 | mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE; |
191 | mp->m_flags |= XFS_MOUNT_SMALL_INUMS; | 200 | mp->m_flags |= XFS_MOUNT_SMALL_INUMS; |
201 | mp->m_flags |= XFS_MOUNT_DELAYLOG; | ||
192 | 202 | ||
193 | /* | 203 | /* |
194 | * These can be overridden by the mount option parsing. | 204 | * These can be overridden by the mount option parsing. |
@@ -207,24 +217,21 @@ xfs_parseargs( | |||
207 | 217 | ||
208 | if (!strcmp(this_char, MNTOPT_LOGBUFS)) { | 218 | if (!strcmp(this_char, MNTOPT_LOGBUFS)) { |
209 | if (!value || !*value) { | 219 | if (!value || !*value) { |
210 | cmn_err(CE_WARN, | 220 | xfs_warn(mp, "%s option requires an argument", |
211 | "XFS: %s option requires an argument", | ||
212 | this_char); | 221 | this_char); |
213 | return EINVAL; | 222 | return EINVAL; |
214 | } | 223 | } |
215 | mp->m_logbufs = simple_strtoul(value, &eov, 10); | 224 | mp->m_logbufs = simple_strtoul(value, &eov, 10); |
216 | } else if (!strcmp(this_char, MNTOPT_LOGBSIZE)) { | 225 | } else if (!strcmp(this_char, MNTOPT_LOGBSIZE)) { |
217 | if (!value || !*value) { | 226 | if (!value || !*value) { |
218 | cmn_err(CE_WARN, | 227 | xfs_warn(mp, "%s option requires an argument", |
219 | "XFS: %s option requires an argument", | ||
220 | this_char); | 228 | this_char); |
221 | return EINVAL; | 229 | return EINVAL; |
222 | } | 230 | } |
223 | mp->m_logbsize = suffix_strtoul(value, &eov, 10); | 231 | mp->m_logbsize = suffix_strtoul(value, &eov, 10); |
224 | } else if (!strcmp(this_char, MNTOPT_LOGDEV)) { | 232 | } else if (!strcmp(this_char, MNTOPT_LOGDEV)) { |
225 | if (!value || !*value) { | 233 | if (!value || !*value) { |
226 | cmn_err(CE_WARN, | 234 | xfs_warn(mp, "%s option requires an argument", |
227 | "XFS: %s option requires an argument", | ||
228 | this_char); | 235 | this_char); |
229 | return EINVAL; | 236 | return EINVAL; |
230 | } | 237 | } |
@@ -232,14 +239,12 @@ xfs_parseargs( | |||
232 | if (!mp->m_logname) | 239 | if (!mp->m_logname) |
233 | return ENOMEM; | 240 | return ENOMEM; |
234 | } else if (!strcmp(this_char, MNTOPT_MTPT)) { | 241 | } else if (!strcmp(this_char, MNTOPT_MTPT)) { |
235 | cmn_err(CE_WARN, | 242 | xfs_warn(mp, "%s option not allowed on this system", |
236 | "XFS: %s option not allowed on this system", | ||
237 | this_char); | 243 | this_char); |
238 | return EINVAL; | 244 | return EINVAL; |
239 | } else if (!strcmp(this_char, MNTOPT_RTDEV)) { | 245 | } else if (!strcmp(this_char, MNTOPT_RTDEV)) { |
240 | if (!value || !*value) { | 246 | if (!value || !*value) { |
241 | cmn_err(CE_WARN, | 247 | xfs_warn(mp, "%s option requires an argument", |
242 | "XFS: %s option requires an argument", | ||
243 | this_char); | 248 | this_char); |
244 | return EINVAL; | 249 | return EINVAL; |
245 | } | 250 | } |
@@ -248,8 +253,7 @@ xfs_parseargs( | |||
248 | return ENOMEM; | 253 | return ENOMEM; |
249 | } else if (!strcmp(this_char, MNTOPT_BIOSIZE)) { | 254 | } else if (!strcmp(this_char, MNTOPT_BIOSIZE)) { |
250 | if (!value || !*value) { | 255 | if (!value || !*value) { |
251 | cmn_err(CE_WARN, | 256 | xfs_warn(mp, "%s option requires an argument", |
252 | "XFS: %s option requires an argument", | ||
253 | this_char); | 257 | this_char); |
254 | return EINVAL; | 258 | return EINVAL; |
255 | } | 259 | } |
@@ -257,8 +261,7 @@ xfs_parseargs( | |||
257 | iosizelog = ffs(iosize) - 1; | 261 | iosizelog = ffs(iosize) - 1; |
258 | } else if (!strcmp(this_char, MNTOPT_ALLOCSIZE)) { | 262 | } else if (!strcmp(this_char, MNTOPT_ALLOCSIZE)) { |
259 | if (!value || !*value) { | 263 | if (!value || !*value) { |
260 | cmn_err(CE_WARN, | 264 | xfs_warn(mp, "%s option requires an argument", |
261 | "XFS: %s option requires an argument", | ||
262 | this_char); | 265 | this_char); |
263 | return EINVAL; | 266 | return EINVAL; |
264 | } | 267 | } |
@@ -280,16 +283,14 @@ xfs_parseargs( | |||
280 | mp->m_flags |= XFS_MOUNT_SWALLOC; | 283 | mp->m_flags |= XFS_MOUNT_SWALLOC; |
281 | } else if (!strcmp(this_char, MNTOPT_SUNIT)) { | 284 | } else if (!strcmp(this_char, MNTOPT_SUNIT)) { |
282 | if (!value || !*value) { | 285 | if (!value || !*value) { |
283 | cmn_err(CE_WARN, | 286 | xfs_warn(mp, "%s option requires an argument", |
284 | "XFS: %s option requires an argument", | ||
285 | this_char); | 287 | this_char); |
286 | return EINVAL; | 288 | return EINVAL; |
287 | } | 289 | } |
288 | dsunit = simple_strtoul(value, &eov, 10); | 290 | dsunit = simple_strtoul(value, &eov, 10); |
289 | } else if (!strcmp(this_char, MNTOPT_SWIDTH)) { | 291 | } else if (!strcmp(this_char, MNTOPT_SWIDTH)) { |
290 | if (!value || !*value) { | 292 | if (!value || !*value) { |
291 | cmn_err(CE_WARN, | 293 | xfs_warn(mp, "%s option requires an argument", |
292 | "XFS: %s option requires an argument", | ||
293 | this_char); | 294 | this_char); |
294 | return EINVAL; | 295 | return EINVAL; |
295 | } | 296 | } |
@@ -297,8 +298,7 @@ xfs_parseargs( | |||
297 | } else if (!strcmp(this_char, MNTOPT_64BITINODE)) { | 298 | } else if (!strcmp(this_char, MNTOPT_64BITINODE)) { |
298 | mp->m_flags &= ~XFS_MOUNT_SMALL_INUMS; | 299 | mp->m_flags &= ~XFS_MOUNT_SMALL_INUMS; |
299 | #if !XFS_BIG_INUMS | 300 | #if !XFS_BIG_INUMS |
300 | cmn_err(CE_WARN, | 301 | xfs_warn(mp, "%s option not allowed on this system", |
301 | "XFS: %s option not allowed on this system", | ||
302 | this_char); | 302 | this_char); |
303 | return EINVAL; | 303 | return EINVAL; |
304 | #endif | 304 | #endif |
@@ -356,20 +356,19 @@ xfs_parseargs( | |||
356 | } else if (!strcmp(this_char, MNTOPT_NODELAYLOG)) { | 356 | } else if (!strcmp(this_char, MNTOPT_NODELAYLOG)) { |
357 | mp->m_flags &= ~XFS_MOUNT_DELAYLOG; | 357 | mp->m_flags &= ~XFS_MOUNT_DELAYLOG; |
358 | } else if (!strcmp(this_char, "ihashsize")) { | 358 | } else if (!strcmp(this_char, "ihashsize")) { |
359 | cmn_err(CE_WARN, | 359 | xfs_warn(mp, |
360 | "XFS: ihashsize no longer used, option is deprecated."); | 360 | "ihashsize no longer used, option is deprecated."); |
361 | } else if (!strcmp(this_char, "osyncisdsync")) { | 361 | } else if (!strcmp(this_char, "osyncisdsync")) { |
362 | cmn_err(CE_WARN, | 362 | xfs_warn(mp, |
363 | "XFS: osyncisdsync has no effect, option is deprecated."); | 363 | "osyncisdsync has no effect, option is deprecated."); |
364 | } else if (!strcmp(this_char, "osyncisosync")) { | 364 | } else if (!strcmp(this_char, "osyncisosync")) { |
365 | cmn_err(CE_WARN, | 365 | xfs_warn(mp, |
366 | "XFS: osyncisosync has no effect, option is deprecated."); | 366 | "osyncisosync has no effect, option is deprecated."); |
367 | } else if (!strcmp(this_char, "irixsgid")) { | 367 | } else if (!strcmp(this_char, "irixsgid")) { |
368 | cmn_err(CE_WARN, | 368 | xfs_warn(mp, |
369 | "XFS: irixsgid is now a sysctl(2) variable, option is deprecated."); | 369 | "irixsgid is now a sysctl(2) variable, option is deprecated."); |
370 | } else { | 370 | } else { |
371 | cmn_err(CE_WARN, | 371 | xfs_warn(mp, "unknown mount option [%s].", this_char); |
372 | "XFS: unknown mount option [%s].", this_char); | ||
373 | return EINVAL; | 372 | return EINVAL; |
374 | } | 373 | } |
375 | } | 374 | } |
@@ -379,40 +378,37 @@ xfs_parseargs( | |||
379 | */ | 378 | */ |
380 | if ((mp->m_flags & XFS_MOUNT_NORECOVERY) && | 379 | if ((mp->m_flags & XFS_MOUNT_NORECOVERY) && |
381 | !(mp->m_flags & XFS_MOUNT_RDONLY)) { | 380 | !(mp->m_flags & XFS_MOUNT_RDONLY)) { |
382 | cmn_err(CE_WARN, "XFS: no-recovery mounts must be read-only."); | 381 | xfs_warn(mp, "no-recovery mounts must be read-only."); |
383 | return EINVAL; | 382 | return EINVAL; |
384 | } | 383 | } |
385 | 384 | ||
386 | if ((mp->m_flags & XFS_MOUNT_NOALIGN) && (dsunit || dswidth)) { | 385 | if ((mp->m_flags & XFS_MOUNT_NOALIGN) && (dsunit || dswidth)) { |
387 | cmn_err(CE_WARN, | 386 | xfs_warn(mp, |
388 | "XFS: sunit and swidth options incompatible with the noalign option"); | 387 | "sunit and swidth options incompatible with the noalign option"); |
389 | return EINVAL; | 388 | return EINVAL; |
390 | } | 389 | } |
391 | 390 | ||
392 | #ifndef CONFIG_XFS_QUOTA | 391 | #ifndef CONFIG_XFS_QUOTA |
393 | if (XFS_IS_QUOTA_RUNNING(mp)) { | 392 | if (XFS_IS_QUOTA_RUNNING(mp)) { |
394 | cmn_err(CE_WARN, | 393 | xfs_warn(mp, "quota support not available in this kernel."); |
395 | "XFS: quota support not available in this kernel."); | ||
396 | return EINVAL; | 394 | return EINVAL; |
397 | } | 395 | } |
398 | #endif | 396 | #endif |
399 | 397 | ||
400 | if ((mp->m_qflags & (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE)) && | 398 | if ((mp->m_qflags & (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE)) && |
401 | (mp->m_qflags & (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE))) { | 399 | (mp->m_qflags & (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE))) { |
402 | cmn_err(CE_WARN, | 400 | xfs_warn(mp, "cannot mount with both project and group quota"); |
403 | "XFS: cannot mount with both project and group quota"); | ||
404 | return EINVAL; | 401 | return EINVAL; |
405 | } | 402 | } |
406 | 403 | ||
407 | if ((dsunit && !dswidth) || (!dsunit && dswidth)) { | 404 | if ((dsunit && !dswidth) || (!dsunit && dswidth)) { |
408 | cmn_err(CE_WARN, | 405 | xfs_warn(mp, "sunit and swidth must be specified together"); |
409 | "XFS: sunit and swidth must be specified together"); | ||
410 | return EINVAL; | 406 | return EINVAL; |
411 | } | 407 | } |
412 | 408 | ||
413 | if (dsunit && (dswidth % dsunit != 0)) { | 409 | if (dsunit && (dswidth % dsunit != 0)) { |
414 | cmn_err(CE_WARN, | 410 | xfs_warn(mp, |
415 | "XFS: stripe width (%d) must be a multiple of the stripe unit (%d)", | 411 | "stripe width (%d) must be a multiple of the stripe unit (%d)", |
416 | dswidth, dsunit); | 412 | dswidth, dsunit); |
417 | return EINVAL; | 413 | return EINVAL; |
418 | } | 414 | } |
@@ -438,8 +434,7 @@ done: | |||
438 | mp->m_logbufs != 0 && | 434 | mp->m_logbufs != 0 && |
439 | (mp->m_logbufs < XLOG_MIN_ICLOGS || | 435 | (mp->m_logbufs < XLOG_MIN_ICLOGS || |
440 | mp->m_logbufs > XLOG_MAX_ICLOGS)) { | 436 | mp->m_logbufs > XLOG_MAX_ICLOGS)) { |
441 | cmn_err(CE_WARN, | 437 | xfs_warn(mp, "invalid logbufs value: %d [not %d-%d]", |
442 | "XFS: invalid logbufs value: %d [not %d-%d]", | ||
443 | mp->m_logbufs, XLOG_MIN_ICLOGS, XLOG_MAX_ICLOGS); | 438 | mp->m_logbufs, XLOG_MIN_ICLOGS, XLOG_MAX_ICLOGS); |
444 | return XFS_ERROR(EINVAL); | 439 | return XFS_ERROR(EINVAL); |
445 | } | 440 | } |
@@ -448,22 +443,16 @@ done: | |||
448 | (mp->m_logbsize < XLOG_MIN_RECORD_BSIZE || | 443 | (mp->m_logbsize < XLOG_MIN_RECORD_BSIZE || |
449 | mp->m_logbsize > XLOG_MAX_RECORD_BSIZE || | 444 | mp->m_logbsize > XLOG_MAX_RECORD_BSIZE || |
450 | !is_power_of_2(mp->m_logbsize))) { | 445 | !is_power_of_2(mp->m_logbsize))) { |
451 | cmn_err(CE_WARN, | 446 | xfs_warn(mp, |
452 | "XFS: invalid logbufsize: %d [not 16k,32k,64k,128k or 256k]", | 447 | "invalid logbufsize: %d [not 16k,32k,64k,128k or 256k]", |
453 | mp->m_logbsize); | 448 | mp->m_logbsize); |
454 | return XFS_ERROR(EINVAL); | 449 | return XFS_ERROR(EINVAL); |
455 | } | 450 | } |
456 | 451 | ||
457 | mp->m_fsname = kstrndup(sb->s_id, MAXNAMELEN, GFP_KERNEL); | ||
458 | if (!mp->m_fsname) | ||
459 | return ENOMEM; | ||
460 | mp->m_fsname_len = strlen(mp->m_fsname) + 1; | ||
461 | |||
462 | if (iosizelog) { | 452 | if (iosizelog) { |
463 | if (iosizelog > XFS_MAX_IO_LOG || | 453 | if (iosizelog > XFS_MAX_IO_LOG || |
464 | iosizelog < XFS_MIN_IO_LOG) { | 454 | iosizelog < XFS_MIN_IO_LOG) { |
465 | cmn_err(CE_WARN, | 455 | xfs_warn(mp, "invalid log iosize: %d [not %d-%d]", |
466 | "XFS: invalid log iosize: %d [not %d-%d]", | ||
467 | iosizelog, XFS_MIN_IO_LOG, | 456 | iosizelog, XFS_MIN_IO_LOG, |
468 | XFS_MAX_IO_LOG); | 457 | XFS_MAX_IO_LOG); |
469 | return XFS_ERROR(EINVAL); | 458 | return XFS_ERROR(EINVAL); |
@@ -606,10 +595,11 @@ xfs_blkdev_get( | |||
606 | { | 595 | { |
607 | int error = 0; | 596 | int error = 0; |
608 | 597 | ||
609 | *bdevp = open_bdev_exclusive(name, FMODE_READ|FMODE_WRITE, mp); | 598 | *bdevp = blkdev_get_by_path(name, FMODE_READ|FMODE_WRITE|FMODE_EXCL, |
599 | mp); | ||
610 | if (IS_ERR(*bdevp)) { | 600 | if (IS_ERR(*bdevp)) { |
611 | error = PTR_ERR(*bdevp); | 601 | error = PTR_ERR(*bdevp); |
612 | printk("XFS: Invalid device [%s], error=%d\n", name, error); | 602 | xfs_warn(mp, "Invalid device [%s], error=%d\n", name, error); |
613 | } | 603 | } |
614 | 604 | ||
615 | return -error; | 605 | return -error; |
@@ -620,7 +610,7 @@ xfs_blkdev_put( | |||
620 | struct block_device *bdev) | 610 | struct block_device *bdev) |
621 | { | 611 | { |
622 | if (bdev) | 612 | if (bdev) |
623 | close_bdev_exclusive(bdev, FMODE_READ|FMODE_WRITE); | 613 | blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); |
624 | } | 614 | } |
625 | 615 | ||
626 | /* | 616 | /* |
@@ -663,23 +653,23 @@ xfs_mountfs_check_barriers(xfs_mount_t *mp) | |||
663 | int error; | 653 | int error; |
664 | 654 | ||
665 | if (mp->m_logdev_targp != mp->m_ddev_targp) { | 655 | if (mp->m_logdev_targp != mp->m_ddev_targp) { |
666 | xfs_fs_cmn_err(CE_NOTE, mp, | 656 | xfs_notice(mp, |
667 | "Disabling barriers, not supported with external log device"); | 657 | "Disabling barriers, not supported with external log device"); |
668 | mp->m_flags &= ~XFS_MOUNT_BARRIER; | 658 | mp->m_flags &= ~XFS_MOUNT_BARRIER; |
669 | return; | 659 | return; |
670 | } | 660 | } |
671 | 661 | ||
672 | if (xfs_readonly_buftarg(mp->m_ddev_targp)) { | 662 | if (xfs_readonly_buftarg(mp->m_ddev_targp)) { |
673 | xfs_fs_cmn_err(CE_NOTE, mp, | 663 | xfs_notice(mp, |
674 | "Disabling barriers, underlying device is readonly"); | 664 | "Disabling barriers, underlying device is readonly"); |
675 | mp->m_flags &= ~XFS_MOUNT_BARRIER; | 665 | mp->m_flags &= ~XFS_MOUNT_BARRIER; |
676 | return; | 666 | return; |
677 | } | 667 | } |
678 | 668 | ||
679 | error = xfs_barrier_test(mp); | 669 | error = xfs_barrier_test(mp); |
680 | if (error) { | 670 | if (error) { |
681 | xfs_fs_cmn_err(CE_NOTE, mp, | 671 | xfs_notice(mp, |
682 | "Disabling barriers, trial barrier write failed"); | 672 | "Disabling barriers, trial barrier write failed"); |
683 | mp->m_flags &= ~XFS_MOUNT_BARRIER; | 673 | mp->m_flags &= ~XFS_MOUNT_BARRIER; |
684 | return; | 674 | return; |
685 | } | 675 | } |
@@ -742,8 +732,8 @@ xfs_open_devices( | |||
742 | goto out_close_logdev; | 732 | goto out_close_logdev; |
743 | 733 | ||
744 | if (rtdev == ddev || rtdev == logdev) { | 734 | if (rtdev == ddev || rtdev == logdev) { |
745 | cmn_err(CE_WARN, | 735 | xfs_warn(mp, |
746 | "XFS: Cannot mount filesystem with identical rtdev and ddev/logdev."); | 736 | "Cannot mount filesystem with identical rtdev and ddev/logdev."); |
747 | error = EINVAL; | 737 | error = EINVAL; |
748 | goto out_close_rtdev; | 738 | goto out_close_rtdev; |
749 | } | 739 | } |
@@ -826,63 +816,6 @@ xfs_setup_devices( | |||
826 | return 0; | 816 | return 0; |
827 | } | 817 | } |
828 | 818 | ||
829 | /* | ||
830 | * XFS AIL push thread support | ||
831 | */ | ||
832 | void | ||
833 | xfsaild_wakeup( | ||
834 | struct xfs_ail *ailp, | ||
835 | xfs_lsn_t threshold_lsn) | ||
836 | { | ||
837 | ailp->xa_target = threshold_lsn; | ||
838 | wake_up_process(ailp->xa_task); | ||
839 | } | ||
840 | |||
841 | STATIC int | ||
842 | xfsaild( | ||
843 | void *data) | ||
844 | { | ||
845 | struct xfs_ail *ailp = data; | ||
846 | xfs_lsn_t last_pushed_lsn = 0; | ||
847 | long tout = 0; /* milliseconds */ | ||
848 | |||
849 | while (!kthread_should_stop()) { | ||
850 | schedule_timeout_interruptible(tout ? | ||
851 | msecs_to_jiffies(tout) : MAX_SCHEDULE_TIMEOUT); | ||
852 | |||
853 | /* swsusp */ | ||
854 | try_to_freeze(); | ||
855 | |||
856 | ASSERT(ailp->xa_mount->m_log); | ||
857 | if (XFS_FORCED_SHUTDOWN(ailp->xa_mount)) | ||
858 | continue; | ||
859 | |||
860 | tout = xfsaild_push(ailp, &last_pushed_lsn); | ||
861 | } | ||
862 | |||
863 | return 0; | ||
864 | } /* xfsaild */ | ||
865 | |||
866 | int | ||
867 | xfsaild_start( | ||
868 | struct xfs_ail *ailp) | ||
869 | { | ||
870 | ailp->xa_target = 0; | ||
871 | ailp->xa_task = kthread_run(xfsaild, ailp, "xfsaild/%s", | ||
872 | ailp->xa_mount->m_fsname); | ||
873 | if (IS_ERR(ailp->xa_task)) | ||
874 | return -PTR_ERR(ailp->xa_task); | ||
875 | return 0; | ||
876 | } | ||
877 | |||
878 | void | ||
879 | xfsaild_stop( | ||
880 | struct xfs_ail *ailp) | ||
881 | { | ||
882 | kthread_stop(ailp->xa_task); | ||
883 | } | ||
884 | |||
885 | |||
886 | /* Catch misguided souls that try to use this interface on XFS */ | 819 | /* Catch misguided souls that try to use this interface on XFS */ |
887 | STATIC struct inode * | 820 | STATIC struct inode * |
888 | xfs_fs_alloc_inode( | 821 | xfs_fs_alloc_inode( |
@@ -935,7 +868,7 @@ out_reclaim: | |||
935 | * Slab object creation initialisation for the XFS inode. | 868 | * Slab object creation initialisation for the XFS inode. |
936 | * This covers only the idempotent fields in the XFS inode; | 869 | * This covers only the idempotent fields in the XFS inode; |
937 | * all other fields need to be initialised on allocation | 870 | * all other fields need to be initialised on allocation |
938 | * from the slab. This avoids the need to repeatedly intialise | 871 | * from the slab. This avoids the need to repeatedly initialise |
939 | * fields in the xfs inode that left in the initialise state | 872 | * fields in the xfs inode that left in the initialise state |
940 | * when freeing the inode. | 873 | * when freeing the inode. |
941 | */ | 874 | */ |
@@ -1076,7 +1009,7 @@ xfs_fs_write_inode( | |||
1076 | error = 0; | 1009 | error = 0; |
1077 | goto out_unlock; | 1010 | goto out_unlock; |
1078 | } | 1011 | } |
1079 | error = xfs_iflush(ip, 0); | 1012 | error = xfs_iflush(ip, SYNC_TRYLOCK); |
1080 | } | 1013 | } |
1081 | 1014 | ||
1082 | out_unlock: | 1015 | out_unlock: |
@@ -1118,6 +1051,8 @@ xfs_fs_evict_inode( | |||
1118 | */ | 1051 | */ |
1119 | ASSERT(!rwsem_is_locked(&ip->i_iolock.mr_lock)); | 1052 | ASSERT(!rwsem_is_locked(&ip->i_iolock.mr_lock)); |
1120 | mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino); | 1053 | mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino); |
1054 | lockdep_set_class_and_name(&ip->i_iolock.mr_lock, | ||
1055 | &xfs_iolock_reclaimable, "xfs_iolock_reclaimable"); | ||
1121 | 1056 | ||
1122 | xfs_inactive(ip); | 1057 | xfs_inactive(ip); |
1123 | } | 1058 | } |
@@ -1187,22 +1122,12 @@ xfs_fs_sync_fs( | |||
1187 | return -error; | 1122 | return -error; |
1188 | 1123 | ||
1189 | if (laptop_mode) { | 1124 | if (laptop_mode) { |
1190 | int prev_sync_seq = mp->m_sync_seq; | ||
1191 | |||
1192 | /* | 1125 | /* |
1193 | * The disk must be active because we're syncing. | 1126 | * The disk must be active because we're syncing. |
1194 | * We schedule xfssyncd now (now that the disk is | 1127 | * We schedule xfssyncd now (now that the disk is |
1195 | * active) instead of later (when it might not be). | 1128 | * active) instead of later (when it might not be). |
1196 | */ | 1129 | */ |
1197 | wake_up_process(mp->m_sync_task); | 1130 | flush_delayed_work_sync(&mp->m_sync_work); |
1198 | /* | ||
1199 | * We have to wait for the sync iteration to complete. | ||
1200 | * If we don't, the disk activity caused by the sync | ||
1201 | * will come after the sync is completed, and that | ||
1202 | * triggers another sync from laptop mode. | ||
1203 | */ | ||
1204 | wait_event(mp->m_wait_single_sync_task, | ||
1205 | mp->m_sync_seq != prev_sync_seq); | ||
1206 | } | 1131 | } |
1207 | 1132 | ||
1208 | return 0; | 1133 | return 0; |
@@ -1330,8 +1255,8 @@ xfs_fs_remount( | |||
1330 | * options that we can't actually change. | 1255 | * options that we can't actually change. |
1331 | */ | 1256 | */ |
1332 | #if 0 | 1257 | #if 0 |
1333 | printk(KERN_INFO | 1258 | xfs_info(mp, |
1334 | "XFS: mount option \"%s\" not supported for remount\n", p); | 1259 | "mount option \"%s\" not supported for remount\n", p); |
1335 | return -EINVAL; | 1260 | return -EINVAL; |
1336 | #else | 1261 | #else |
1337 | break; | 1262 | break; |
@@ -1352,8 +1277,7 @@ xfs_fs_remount( | |||
1352 | if (mp->m_update_flags) { | 1277 | if (mp->m_update_flags) { |
1353 | error = xfs_mount_log_sb(mp, mp->m_update_flags); | 1278 | error = xfs_mount_log_sb(mp, mp->m_update_flags); |
1354 | if (error) { | 1279 | if (error) { |
1355 | cmn_err(CE_WARN, | 1280 | xfs_warn(mp, "failed to write sb changes"); |
1356 | "XFS: failed to write sb changes"); | ||
1357 | return error; | 1281 | return error; |
1358 | } | 1282 | } |
1359 | mp->m_update_flags = 0; | 1283 | mp->m_update_flags = 0; |
@@ -1399,7 +1323,7 @@ xfs_fs_freeze( | |||
1399 | 1323 | ||
1400 | xfs_save_resvblks(mp); | 1324 | xfs_save_resvblks(mp); |
1401 | xfs_quiesce_attr(mp); | 1325 | xfs_quiesce_attr(mp); |
1402 | return -xfs_fs_log_dummy(mp, SYNC_WAIT); | 1326 | return -xfs_fs_log_dummy(mp); |
1403 | } | 1327 | } |
1404 | 1328 | ||
1405 | STATIC int | 1329 | STATIC int |
@@ -1437,15 +1361,15 @@ xfs_finish_flags( | |||
1437 | mp->m_logbsize = mp->m_sb.sb_logsunit; | 1361 | mp->m_logbsize = mp->m_sb.sb_logsunit; |
1438 | } else if (mp->m_logbsize > 0 && | 1362 | } else if (mp->m_logbsize > 0 && |
1439 | mp->m_logbsize < mp->m_sb.sb_logsunit) { | 1363 | mp->m_logbsize < mp->m_sb.sb_logsunit) { |
1440 | cmn_err(CE_WARN, | 1364 | xfs_warn(mp, |
1441 | "XFS: logbuf size must be greater than or equal to log stripe size"); | 1365 | "logbuf size must be greater than or equal to log stripe size"); |
1442 | return XFS_ERROR(EINVAL); | 1366 | return XFS_ERROR(EINVAL); |
1443 | } | 1367 | } |
1444 | } else { | 1368 | } else { |
1445 | /* Fail a mount if the logbuf is larger than 32K */ | 1369 | /* Fail a mount if the logbuf is larger than 32K */ |
1446 | if (mp->m_logbsize > XLOG_BIG_RECORD_BSIZE) { | 1370 | if (mp->m_logbsize > XLOG_BIG_RECORD_BSIZE) { |
1447 | cmn_err(CE_WARN, | 1371 | xfs_warn(mp, |
1448 | "XFS: logbuf size for version 1 logs must be 16K or 32K"); | 1372 | "logbuf size for version 1 logs must be 16K or 32K"); |
1449 | return XFS_ERROR(EINVAL); | 1373 | return XFS_ERROR(EINVAL); |
1450 | } | 1374 | } |
1451 | } | 1375 | } |
@@ -1462,8 +1386,8 @@ xfs_finish_flags( | |||
1462 | * prohibit r/w mounts of read-only filesystems | 1386 | * prohibit r/w mounts of read-only filesystems |
1463 | */ | 1387 | */ |
1464 | if ((mp->m_sb.sb_flags & XFS_SBF_READONLY) && !ronly) { | 1388 | if ((mp->m_sb.sb_flags & XFS_SBF_READONLY) && !ronly) { |
1465 | cmn_err(CE_WARN, | 1389 | xfs_warn(mp, |
1466 | "XFS: cannot mount a read-only filesystem as read-write"); | 1390 | "cannot mount a read-only filesystem as read-write"); |
1467 | return XFS_ERROR(EROFS); | 1391 | return XFS_ERROR(EROFS); |
1468 | } | 1392 | } |
1469 | 1393 | ||
@@ -1487,9 +1411,6 @@ xfs_fs_fill_super( | |||
1487 | spin_lock_init(&mp->m_sb_lock); | 1411 | spin_lock_init(&mp->m_sb_lock); |
1488 | mutex_init(&mp->m_growlock); | 1412 | mutex_init(&mp->m_growlock); |
1489 | atomic_set(&mp->m_active_trans, 0); | 1413 | atomic_set(&mp->m_active_trans, 0); |
1490 | INIT_LIST_HEAD(&mp->m_sync_list); | ||
1491 | spin_lock_init(&mp->m_sync_lock); | ||
1492 | init_waitqueue_head(&mp->m_wait_single_sync_task); | ||
1493 | 1414 | ||
1494 | mp->m_super = sb; | 1415 | mp->m_super = sb; |
1495 | sb->s_fs_info = mp; | 1416 | sb->s_fs_info = mp; |
@@ -1536,10 +1457,14 @@ xfs_fs_fill_super( | |||
1536 | if (error) | 1457 | if (error) |
1537 | goto out_free_sb; | 1458 | goto out_free_sb; |
1538 | 1459 | ||
1539 | error = xfs_mountfs(mp); | 1460 | /* |
1540 | if (error) | 1461 | * we must configure the block size in the superblock before we run the |
1541 | goto out_filestream_unmount; | 1462 | * full mount process as the mount process can lookup and cache inodes. |
1542 | 1463 | * For the same reason we must also initialise the syncd and register | |
1464 | * the inode cache shrinker so that inodes can be reclaimed during | ||
1465 | * operations like a quotacheck that iterate all inodes in the | ||
1466 | * filesystem. | ||
1467 | */ | ||
1543 | sb->s_magic = XFS_SB_MAGIC; | 1468 | sb->s_magic = XFS_SB_MAGIC; |
1544 | sb->s_blocksize = mp->m_sb.sb_blocksize; | 1469 | sb->s_blocksize = mp->m_sb.sb_blocksize; |
1545 | sb->s_blocksize_bits = ffs(sb->s_blocksize) - 1; | 1470 | sb->s_blocksize_bits = ffs(sb->s_blocksize) - 1; |
@@ -1547,6 +1472,16 @@ xfs_fs_fill_super( | |||
1547 | sb->s_time_gran = 1; | 1472 | sb->s_time_gran = 1; |
1548 | set_posix_acl_flag(sb); | 1473 | set_posix_acl_flag(sb); |
1549 | 1474 | ||
1475 | error = xfs_syncd_init(mp); | ||
1476 | if (error) | ||
1477 | goto out_filestream_unmount; | ||
1478 | |||
1479 | xfs_inode_shrinker_register(mp); | ||
1480 | |||
1481 | error = xfs_mountfs(mp); | ||
1482 | if (error) | ||
1483 | goto out_syncd_stop; | ||
1484 | |||
1550 | root = igrab(VFS_I(mp->m_rootip)); | 1485 | root = igrab(VFS_I(mp->m_rootip)); |
1551 | if (!root) { | 1486 | if (!root) { |
1552 | error = ENOENT; | 1487 | error = ENOENT; |
@@ -1562,14 +1497,11 @@ xfs_fs_fill_super( | |||
1562 | goto fail_vnrele; | 1497 | goto fail_vnrele; |
1563 | } | 1498 | } |
1564 | 1499 | ||
1565 | error = xfs_syncd_init(mp); | ||
1566 | if (error) | ||
1567 | goto fail_vnrele; | ||
1568 | |||
1569 | xfs_inode_shrinker_register(mp); | ||
1570 | |||
1571 | return 0; | 1500 | return 0; |
1572 | 1501 | ||
1502 | out_syncd_stop: | ||
1503 | xfs_inode_shrinker_unregister(mp); | ||
1504 | xfs_syncd_stop(mp); | ||
1573 | out_filestream_unmount: | 1505 | out_filestream_unmount: |
1574 | xfs_filestream_unmount(mp); | 1506 | xfs_filestream_unmount(mp); |
1575 | out_free_sb: | 1507 | out_free_sb: |
@@ -1593,6 +1525,9 @@ xfs_fs_fill_super( | |||
1593 | } | 1525 | } |
1594 | 1526 | ||
1595 | fail_unmount: | 1527 | fail_unmount: |
1528 | xfs_inode_shrinker_unregister(mp); | ||
1529 | xfs_syncd_stop(mp); | ||
1530 | |||
1596 | /* | 1531 | /* |
1597 | * Blow away any referenced inode in the filestreams cache. | 1532 | * Blow away any referenced inode in the filestreams cache. |
1598 | * This can and will cause log traffic as inodes go inactive | 1533 | * This can and will cause log traffic as inodes go inactive |
@@ -1782,6 +1717,38 @@ xfs_destroy_zones(void) | |||
1782 | } | 1717 | } |
1783 | 1718 | ||
1784 | STATIC int __init | 1719 | STATIC int __init |
1720 | xfs_init_workqueues(void) | ||
1721 | { | ||
1722 | /* | ||
1723 | * max_active is set to 8 to give enough concurency to allow | ||
1724 | * multiple work operations on each CPU to run. This allows multiple | ||
1725 | * filesystems to be running sync work concurrently, and scales with | ||
1726 | * the number of CPUs in the system. | ||
1727 | */ | ||
1728 | xfs_syncd_wq = alloc_workqueue("xfssyncd", WQ_CPU_INTENSIVE, 8); | ||
1729 | if (!xfs_syncd_wq) | ||
1730 | goto out; | ||
1731 | |||
1732 | xfs_ail_wq = alloc_workqueue("xfsail", WQ_CPU_INTENSIVE, 8); | ||
1733 | if (!xfs_ail_wq) | ||
1734 | goto out_destroy_syncd; | ||
1735 | |||
1736 | return 0; | ||
1737 | |||
1738 | out_destroy_syncd: | ||
1739 | destroy_workqueue(xfs_syncd_wq); | ||
1740 | out: | ||
1741 | return -ENOMEM; | ||
1742 | } | ||
1743 | |||
1744 | STATIC void | ||
1745 | xfs_destroy_workqueues(void) | ||
1746 | { | ||
1747 | destroy_workqueue(xfs_ail_wq); | ||
1748 | destroy_workqueue(xfs_syncd_wq); | ||
1749 | } | ||
1750 | |||
1751 | STATIC int __init | ||
1785 | init_xfs_fs(void) | 1752 | init_xfs_fs(void) |
1786 | { | 1753 | { |
1787 | int error; | 1754 | int error; |
@@ -1796,10 +1763,14 @@ init_xfs_fs(void) | |||
1796 | if (error) | 1763 | if (error) |
1797 | goto out; | 1764 | goto out; |
1798 | 1765 | ||
1799 | error = xfs_mru_cache_init(); | 1766 | error = xfs_init_workqueues(); |
1800 | if (error) | 1767 | if (error) |
1801 | goto out_destroy_zones; | 1768 | goto out_destroy_zones; |
1802 | 1769 | ||
1770 | error = xfs_mru_cache_init(); | ||
1771 | if (error) | ||
1772 | goto out_destroy_wq; | ||
1773 | |||
1803 | error = xfs_filestream_init(); | 1774 | error = xfs_filestream_init(); |
1804 | if (error) | 1775 | if (error) |
1805 | goto out_mru_cache_uninit; | 1776 | goto out_mru_cache_uninit; |
@@ -1816,6 +1787,10 @@ init_xfs_fs(void) | |||
1816 | if (error) | 1787 | if (error) |
1817 | goto out_cleanup_procfs; | 1788 | goto out_cleanup_procfs; |
1818 | 1789 | ||
1790 | error = xfs_init_workqueues(); | ||
1791 | if (error) | ||
1792 | goto out_sysctl_unregister; | ||
1793 | |||
1819 | vfs_initquota(); | 1794 | vfs_initquota(); |
1820 | 1795 | ||
1821 | error = register_filesystem(&xfs_fs_type); | 1796 | error = register_filesystem(&xfs_fs_type); |
@@ -1833,6 +1808,8 @@ init_xfs_fs(void) | |||
1833 | xfs_filestream_uninit(); | 1808 | xfs_filestream_uninit(); |
1834 | out_mru_cache_uninit: | 1809 | out_mru_cache_uninit: |
1835 | xfs_mru_cache_uninit(); | 1810 | xfs_mru_cache_uninit(); |
1811 | out_destroy_wq: | ||
1812 | xfs_destroy_workqueues(); | ||
1836 | out_destroy_zones: | 1813 | out_destroy_zones: |
1837 | xfs_destroy_zones(); | 1814 | xfs_destroy_zones(); |
1838 | out: | 1815 | out: |
@@ -1849,6 +1826,7 @@ exit_xfs_fs(void) | |||
1849 | xfs_buf_terminate(); | 1826 | xfs_buf_terminate(); |
1850 | xfs_filestream_uninit(); | 1827 | xfs_filestream_uninit(); |
1851 | xfs_mru_cache_uninit(); | 1828 | xfs_mru_cache_uninit(); |
1829 | xfs_destroy_workqueues(); | ||
1852 | xfs_destroy_zones(); | 1830 | xfs_destroy_zones(); |
1853 | } | 1831 | } |
1854 | 1832 | ||
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index afb0d7cfad1c..e4f9c1b0836c 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c | |||
@@ -22,6 +22,7 @@ | |||
22 | #include "xfs_log.h" | 22 | #include "xfs_log.h" |
23 | #include "xfs_inum.h" | 23 | #include "xfs_inum.h" |
24 | #include "xfs_trans.h" | 24 | #include "xfs_trans.h" |
25 | #include "xfs_trans_priv.h" | ||
25 | #include "xfs_sb.h" | 26 | #include "xfs_sb.h" |
26 | #include "xfs_ag.h" | 27 | #include "xfs_ag.h" |
27 | #include "xfs_mount.h" | 28 | #include "xfs_mount.h" |
@@ -39,6 +40,8 @@ | |||
39 | #include <linux/kthread.h> | 40 | #include <linux/kthread.h> |
40 | #include <linux/freezer.h> | 41 | #include <linux/freezer.h> |
41 | 42 | ||
43 | struct workqueue_struct *xfs_syncd_wq; /* sync workqueue */ | ||
44 | |||
42 | /* | 45 | /* |
43 | * The inode lookup is done in batches to keep the amount of lock traffic and | 46 | * The inode lookup is done in batches to keep the amount of lock traffic and |
44 | * radix tree lookups to a minimum. The batch size is a trade off between | 47 | * radix tree lookups to a minimum. The batch size is a trade off between |
@@ -53,14 +56,30 @@ xfs_inode_ag_walk_grab( | |||
53 | { | 56 | { |
54 | struct inode *inode = VFS_I(ip); | 57 | struct inode *inode = VFS_I(ip); |
55 | 58 | ||
59 | ASSERT(rcu_read_lock_held()); | ||
60 | |||
61 | /* | ||
62 | * check for stale RCU freed inode | ||
63 | * | ||
64 | * If the inode has been reallocated, it doesn't matter if it's not in | ||
65 | * the AG we are walking - we are walking for writeback, so if it | ||
66 | * passes all the "valid inode" checks and is dirty, then we'll write | ||
67 | * it back anyway. If it has been reallocated and still being | ||
68 | * initialised, the XFS_INEW check below will catch it. | ||
69 | */ | ||
70 | spin_lock(&ip->i_flags_lock); | ||
71 | if (!ip->i_ino) | ||
72 | goto out_unlock_noent; | ||
73 | |||
74 | /* avoid new or reclaimable inodes. Leave for reclaim code to flush */ | ||
75 | if (__xfs_iflags_test(ip, XFS_INEW | XFS_IRECLAIMABLE | XFS_IRECLAIM)) | ||
76 | goto out_unlock_noent; | ||
77 | spin_unlock(&ip->i_flags_lock); | ||
78 | |||
56 | /* nothing to sync during shutdown */ | 79 | /* nothing to sync during shutdown */ |
57 | if (XFS_FORCED_SHUTDOWN(ip->i_mount)) | 80 | if (XFS_FORCED_SHUTDOWN(ip->i_mount)) |
58 | return EFSCORRUPTED; | 81 | return EFSCORRUPTED; |
59 | 82 | ||
60 | /* avoid new or reclaimable inodes. Leave for reclaim code to flush */ | ||
61 | if (xfs_iflags_test(ip, XFS_INEW | XFS_IRECLAIMABLE | XFS_IRECLAIM)) | ||
62 | return ENOENT; | ||
63 | |||
64 | /* If we can't grab the inode, it must on it's way to reclaim. */ | 83 | /* If we can't grab the inode, it must on it's way to reclaim. */ |
65 | if (!igrab(inode)) | 84 | if (!igrab(inode)) |
66 | return ENOENT; | 85 | return ENOENT; |
@@ -72,6 +91,10 @@ xfs_inode_ag_walk_grab( | |||
72 | 91 | ||
73 | /* inode is valid */ | 92 | /* inode is valid */ |
74 | return 0; | 93 | return 0; |
94 | |||
95 | out_unlock_noent: | ||
96 | spin_unlock(&ip->i_flags_lock); | ||
97 | return ENOENT; | ||
75 | } | 98 | } |
76 | 99 | ||
77 | STATIC int | 100 | STATIC int |
@@ -98,12 +121,12 @@ restart: | |||
98 | int error = 0; | 121 | int error = 0; |
99 | int i; | 122 | int i; |
100 | 123 | ||
101 | read_lock(&pag->pag_ici_lock); | 124 | rcu_read_lock(); |
102 | nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, | 125 | nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, |
103 | (void **)batch, first_index, | 126 | (void **)batch, first_index, |
104 | XFS_LOOKUP_BATCH); | 127 | XFS_LOOKUP_BATCH); |
105 | if (!nr_found) { | 128 | if (!nr_found) { |
106 | read_unlock(&pag->pag_ici_lock); | 129 | rcu_read_unlock(); |
107 | break; | 130 | break; |
108 | } | 131 | } |
109 | 132 | ||
@@ -118,18 +141,26 @@ restart: | |||
118 | batch[i] = NULL; | 141 | batch[i] = NULL; |
119 | 142 | ||
120 | /* | 143 | /* |
121 | * Update the index for the next lookup. Catch overflows | 144 | * Update the index for the next lookup. Catch |
122 | * into the next AG range which can occur if we have inodes | 145 | * overflows into the next AG range which can occur if |
123 | * in the last block of the AG and we are currently | 146 | * we have inodes in the last block of the AG and we |
124 | * pointing to the last inode. | 147 | * are currently pointing to the last inode. |
148 | * | ||
149 | * Because we may see inodes that are from the wrong AG | ||
150 | * due to RCU freeing and reallocation, only update the | ||
151 | * index if it lies in this AG. It was a race that lead | ||
152 | * us to see this inode, so another lookup from the | ||
153 | * same index will not find it again. | ||
125 | */ | 154 | */ |
155 | if (XFS_INO_TO_AGNO(mp, ip->i_ino) != pag->pag_agno) | ||
156 | continue; | ||
126 | first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1); | 157 | first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1); |
127 | if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) | 158 | if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) |
128 | done = 1; | 159 | done = 1; |
129 | } | 160 | } |
130 | 161 | ||
131 | /* unlock now we've grabbed the inodes. */ | 162 | /* unlock now we've grabbed the inodes. */ |
132 | read_unlock(&pag->pag_ici_lock); | 163 | rcu_read_unlock(); |
133 | 164 | ||
134 | for (i = 0; i < nr_found; i++) { | 165 | for (i = 0; i < nr_found; i++) { |
135 | if (!batch[i]) | 166 | if (!batch[i]) |
@@ -334,7 +365,7 @@ xfs_quiesce_data( | |||
334 | 365 | ||
335 | /* mark the log as covered if needed */ | 366 | /* mark the log as covered if needed */ |
336 | if (xfs_log_need_covered(mp)) | 367 | if (xfs_log_need_covered(mp)) |
337 | error2 = xfs_fs_log_dummy(mp, SYNC_WAIT); | 368 | error2 = xfs_fs_log_dummy(mp); |
338 | 369 | ||
339 | /* flush data-only devices */ | 370 | /* flush data-only devices */ |
340 | if (mp->m_rtdev_targp) | 371 | if (mp->m_rtdev_targp) |
@@ -373,7 +404,7 @@ xfs_quiesce_fs( | |||
373 | /* | 404 | /* |
374 | * Second stage of a quiesce. The data is already synced, now we have to take | 405 | * Second stage of a quiesce. The data is already synced, now we have to take |
375 | * care of the metadata. New transactions are already blocked, so we need to | 406 | * care of the metadata. New transactions are already blocked, so we need to |
376 | * wait for any remaining transactions to drain out before proceding. | 407 | * wait for any remaining transactions to drain out before proceeding. |
377 | */ | 408 | */ |
378 | void | 409 | void |
379 | xfs_quiesce_attr( | 410 | xfs_quiesce_attr( |
@@ -397,69 +428,18 @@ xfs_quiesce_attr( | |||
397 | /* Push the superblock and write an unmount record */ | 428 | /* Push the superblock and write an unmount record */ |
398 | error = xfs_log_sbcount(mp, 1); | 429 | error = xfs_log_sbcount(mp, 1); |
399 | if (error) | 430 | if (error) |
400 | xfs_fs_cmn_err(CE_WARN, mp, | 431 | xfs_warn(mp, "xfs_attr_quiesce: failed to log sb changes. " |
401 | "xfs_attr_quiesce: failed to log sb changes. " | ||
402 | "Frozen image may not be consistent."); | 432 | "Frozen image may not be consistent."); |
403 | xfs_log_unmount_write(mp); | 433 | xfs_log_unmount_write(mp); |
404 | xfs_unmountfs_writesb(mp); | 434 | xfs_unmountfs_writesb(mp); |
405 | } | 435 | } |
406 | 436 | ||
407 | /* | 437 | static void |
408 | * Enqueue a work item to be picked up by the vfs xfssyncd thread. | 438 | xfs_syncd_queue_sync( |
409 | * Doing this has two advantages: | 439 | struct xfs_mount *mp) |
410 | * - It saves on stack space, which is tight in certain situations | ||
411 | * - It can be used (with care) as a mechanism to avoid deadlocks. | ||
412 | * Flushing while allocating in a full filesystem requires both. | ||
413 | */ | ||
414 | STATIC void | ||
415 | xfs_syncd_queue_work( | ||
416 | struct xfs_mount *mp, | ||
417 | void *data, | ||
418 | void (*syncer)(struct xfs_mount *, void *), | ||
419 | struct completion *completion) | ||
420 | { | 440 | { |
421 | struct xfs_sync_work *work; | 441 | queue_delayed_work(xfs_syncd_wq, &mp->m_sync_work, |
422 | 442 | msecs_to_jiffies(xfs_syncd_centisecs * 10)); | |
423 | work = kmem_alloc(sizeof(struct xfs_sync_work), KM_SLEEP); | ||
424 | INIT_LIST_HEAD(&work->w_list); | ||
425 | work->w_syncer = syncer; | ||
426 | work->w_data = data; | ||
427 | work->w_mount = mp; | ||
428 | work->w_completion = completion; | ||
429 | spin_lock(&mp->m_sync_lock); | ||
430 | list_add_tail(&work->w_list, &mp->m_sync_list); | ||
431 | spin_unlock(&mp->m_sync_lock); | ||
432 | wake_up_process(mp->m_sync_task); | ||
433 | } | ||
434 | |||
435 | /* | ||
436 | * Flush delayed allocate data, attempting to free up reserved space | ||
437 | * from existing allocations. At this point a new allocation attempt | ||
438 | * has failed with ENOSPC and we are in the process of scratching our | ||
439 | * heads, looking about for more room... | ||
440 | */ | ||
441 | STATIC void | ||
442 | xfs_flush_inodes_work( | ||
443 | struct xfs_mount *mp, | ||
444 | void *arg) | ||
445 | { | ||
446 | struct inode *inode = arg; | ||
447 | xfs_sync_data(mp, SYNC_TRYLOCK); | ||
448 | xfs_sync_data(mp, SYNC_TRYLOCK | SYNC_WAIT); | ||
449 | iput(inode); | ||
450 | } | ||
451 | |||
452 | void | ||
453 | xfs_flush_inodes( | ||
454 | xfs_inode_t *ip) | ||
455 | { | ||
456 | struct inode *inode = VFS_I(ip); | ||
457 | DECLARE_COMPLETION_ONSTACK(completion); | ||
458 | |||
459 | igrab(inode); | ||
460 | xfs_syncd_queue_work(ip->i_mount, inode, xfs_flush_inodes_work, &completion); | ||
461 | wait_for_completion(&completion); | ||
462 | xfs_log_force(ip->i_mount, XFS_LOG_SYNC); | ||
463 | } | 443 | } |
464 | 444 | ||
465 | /* | 445 | /* |
@@ -469,84 +449,119 @@ xfs_flush_inodes( | |||
469 | */ | 449 | */ |
470 | STATIC void | 450 | STATIC void |
471 | xfs_sync_worker( | 451 | xfs_sync_worker( |
472 | struct xfs_mount *mp, | 452 | struct work_struct *work) |
473 | void *unused) | ||
474 | { | 453 | { |
454 | struct xfs_mount *mp = container_of(to_delayed_work(work), | ||
455 | struct xfs_mount, m_sync_work); | ||
475 | int error; | 456 | int error; |
476 | 457 | ||
477 | if (!(mp->m_flags & XFS_MOUNT_RDONLY)) { | 458 | if (!(mp->m_flags & XFS_MOUNT_RDONLY)) { |
478 | xfs_log_force(mp, 0); | ||
479 | xfs_reclaim_inodes(mp, 0); | ||
480 | /* dgc: errors ignored here */ | 459 | /* dgc: errors ignored here */ |
481 | error = xfs_qm_sync(mp, SYNC_TRYLOCK); | ||
482 | if (mp->m_super->s_frozen == SB_UNFROZEN && | 460 | if (mp->m_super->s_frozen == SB_UNFROZEN && |
483 | xfs_log_need_covered(mp)) | 461 | xfs_log_need_covered(mp)) |
484 | error = xfs_fs_log_dummy(mp, 0); | 462 | error = xfs_fs_log_dummy(mp); |
463 | else | ||
464 | xfs_log_force(mp, 0); | ||
465 | error = xfs_qm_sync(mp, SYNC_TRYLOCK); | ||
466 | |||
467 | /* start pushing all the metadata that is currently dirty */ | ||
468 | xfs_ail_push_all(mp->m_ail); | ||
485 | } | 469 | } |
486 | mp->m_sync_seq++; | 470 | |
487 | wake_up(&mp->m_wait_single_sync_task); | 471 | /* queue us up again */ |
472 | xfs_syncd_queue_sync(mp); | ||
488 | } | 473 | } |
489 | 474 | ||
490 | STATIC int | 475 | /* |
491 | xfssyncd( | 476 | * Queue a new inode reclaim pass if there are reclaimable inodes and there |
492 | void *arg) | 477 | * isn't a reclaim pass already in progress. By default it runs every 5s based |
478 | * on the xfs syncd work default of 30s. Perhaps this should have it's own | ||
479 | * tunable, but that can be done if this method proves to be ineffective or too | ||
480 | * aggressive. | ||
481 | */ | ||
482 | static void | ||
483 | xfs_syncd_queue_reclaim( | ||
484 | struct xfs_mount *mp) | ||
493 | { | 485 | { |
494 | struct xfs_mount *mp = arg; | ||
495 | long timeleft; | ||
496 | xfs_sync_work_t *work, *n; | ||
497 | LIST_HEAD (tmp); | ||
498 | |||
499 | set_freezable(); | ||
500 | timeleft = xfs_syncd_centisecs * msecs_to_jiffies(10); | ||
501 | for (;;) { | ||
502 | if (list_empty(&mp->m_sync_list)) | ||
503 | timeleft = schedule_timeout_interruptible(timeleft); | ||
504 | /* swsusp */ | ||
505 | try_to_freeze(); | ||
506 | if (kthread_should_stop() && list_empty(&mp->m_sync_list)) | ||
507 | break; | ||
508 | 486 | ||
509 | spin_lock(&mp->m_sync_lock); | 487 | /* |
510 | /* | 488 | * We can have inodes enter reclaim after we've shut down the syncd |
511 | * We can get woken by laptop mode, to do a sync - | 489 | * workqueue during unmount, so don't allow reclaim work to be queued |
512 | * that's the (only!) case where the list would be | 490 | * during unmount. |
513 | * empty with time remaining. | 491 | */ |
514 | */ | 492 | if (!(mp->m_super->s_flags & MS_ACTIVE)) |
515 | if (!timeleft || list_empty(&mp->m_sync_list)) { | 493 | return; |
516 | if (!timeleft) | ||
517 | timeleft = xfs_syncd_centisecs * | ||
518 | msecs_to_jiffies(10); | ||
519 | INIT_LIST_HEAD(&mp->m_sync_work.w_list); | ||
520 | list_add_tail(&mp->m_sync_work.w_list, | ||
521 | &mp->m_sync_list); | ||
522 | } | ||
523 | list_splice_init(&mp->m_sync_list, &tmp); | ||
524 | spin_unlock(&mp->m_sync_lock); | ||
525 | 494 | ||
526 | list_for_each_entry_safe(work, n, &tmp, w_list) { | 495 | rcu_read_lock(); |
527 | (*work->w_syncer)(mp, work->w_data); | 496 | if (radix_tree_tagged(&mp->m_perag_tree, XFS_ICI_RECLAIM_TAG)) { |
528 | list_del(&work->w_list); | 497 | queue_delayed_work(xfs_syncd_wq, &mp->m_reclaim_work, |
529 | if (work == &mp->m_sync_work) | 498 | msecs_to_jiffies(xfs_syncd_centisecs / 6 * 10)); |
530 | continue; | ||
531 | if (work->w_completion) | ||
532 | complete(work->w_completion); | ||
533 | kmem_free(work); | ||
534 | } | ||
535 | } | 499 | } |
500 | rcu_read_unlock(); | ||
501 | } | ||
536 | 502 | ||
537 | return 0; | 503 | /* |
504 | * This is a fast pass over the inode cache to try to get reclaim moving on as | ||
505 | * many inodes as possible in a short period of time. It kicks itself every few | ||
506 | * seconds, as well as being kicked by the inode cache shrinker when memory | ||
507 | * goes low. It scans as quickly as possible avoiding locked inodes or those | ||
508 | * already being flushed, and once done schedules a future pass. | ||
509 | */ | ||
510 | STATIC void | ||
511 | xfs_reclaim_worker( | ||
512 | struct work_struct *work) | ||
513 | { | ||
514 | struct xfs_mount *mp = container_of(to_delayed_work(work), | ||
515 | struct xfs_mount, m_reclaim_work); | ||
516 | |||
517 | xfs_reclaim_inodes(mp, SYNC_TRYLOCK); | ||
518 | xfs_syncd_queue_reclaim(mp); | ||
519 | } | ||
520 | |||
521 | /* | ||
522 | * Flush delayed allocate data, attempting to free up reserved space | ||
523 | * from existing allocations. At this point a new allocation attempt | ||
524 | * has failed with ENOSPC and we are in the process of scratching our | ||
525 | * heads, looking about for more room. | ||
526 | * | ||
527 | * Queue a new data flush if there isn't one already in progress and | ||
528 | * wait for completion of the flush. This means that we only ever have one | ||
529 | * inode flush in progress no matter how many ENOSPC events are occurring and | ||
530 | * so will prevent the system from bogging down due to every concurrent | ||
531 | * ENOSPC event scanning all the active inodes in the system for writeback. | ||
532 | */ | ||
533 | void | ||
534 | xfs_flush_inodes( | ||
535 | struct xfs_inode *ip) | ||
536 | { | ||
537 | struct xfs_mount *mp = ip->i_mount; | ||
538 | |||
539 | queue_work(xfs_syncd_wq, &mp->m_flush_work); | ||
540 | flush_work_sync(&mp->m_flush_work); | ||
541 | } | ||
542 | |||
543 | STATIC void | ||
544 | xfs_flush_worker( | ||
545 | struct work_struct *work) | ||
546 | { | ||
547 | struct xfs_mount *mp = container_of(work, | ||
548 | struct xfs_mount, m_flush_work); | ||
549 | |||
550 | xfs_sync_data(mp, SYNC_TRYLOCK); | ||
551 | xfs_sync_data(mp, SYNC_TRYLOCK | SYNC_WAIT); | ||
538 | } | 552 | } |
539 | 553 | ||
540 | int | 554 | int |
541 | xfs_syncd_init( | 555 | xfs_syncd_init( |
542 | struct xfs_mount *mp) | 556 | struct xfs_mount *mp) |
543 | { | 557 | { |
544 | mp->m_sync_work.w_syncer = xfs_sync_worker; | 558 | INIT_WORK(&mp->m_flush_work, xfs_flush_worker); |
545 | mp->m_sync_work.w_mount = mp; | 559 | INIT_DELAYED_WORK(&mp->m_sync_work, xfs_sync_worker); |
546 | mp->m_sync_work.w_completion = NULL; | 560 | INIT_DELAYED_WORK(&mp->m_reclaim_work, xfs_reclaim_worker); |
547 | mp->m_sync_task = kthread_run(xfssyncd, mp, "xfssyncd/%s", mp->m_fsname); | 561 | |
548 | if (IS_ERR(mp->m_sync_task)) | 562 | xfs_syncd_queue_sync(mp); |
549 | return -PTR_ERR(mp->m_sync_task); | 563 | xfs_syncd_queue_reclaim(mp); |
564 | |||
550 | return 0; | 565 | return 0; |
551 | } | 566 | } |
552 | 567 | ||
@@ -554,7 +569,9 @@ void | |||
554 | xfs_syncd_stop( | 569 | xfs_syncd_stop( |
555 | struct xfs_mount *mp) | 570 | struct xfs_mount *mp) |
556 | { | 571 | { |
557 | kthread_stop(mp->m_sync_task); | 572 | cancel_delayed_work_sync(&mp->m_sync_work); |
573 | cancel_delayed_work_sync(&mp->m_reclaim_work); | ||
574 | cancel_work_sync(&mp->m_flush_work); | ||
558 | } | 575 | } |
559 | 576 | ||
560 | void | 577 | void |
@@ -573,6 +590,10 @@ __xfs_inode_set_reclaim_tag( | |||
573 | XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino), | 590 | XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino), |
574 | XFS_ICI_RECLAIM_TAG); | 591 | XFS_ICI_RECLAIM_TAG); |
575 | spin_unlock(&ip->i_mount->m_perag_lock); | 592 | spin_unlock(&ip->i_mount->m_perag_lock); |
593 | |||
594 | /* schedule periodic background inode reclaim */ | ||
595 | xfs_syncd_queue_reclaim(ip->i_mount); | ||
596 | |||
576 | trace_xfs_perag_set_reclaim(ip->i_mount, pag->pag_agno, | 597 | trace_xfs_perag_set_reclaim(ip->i_mount, pag->pag_agno, |
577 | -1, _RET_IP_); | 598 | -1, _RET_IP_); |
578 | } | 599 | } |
@@ -592,12 +613,12 @@ xfs_inode_set_reclaim_tag( | |||
592 | struct xfs_perag *pag; | 613 | struct xfs_perag *pag; |
593 | 614 | ||
594 | pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); | 615 | pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); |
595 | write_lock(&pag->pag_ici_lock); | 616 | spin_lock(&pag->pag_ici_lock); |
596 | spin_lock(&ip->i_flags_lock); | 617 | spin_lock(&ip->i_flags_lock); |
597 | __xfs_inode_set_reclaim_tag(pag, ip); | 618 | __xfs_inode_set_reclaim_tag(pag, ip); |
598 | __xfs_iflags_set(ip, XFS_IRECLAIMABLE); | 619 | __xfs_iflags_set(ip, XFS_IRECLAIMABLE); |
599 | spin_unlock(&ip->i_flags_lock); | 620 | spin_unlock(&ip->i_flags_lock); |
600 | write_unlock(&pag->pag_ici_lock); | 621 | spin_unlock(&pag->pag_ici_lock); |
601 | xfs_perag_put(pag); | 622 | xfs_perag_put(pag); |
602 | } | 623 | } |
603 | 624 | ||
@@ -639,9 +660,14 @@ xfs_reclaim_inode_grab( | |||
639 | struct xfs_inode *ip, | 660 | struct xfs_inode *ip, |
640 | int flags) | 661 | int flags) |
641 | { | 662 | { |
663 | ASSERT(rcu_read_lock_held()); | ||
664 | |||
665 | /* quick check for stale RCU freed inode */ | ||
666 | if (!ip->i_ino) | ||
667 | return 1; | ||
642 | 668 | ||
643 | /* | 669 | /* |
644 | * do some unlocked checks first to avoid unnecceary lock traffic. | 670 | * do some unlocked checks first to avoid unnecessary lock traffic. |
645 | * The first is a flush lock check, the second is a already in reclaim | 671 | * The first is a flush lock check, the second is a already in reclaim |
646 | * check. Only do these checks if we are not going to block on locks. | 672 | * check. Only do these checks if we are not going to block on locks. |
647 | */ | 673 | */ |
@@ -654,11 +680,16 @@ xfs_reclaim_inode_grab( | |||
654 | * The radix tree lock here protects a thread in xfs_iget from racing | 680 | * The radix tree lock here protects a thread in xfs_iget from racing |
655 | * with us starting reclaim on the inode. Once we have the | 681 | * with us starting reclaim on the inode. Once we have the |
656 | * XFS_IRECLAIM flag set it will not touch us. | 682 | * XFS_IRECLAIM flag set it will not touch us. |
683 | * | ||
684 | * Due to RCU lookup, we may find inodes that have been freed and only | ||
685 | * have XFS_IRECLAIM set. Indeed, we may see reallocated inodes that | ||
686 | * aren't candidates for reclaim at all, so we must check the | ||
687 | * XFS_IRECLAIMABLE is set first before proceeding to reclaim. | ||
657 | */ | 688 | */ |
658 | spin_lock(&ip->i_flags_lock); | 689 | spin_lock(&ip->i_flags_lock); |
659 | ASSERT_ALWAYS(__xfs_iflags_test(ip, XFS_IRECLAIMABLE)); | 690 | if (!__xfs_iflags_test(ip, XFS_IRECLAIMABLE) || |
660 | if (__xfs_iflags_test(ip, XFS_IRECLAIM)) { | 691 | __xfs_iflags_test(ip, XFS_IRECLAIM)) { |
661 | /* ignore as it is already under reclaim */ | 692 | /* not a reclaim candidate. */ |
662 | spin_unlock(&ip->i_flags_lock); | 693 | spin_unlock(&ip->i_flags_lock); |
663 | return 1; | 694 | return 1; |
664 | } | 695 | } |
@@ -723,8 +754,10 @@ xfs_reclaim_inode( | |||
723 | struct xfs_perag *pag, | 754 | struct xfs_perag *pag, |
724 | int sync_mode) | 755 | int sync_mode) |
725 | { | 756 | { |
726 | int error = 0; | 757 | int error; |
727 | 758 | ||
759 | restart: | ||
760 | error = 0; | ||
728 | xfs_ilock(ip, XFS_ILOCK_EXCL); | 761 | xfs_ilock(ip, XFS_ILOCK_EXCL); |
729 | if (!xfs_iflock_nowait(ip)) { | 762 | if (!xfs_iflock_nowait(ip)) { |
730 | if (!(sync_mode & SYNC_WAIT)) | 763 | if (!(sync_mode & SYNC_WAIT)) |
@@ -750,9 +783,31 @@ xfs_reclaim_inode( | |||
750 | if (xfs_inode_clean(ip)) | 783 | if (xfs_inode_clean(ip)) |
751 | goto reclaim; | 784 | goto reclaim; |
752 | 785 | ||
753 | /* Now we have an inode that needs flushing */ | 786 | /* |
754 | error = xfs_iflush(ip, sync_mode); | 787 | * Now we have an inode that needs flushing. |
788 | * | ||
789 | * We do a nonblocking flush here even if we are doing a SYNC_WAIT | ||
790 | * reclaim as we can deadlock with inode cluster removal. | ||
791 | * xfs_ifree_cluster() can lock the inode buffer before it locks the | ||
792 | * ip->i_lock, and we are doing the exact opposite here. As a result, | ||
793 | * doing a blocking xfs_itobp() to get the cluster buffer will result | ||
794 | * in an ABBA deadlock with xfs_ifree_cluster(). | ||
795 | * | ||
796 | * As xfs_ifree_cluser() must gather all inodes that are active in the | ||
797 | * cache to mark them stale, if we hit this case we don't actually want | ||
798 | * to do IO here - we want the inode marked stale so we can simply | ||
799 | * reclaim it. Hence if we get an EAGAIN error on a SYNC_WAIT flush, | ||
800 | * just unlock the inode, back off and try again. Hopefully the next | ||
801 | * pass through will see the stale flag set on the inode. | ||
802 | */ | ||
803 | error = xfs_iflush(ip, SYNC_TRYLOCK | sync_mode); | ||
755 | if (sync_mode & SYNC_WAIT) { | 804 | if (sync_mode & SYNC_WAIT) { |
805 | if (error == EAGAIN) { | ||
806 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | ||
807 | /* backoff longer than in xfs_ifree_cluster */ | ||
808 | delay(2); | ||
809 | goto restart; | ||
810 | } | ||
756 | xfs_iflock(ip); | 811 | xfs_iflock(ip); |
757 | goto reclaim; | 812 | goto reclaim; |
758 | } | 813 | } |
@@ -767,7 +822,7 @@ xfs_reclaim_inode( | |||
767 | * pass on the error. | 822 | * pass on the error. |
768 | */ | 823 | */ |
769 | if (error && error != EAGAIN && !XFS_FORCED_SHUTDOWN(ip->i_mount)) { | 824 | if (error && error != EAGAIN && !XFS_FORCED_SHUTDOWN(ip->i_mount)) { |
770 | xfs_fs_cmn_err(CE_WARN, ip->i_mount, | 825 | xfs_warn(ip->i_mount, |
771 | "inode 0x%llx background reclaim flush failed with %d", | 826 | "inode 0x%llx background reclaim flush failed with %d", |
772 | (long long)ip->i_ino, error); | 827 | (long long)ip->i_ino, error); |
773 | } | 828 | } |
@@ -795,12 +850,12 @@ reclaim: | |||
795 | * added to the tree assert that it's been there before to catch | 850 | * added to the tree assert that it's been there before to catch |
796 | * problems with the inode life time early on. | 851 | * problems with the inode life time early on. |
797 | */ | 852 | */ |
798 | write_lock(&pag->pag_ici_lock); | 853 | spin_lock(&pag->pag_ici_lock); |
799 | if (!radix_tree_delete(&pag->pag_ici_root, | 854 | if (!radix_tree_delete(&pag->pag_ici_root, |
800 | XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino))) | 855 | XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino))) |
801 | ASSERT(0); | 856 | ASSERT(0); |
802 | __xfs_inode_clear_reclaim(pag, ip); | 857 | __xfs_inode_clear_reclaim(pag, ip); |
803 | write_unlock(&pag->pag_ici_lock); | 858 | spin_unlock(&pag->pag_ici_lock); |
804 | 859 | ||
805 | /* | 860 | /* |
806 | * Here we do an (almost) spurious inode lock in order to coordinate | 861 | * Here we do an (almost) spurious inode lock in order to coordinate |
@@ -864,14 +919,14 @@ restart: | |||
864 | struct xfs_inode *batch[XFS_LOOKUP_BATCH]; | 919 | struct xfs_inode *batch[XFS_LOOKUP_BATCH]; |
865 | int i; | 920 | int i; |
866 | 921 | ||
867 | write_lock(&pag->pag_ici_lock); | 922 | rcu_read_lock(); |
868 | nr_found = radix_tree_gang_lookup_tag( | 923 | nr_found = radix_tree_gang_lookup_tag( |
869 | &pag->pag_ici_root, | 924 | &pag->pag_ici_root, |
870 | (void **)batch, first_index, | 925 | (void **)batch, first_index, |
871 | XFS_LOOKUP_BATCH, | 926 | XFS_LOOKUP_BATCH, |
872 | XFS_ICI_RECLAIM_TAG); | 927 | XFS_ICI_RECLAIM_TAG); |
873 | if (!nr_found) { | 928 | if (!nr_found) { |
874 | write_unlock(&pag->pag_ici_lock); | 929 | rcu_read_unlock(); |
875 | break; | 930 | break; |
876 | } | 931 | } |
877 | 932 | ||
@@ -891,14 +946,24 @@ restart: | |||
891 | * occur if we have inodes in the last block of | 946 | * occur if we have inodes in the last block of |
892 | * the AG and we are currently pointing to the | 947 | * the AG and we are currently pointing to the |
893 | * last inode. | 948 | * last inode. |
949 | * | ||
950 | * Because we may see inodes that are from the | ||
951 | * wrong AG due to RCU freeing and | ||
952 | * reallocation, only update the index if it | ||
953 | * lies in this AG. It was a race that lead us | ||
954 | * to see this inode, so another lookup from | ||
955 | * the same index will not find it again. | ||
894 | */ | 956 | */ |
957 | if (XFS_INO_TO_AGNO(mp, ip->i_ino) != | ||
958 | pag->pag_agno) | ||
959 | continue; | ||
895 | first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1); | 960 | first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1); |
896 | if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) | 961 | if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) |
897 | done = 1; | 962 | done = 1; |
898 | } | 963 | } |
899 | 964 | ||
900 | /* unlock now we've grabbed the inodes. */ | 965 | /* unlock now we've grabbed the inodes. */ |
901 | write_unlock(&pag->pag_ici_lock); | 966 | rcu_read_unlock(); |
902 | 967 | ||
903 | for (i = 0; i < nr_found; i++) { | 968 | for (i = 0; i < nr_found; i++) { |
904 | if (!batch[i]) | 969 | if (!batch[i]) |
@@ -945,7 +1010,13 @@ xfs_reclaim_inodes( | |||
945 | } | 1010 | } |
946 | 1011 | ||
947 | /* | 1012 | /* |
948 | * Shrinker infrastructure. | 1013 | * Inode cache shrinker. |
1014 | * | ||
1015 | * When called we make sure that there is a background (fast) inode reclaim in | ||
1016 | * progress, while we will throttle the speed of reclaim via doiing synchronous | ||
1017 | * reclaim of inodes. That means if we come across dirty inodes, we wait for | ||
1018 | * them to be cleaned, which we hope will not be very long due to the | ||
1019 | * background walker having already kicked the IO off on those dirty inodes. | ||
949 | */ | 1020 | */ |
950 | static int | 1021 | static int |
951 | xfs_reclaim_inode_shrink( | 1022 | xfs_reclaim_inode_shrink( |
@@ -960,10 +1031,15 @@ xfs_reclaim_inode_shrink( | |||
960 | 1031 | ||
961 | mp = container_of(shrink, struct xfs_mount, m_inode_shrink); | 1032 | mp = container_of(shrink, struct xfs_mount, m_inode_shrink); |
962 | if (nr_to_scan) { | 1033 | if (nr_to_scan) { |
1034 | /* kick background reclaimer and push the AIL */ | ||
1035 | xfs_syncd_queue_reclaim(mp); | ||
1036 | xfs_ail_push_all(mp->m_ail); | ||
1037 | |||
963 | if (!(gfp_mask & __GFP_FS)) | 1038 | if (!(gfp_mask & __GFP_FS)) |
964 | return -1; | 1039 | return -1; |
965 | 1040 | ||
966 | xfs_reclaim_inodes_ag(mp, SYNC_TRYLOCK, &nr_to_scan); | 1041 | xfs_reclaim_inodes_ag(mp, SYNC_TRYLOCK | SYNC_WAIT, |
1042 | &nr_to_scan); | ||
967 | /* terminate if we don't exhaust the scan */ | 1043 | /* terminate if we don't exhaust the scan */ |
968 | if (nr_to_scan > 0) | 1044 | if (nr_to_scan > 0) |
969 | return -1; | 1045 | return -1; |
diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h index 32ba6628290c..e3a6ad27415f 100644 --- a/fs/xfs/linux-2.6/xfs_sync.h +++ b/fs/xfs/linux-2.6/xfs_sync.h | |||
@@ -32,6 +32,8 @@ typedef struct xfs_sync_work { | |||
32 | #define SYNC_WAIT 0x0001 /* wait for i/o to complete */ | 32 | #define SYNC_WAIT 0x0001 /* wait for i/o to complete */ |
33 | #define SYNC_TRYLOCK 0x0002 /* only try to lock inodes */ | 33 | #define SYNC_TRYLOCK 0x0002 /* only try to lock inodes */ |
34 | 34 | ||
35 | extern struct workqueue_struct *xfs_syncd_wq; /* sync workqueue */ | ||
36 | |||
35 | int xfs_syncd_init(struct xfs_mount *mp); | 37 | int xfs_syncd_init(struct xfs_mount *mp); |
36 | void xfs_syncd_stop(struct xfs_mount *mp); | 38 | void xfs_syncd_stop(struct xfs_mount *mp); |
37 | 39 | ||
diff --git a/fs/xfs/linux-2.6/xfs_sysctl.c b/fs/xfs/linux-2.6/xfs_sysctl.c index 7bb5092d6ae4..ee2d2adaa438 100644 --- a/fs/xfs/linux-2.6/xfs_sysctl.c +++ b/fs/xfs/linux-2.6/xfs_sysctl.c | |||
@@ -18,6 +18,7 @@ | |||
18 | #include "xfs.h" | 18 | #include "xfs.h" |
19 | #include <linux/sysctl.h> | 19 | #include <linux/sysctl.h> |
20 | #include <linux/proc_fs.h> | 20 | #include <linux/proc_fs.h> |
21 | #include "xfs_error.h" | ||
21 | 22 | ||
22 | static struct ctl_table_header *xfs_table_header; | 23 | static struct ctl_table_header *xfs_table_header; |
23 | 24 | ||
@@ -36,7 +37,7 @@ xfs_stats_clear_proc_handler( | |||
36 | ret = proc_dointvec_minmax(ctl, write, buffer, lenp, ppos); | 37 | ret = proc_dointvec_minmax(ctl, write, buffer, lenp, ppos); |
37 | 38 | ||
38 | if (!ret && write && *valp) { | 39 | if (!ret && write && *valp) { |
39 | printk("XFS Clearing xfsstats\n"); | 40 | xfs_notice(NULL, "Clearing xfsstats"); |
40 | for_each_possible_cpu(c) { | 41 | for_each_possible_cpu(c) { |
41 | preempt_disable(); | 42 | preempt_disable(); |
42 | /* save vn_active, it's a universal truth! */ | 43 | /* save vn_active, it's a universal truth! */ |
@@ -51,6 +52,26 @@ xfs_stats_clear_proc_handler( | |||
51 | 52 | ||
52 | return ret; | 53 | return ret; |
53 | } | 54 | } |
55 | |||
56 | STATIC int | ||
57 | xfs_panic_mask_proc_handler( | ||
58 | ctl_table *ctl, | ||
59 | int write, | ||
60 | void __user *buffer, | ||
61 | size_t *lenp, | ||
62 | loff_t *ppos) | ||
63 | { | ||
64 | int ret, *valp = ctl->data; | ||
65 | |||
66 | ret = proc_dointvec_minmax(ctl, write, buffer, lenp, ppos); | ||
67 | if (!ret && write) { | ||
68 | xfs_panic_mask = *valp; | ||
69 | #ifdef DEBUG | ||
70 | xfs_panic_mask |= (XFS_PTAG_SHUTDOWN_CORRUPT | XFS_PTAG_LOGRES); | ||
71 | #endif | ||
72 | } | ||
73 | return ret; | ||
74 | } | ||
54 | #endif /* CONFIG_PROC_FS */ | 75 | #endif /* CONFIG_PROC_FS */ |
55 | 76 | ||
56 | static ctl_table xfs_table[] = { | 77 | static ctl_table xfs_table[] = { |
@@ -77,7 +98,7 @@ static ctl_table xfs_table[] = { | |||
77 | .data = &xfs_params.panic_mask.val, | 98 | .data = &xfs_params.panic_mask.val, |
78 | .maxlen = sizeof(int), | 99 | .maxlen = sizeof(int), |
79 | .mode = 0644, | 100 | .mode = 0644, |
80 | .proc_handler = proc_dointvec_minmax, | 101 | .proc_handler = xfs_panic_mask_proc_handler, |
81 | .extra1 = &xfs_params.panic_mask.min, | 102 | .extra1 = &xfs_params.panic_mask.min, |
82 | .extra2 = &xfs_params.panic_mask.max | 103 | .extra2 = &xfs_params.panic_mask.max |
83 | }, | 104 | }, |
diff --git a/fs/xfs/linux-2.6/xfs_trace.h b/fs/xfs/linux-2.6/xfs_trace.h index acef2e98c594..2d0bcb479075 100644 --- a/fs/xfs/linux-2.6/xfs_trace.h +++ b/fs/xfs/linux-2.6/xfs_trace.h | |||
@@ -766,8 +766,8 @@ DECLARE_EVENT_CLASS(xfs_loggrant_class, | |||
766 | __field(int, curr_res) | 766 | __field(int, curr_res) |
767 | __field(int, unit_res) | 767 | __field(int, unit_res) |
768 | __field(unsigned int, flags) | 768 | __field(unsigned int, flags) |
769 | __field(void *, reserve_headq) | 769 | __field(int, reserveq) |
770 | __field(void *, write_headq) | 770 | __field(int, writeq) |
771 | __field(int, grant_reserve_cycle) | 771 | __field(int, grant_reserve_cycle) |
772 | __field(int, grant_reserve_bytes) | 772 | __field(int, grant_reserve_bytes) |
773 | __field(int, grant_write_cycle) | 773 | __field(int, grant_write_cycle) |
@@ -784,19 +784,21 @@ DECLARE_EVENT_CLASS(xfs_loggrant_class, | |||
784 | __entry->curr_res = tic->t_curr_res; | 784 | __entry->curr_res = tic->t_curr_res; |
785 | __entry->unit_res = tic->t_unit_res; | 785 | __entry->unit_res = tic->t_unit_res; |
786 | __entry->flags = tic->t_flags; | 786 | __entry->flags = tic->t_flags; |
787 | __entry->reserve_headq = log->l_reserve_headq; | 787 | __entry->reserveq = list_empty(&log->l_reserveq); |
788 | __entry->write_headq = log->l_write_headq; | 788 | __entry->writeq = list_empty(&log->l_writeq); |
789 | __entry->grant_reserve_cycle = log->l_grant_reserve_cycle; | 789 | xlog_crack_grant_head(&log->l_grant_reserve_head, |
790 | __entry->grant_reserve_bytes = log->l_grant_reserve_bytes; | 790 | &__entry->grant_reserve_cycle, |
791 | __entry->grant_write_cycle = log->l_grant_write_cycle; | 791 | &__entry->grant_reserve_bytes); |
792 | __entry->grant_write_bytes = log->l_grant_write_bytes; | 792 | xlog_crack_grant_head(&log->l_grant_write_head, |
793 | &__entry->grant_write_cycle, | ||
794 | &__entry->grant_write_bytes); | ||
793 | __entry->curr_cycle = log->l_curr_cycle; | 795 | __entry->curr_cycle = log->l_curr_cycle; |
794 | __entry->curr_block = log->l_curr_block; | 796 | __entry->curr_block = log->l_curr_block; |
795 | __entry->tail_lsn = log->l_tail_lsn; | 797 | __entry->tail_lsn = atomic64_read(&log->l_tail_lsn); |
796 | ), | 798 | ), |
797 | TP_printk("dev %d:%d type %s t_ocnt %u t_cnt %u t_curr_res %u " | 799 | TP_printk("dev %d:%d type %s t_ocnt %u t_cnt %u t_curr_res %u " |
798 | "t_unit_res %u t_flags %s reserve_headq 0x%p " | 800 | "t_unit_res %u t_flags %s reserveq %s " |
799 | "write_headq 0x%p grant_reserve_cycle %d " | 801 | "writeq %s grant_reserve_cycle %d " |
800 | "grant_reserve_bytes %d grant_write_cycle %d " | 802 | "grant_reserve_bytes %d grant_write_cycle %d " |
801 | "grant_write_bytes %d curr_cycle %d curr_block %d " | 803 | "grant_write_bytes %d curr_cycle %d curr_block %d " |
802 | "tail_cycle %d tail_block %d", | 804 | "tail_cycle %d tail_block %d", |
@@ -807,8 +809,8 @@ DECLARE_EVENT_CLASS(xfs_loggrant_class, | |||
807 | __entry->curr_res, | 809 | __entry->curr_res, |
808 | __entry->unit_res, | 810 | __entry->unit_res, |
809 | __print_flags(__entry->flags, "|", XLOG_TIC_FLAGS), | 811 | __print_flags(__entry->flags, "|", XLOG_TIC_FLAGS), |
810 | __entry->reserve_headq, | 812 | __entry->reserveq ? "empty" : "active", |
811 | __entry->write_headq, | 813 | __entry->writeq ? "empty" : "active", |
812 | __entry->grant_reserve_cycle, | 814 | __entry->grant_reserve_cycle, |
813 | __entry->grant_reserve_bytes, | 815 | __entry->grant_reserve_bytes, |
814 | __entry->grant_write_cycle, | 816 | __entry->grant_write_cycle, |
@@ -835,6 +837,7 @@ DEFINE_LOGGRANT_EVENT(xfs_log_grant_sleep1); | |||
835 | DEFINE_LOGGRANT_EVENT(xfs_log_grant_wake1); | 837 | DEFINE_LOGGRANT_EVENT(xfs_log_grant_wake1); |
836 | DEFINE_LOGGRANT_EVENT(xfs_log_grant_sleep2); | 838 | DEFINE_LOGGRANT_EVENT(xfs_log_grant_sleep2); |
837 | DEFINE_LOGGRANT_EVENT(xfs_log_grant_wake2); | 839 | DEFINE_LOGGRANT_EVENT(xfs_log_grant_wake2); |
840 | DEFINE_LOGGRANT_EVENT(xfs_log_grant_wake_up); | ||
838 | DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_enter); | 841 | DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_enter); |
839 | DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_exit); | 842 | DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_exit); |
840 | DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_error); | 843 | DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_error); |
@@ -842,6 +845,7 @@ DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_sleep1); | |||
842 | DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_wake1); | 845 | DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_wake1); |
843 | DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_sleep2); | 846 | DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_sleep2); |
844 | DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_wake2); | 847 | DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_wake2); |
848 | DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_wake_up); | ||
845 | DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_enter); | 849 | DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_enter); |
846 | DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_exit); | 850 | DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_exit); |
847 | DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_sub); | 851 | DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_sub); |
@@ -935,10 +939,10 @@ DEFINE_PAGE_EVENT(xfs_writepage); | |||
935 | DEFINE_PAGE_EVENT(xfs_releasepage); | 939 | DEFINE_PAGE_EVENT(xfs_releasepage); |
936 | DEFINE_PAGE_EVENT(xfs_invalidatepage); | 940 | DEFINE_PAGE_EVENT(xfs_invalidatepage); |
937 | 941 | ||
938 | DECLARE_EVENT_CLASS(xfs_iomap_class, | 942 | DECLARE_EVENT_CLASS(xfs_imap_class, |
939 | TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count, | 943 | TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count, |
940 | int flags, struct xfs_bmbt_irec *irec), | 944 | int type, struct xfs_bmbt_irec *irec), |
941 | TP_ARGS(ip, offset, count, flags, irec), | 945 | TP_ARGS(ip, offset, count, type, irec), |
942 | TP_STRUCT__entry( | 946 | TP_STRUCT__entry( |
943 | __field(dev_t, dev) | 947 | __field(dev_t, dev) |
944 | __field(xfs_ino_t, ino) | 948 | __field(xfs_ino_t, ino) |
@@ -946,7 +950,7 @@ DECLARE_EVENT_CLASS(xfs_iomap_class, | |||
946 | __field(loff_t, new_size) | 950 | __field(loff_t, new_size) |
947 | __field(loff_t, offset) | 951 | __field(loff_t, offset) |
948 | __field(size_t, count) | 952 | __field(size_t, count) |
949 | __field(int, flags) | 953 | __field(int, type) |
950 | __field(xfs_fileoff_t, startoff) | 954 | __field(xfs_fileoff_t, startoff) |
951 | __field(xfs_fsblock_t, startblock) | 955 | __field(xfs_fsblock_t, startblock) |
952 | __field(xfs_filblks_t, blockcount) | 956 | __field(xfs_filblks_t, blockcount) |
@@ -958,13 +962,13 @@ DECLARE_EVENT_CLASS(xfs_iomap_class, | |||
958 | __entry->new_size = ip->i_new_size; | 962 | __entry->new_size = ip->i_new_size; |
959 | __entry->offset = offset; | 963 | __entry->offset = offset; |
960 | __entry->count = count; | 964 | __entry->count = count; |
961 | __entry->flags = flags; | 965 | __entry->type = type; |
962 | __entry->startoff = irec ? irec->br_startoff : 0; | 966 | __entry->startoff = irec ? irec->br_startoff : 0; |
963 | __entry->startblock = irec ? irec->br_startblock : 0; | 967 | __entry->startblock = irec ? irec->br_startblock : 0; |
964 | __entry->blockcount = irec ? irec->br_blockcount : 0; | 968 | __entry->blockcount = irec ? irec->br_blockcount : 0; |
965 | ), | 969 | ), |
966 | TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx " | 970 | TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx " |
967 | "offset 0x%llx count %zd flags %s " | 971 | "offset 0x%llx count %zd type %s " |
968 | "startoff 0x%llx startblock %lld blockcount 0x%llx", | 972 | "startoff 0x%llx startblock %lld blockcount 0x%llx", |
969 | MAJOR(__entry->dev), MINOR(__entry->dev), | 973 | MAJOR(__entry->dev), MINOR(__entry->dev), |
970 | __entry->ino, | 974 | __entry->ino, |
@@ -972,20 +976,21 @@ DECLARE_EVENT_CLASS(xfs_iomap_class, | |||
972 | __entry->new_size, | 976 | __entry->new_size, |
973 | __entry->offset, | 977 | __entry->offset, |
974 | __entry->count, | 978 | __entry->count, |
975 | __print_flags(__entry->flags, "|", BMAPI_FLAGS), | 979 | __print_symbolic(__entry->type, XFS_IO_TYPES), |
976 | __entry->startoff, | 980 | __entry->startoff, |
977 | (__int64_t)__entry->startblock, | 981 | (__int64_t)__entry->startblock, |
978 | __entry->blockcount) | 982 | __entry->blockcount) |
979 | ) | 983 | ) |
980 | 984 | ||
981 | #define DEFINE_IOMAP_EVENT(name) \ | 985 | #define DEFINE_IOMAP_EVENT(name) \ |
982 | DEFINE_EVENT(xfs_iomap_class, name, \ | 986 | DEFINE_EVENT(xfs_imap_class, name, \ |
983 | TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count, \ | 987 | TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count, \ |
984 | int flags, struct xfs_bmbt_irec *irec), \ | 988 | int type, struct xfs_bmbt_irec *irec), \ |
985 | TP_ARGS(ip, offset, count, flags, irec)) | 989 | TP_ARGS(ip, offset, count, type, irec)) |
986 | DEFINE_IOMAP_EVENT(xfs_iomap_enter); | 990 | DEFINE_IOMAP_EVENT(xfs_map_blocks_found); |
987 | DEFINE_IOMAP_EVENT(xfs_iomap_found); | 991 | DEFINE_IOMAP_EVENT(xfs_map_blocks_alloc); |
988 | DEFINE_IOMAP_EVENT(xfs_iomap_alloc); | 992 | DEFINE_IOMAP_EVENT(xfs_get_blocks_found); |
993 | DEFINE_IOMAP_EVENT(xfs_get_blocks_alloc); | ||
989 | 994 | ||
990 | DECLARE_EVENT_CLASS(xfs_simple_io_class, | 995 | DECLARE_EVENT_CLASS(xfs_simple_io_class, |
991 | TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count), | 996 | TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count), |
@@ -1022,6 +1027,7 @@ DEFINE_EVENT(xfs_simple_io_class, name, \ | |||
1022 | TP_ARGS(ip, offset, count)) | 1027 | TP_ARGS(ip, offset, count)) |
1023 | DEFINE_SIMPLE_IO_EVENT(xfs_delalloc_enospc); | 1028 | DEFINE_SIMPLE_IO_EVENT(xfs_delalloc_enospc); |
1024 | DEFINE_SIMPLE_IO_EVENT(xfs_unwritten_convert); | 1029 | DEFINE_SIMPLE_IO_EVENT(xfs_unwritten_convert); |
1030 | DEFINE_SIMPLE_IO_EVENT(xfs_get_blocks_notfound); | ||
1025 | 1031 | ||
1026 | 1032 | ||
1027 | TRACE_EVENT(xfs_itruncate_start, | 1033 | TRACE_EVENT(xfs_itruncate_start, |
@@ -1420,6 +1426,7 @@ DEFINE_EVENT(xfs_alloc_class, name, \ | |||
1420 | TP_PROTO(struct xfs_alloc_arg *args), \ | 1426 | TP_PROTO(struct xfs_alloc_arg *args), \ |
1421 | TP_ARGS(args)) | 1427 | TP_ARGS(args)) |
1422 | DEFINE_ALLOC_EVENT(xfs_alloc_exact_done); | 1428 | DEFINE_ALLOC_EVENT(xfs_alloc_exact_done); |
1429 | DEFINE_ALLOC_EVENT(xfs_alloc_exact_notfound); | ||
1423 | DEFINE_ALLOC_EVENT(xfs_alloc_exact_error); | 1430 | DEFINE_ALLOC_EVENT(xfs_alloc_exact_error); |
1424 | DEFINE_ALLOC_EVENT(xfs_alloc_near_nominleft); | 1431 | DEFINE_ALLOC_EVENT(xfs_alloc_near_nominleft); |
1425 | DEFINE_ALLOC_EVENT(xfs_alloc_near_first); | 1432 | DEFINE_ALLOC_EVENT(xfs_alloc_near_first); |
@@ -1752,6 +1759,39 @@ DEFINE_LOG_RECOVER_INO_ITEM(xfs_log_recover_inode_recover); | |||
1752 | DEFINE_LOG_RECOVER_INO_ITEM(xfs_log_recover_inode_cancel); | 1759 | DEFINE_LOG_RECOVER_INO_ITEM(xfs_log_recover_inode_cancel); |
1753 | DEFINE_LOG_RECOVER_INO_ITEM(xfs_log_recover_inode_skip); | 1760 | DEFINE_LOG_RECOVER_INO_ITEM(xfs_log_recover_inode_skip); |
1754 | 1761 | ||
1762 | DECLARE_EVENT_CLASS(xfs_discard_class, | ||
1763 | TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, | ||
1764 | xfs_agblock_t agbno, xfs_extlen_t len), | ||
1765 | TP_ARGS(mp, agno, agbno, len), | ||
1766 | TP_STRUCT__entry( | ||
1767 | __field(dev_t, dev) | ||
1768 | __field(xfs_agnumber_t, agno) | ||
1769 | __field(xfs_agblock_t, agbno) | ||
1770 | __field(xfs_extlen_t, len) | ||
1771 | ), | ||
1772 | TP_fast_assign( | ||
1773 | __entry->dev = mp->m_super->s_dev; | ||
1774 | __entry->agno = agno; | ||
1775 | __entry->agbno = agbno; | ||
1776 | __entry->len = len; | ||
1777 | ), | ||
1778 | TP_printk("dev %d:%d agno %u agbno %u len %u\n", | ||
1779 | MAJOR(__entry->dev), MINOR(__entry->dev), | ||
1780 | __entry->agno, | ||
1781 | __entry->agbno, | ||
1782 | __entry->len) | ||
1783 | ) | ||
1784 | |||
1785 | #define DEFINE_DISCARD_EVENT(name) \ | ||
1786 | DEFINE_EVENT(xfs_discard_class, name, \ | ||
1787 | TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, \ | ||
1788 | xfs_agblock_t agbno, xfs_extlen_t len), \ | ||
1789 | TP_ARGS(mp, agno, agbno, len)) | ||
1790 | DEFINE_DISCARD_EVENT(xfs_discard_extent); | ||
1791 | DEFINE_DISCARD_EVENT(xfs_discard_toosmall); | ||
1792 | DEFINE_DISCARD_EVENT(xfs_discard_exclude); | ||
1793 | DEFINE_DISCARD_EVENT(xfs_discard_busy); | ||
1794 | |||
1755 | #endif /* _TRACE_XFS_H */ | 1795 | #endif /* _TRACE_XFS_H */ |
1756 | 1796 | ||
1757 | #undef TRACE_INCLUDE_PATH | 1797 | #undef TRACE_INCLUDE_PATH |
diff --git a/fs/xfs/quota/xfs_dquot.c b/fs/xfs/quota/xfs_dquot.c index faf8e1a83a12..6fa214603819 100644 --- a/fs/xfs/quota/xfs_dquot.c +++ b/fs/xfs/quota/xfs_dquot.c | |||
@@ -149,7 +149,6 @@ xfs_qm_dqdestroy( | |||
149 | ASSERT(list_empty(&dqp->q_freelist)); | 149 | ASSERT(list_empty(&dqp->q_freelist)); |
150 | 150 | ||
151 | mutex_destroy(&dqp->q_qlock); | 151 | mutex_destroy(&dqp->q_qlock); |
152 | sv_destroy(&dqp->q_pinwait); | ||
153 | kmem_zone_free(xfs_Gqm->qm_dqzone, dqp); | 152 | kmem_zone_free(xfs_Gqm->qm_dqzone, dqp); |
154 | 153 | ||
155 | atomic_dec(&xfs_Gqm->qm_totaldquots); | 154 | atomic_dec(&xfs_Gqm->qm_totaldquots); |
@@ -545,9 +544,10 @@ xfs_qm_dqtobp( | |||
545 | /* | 544 | /* |
546 | * A simple sanity check in case we got a corrupted dquot... | 545 | * A simple sanity check in case we got a corrupted dquot... |
547 | */ | 546 | */ |
548 | if (xfs_qm_dqcheck(ddq, id, dqp->dq_flags & XFS_DQ_ALLTYPES, | 547 | error = xfs_qm_dqcheck(mp, ddq, id, dqp->dq_flags & XFS_DQ_ALLTYPES, |
549 | flags & (XFS_QMOPT_DQREPAIR|XFS_QMOPT_DOWARN), | 548 | flags & (XFS_QMOPT_DQREPAIR|XFS_QMOPT_DOWARN), |
550 | "dqtobp")) { | 549 | "dqtobp"); |
550 | if (error) { | ||
551 | if (!(flags & XFS_QMOPT_DQREPAIR)) { | 551 | if (!(flags & XFS_QMOPT_DQREPAIR)) { |
552 | xfs_trans_brelse(tp, bp); | 552 | xfs_trans_brelse(tp, bp); |
553 | return XFS_ERROR(EIO); | 553 | return XFS_ERROR(EIO); |
@@ -600,7 +600,7 @@ xfs_qm_dqread( | |||
600 | 600 | ||
601 | /* | 601 | /* |
602 | * Reservation counters are defined as reservation plus current usage | 602 | * Reservation counters are defined as reservation plus current usage |
603 | * to avoid having to add everytime. | 603 | * to avoid having to add every time. |
604 | */ | 604 | */ |
605 | dqp->q_res_bcount = be64_to_cpu(ddqp->d_bcount); | 605 | dqp->q_res_bcount = be64_to_cpu(ddqp->d_bcount); |
606 | dqp->q_res_icount = be64_to_cpu(ddqp->d_icount); | 606 | dqp->q_res_icount = be64_to_cpu(ddqp->d_icount); |
@@ -828,7 +828,7 @@ xfs_qm_dqget( | |||
828 | if (xfs_do_dqerror) { | 828 | if (xfs_do_dqerror) { |
829 | if ((xfs_dqerror_target == mp->m_ddev_targp) && | 829 | if ((xfs_dqerror_target == mp->m_ddev_targp) && |
830 | (xfs_dqreq_num++ % xfs_dqerror_mod) == 0) { | 830 | (xfs_dqreq_num++ % xfs_dqerror_mod) == 0) { |
831 | cmn_err(CE_DEBUG, "Returning error in dqget"); | 831 | xfs_debug(mp, "Returning error in dqget"); |
832 | return (EIO); | 832 | return (EIO); |
833 | } | 833 | } |
834 | } | 834 | } |
@@ -1208,8 +1208,9 @@ xfs_qm_dqflush( | |||
1208 | /* | 1208 | /* |
1209 | * A simple sanity check in case we got a corrupted dquot.. | 1209 | * A simple sanity check in case we got a corrupted dquot.. |
1210 | */ | 1210 | */ |
1211 | if (xfs_qm_dqcheck(&dqp->q_core, be32_to_cpu(ddqp->d_id), 0, | 1211 | error = xfs_qm_dqcheck(mp, &dqp->q_core, be32_to_cpu(ddqp->d_id), 0, |
1212 | XFS_QMOPT_DOWARN, "dqflush (incore copy)")) { | 1212 | XFS_QMOPT_DOWARN, "dqflush (incore copy)"); |
1213 | if (error) { | ||
1213 | xfs_buf_relse(bp); | 1214 | xfs_buf_relse(bp); |
1214 | xfs_dqfunlock(dqp); | 1215 | xfs_dqfunlock(dqp); |
1215 | xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); | 1216 | xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); |
@@ -1392,8 +1393,8 @@ xfs_qm_dqpurge( | |||
1392 | */ | 1393 | */ |
1393 | error = xfs_qm_dqflush(dqp, SYNC_WAIT); | 1394 | error = xfs_qm_dqflush(dqp, SYNC_WAIT); |
1394 | if (error) | 1395 | if (error) |
1395 | xfs_fs_cmn_err(CE_WARN, mp, | 1396 | xfs_warn(mp, "%s: dquot %p flush failed", |
1396 | "xfs_qm_dqpurge: dquot %p flush failed", dqp); | 1397 | __func__, dqp); |
1397 | xfs_dqflock(dqp); | 1398 | xfs_dqflock(dqp); |
1398 | } | 1399 | } |
1399 | ASSERT(atomic_read(&dqp->q_pincount) == 0); | 1400 | ASSERT(atomic_read(&dqp->q_pincount) == 0); |
@@ -1426,36 +1427,38 @@ xfs_qm_dqpurge( | |||
1426 | void | 1427 | void |
1427 | xfs_qm_dqprint(xfs_dquot_t *dqp) | 1428 | xfs_qm_dqprint(xfs_dquot_t *dqp) |
1428 | { | 1429 | { |
1429 | cmn_err(CE_DEBUG, "-----------KERNEL DQUOT----------------"); | 1430 | struct xfs_mount *mp = dqp->q_mount; |
1430 | cmn_err(CE_DEBUG, "---- dquotID = %d", | 1431 | |
1432 | xfs_debug(mp, "-----------KERNEL DQUOT----------------"); | ||
1433 | xfs_debug(mp, "---- dquotID = %d", | ||
1431 | (int)be32_to_cpu(dqp->q_core.d_id)); | 1434 | (int)be32_to_cpu(dqp->q_core.d_id)); |
1432 | cmn_err(CE_DEBUG, "---- type = %s", DQFLAGTO_TYPESTR(dqp)); | 1435 | xfs_debug(mp, "---- type = %s", DQFLAGTO_TYPESTR(dqp)); |
1433 | cmn_err(CE_DEBUG, "---- fs = 0x%p", dqp->q_mount); | 1436 | xfs_debug(mp, "---- fs = 0x%p", dqp->q_mount); |
1434 | cmn_err(CE_DEBUG, "---- blkno = 0x%x", (int) dqp->q_blkno); | 1437 | xfs_debug(mp, "---- blkno = 0x%x", (int) dqp->q_blkno); |
1435 | cmn_err(CE_DEBUG, "---- boffset = 0x%x", (int) dqp->q_bufoffset); | 1438 | xfs_debug(mp, "---- boffset = 0x%x", (int) dqp->q_bufoffset); |
1436 | cmn_err(CE_DEBUG, "---- blkhlimit = %Lu (0x%x)", | 1439 | xfs_debug(mp, "---- blkhlimit = %Lu (0x%x)", |
1437 | be64_to_cpu(dqp->q_core.d_blk_hardlimit), | 1440 | be64_to_cpu(dqp->q_core.d_blk_hardlimit), |
1438 | (int)be64_to_cpu(dqp->q_core.d_blk_hardlimit)); | 1441 | (int)be64_to_cpu(dqp->q_core.d_blk_hardlimit)); |
1439 | cmn_err(CE_DEBUG, "---- blkslimit = %Lu (0x%x)", | 1442 | xfs_debug(mp, "---- blkslimit = %Lu (0x%x)", |
1440 | be64_to_cpu(dqp->q_core.d_blk_softlimit), | 1443 | be64_to_cpu(dqp->q_core.d_blk_softlimit), |
1441 | (int)be64_to_cpu(dqp->q_core.d_blk_softlimit)); | 1444 | (int)be64_to_cpu(dqp->q_core.d_blk_softlimit)); |
1442 | cmn_err(CE_DEBUG, "---- inohlimit = %Lu (0x%x)", | 1445 | xfs_debug(mp, "---- inohlimit = %Lu (0x%x)", |
1443 | be64_to_cpu(dqp->q_core.d_ino_hardlimit), | 1446 | be64_to_cpu(dqp->q_core.d_ino_hardlimit), |
1444 | (int)be64_to_cpu(dqp->q_core.d_ino_hardlimit)); | 1447 | (int)be64_to_cpu(dqp->q_core.d_ino_hardlimit)); |
1445 | cmn_err(CE_DEBUG, "---- inoslimit = %Lu (0x%x)", | 1448 | xfs_debug(mp, "---- inoslimit = %Lu (0x%x)", |
1446 | be64_to_cpu(dqp->q_core.d_ino_softlimit), | 1449 | be64_to_cpu(dqp->q_core.d_ino_softlimit), |
1447 | (int)be64_to_cpu(dqp->q_core.d_ino_softlimit)); | 1450 | (int)be64_to_cpu(dqp->q_core.d_ino_softlimit)); |
1448 | cmn_err(CE_DEBUG, "---- bcount = %Lu (0x%x)", | 1451 | xfs_debug(mp, "---- bcount = %Lu (0x%x)", |
1449 | be64_to_cpu(dqp->q_core.d_bcount), | 1452 | be64_to_cpu(dqp->q_core.d_bcount), |
1450 | (int)be64_to_cpu(dqp->q_core.d_bcount)); | 1453 | (int)be64_to_cpu(dqp->q_core.d_bcount)); |
1451 | cmn_err(CE_DEBUG, "---- icount = %Lu (0x%x)", | 1454 | xfs_debug(mp, "---- icount = %Lu (0x%x)", |
1452 | be64_to_cpu(dqp->q_core.d_icount), | 1455 | be64_to_cpu(dqp->q_core.d_icount), |
1453 | (int)be64_to_cpu(dqp->q_core.d_icount)); | 1456 | (int)be64_to_cpu(dqp->q_core.d_icount)); |
1454 | cmn_err(CE_DEBUG, "---- btimer = %d", | 1457 | xfs_debug(mp, "---- btimer = %d", |
1455 | (int)be32_to_cpu(dqp->q_core.d_btimer)); | 1458 | (int)be32_to_cpu(dqp->q_core.d_btimer)); |
1456 | cmn_err(CE_DEBUG, "---- itimer = %d", | 1459 | xfs_debug(mp, "---- itimer = %d", |
1457 | (int)be32_to_cpu(dqp->q_core.d_itimer)); | 1460 | (int)be32_to_cpu(dqp->q_core.d_itimer)); |
1458 | cmn_err(CE_DEBUG, "---------------------------"); | 1461 | xfs_debug(mp, "---------------------------"); |
1459 | } | 1462 | } |
1460 | #endif | 1463 | #endif |
1461 | 1464 | ||
diff --git a/fs/xfs/quota/xfs_dquot_item.c b/fs/xfs/quota/xfs_dquot_item.c index 2a1f3dc10a02..9e0e2fa3f2c8 100644 --- a/fs/xfs/quota/xfs_dquot_item.c +++ b/fs/xfs/quota/xfs_dquot_item.c | |||
@@ -136,9 +136,8 @@ xfs_qm_dquot_logitem_push( | |||
136 | */ | 136 | */ |
137 | error = xfs_qm_dqflush(dqp, 0); | 137 | error = xfs_qm_dqflush(dqp, 0); |
138 | if (error) | 138 | if (error) |
139 | xfs_fs_cmn_err(CE_WARN, dqp->q_mount, | 139 | xfs_warn(dqp->q_mount, "%s: push error %d on dqp %p", |
140 | "xfs_qm_dquot_logitem_push: push error %d on dqp %p", | 140 | __func__, error, dqp); |
141 | error, dqp); | ||
142 | xfs_dqunlock(dqp); | 141 | xfs_dqunlock(dqp); |
143 | } | 142 | } |
144 | 143 | ||
diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c index f8e854b4fde8..69228aa8605a 100644 --- a/fs/xfs/quota/xfs_qm.c +++ b/fs/xfs/quota/xfs_qm.c | |||
@@ -80,7 +80,7 @@ xfs_qm_dquot_list_print( | |||
80 | int i = 0; | 80 | int i = 0; |
81 | 81 | ||
82 | list_for_each_entry(dqp, &mp->m_quotainfo->qi_dqlist_lock, qi_mplist) { | 82 | list_for_each_entry(dqp, &mp->m_quotainfo->qi_dqlist_lock, qi_mplist) { |
83 | cmn_err(CE_DEBUG, " %d. \"%d (%s)\" " | 83 | xfs_debug(mp, " %d. \"%d (%s)\" " |
84 | "bcnt = %lld, icnt = %lld, refs = %d", | 84 | "bcnt = %lld, icnt = %lld, refs = %d", |
85 | i++, be32_to_cpu(dqp->q_core.d_id), | 85 | i++, be32_to_cpu(dqp->q_core.d_id), |
86 | DQFLAGTO_TYPESTR(dqp), | 86 | DQFLAGTO_TYPESTR(dqp), |
@@ -205,7 +205,7 @@ xfs_qm_destroy( | |||
205 | list_for_each_entry_safe(dqp, n, &xqm->qm_dqfrlist, q_freelist) { | 205 | list_for_each_entry_safe(dqp, n, &xqm->qm_dqfrlist, q_freelist) { |
206 | xfs_dqlock(dqp); | 206 | xfs_dqlock(dqp); |
207 | #ifdef QUOTADEBUG | 207 | #ifdef QUOTADEBUG |
208 | cmn_err(CE_DEBUG, "FREELIST destroy 0x%p", dqp); | 208 | xfs_debug(dqp->q_mount, "FREELIST destroy 0x%p", dqp); |
209 | #endif | 209 | #endif |
210 | list_del_init(&dqp->q_freelist); | 210 | list_del_init(&dqp->q_freelist); |
211 | xfs_Gqm->qm_dqfrlist_cnt--; | 211 | xfs_Gqm->qm_dqfrlist_cnt--; |
@@ -341,9 +341,7 @@ xfs_qm_mount_quotas( | |||
341 | * quotas immediately. | 341 | * quotas immediately. |
342 | */ | 342 | */ |
343 | if (mp->m_sb.sb_rextents) { | 343 | if (mp->m_sb.sb_rextents) { |
344 | cmn_err(CE_NOTE, | 344 | xfs_notice(mp, "Cannot turn on quotas for realtime filesystem"); |
345 | "Cannot turn on quotas for realtime filesystem %s", | ||
346 | mp->m_fsname); | ||
347 | mp->m_qflags = 0; | 345 | mp->m_qflags = 0; |
348 | goto write_changes; | 346 | goto write_changes; |
349 | } | 347 | } |
@@ -402,14 +400,13 @@ xfs_qm_mount_quotas( | |||
402 | * off, but the on disk superblock doesn't know that ! | 400 | * off, but the on disk superblock doesn't know that ! |
403 | */ | 401 | */ |
404 | ASSERT(!(XFS_IS_QUOTA_RUNNING(mp))); | 402 | ASSERT(!(XFS_IS_QUOTA_RUNNING(mp))); |
405 | xfs_fs_cmn_err(CE_ALERT, mp, | 403 | xfs_alert(mp, "%s: Superblock update failed!", |
406 | "XFS mount_quotas: Superblock update failed!"); | 404 | __func__); |
407 | } | 405 | } |
408 | } | 406 | } |
409 | 407 | ||
410 | if (error) { | 408 | if (error) { |
411 | xfs_fs_cmn_err(CE_WARN, mp, | 409 | xfs_warn(mp, "Failed to initialize disk quotas."); |
412 | "Failed to initialize disk quotas."); | ||
413 | return; | 410 | return; |
414 | } | 411 | } |
415 | 412 | ||
@@ -464,12 +461,10 @@ xfs_qm_dqflush_all( | |||
464 | struct xfs_quotainfo *q = mp->m_quotainfo; | 461 | struct xfs_quotainfo *q = mp->m_quotainfo; |
465 | int recl; | 462 | int recl; |
466 | struct xfs_dquot *dqp; | 463 | struct xfs_dquot *dqp; |
467 | int niters; | ||
468 | int error; | 464 | int error; |
469 | 465 | ||
470 | if (!q) | 466 | if (!q) |
471 | return 0; | 467 | return 0; |
472 | niters = 0; | ||
473 | again: | 468 | again: |
474 | mutex_lock(&q->qi_dqlist_lock); | 469 | mutex_lock(&q->qi_dqlist_lock); |
475 | list_for_each_entry(dqp, &q->qi_dqlist, q_mplist) { | 470 | list_for_each_entry(dqp, &q->qi_dqlist, q_mplist) { |
@@ -1230,13 +1225,6 @@ xfs_qm_qino_alloc( | |||
1230 | } | 1225 | } |
1231 | 1226 | ||
1232 | /* | 1227 | /* |
1233 | * Keep an extra reference to this quota inode. This inode is | ||
1234 | * locked exclusively and joined to the transaction already. | ||
1235 | */ | ||
1236 | ASSERT(xfs_isilocked(*ip, XFS_ILOCK_EXCL)); | ||
1237 | IHOLD(*ip); | ||
1238 | |||
1239 | /* | ||
1240 | * Make the changes in the superblock, and log those too. | 1228 | * Make the changes in the superblock, and log those too. |
1241 | * sbfields arg may contain fields other than *QUOTINO; | 1229 | * sbfields arg may contain fields other than *QUOTINO; |
1242 | * VERSIONNUM for example. | 1230 | * VERSIONNUM for example. |
@@ -1264,7 +1252,7 @@ xfs_qm_qino_alloc( | |||
1264 | xfs_mod_sb(tp, sbfields); | 1252 | xfs_mod_sb(tp, sbfields); |
1265 | 1253 | ||
1266 | if ((error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES))) { | 1254 | if ((error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES))) { |
1267 | xfs_fs_cmn_err(CE_ALERT, mp, "XFS qino_alloc failed!"); | 1255 | xfs_alert(mp, "%s failed (error %d)!", __func__, error); |
1268 | return error; | 1256 | return error; |
1269 | } | 1257 | } |
1270 | return 0; | 1258 | return 0; |
@@ -1299,7 +1287,7 @@ xfs_qm_reset_dqcounts( | |||
1299 | * output any warnings because it's perfectly possible to | 1287 | * output any warnings because it's perfectly possible to |
1300 | * find uninitialised dquot blks. See comment in xfs_qm_dqcheck. | 1288 | * find uninitialised dquot blks. See comment in xfs_qm_dqcheck. |
1301 | */ | 1289 | */ |
1302 | (void) xfs_qm_dqcheck(ddq, id+j, type, XFS_QMOPT_DQREPAIR, | 1290 | (void) xfs_qm_dqcheck(mp, ddq, id+j, type, XFS_QMOPT_DQREPAIR, |
1303 | "xfs_quotacheck"); | 1291 | "xfs_quotacheck"); |
1304 | ddq->d_bcount = 0; | 1292 | ddq->d_bcount = 0; |
1305 | ddq->d_icount = 0; | 1293 | ddq->d_icount = 0; |
@@ -1324,14 +1312,9 @@ xfs_qm_dqiter_bufs( | |||
1324 | { | 1312 | { |
1325 | xfs_buf_t *bp; | 1313 | xfs_buf_t *bp; |
1326 | int error; | 1314 | int error; |
1327 | int notcommitted; | ||
1328 | int incr; | ||
1329 | int type; | 1315 | int type; |
1330 | 1316 | ||
1331 | ASSERT(blkcnt > 0); | 1317 | ASSERT(blkcnt > 0); |
1332 | notcommitted = 0; | ||
1333 | incr = (blkcnt > XFS_QM_MAX_DQCLUSTER_LOGSZ) ? | ||
1334 | XFS_QM_MAX_DQCLUSTER_LOGSZ : blkcnt; | ||
1335 | type = flags & XFS_QMOPT_UQUOTA ? XFS_DQ_USER : | 1318 | type = flags & XFS_QMOPT_UQUOTA ? XFS_DQ_USER : |
1336 | (flags & XFS_QMOPT_PQUOTA ? XFS_DQ_PROJ : XFS_DQ_GROUP); | 1319 | (flags & XFS_QMOPT_PQUOTA ? XFS_DQ_PROJ : XFS_DQ_GROUP); |
1337 | error = 0; | 1320 | error = 0; |
@@ -1676,7 +1659,7 @@ xfs_qm_quotacheck( | |||
1676 | */ | 1659 | */ |
1677 | ASSERT(list_empty(&mp->m_quotainfo->qi_dqlist)); | 1660 | ASSERT(list_empty(&mp->m_quotainfo->qi_dqlist)); |
1678 | 1661 | ||
1679 | cmn_err(CE_NOTE, "XFS quotacheck %s: Please wait.", mp->m_fsname); | 1662 | xfs_notice(mp, "Quotacheck needed: Please wait."); |
1680 | 1663 | ||
1681 | /* | 1664 | /* |
1682 | * First we go thru all the dquots on disk, USR and GRP/PRJ, and reset | 1665 | * First we go thru all the dquots on disk, USR and GRP/PRJ, and reset |
@@ -1754,9 +1737,9 @@ xfs_qm_quotacheck( | |||
1754 | 1737 | ||
1755 | error_return: | 1738 | error_return: |
1756 | if (error) { | 1739 | if (error) { |
1757 | cmn_err(CE_WARN, "XFS quotacheck %s: Unsuccessful (Error %d): " | 1740 | xfs_warn(mp, |
1758 | "Disabling quotas.", | 1741 | "Quotacheck: Unsuccessful (Error %d): Disabling quotas.", |
1759 | mp->m_fsname, error); | 1742 | error); |
1760 | /* | 1743 | /* |
1761 | * We must turn off quotas. | 1744 | * We must turn off quotas. |
1762 | */ | 1745 | */ |
@@ -1764,12 +1747,11 @@ xfs_qm_quotacheck( | |||
1764 | ASSERT(xfs_Gqm != NULL); | 1747 | ASSERT(xfs_Gqm != NULL); |
1765 | xfs_qm_destroy_quotainfo(mp); | 1748 | xfs_qm_destroy_quotainfo(mp); |
1766 | if (xfs_mount_reset_sbqflags(mp)) { | 1749 | if (xfs_mount_reset_sbqflags(mp)) { |
1767 | cmn_err(CE_WARN, "XFS quotacheck %s: " | 1750 | xfs_warn(mp, |
1768 | "Failed to reset quota flags.", mp->m_fsname); | 1751 | "Quotacheck: Failed to reset quota flags."); |
1769 | } | 1752 | } |
1770 | } else { | 1753 | } else |
1771 | cmn_err(CE_NOTE, "XFS quotacheck %s: Done.", mp->m_fsname); | 1754 | xfs_notice(mp, "Quotacheck: Done."); |
1772 | } | ||
1773 | return (error); | 1755 | return (error); |
1774 | } | 1756 | } |
1775 | 1757 | ||
@@ -1863,12 +1845,14 @@ xfs_qm_dqreclaim_one(void) | |||
1863 | xfs_dquot_t *dqpout; | 1845 | xfs_dquot_t *dqpout; |
1864 | xfs_dquot_t *dqp; | 1846 | xfs_dquot_t *dqp; |
1865 | int restarts; | 1847 | int restarts; |
1848 | int startagain; | ||
1866 | 1849 | ||
1867 | restarts = 0; | 1850 | restarts = 0; |
1868 | dqpout = NULL; | 1851 | dqpout = NULL; |
1869 | 1852 | ||
1870 | /* lockorder: hashchainlock, freelistlock, mplistlock, dqlock, dqflock */ | 1853 | /* lockorder: hashchainlock, freelistlock, mplistlock, dqlock, dqflock */ |
1871 | startagain: | 1854 | again: |
1855 | startagain = 0; | ||
1872 | mutex_lock(&xfs_Gqm->qm_dqfrlist_lock); | 1856 | mutex_lock(&xfs_Gqm->qm_dqfrlist_lock); |
1873 | 1857 | ||
1874 | list_for_each_entry(dqp, &xfs_Gqm->qm_dqfrlist, q_freelist) { | 1858 | list_for_each_entry(dqp, &xfs_Gqm->qm_dqfrlist, q_freelist) { |
@@ -1885,13 +1869,10 @@ startagain: | |||
1885 | ASSERT(! (dqp->dq_flags & XFS_DQ_INACTIVE)); | 1869 | ASSERT(! (dqp->dq_flags & XFS_DQ_INACTIVE)); |
1886 | 1870 | ||
1887 | trace_xfs_dqreclaim_want(dqp); | 1871 | trace_xfs_dqreclaim_want(dqp); |
1888 | |||
1889 | xfs_dqunlock(dqp); | ||
1890 | mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock); | ||
1891 | if (++restarts >= XFS_QM_RECLAIM_MAX_RESTARTS) | ||
1892 | return NULL; | ||
1893 | XQM_STATS_INC(xqmstats.xs_qm_dqwants); | 1872 | XQM_STATS_INC(xqmstats.xs_qm_dqwants); |
1894 | goto startagain; | 1873 | restarts++; |
1874 | startagain = 1; | ||
1875 | goto dqunlock; | ||
1895 | } | 1876 | } |
1896 | 1877 | ||
1897 | /* | 1878 | /* |
@@ -1906,23 +1887,20 @@ startagain: | |||
1906 | ASSERT(list_empty(&dqp->q_mplist)); | 1887 | ASSERT(list_empty(&dqp->q_mplist)); |
1907 | list_del_init(&dqp->q_freelist); | 1888 | list_del_init(&dqp->q_freelist); |
1908 | xfs_Gqm->qm_dqfrlist_cnt--; | 1889 | xfs_Gqm->qm_dqfrlist_cnt--; |
1909 | xfs_dqunlock(dqp); | ||
1910 | dqpout = dqp; | 1890 | dqpout = dqp; |
1911 | XQM_STATS_INC(xqmstats.xs_qm_dqinact_reclaims); | 1891 | XQM_STATS_INC(xqmstats.xs_qm_dqinact_reclaims); |
1912 | break; | 1892 | goto dqunlock; |
1913 | } | 1893 | } |
1914 | 1894 | ||
1915 | ASSERT(dqp->q_hash); | 1895 | ASSERT(dqp->q_hash); |
1916 | ASSERT(!list_empty(&dqp->q_mplist)); | 1896 | ASSERT(!list_empty(&dqp->q_mplist)); |
1917 | 1897 | ||
1918 | /* | 1898 | /* |
1919 | * Try to grab the flush lock. If this dquot is in the process of | 1899 | * Try to grab the flush lock. If this dquot is in the process |
1920 | * getting flushed to disk, we don't want to reclaim it. | 1900 | * of getting flushed to disk, we don't want to reclaim it. |
1921 | */ | 1901 | */ |
1922 | if (!xfs_dqflock_nowait(dqp)) { | 1902 | if (!xfs_dqflock_nowait(dqp)) |
1923 | xfs_dqunlock(dqp); | 1903 | goto dqunlock; |
1924 | continue; | ||
1925 | } | ||
1926 | 1904 | ||
1927 | /* | 1905 | /* |
1928 | * We have the flush lock so we know that this is not in the | 1906 | * We have the flush lock so we know that this is not in the |
@@ -1941,11 +1919,10 @@ startagain: | |||
1941 | */ | 1919 | */ |
1942 | error = xfs_qm_dqflush(dqp, 0); | 1920 | error = xfs_qm_dqflush(dqp, 0); |
1943 | if (error) { | 1921 | if (error) { |
1944 | xfs_fs_cmn_err(CE_WARN, mp, | 1922 | xfs_warn(mp, "%s: dquot %p flush failed", |
1945 | "xfs_qm_dqreclaim: dquot %p flush failed", dqp); | 1923 | __func__, dqp); |
1946 | } | 1924 | } |
1947 | xfs_dqunlock(dqp); /* dqflush unlocks dqflock */ | 1925 | goto dqunlock; |
1948 | continue; | ||
1949 | } | 1926 | } |
1950 | 1927 | ||
1951 | /* | 1928 | /* |
@@ -1967,13 +1944,8 @@ startagain: | |||
1967 | */ | 1944 | */ |
1968 | if (!mutex_trylock(&mp->m_quotainfo->qi_dqlist_lock)) { | 1945 | if (!mutex_trylock(&mp->m_quotainfo->qi_dqlist_lock)) { |
1969 | restarts++; | 1946 | restarts++; |
1970 | mutex_unlock(&dqp->q_hash->qh_lock); | 1947 | startagain = 1; |
1971 | xfs_dqfunlock(dqp); | 1948 | goto qhunlock; |
1972 | xfs_dqunlock(dqp); | ||
1973 | mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock); | ||
1974 | if (restarts++ >= XFS_QM_RECLAIM_MAX_RESTARTS) | ||
1975 | return NULL; | ||
1976 | goto startagain; | ||
1977 | } | 1949 | } |
1978 | 1950 | ||
1979 | ASSERT(dqp->q_nrefs == 0); | 1951 | ASSERT(dqp->q_nrefs == 0); |
@@ -1986,14 +1958,20 @@ startagain: | |||
1986 | xfs_Gqm->qm_dqfrlist_cnt--; | 1958 | xfs_Gqm->qm_dqfrlist_cnt--; |
1987 | dqpout = dqp; | 1959 | dqpout = dqp; |
1988 | mutex_unlock(&mp->m_quotainfo->qi_dqlist_lock); | 1960 | mutex_unlock(&mp->m_quotainfo->qi_dqlist_lock); |
1961 | qhunlock: | ||
1989 | mutex_unlock(&dqp->q_hash->qh_lock); | 1962 | mutex_unlock(&dqp->q_hash->qh_lock); |
1990 | dqfunlock: | 1963 | dqfunlock: |
1991 | xfs_dqfunlock(dqp); | 1964 | xfs_dqfunlock(dqp); |
1965 | dqunlock: | ||
1992 | xfs_dqunlock(dqp); | 1966 | xfs_dqunlock(dqp); |
1993 | if (dqpout) | 1967 | if (dqpout) |
1994 | break; | 1968 | break; |
1995 | if (restarts >= XFS_QM_RECLAIM_MAX_RESTARTS) | 1969 | if (restarts >= XFS_QM_RECLAIM_MAX_RESTARTS) |
1996 | return NULL; | 1970 | break; |
1971 | if (startagain) { | ||
1972 | mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock); | ||
1973 | goto again; | ||
1974 | } | ||
1997 | } | 1975 | } |
1998 | mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock); | 1976 | mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock); |
1999 | return dqpout; | 1977 | return dqpout; |
@@ -2119,7 +2097,7 @@ xfs_qm_write_sb_changes( | |||
2119 | int error; | 2097 | int error; |
2120 | 2098 | ||
2121 | #ifdef QUOTADEBUG | 2099 | #ifdef QUOTADEBUG |
2122 | cmn_err(CE_NOTE, "Writing superblock quota changes :%s", mp->m_fsname); | 2100 | xfs_notice(mp, "Writing superblock quota changes"); |
2123 | #endif | 2101 | #endif |
2124 | tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SBCHANGE); | 2102 | tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SBCHANGE); |
2125 | if ((error = xfs_trans_reserve(tp, 0, | 2103 | if ((error = xfs_trans_reserve(tp, 0, |
diff --git a/fs/xfs/quota/xfs_qm.h b/fs/xfs/quota/xfs_qm.h index c9446f1c726d..567b29b9f1b3 100644 --- a/fs/xfs/quota/xfs_qm.h +++ b/fs/xfs/quota/xfs_qm.h | |||
@@ -65,11 +65,6 @@ extern kmem_zone_t *qm_dqtrxzone; | |||
65 | * block in the dquot/xqm code. | 65 | * block in the dquot/xqm code. |
66 | */ | 66 | */ |
67 | #define XFS_DQUOT_CLUSTER_SIZE_FSB (xfs_filblks_t)1 | 67 | #define XFS_DQUOT_CLUSTER_SIZE_FSB (xfs_filblks_t)1 |
68 | /* | ||
69 | * When doing a quotacheck, we log dquot clusters of this many FSBs at most | ||
70 | * in a single transaction. We don't want to ask for too huge a log reservation. | ||
71 | */ | ||
72 | #define XFS_QM_MAX_DQCLUSTER_LOGSZ 3 | ||
73 | 68 | ||
74 | typedef xfs_dqhash_t xfs_dqlist_t; | 69 | typedef xfs_dqhash_t xfs_dqlist_t; |
75 | 70 | ||
diff --git a/fs/xfs/quota/xfs_qm_bhv.c b/fs/xfs/quota/xfs_qm_bhv.c index 45b5cb1788ab..a0a829addca9 100644 --- a/fs/xfs/quota/xfs_qm_bhv.c +++ b/fs/xfs/quota/xfs_qm_bhv.c | |||
@@ -119,8 +119,7 @@ xfs_qm_newmount( | |||
119 | (gquotaondisk && !XFS_IS_GQUOTA_ON(mp)) || | 119 | (gquotaondisk && !XFS_IS_GQUOTA_ON(mp)) || |
120 | (!gquotaondisk && XFS_IS_OQUOTA_ON(mp))) && | 120 | (!gquotaondisk && XFS_IS_OQUOTA_ON(mp))) && |
121 | xfs_dev_is_read_only(mp, "changing quota state")) { | 121 | xfs_dev_is_read_only(mp, "changing quota state")) { |
122 | cmn_err(CE_WARN, | 122 | xfs_warn(mp, "please mount with%s%s%s%s.", |
123 | "XFS: please mount with%s%s%s%s.", | ||
124 | (!quotaondisk ? "out quota" : ""), | 123 | (!quotaondisk ? "out quota" : ""), |
125 | (uquotaondisk ? " usrquota" : ""), | 124 | (uquotaondisk ? " usrquota" : ""), |
126 | (pquotaondisk ? " prjquota" : ""), | 125 | (pquotaondisk ? " prjquota" : ""), |
@@ -135,7 +134,7 @@ xfs_qm_newmount( | |||
135 | */ | 134 | */ |
136 | if (quotaondisk && !XFS_QM_NEED_QUOTACHECK(mp)) { | 135 | if (quotaondisk && !XFS_QM_NEED_QUOTACHECK(mp)) { |
137 | /* | 136 | /* |
138 | * If an error occured, qm_mount_quotas code | 137 | * If an error occurred, qm_mount_quotas code |
139 | * has already disabled quotas. So, just finish | 138 | * has already disabled quotas. So, just finish |
140 | * mounting, and get on with the boring life | 139 | * mounting, and get on with the boring life |
141 | * without disk quotas. | 140 | * without disk quotas. |
diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c index bdebc183223e..2dadb15d5ca9 100644 --- a/fs/xfs/quota/xfs_qm_syscalls.c +++ b/fs/xfs/quota/xfs_qm_syscalls.c | |||
@@ -41,12 +41,6 @@ | |||
41 | #include "xfs_qm.h" | 41 | #include "xfs_qm.h" |
42 | #include "xfs_trace.h" | 42 | #include "xfs_trace.h" |
43 | 43 | ||
44 | #ifdef DEBUG | ||
45 | # define qdprintk(s, args...) cmn_err(CE_DEBUG, s, ## args) | ||
46 | #else | ||
47 | # define qdprintk(s, args...) do { } while (0) | ||
48 | #endif | ||
49 | |||
50 | STATIC int xfs_qm_log_quotaoff(xfs_mount_t *, xfs_qoff_logitem_t **, uint); | 44 | STATIC int xfs_qm_log_quotaoff(xfs_mount_t *, xfs_qoff_logitem_t **, uint); |
51 | STATIC int xfs_qm_log_quotaoff_end(xfs_mount_t *, xfs_qoff_logitem_t *, | 45 | STATIC int xfs_qm_log_quotaoff_end(xfs_mount_t *, xfs_qoff_logitem_t *, |
52 | uint); | 46 | uint); |
@@ -178,7 +172,7 @@ xfs_qm_scall_quotaoff( | |||
178 | /* | 172 | /* |
179 | * Next we make the changes in the quota flag in the mount struct. | 173 | * Next we make the changes in the quota flag in the mount struct. |
180 | * This isn't protected by a particular lock directly, because we | 174 | * This isn't protected by a particular lock directly, because we |
181 | * don't want to take a mrlock everytime we depend on quotas being on. | 175 | * don't want to take a mrlock every time we depend on quotas being on. |
182 | */ | 176 | */ |
183 | mp->m_qflags &= ~(flags); | 177 | mp->m_qflags &= ~(flags); |
184 | 178 | ||
@@ -294,7 +288,8 @@ xfs_qm_scall_trunc_qfiles( | |||
294 | int error = 0, error2 = 0; | 288 | int error = 0, error2 = 0; |
295 | 289 | ||
296 | if (!xfs_sb_version_hasquota(&mp->m_sb) || flags == 0) { | 290 | if (!xfs_sb_version_hasquota(&mp->m_sb) || flags == 0) { |
297 | qdprintk("qtrunc flags=%x m_qflags=%x\n", flags, mp->m_qflags); | 291 | xfs_debug(mp, "%s: flags=%x m_qflags=%x\n", |
292 | __func__, flags, mp->m_qflags); | ||
298 | return XFS_ERROR(EINVAL); | 293 | return XFS_ERROR(EINVAL); |
299 | } | 294 | } |
300 | 295 | ||
@@ -318,20 +313,19 @@ xfs_qm_scall_quotaon( | |||
318 | { | 313 | { |
319 | int error; | 314 | int error; |
320 | uint qf; | 315 | uint qf; |
321 | uint accflags; | ||
322 | __int64_t sbflags; | 316 | __int64_t sbflags; |
323 | 317 | ||
324 | flags &= (XFS_ALL_QUOTA_ACCT | XFS_ALL_QUOTA_ENFD); | 318 | flags &= (XFS_ALL_QUOTA_ACCT | XFS_ALL_QUOTA_ENFD); |
325 | /* | 319 | /* |
326 | * Switching on quota accounting must be done at mount time. | 320 | * Switching on quota accounting must be done at mount time. |
327 | */ | 321 | */ |
328 | accflags = flags & XFS_ALL_QUOTA_ACCT; | ||
329 | flags &= ~(XFS_ALL_QUOTA_ACCT); | 322 | flags &= ~(XFS_ALL_QUOTA_ACCT); |
330 | 323 | ||
331 | sbflags = 0; | 324 | sbflags = 0; |
332 | 325 | ||
333 | if (flags == 0) { | 326 | if (flags == 0) { |
334 | qdprintk("quotaon: zero flags, m_qflags=%x\n", mp->m_qflags); | 327 | xfs_debug(mp, "%s: zero flags, m_qflags=%x\n", |
328 | __func__, mp->m_qflags); | ||
335 | return XFS_ERROR(EINVAL); | 329 | return XFS_ERROR(EINVAL); |
336 | } | 330 | } |
337 | 331 | ||
@@ -352,12 +346,13 @@ xfs_qm_scall_quotaon( | |||
352 | (flags & XFS_GQUOTA_ACCT) == 0 && | 346 | (flags & XFS_GQUOTA_ACCT) == 0 && |
353 | (mp->m_sb.sb_qflags & XFS_GQUOTA_ACCT) == 0 && | 347 | (mp->m_sb.sb_qflags & XFS_GQUOTA_ACCT) == 0 && |
354 | (flags & XFS_OQUOTA_ENFD))) { | 348 | (flags & XFS_OQUOTA_ENFD))) { |
355 | qdprintk("Can't enforce without acct, flags=%x sbflags=%x\n", | 349 | xfs_debug(mp, |
356 | flags, mp->m_sb.sb_qflags); | 350 | "%s: Can't enforce without acct, flags=%x sbflags=%x\n", |
351 | __func__, flags, mp->m_sb.sb_qflags); | ||
357 | return XFS_ERROR(EINVAL); | 352 | return XFS_ERROR(EINVAL); |
358 | } | 353 | } |
359 | /* | 354 | /* |
360 | * If everything's upto-date incore, then don't waste time. | 355 | * If everything's up to-date incore, then don't waste time. |
361 | */ | 356 | */ |
362 | if ((mp->m_qflags & flags) == flags) | 357 | if ((mp->m_qflags & flags) == flags) |
363 | return XFS_ERROR(EEXIST); | 358 | return XFS_ERROR(EEXIST); |
@@ -541,7 +536,7 @@ xfs_qm_scall_setqlim( | |||
541 | q->qi_bsoftlimit = soft; | 536 | q->qi_bsoftlimit = soft; |
542 | } | 537 | } |
543 | } else { | 538 | } else { |
544 | qdprintk("blkhard %Ld < blksoft %Ld\n", hard, soft); | 539 | xfs_debug(mp, "blkhard %Ld < blksoft %Ld\n", hard, soft); |
545 | } | 540 | } |
546 | hard = (newlim->d_fieldmask & FS_DQ_RTBHARD) ? | 541 | hard = (newlim->d_fieldmask & FS_DQ_RTBHARD) ? |
547 | (xfs_qcnt_t) XFS_BB_TO_FSB(mp, newlim->d_rtb_hardlimit) : | 542 | (xfs_qcnt_t) XFS_BB_TO_FSB(mp, newlim->d_rtb_hardlimit) : |
@@ -557,7 +552,7 @@ xfs_qm_scall_setqlim( | |||
557 | q->qi_rtbsoftlimit = soft; | 552 | q->qi_rtbsoftlimit = soft; |
558 | } | 553 | } |
559 | } else { | 554 | } else { |
560 | qdprintk("rtbhard %Ld < rtbsoft %Ld\n", hard, soft); | 555 | xfs_debug(mp, "rtbhard %Ld < rtbsoft %Ld\n", hard, soft); |
561 | } | 556 | } |
562 | 557 | ||
563 | hard = (newlim->d_fieldmask & FS_DQ_IHARD) ? | 558 | hard = (newlim->d_fieldmask & FS_DQ_IHARD) ? |
@@ -574,7 +569,7 @@ xfs_qm_scall_setqlim( | |||
574 | q->qi_isoftlimit = soft; | 569 | q->qi_isoftlimit = soft; |
575 | } | 570 | } |
576 | } else { | 571 | } else { |
577 | qdprintk("ihard %Ld < isoft %Ld\n", hard, soft); | 572 | xfs_debug(mp, "ihard %Ld < isoft %Ld\n", hard, soft); |
578 | } | 573 | } |
579 | 574 | ||
580 | /* | 575 | /* |
@@ -939,10 +934,11 @@ struct mutex qcheck_lock; | |||
939 | #define DQTEST_LIST_PRINT(l, NXT, title) \ | 934 | #define DQTEST_LIST_PRINT(l, NXT, title) \ |
940 | { \ | 935 | { \ |
941 | xfs_dqtest_t *dqp; int i = 0;\ | 936 | xfs_dqtest_t *dqp; int i = 0;\ |
942 | cmn_err(CE_DEBUG, "%s (#%d)", title, (int) (l)->qh_nelems); \ | 937 | xfs_debug(NULL, "%s (#%d)", title, (int) (l)->qh_nelems); \ |
943 | for (dqp = (xfs_dqtest_t *)(l)->qh_next; dqp != NULL; \ | 938 | for (dqp = (xfs_dqtest_t *)(l)->qh_next; dqp != NULL; \ |
944 | dqp = (xfs_dqtest_t *)dqp->NXT) { \ | 939 | dqp = (xfs_dqtest_t *)dqp->NXT) { \ |
945 | cmn_err(CE_DEBUG, " %d. \"%d (%s)\" bcnt = %d, icnt = %d", \ | 940 | xfs_debug(dqp->q_mount, \ |
941 | " %d. \"%d (%s)\" bcnt = %d, icnt = %d", \ | ||
946 | ++i, dqp->d_id, DQFLAGTO_TYPESTR(dqp), \ | 942 | ++i, dqp->d_id, DQFLAGTO_TYPESTR(dqp), \ |
947 | dqp->d_bcount, dqp->d_icount); } \ | 943 | dqp->d_bcount, dqp->d_icount); } \ |
948 | } | 944 | } |
@@ -966,16 +962,17 @@ xfs_qm_hashinsert(xfs_dqhash_t *h, xfs_dqtest_t *dqp) | |||
966 | } | 962 | } |
967 | STATIC void | 963 | STATIC void |
968 | xfs_qm_dqtest_print( | 964 | xfs_qm_dqtest_print( |
969 | xfs_dqtest_t *d) | 965 | struct xfs_mount *mp, |
966 | struct dqtest *d) | ||
970 | { | 967 | { |
971 | cmn_err(CE_DEBUG, "-----------DQTEST DQUOT----------------"); | 968 | xfs_debug(mp, "-----------DQTEST DQUOT----------------"); |
972 | cmn_err(CE_DEBUG, "---- dquot ID = %d", d->d_id); | 969 | xfs_debug(mp, "---- dquot ID = %d", d->d_id); |
973 | cmn_err(CE_DEBUG, "---- fs = 0x%p", d->q_mount); | 970 | xfs_debug(mp, "---- fs = 0x%p", d->q_mount); |
974 | cmn_err(CE_DEBUG, "---- bcount = %Lu (0x%x)", | 971 | xfs_debug(mp, "---- bcount = %Lu (0x%x)", |
975 | d->d_bcount, (int)d->d_bcount); | 972 | d->d_bcount, (int)d->d_bcount); |
976 | cmn_err(CE_DEBUG, "---- icount = %Lu (0x%x)", | 973 | xfs_debug(mp, "---- icount = %Lu (0x%x)", |
977 | d->d_icount, (int)d->d_icount); | 974 | d->d_icount, (int)d->d_icount); |
978 | cmn_err(CE_DEBUG, "---------------------------"); | 975 | xfs_debug(mp, "---------------------------"); |
979 | } | 976 | } |
980 | 977 | ||
981 | STATIC void | 978 | STATIC void |
@@ -989,12 +986,14 @@ xfs_qm_dqtest_failed( | |||
989 | { | 986 | { |
990 | qmtest_nfails++; | 987 | qmtest_nfails++; |
991 | if (error) | 988 | if (error) |
992 | cmn_err(CE_DEBUG, "quotacheck failed id=%d, err=%d\nreason: %s", | 989 | xfs_debug(dqp->q_mount, |
993 | d->d_id, error, reason); | 990 | "quotacheck failed id=%d, err=%d\nreason: %s", |
991 | d->d_id, error, reason); | ||
994 | else | 992 | else |
995 | cmn_err(CE_DEBUG, "quotacheck failed id=%d (%s) [%d != %d]", | 993 | xfs_debug(dqp->q_mount, |
996 | d->d_id, reason, (int)a, (int)b); | 994 | "quotacheck failed id=%d (%s) [%d != %d]", |
997 | xfs_qm_dqtest_print(d); | 995 | d->d_id, reason, (int)a, (int)b); |
996 | xfs_qm_dqtest_print(dqp->q_mount, d); | ||
998 | if (dqp) | 997 | if (dqp) |
999 | xfs_qm_dqprint(dqp); | 998 | xfs_qm_dqprint(dqp); |
1000 | } | 999 | } |
@@ -1021,9 +1020,9 @@ xfs_dqtest_cmp2( | |||
1021 | be64_to_cpu(dqp->q_core.d_bcount) >= | 1020 | be64_to_cpu(dqp->q_core.d_bcount) >= |
1022 | be64_to_cpu(dqp->q_core.d_blk_softlimit)) { | 1021 | be64_to_cpu(dqp->q_core.d_blk_softlimit)) { |
1023 | if (!dqp->q_core.d_btimer && dqp->q_core.d_id) { | 1022 | if (!dqp->q_core.d_btimer && dqp->q_core.d_id) { |
1024 | cmn_err(CE_DEBUG, | 1023 | xfs_debug(dqp->q_mount, |
1025 | "%d [%s] [0x%p] BLK TIMER NOT STARTED", | 1024 | "%d [%s] BLK TIMER NOT STARTED", |
1026 | d->d_id, DQFLAGTO_TYPESTR(d), d->q_mount); | 1025 | d->d_id, DQFLAGTO_TYPESTR(d)); |
1027 | err++; | 1026 | err++; |
1028 | } | 1027 | } |
1029 | } | 1028 | } |
@@ -1031,16 +1030,16 @@ xfs_dqtest_cmp2( | |||
1031 | be64_to_cpu(dqp->q_core.d_icount) >= | 1030 | be64_to_cpu(dqp->q_core.d_icount) >= |
1032 | be64_to_cpu(dqp->q_core.d_ino_softlimit)) { | 1031 | be64_to_cpu(dqp->q_core.d_ino_softlimit)) { |
1033 | if (!dqp->q_core.d_itimer && dqp->q_core.d_id) { | 1032 | if (!dqp->q_core.d_itimer && dqp->q_core.d_id) { |
1034 | cmn_err(CE_DEBUG, | 1033 | xfs_debug(dqp->q_mount, |
1035 | "%d [%s] [0x%p] INO TIMER NOT STARTED", | 1034 | "%d [%s] INO TIMER NOT STARTED", |
1036 | d->d_id, DQFLAGTO_TYPESTR(d), d->q_mount); | 1035 | d->d_id, DQFLAGTO_TYPESTR(d)); |
1037 | err++; | 1036 | err++; |
1038 | } | 1037 | } |
1039 | } | 1038 | } |
1040 | #ifdef QUOTADEBUG | 1039 | #ifdef QUOTADEBUG |
1041 | if (!err) { | 1040 | if (!err) { |
1042 | cmn_err(CE_DEBUG, "%d [%s] [0x%p] qchecked", | 1041 | xfs_debug(dqp->q_mount, "%d [%s] qchecked", |
1043 | d->d_id, DQFLAGTO_TYPESTR(d), d->q_mount); | 1042 | d->d_id, DQFLAGTO_TYPESTR(d)); |
1044 | } | 1043 | } |
1045 | #endif | 1044 | #endif |
1046 | return (err); | 1045 | return (err); |
@@ -1137,8 +1136,8 @@ xfs_qm_internalqcheck_adjust( | |||
1137 | 1136 | ||
1138 | if (ino == mp->m_sb.sb_uquotino || ino == mp->m_sb.sb_gquotino) { | 1137 | if (ino == mp->m_sb.sb_uquotino || ino == mp->m_sb.sb_gquotino) { |
1139 | *res = BULKSTAT_RV_NOTHING; | 1138 | *res = BULKSTAT_RV_NOTHING; |
1140 | qdprintk("internalqcheck: ino=%llu, uqino=%llu, gqino=%llu\n", | 1139 | xfs_debug(mp, "%s: ino=%llu, uqino=%llu, gqino=%llu\n", |
1141 | (unsigned long long) ino, | 1140 | __func__, (unsigned long long) ino, |
1142 | (unsigned long long) mp->m_sb.sb_uquotino, | 1141 | (unsigned long long) mp->m_sb.sb_uquotino, |
1143 | (unsigned long long) mp->m_sb.sb_gquotino); | 1142 | (unsigned long long) mp->m_sb.sb_gquotino); |
1144 | return XFS_ERROR(EINVAL); | 1143 | return XFS_ERROR(EINVAL); |
@@ -1223,12 +1222,12 @@ xfs_qm_internalqcheck( | |||
1223 | xfs_qm_internalqcheck_adjust, | 1222 | xfs_qm_internalqcheck_adjust, |
1224 | 0, NULL, &done); | 1223 | 0, NULL, &done); |
1225 | if (error) { | 1224 | if (error) { |
1226 | cmn_err(CE_DEBUG, "Bulkstat returned error 0x%x", error); | 1225 | xfs_debug(mp, "Bulkstat returned error 0x%x", error); |
1227 | break; | 1226 | break; |
1228 | } | 1227 | } |
1229 | } while (!done); | 1228 | } while (!done); |
1230 | 1229 | ||
1231 | cmn_err(CE_DEBUG, "Checking results against system dquots"); | 1230 | xfs_debug(mp, "Checking results against system dquots"); |
1232 | for (i = 0; i < qmtest_hashmask; i++) { | 1231 | for (i = 0; i < qmtest_hashmask; i++) { |
1233 | xfs_dqtest_t *d, *n; | 1232 | xfs_dqtest_t *d, *n; |
1234 | xfs_dqhash_t *h; | 1233 | xfs_dqhash_t *h; |
@@ -1246,10 +1245,10 @@ xfs_qm_internalqcheck( | |||
1246 | } | 1245 | } |
1247 | 1246 | ||
1248 | if (qmtest_nfails) { | 1247 | if (qmtest_nfails) { |
1249 | cmn_err(CE_DEBUG, "******** quotacheck failed ********"); | 1248 | xfs_debug(mp, "******** quotacheck failed ********"); |
1250 | cmn_err(CE_DEBUG, "failures = %d", qmtest_nfails); | 1249 | xfs_debug(mp, "failures = %d", qmtest_nfails); |
1251 | } else { | 1250 | } else { |
1252 | cmn_err(CE_DEBUG, "******** quotacheck successful! ********"); | 1251 | xfs_debug(mp, "******** quotacheck successful! ********"); |
1253 | } | 1252 | } |
1254 | kmem_free(qmtest_udqtab); | 1253 | kmem_free(qmtest_udqtab); |
1255 | kmem_free(qmtest_gdqtab); | 1254 | kmem_free(qmtest_gdqtab); |
diff --git a/fs/xfs/quota/xfs_trans_dquot.c b/fs/xfs/quota/xfs_trans_dquot.c index 7de91d1b75c0..2a3648731331 100644 --- a/fs/xfs/quota/xfs_trans_dquot.c +++ b/fs/xfs/quota/xfs_trans_dquot.c | |||
@@ -643,8 +643,9 @@ xfs_trans_dqresv( | |||
643 | (XFS_IS_OQUOTA_ENFORCED(dqp->q_mount) && | 643 | (XFS_IS_OQUOTA_ENFORCED(dqp->q_mount) && |
644 | (XFS_QM_ISPDQ(dqp) || XFS_QM_ISGDQ(dqp))))) { | 644 | (XFS_QM_ISPDQ(dqp) || XFS_QM_ISGDQ(dqp))))) { |
645 | #ifdef QUOTADEBUG | 645 | #ifdef QUOTADEBUG |
646 | cmn_err(CE_DEBUG, "BLK Res: nblks=%ld + resbcount=%Ld" | 646 | xfs_debug(mp, |
647 | " > hardlimit=%Ld?", nblks, *resbcountp, hardlimit); | 647 | "BLK Res: nblks=%ld + resbcount=%Ld > hardlimit=%Ld?", |
648 | nblks, *resbcountp, hardlimit); | ||
648 | #endif | 649 | #endif |
649 | if (nblks > 0) { | 650 | if (nblks > 0) { |
650 | /* | 651 | /* |
diff --git a/fs/xfs/support/debug.c b/fs/xfs/support/debug.c deleted file mode 100644 index 975aa10e1a47..000000000000 --- a/fs/xfs/support/debug.c +++ /dev/null | |||
@@ -1,115 +0,0 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc. | ||
3 | * All Rights Reserved. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or | ||
6 | * modify it under the terms of the GNU General Public License as | ||
7 | * published by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope that it would be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, write the Free Software Foundation, | ||
16 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
17 | */ | ||
18 | #include <xfs.h> | ||
19 | #include "debug.h" | ||
20 | |||
21 | /* xfs_mount.h drags a lot of crap in, sorry.. */ | ||
22 | #include "xfs_sb.h" | ||
23 | #include "xfs_inum.h" | ||
24 | #include "xfs_ag.h" | ||
25 | #include "xfs_mount.h" | ||
26 | #include "xfs_error.h" | ||
27 | |||
28 | static char message[1024]; /* keep it off the stack */ | ||
29 | static DEFINE_SPINLOCK(xfs_err_lock); | ||
30 | |||
31 | /* Translate from CE_FOO to KERN_FOO, err_level(CE_FOO) == KERN_FOO */ | ||
32 | #define XFS_MAX_ERR_LEVEL 7 | ||
33 | #define XFS_ERR_MASK ((1 << 3) - 1) | ||
34 | static const char * const err_level[XFS_MAX_ERR_LEVEL+1] = | ||
35 | {KERN_EMERG, KERN_ALERT, KERN_CRIT, | ||
36 | KERN_ERR, KERN_WARNING, KERN_NOTICE, | ||
37 | KERN_INFO, KERN_DEBUG}; | ||
38 | |||
39 | void | ||
40 | cmn_err(register int level, char *fmt, ...) | ||
41 | { | ||
42 | char *fp = fmt; | ||
43 | int len; | ||
44 | ulong flags; | ||
45 | va_list ap; | ||
46 | |||
47 | level &= XFS_ERR_MASK; | ||
48 | if (level > XFS_MAX_ERR_LEVEL) | ||
49 | level = XFS_MAX_ERR_LEVEL; | ||
50 | spin_lock_irqsave(&xfs_err_lock,flags); | ||
51 | va_start(ap, fmt); | ||
52 | if (*fmt == '!') fp++; | ||
53 | len = vsnprintf(message, sizeof(message), fp, ap); | ||
54 | if (len >= sizeof(message)) | ||
55 | len = sizeof(message) - 1; | ||
56 | if (message[len-1] == '\n') | ||
57 | message[len-1] = 0; | ||
58 | printk("%s%s\n", err_level[level], message); | ||
59 | va_end(ap); | ||
60 | spin_unlock_irqrestore(&xfs_err_lock,flags); | ||
61 | BUG_ON(level == CE_PANIC); | ||
62 | } | ||
63 | |||
64 | void | ||
65 | xfs_fs_vcmn_err( | ||
66 | int level, | ||
67 | struct xfs_mount *mp, | ||
68 | char *fmt, | ||
69 | va_list ap) | ||
70 | { | ||
71 | unsigned long flags; | ||
72 | int len = 0; | ||
73 | |||
74 | level &= XFS_ERR_MASK; | ||
75 | if (level > XFS_MAX_ERR_LEVEL) | ||
76 | level = XFS_MAX_ERR_LEVEL; | ||
77 | |||
78 | spin_lock_irqsave(&xfs_err_lock,flags); | ||
79 | |||
80 | if (mp) { | ||
81 | len = sprintf(message, "Filesystem \"%s\": ", mp->m_fsname); | ||
82 | |||
83 | /* | ||
84 | * Skip the printk if we can't print anything useful | ||
85 | * due to an over-long device name. | ||
86 | */ | ||
87 | if (len >= sizeof(message)) | ||
88 | goto out; | ||
89 | } | ||
90 | |||
91 | len = vsnprintf(message + len, sizeof(message) - len, fmt, ap); | ||
92 | if (len >= sizeof(message)) | ||
93 | len = sizeof(message) - 1; | ||
94 | if (message[len-1] == '\n') | ||
95 | message[len-1] = 0; | ||
96 | |||
97 | printk("%s%s\n", err_level[level], message); | ||
98 | out: | ||
99 | spin_unlock_irqrestore(&xfs_err_lock,flags); | ||
100 | |||
101 | BUG_ON(level == CE_PANIC); | ||
102 | } | ||
103 | |||
104 | void | ||
105 | assfail(char *expr, char *file, int line) | ||
106 | { | ||
107 | printk("Assertion failed: %s, file: %s, line: %d\n", expr, file, line); | ||
108 | BUG(); | ||
109 | } | ||
110 | |||
111 | void | ||
112 | xfs_hex_dump(void *p, int length) | ||
113 | { | ||
114 | print_hex_dump(KERN_ALERT, "", DUMP_PREFIX_ADDRESS, 16, 1, p, length, 1); | ||
115 | } | ||
diff --git a/fs/xfs/support/debug.h b/fs/xfs/support/debug.h deleted file mode 100644 index d2d20462fd4f..000000000000 --- a/fs/xfs/support/debug.h +++ /dev/null | |||
@@ -1,54 +0,0 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2000-2005 Silicon Graphics, Inc. | ||
3 | * All Rights Reserved. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or | ||
6 | * modify it under the terms of the GNU General Public License as | ||
7 | * published by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope that it would be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, write the Free Software Foundation, | ||
16 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
17 | */ | ||
18 | #ifndef __XFS_SUPPORT_DEBUG_H__ | ||
19 | #define __XFS_SUPPORT_DEBUG_H__ | ||
20 | |||
21 | #include <stdarg.h> | ||
22 | |||
23 | #define CE_DEBUG 7 /* debug */ | ||
24 | #define CE_CONT 6 /* continuation */ | ||
25 | #define CE_NOTE 5 /* notice */ | ||
26 | #define CE_WARN 4 /* warning */ | ||
27 | #define CE_ALERT 1 /* alert */ | ||
28 | #define CE_PANIC 0 /* panic */ | ||
29 | |||
30 | extern void cmn_err(int, char *, ...) | ||
31 | __attribute__ ((format (printf, 2, 3))); | ||
32 | extern void assfail(char *expr, char *f, int l); | ||
33 | |||
34 | #define ASSERT_ALWAYS(expr) \ | ||
35 | (unlikely(expr) ? (void)0 : assfail(#expr, __FILE__, __LINE__)) | ||
36 | |||
37 | #ifndef DEBUG | ||
38 | #define ASSERT(expr) ((void)0) | ||
39 | |||
40 | #ifndef STATIC | ||
41 | # define STATIC static noinline | ||
42 | #endif | ||
43 | |||
44 | #else /* DEBUG */ | ||
45 | |||
46 | #define ASSERT(expr) \ | ||
47 | (unlikely(expr) ? (void)0 : assfail(#expr, __FILE__, __LINE__)) | ||
48 | |||
49 | #ifndef STATIC | ||
50 | # define STATIC noinline | ||
51 | #endif | ||
52 | |||
53 | #endif /* DEBUG */ | ||
54 | #endif /* __XFS_SUPPORT_DEBUG_H__ */ | ||
diff --git a/fs/xfs/xfs_acl.h b/fs/xfs/xfs_acl.h index 0135e2a669d7..11dd72070cbb 100644 --- a/fs/xfs/xfs_acl.h +++ b/fs/xfs/xfs_acl.h | |||
@@ -42,7 +42,7 @@ struct xfs_acl { | |||
42 | #define SGI_ACL_DEFAULT_SIZE (sizeof(SGI_ACL_DEFAULT)-1) | 42 | #define SGI_ACL_DEFAULT_SIZE (sizeof(SGI_ACL_DEFAULT)-1) |
43 | 43 | ||
44 | #ifdef CONFIG_XFS_POSIX_ACL | 44 | #ifdef CONFIG_XFS_POSIX_ACL |
45 | extern int xfs_check_acl(struct inode *inode, int mask); | 45 | extern int xfs_check_acl(struct inode *inode, int mask, unsigned int flags); |
46 | extern struct posix_acl *xfs_get_acl(struct inode *inode, int type); | 46 | extern struct posix_acl *xfs_get_acl(struct inode *inode, int type); |
47 | extern int xfs_inherit_acl(struct inode *inode, struct posix_acl *default_acl); | 47 | extern int xfs_inherit_acl(struct inode *inode, struct posix_acl *default_acl); |
48 | extern int xfs_acl_chmod(struct inode *inode); | 48 | extern int xfs_acl_chmod(struct inode *inode); |
diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h index 63c7a1a6c022..58632cc17f2d 100644 --- a/fs/xfs/xfs_ag.h +++ b/fs/xfs/xfs_ag.h | |||
@@ -227,7 +227,7 @@ typedef struct xfs_perag { | |||
227 | 227 | ||
228 | atomic_t pagf_fstrms; /* # of filestreams active in this AG */ | 228 | atomic_t pagf_fstrms; /* # of filestreams active in this AG */ |
229 | 229 | ||
230 | rwlock_t pag_ici_lock; /* incore inode lock */ | 230 | spinlock_t pag_ici_lock; /* incore inode cache lock */ |
231 | struct radix_tree_root pag_ici_root; /* incore inode cache root */ | 231 | struct radix_tree_root pag_ici_root; /* incore inode cache root */ |
232 | int pag_ici_reclaimable; /* reclaimable inodes */ | 232 | int pag_ici_reclaimable; /* reclaimable inodes */ |
233 | struct mutex pag_ici_reclaim_lock; /* serialisation point */ | 233 | struct mutex pag_ici_reclaim_lock; /* serialisation point */ |
diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c index 112abc439ca5..27d64d752eab 100644 --- a/fs/xfs/xfs_alloc.c +++ b/fs/xfs/xfs_alloc.c | |||
@@ -41,10 +41,6 @@ | |||
41 | #define XFSA_FIXUP_BNO_OK 1 | 41 | #define XFSA_FIXUP_BNO_OK 1 |
42 | #define XFSA_FIXUP_CNT_OK 2 | 42 | #define XFSA_FIXUP_CNT_OK 2 |
43 | 43 | ||
44 | static int | ||
45 | xfs_alloc_busy_search(struct xfs_mount *mp, xfs_agnumber_t agno, | ||
46 | xfs_agblock_t bno, xfs_extlen_t len); | ||
47 | |||
48 | /* | 44 | /* |
49 | * Prototypes for per-ag allocation routines | 45 | * Prototypes for per-ag allocation routines |
50 | */ | 46 | */ |
@@ -94,7 +90,7 @@ xfs_alloc_lookup_ge( | |||
94 | * Lookup the first record less than or equal to [bno, len] | 90 | * Lookup the first record less than or equal to [bno, len] |
95 | * in the btree given by cur. | 91 | * in the btree given by cur. |
96 | */ | 92 | */ |
97 | STATIC int /* error */ | 93 | int /* error */ |
98 | xfs_alloc_lookup_le( | 94 | xfs_alloc_lookup_le( |
99 | struct xfs_btree_cur *cur, /* btree cursor */ | 95 | struct xfs_btree_cur *cur, /* btree cursor */ |
100 | xfs_agblock_t bno, /* starting block of extent */ | 96 | xfs_agblock_t bno, /* starting block of extent */ |
@@ -127,7 +123,7 @@ xfs_alloc_update( | |||
127 | /* | 123 | /* |
128 | * Get the data from the pointed-to record. | 124 | * Get the data from the pointed-to record. |
129 | */ | 125 | */ |
130 | STATIC int /* error */ | 126 | int /* error */ |
131 | xfs_alloc_get_rec( | 127 | xfs_alloc_get_rec( |
132 | struct xfs_btree_cur *cur, /* btree cursor */ | 128 | struct xfs_btree_cur *cur, /* btree cursor */ |
133 | xfs_agblock_t *bno, /* output: starting block of extent */ | 129 | xfs_agblock_t *bno, /* output: starting block of extent */ |
@@ -151,10 +147,9 @@ xfs_alloc_get_rec( | |||
151 | */ | 147 | */ |
152 | STATIC void | 148 | STATIC void |
153 | xfs_alloc_compute_aligned( | 149 | xfs_alloc_compute_aligned( |
150 | xfs_alloc_arg_t *args, /* allocation argument structure */ | ||
154 | xfs_agblock_t foundbno, /* starting block in found extent */ | 151 | xfs_agblock_t foundbno, /* starting block in found extent */ |
155 | xfs_extlen_t foundlen, /* length in found extent */ | 152 | xfs_extlen_t foundlen, /* length in found extent */ |
156 | xfs_extlen_t alignment, /* alignment for allocation */ | ||
157 | xfs_extlen_t minlen, /* minimum length for allocation */ | ||
158 | xfs_agblock_t *resbno, /* result block number */ | 153 | xfs_agblock_t *resbno, /* result block number */ |
159 | xfs_extlen_t *reslen) /* result length */ | 154 | xfs_extlen_t *reslen) /* result length */ |
160 | { | 155 | { |
@@ -162,8 +157,8 @@ xfs_alloc_compute_aligned( | |||
162 | xfs_extlen_t diff; | 157 | xfs_extlen_t diff; |
163 | xfs_extlen_t len; | 158 | xfs_extlen_t len; |
164 | 159 | ||
165 | if (alignment > 1 && foundlen >= minlen) { | 160 | if (args->alignment > 1 && foundlen >= args->minlen) { |
166 | bno = roundup(foundbno, alignment); | 161 | bno = roundup(foundbno, args->alignment); |
167 | diff = bno - foundbno; | 162 | diff = bno - foundbno; |
168 | len = diff >= foundlen ? 0 : foundlen - diff; | 163 | len = diff >= foundlen ? 0 : foundlen - diff; |
169 | } else { | 164 | } else { |
@@ -468,6 +463,27 @@ xfs_alloc_read_agfl( | |||
468 | return 0; | 463 | return 0; |
469 | } | 464 | } |
470 | 465 | ||
466 | STATIC int | ||
467 | xfs_alloc_update_counters( | ||
468 | struct xfs_trans *tp, | ||
469 | struct xfs_perag *pag, | ||
470 | struct xfs_buf *agbp, | ||
471 | long len) | ||
472 | { | ||
473 | struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp); | ||
474 | |||
475 | pag->pagf_freeblks += len; | ||
476 | be32_add_cpu(&agf->agf_freeblks, len); | ||
477 | |||
478 | xfs_trans_agblocks_delta(tp, len); | ||
479 | if (unlikely(be32_to_cpu(agf->agf_freeblks) > | ||
480 | be32_to_cpu(agf->agf_length))) | ||
481 | return EFSCORRUPTED; | ||
482 | |||
483 | xfs_alloc_log_agf(tp, agbp, XFS_AGF_FREEBLKS); | ||
484 | return 0; | ||
485 | } | ||
486 | |||
471 | /* | 487 | /* |
472 | * Allocation group level functions. | 488 | * Allocation group level functions. |
473 | */ | 489 | */ |
@@ -509,49 +525,44 @@ xfs_alloc_ag_vextent( | |||
509 | ASSERT(0); | 525 | ASSERT(0); |
510 | /* NOTREACHED */ | 526 | /* NOTREACHED */ |
511 | } | 527 | } |
512 | if (error) | 528 | |
529 | if (error || args->agbno == NULLAGBLOCK) | ||
513 | return error; | 530 | return error; |
514 | /* | ||
515 | * If the allocation worked, need to change the agf structure | ||
516 | * (and log it), and the superblock. | ||
517 | */ | ||
518 | if (args->agbno != NULLAGBLOCK) { | ||
519 | xfs_agf_t *agf; /* allocation group freelist header */ | ||
520 | long slen = (long)args->len; | ||
521 | 531 | ||
522 | ASSERT(args->len >= args->minlen && args->len <= args->maxlen); | 532 | ASSERT(args->len >= args->minlen); |
523 | ASSERT(!(args->wasfromfl) || !args->isfl); | 533 | ASSERT(args->len <= args->maxlen); |
524 | ASSERT(args->agbno % args->alignment == 0); | 534 | ASSERT(!args->wasfromfl || !args->isfl); |
525 | if (!(args->wasfromfl)) { | 535 | ASSERT(args->agbno % args->alignment == 0); |
526 | 536 | ||
527 | agf = XFS_BUF_TO_AGF(args->agbp); | 537 | if (!args->wasfromfl) { |
528 | be32_add_cpu(&agf->agf_freeblks, -(args->len)); | 538 | error = xfs_alloc_update_counters(args->tp, args->pag, |
529 | xfs_trans_agblocks_delta(args->tp, | 539 | args->agbp, |
530 | -((long)(args->len))); | 540 | -((long)(args->len))); |
531 | args->pag->pagf_freeblks -= args->len; | 541 | if (error) |
532 | ASSERT(be32_to_cpu(agf->agf_freeblks) <= | 542 | return error; |
533 | be32_to_cpu(agf->agf_length)); | 543 | |
534 | xfs_alloc_log_agf(args->tp, args->agbp, | 544 | /* |
535 | XFS_AGF_FREEBLKS); | 545 | * Search the busylist for these blocks and mark the |
536 | /* | 546 | * transaction as synchronous if blocks are found. This |
537 | * Search the busylist for these blocks and mark the | 547 | * avoids the need to block due to a synchronous log |
538 | * transaction as synchronous if blocks are found. This | 548 | * force to ensure correct ordering as the synchronous |
539 | * avoids the need to block due to a synchronous log | 549 | * transaction will guarantee that for us. |
540 | * force to ensure correct ordering as the synchronous | 550 | */ |
541 | * transaction will guarantee that for us. | 551 | if (xfs_alloc_busy_search(args->mp, args->agno, |
542 | */ | 552 | args->agbno, args->len)) |
543 | if (xfs_alloc_busy_search(args->mp, args->agno, | 553 | xfs_trans_set_sync(args->tp); |
544 | args->agbno, args->len)) | ||
545 | xfs_trans_set_sync(args->tp); | ||
546 | } | ||
547 | if (!args->isfl) | ||
548 | xfs_trans_mod_sb(args->tp, | ||
549 | args->wasdel ? XFS_TRANS_SB_RES_FDBLOCKS : | ||
550 | XFS_TRANS_SB_FDBLOCKS, -slen); | ||
551 | XFS_STATS_INC(xs_allocx); | ||
552 | XFS_STATS_ADD(xs_allocb, args->len); | ||
553 | } | 554 | } |
554 | return 0; | 555 | |
556 | if (!args->isfl) { | ||
557 | xfs_trans_mod_sb(args->tp, args->wasdel ? | ||
558 | XFS_TRANS_SB_RES_FDBLOCKS : | ||
559 | XFS_TRANS_SB_FDBLOCKS, | ||
560 | -((long)(args->len))); | ||
561 | } | ||
562 | |||
563 | XFS_STATS_INC(xs_allocx); | ||
564 | XFS_STATS_ADD(xs_allocb, args->len); | ||
565 | return error; | ||
555 | } | 566 | } |
556 | 567 | ||
557 | /* | 568 | /* |
@@ -577,61 +588,58 @@ xfs_alloc_ag_vextent_exact( | |||
577 | xfs_extlen_t rlen; /* length of returned extent */ | 588 | xfs_extlen_t rlen; /* length of returned extent */ |
578 | 589 | ||
579 | ASSERT(args->alignment == 1); | 590 | ASSERT(args->alignment == 1); |
591 | |||
580 | /* | 592 | /* |
581 | * Allocate/initialize a cursor for the by-number freespace btree. | 593 | * Allocate/initialize a cursor for the by-number freespace btree. |
582 | */ | 594 | */ |
583 | bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp, | 595 | bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp, |
584 | args->agno, XFS_BTNUM_BNO); | 596 | args->agno, XFS_BTNUM_BNO); |
597 | |||
585 | /* | 598 | /* |
586 | * Lookup bno and minlen in the btree (minlen is irrelevant, really). | 599 | * Lookup bno and minlen in the btree (minlen is irrelevant, really). |
587 | * Look for the closest free block <= bno, it must contain bno | 600 | * Look for the closest free block <= bno, it must contain bno |
588 | * if any free block does. | 601 | * if any free block does. |
589 | */ | 602 | */ |
590 | if ((error = xfs_alloc_lookup_le(bno_cur, args->agbno, args->minlen, &i))) | 603 | error = xfs_alloc_lookup_le(bno_cur, args->agbno, args->minlen, &i); |
604 | if (error) | ||
591 | goto error0; | 605 | goto error0; |
592 | if (!i) { | 606 | if (!i) |
593 | /* | 607 | goto not_found; |
594 | * Didn't find it, return null. | 608 | |
595 | */ | ||
596 | xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR); | ||
597 | args->agbno = NULLAGBLOCK; | ||
598 | return 0; | ||
599 | } | ||
600 | /* | 609 | /* |
601 | * Grab the freespace record. | 610 | * Grab the freespace record. |
602 | */ | 611 | */ |
603 | if ((error = xfs_alloc_get_rec(bno_cur, &fbno, &flen, &i))) | 612 | error = xfs_alloc_get_rec(bno_cur, &fbno, &flen, &i); |
613 | if (error) | ||
604 | goto error0; | 614 | goto error0; |
605 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | 615 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); |
606 | ASSERT(fbno <= args->agbno); | 616 | ASSERT(fbno <= args->agbno); |
607 | minend = args->agbno + args->minlen; | 617 | minend = args->agbno + args->minlen; |
608 | maxend = args->agbno + args->maxlen; | 618 | maxend = args->agbno + args->maxlen; |
609 | fend = fbno + flen; | 619 | fend = fbno + flen; |
620 | |||
610 | /* | 621 | /* |
611 | * Give up if the freespace isn't long enough for the minimum request. | 622 | * Give up if the freespace isn't long enough for the minimum request. |
612 | */ | 623 | */ |
613 | if (fend < minend) { | 624 | if (fend < minend) |
614 | xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR); | 625 | goto not_found; |
615 | args->agbno = NULLAGBLOCK; | 626 | |
616 | return 0; | ||
617 | } | ||
618 | /* | 627 | /* |
619 | * End of extent will be smaller of the freespace end and the | 628 | * End of extent will be smaller of the freespace end and the |
620 | * maximal requested end. | 629 | * maximal requested end. |
621 | */ | 630 | * |
622 | end = XFS_AGBLOCK_MIN(fend, maxend); | ||
623 | /* | ||
624 | * Fix the length according to mod and prod if given. | 631 | * Fix the length according to mod and prod if given. |
625 | */ | 632 | */ |
633 | end = XFS_AGBLOCK_MIN(fend, maxend); | ||
626 | args->len = end - args->agbno; | 634 | args->len = end - args->agbno; |
627 | xfs_alloc_fix_len(args); | 635 | xfs_alloc_fix_len(args); |
628 | if (!xfs_alloc_fix_minleft(args)) { | 636 | if (!xfs_alloc_fix_minleft(args)) |
629 | xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR); | 637 | goto not_found; |
630 | return 0; | 638 | |
631 | } | ||
632 | rlen = args->len; | 639 | rlen = args->len; |
633 | ASSERT(args->agbno + rlen <= fend); | 640 | ASSERT(args->agbno + rlen <= fend); |
634 | end = args->agbno + rlen; | 641 | end = args->agbno + rlen; |
642 | |||
635 | /* | 643 | /* |
636 | * We are allocating agbno for rlen [agbno .. end] | 644 | * We are allocating agbno for rlen [agbno .. end] |
637 | * Allocate/initialize a cursor for the by-size btree. | 645 | * Allocate/initialize a cursor for the by-size btree. |
@@ -640,16 +648,25 @@ xfs_alloc_ag_vextent_exact( | |||
640 | args->agno, XFS_BTNUM_CNT); | 648 | args->agno, XFS_BTNUM_CNT); |
641 | ASSERT(args->agbno + args->len <= | 649 | ASSERT(args->agbno + args->len <= |
642 | be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length)); | 650 | be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length)); |
643 | if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen, | 651 | error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen, args->agbno, |
644 | args->agbno, args->len, XFSA_FIXUP_BNO_OK))) { | 652 | args->len, XFSA_FIXUP_BNO_OK); |
653 | if (error) { | ||
645 | xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR); | 654 | xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR); |
646 | goto error0; | 655 | goto error0; |
647 | } | 656 | } |
657 | |||
648 | xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR); | 658 | xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR); |
649 | xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); | 659 | xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); |
650 | 660 | ||
651 | trace_xfs_alloc_exact_done(args); | ||
652 | args->wasfromfl = 0; | 661 | args->wasfromfl = 0; |
662 | trace_xfs_alloc_exact_done(args); | ||
663 | return 0; | ||
664 | |||
665 | not_found: | ||
666 | /* Didn't find it, return null. */ | ||
667 | xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR); | ||
668 | args->agbno = NULLAGBLOCK; | ||
669 | trace_xfs_alloc_exact_notfound(args); | ||
653 | return 0; | 670 | return 0; |
654 | 671 | ||
655 | error0: | 672 | error0: |
@@ -659,6 +676,94 @@ error0: | |||
659 | } | 676 | } |
660 | 677 | ||
661 | /* | 678 | /* |
679 | * Search the btree in a given direction via the search cursor and compare | ||
680 | * the records found against the good extent we've already found. | ||
681 | */ | ||
682 | STATIC int | ||
683 | xfs_alloc_find_best_extent( | ||
684 | struct xfs_alloc_arg *args, /* allocation argument structure */ | ||
685 | struct xfs_btree_cur **gcur, /* good cursor */ | ||
686 | struct xfs_btree_cur **scur, /* searching cursor */ | ||
687 | xfs_agblock_t gdiff, /* difference for search comparison */ | ||
688 | xfs_agblock_t *sbno, /* extent found by search */ | ||
689 | xfs_extlen_t *slen, | ||
690 | xfs_extlen_t *slena, /* aligned length */ | ||
691 | int dir) /* 0 = search right, 1 = search left */ | ||
692 | { | ||
693 | xfs_agblock_t bno; | ||
694 | xfs_agblock_t new; | ||
695 | xfs_agblock_t sdiff; | ||
696 | int error; | ||
697 | int i; | ||
698 | |||
699 | /* The good extent is perfect, no need to search. */ | ||
700 | if (!gdiff) | ||
701 | goto out_use_good; | ||
702 | |||
703 | /* | ||
704 | * Look until we find a better one, run out of space or run off the end. | ||
705 | */ | ||
706 | do { | ||
707 | error = xfs_alloc_get_rec(*scur, sbno, slen, &i); | ||
708 | if (error) | ||
709 | goto error0; | ||
710 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | ||
711 | xfs_alloc_compute_aligned(args, *sbno, *slen, &bno, slena); | ||
712 | |||
713 | /* | ||
714 | * The good extent is closer than this one. | ||
715 | */ | ||
716 | if (!dir) { | ||
717 | if (bno >= args->agbno + gdiff) | ||
718 | goto out_use_good; | ||
719 | } else { | ||
720 | if (bno <= args->agbno - gdiff) | ||
721 | goto out_use_good; | ||
722 | } | ||
723 | |||
724 | /* | ||
725 | * Same distance, compare length and pick the best. | ||
726 | */ | ||
727 | if (*slena >= args->minlen) { | ||
728 | args->len = XFS_EXTLEN_MIN(*slena, args->maxlen); | ||
729 | xfs_alloc_fix_len(args); | ||
730 | |||
731 | sdiff = xfs_alloc_compute_diff(args->agbno, args->len, | ||
732 | args->alignment, *sbno, | ||
733 | *slen, &new); | ||
734 | |||
735 | /* | ||
736 | * Choose closer size and invalidate other cursor. | ||
737 | */ | ||
738 | if (sdiff < gdiff) | ||
739 | goto out_use_search; | ||
740 | goto out_use_good; | ||
741 | } | ||
742 | |||
743 | if (!dir) | ||
744 | error = xfs_btree_increment(*scur, 0, &i); | ||
745 | else | ||
746 | error = xfs_btree_decrement(*scur, 0, &i); | ||
747 | if (error) | ||
748 | goto error0; | ||
749 | } while (i); | ||
750 | |||
751 | out_use_good: | ||
752 | xfs_btree_del_cursor(*scur, XFS_BTREE_NOERROR); | ||
753 | *scur = NULL; | ||
754 | return 0; | ||
755 | |||
756 | out_use_search: | ||
757 | xfs_btree_del_cursor(*gcur, XFS_BTREE_NOERROR); | ||
758 | *gcur = NULL; | ||
759 | return 0; | ||
760 | |||
761 | error0: | ||
762 | /* caller invalidates cursors */ | ||
763 | return error; | ||
764 | } | ||
765 | |||
766 | /* | ||
662 | * Allocate a variable extent near bno in the allocation group agno. | 767 | * Allocate a variable extent near bno in the allocation group agno. |
663 | * Extent's length (returned in len) will be between minlen and maxlen, | 768 | * Extent's length (returned in len) will be between minlen and maxlen, |
664 | * and of the form k * prod + mod unless there's nothing that large. | 769 | * and of the form k * prod + mod unless there's nothing that large. |
@@ -775,8 +880,8 @@ xfs_alloc_ag_vextent_near( | |||
775 | if ((error = xfs_alloc_get_rec(cnt_cur, <bno, <len, &i))) | 880 | if ((error = xfs_alloc_get_rec(cnt_cur, <bno, <len, &i))) |
776 | goto error0; | 881 | goto error0; |
777 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | 882 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); |
778 | xfs_alloc_compute_aligned(ltbno, ltlen, args->alignment, | 883 | xfs_alloc_compute_aligned(args, ltbno, ltlen, |
779 | args->minlen, <bnoa, <lena); | 884 | <bnoa, <lena); |
780 | if (ltlena < args->minlen) | 885 | if (ltlena < args->minlen) |
781 | continue; | 886 | continue; |
782 | args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen); | 887 | args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen); |
@@ -896,8 +1001,8 @@ xfs_alloc_ag_vextent_near( | |||
896 | if ((error = xfs_alloc_get_rec(bno_cur_lt, <bno, <len, &i))) | 1001 | if ((error = xfs_alloc_get_rec(bno_cur_lt, <bno, <len, &i))) |
897 | goto error0; | 1002 | goto error0; |
898 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | 1003 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); |
899 | xfs_alloc_compute_aligned(ltbno, ltlen, args->alignment, | 1004 | xfs_alloc_compute_aligned(args, ltbno, ltlen, |
900 | args->minlen, <bnoa, <lena); | 1005 | <bnoa, <lena); |
901 | if (ltlena >= args->minlen) | 1006 | if (ltlena >= args->minlen) |
902 | break; | 1007 | break; |
903 | if ((error = xfs_btree_decrement(bno_cur_lt, 0, &i))) | 1008 | if ((error = xfs_btree_decrement(bno_cur_lt, 0, &i))) |
@@ -912,8 +1017,8 @@ xfs_alloc_ag_vextent_near( | |||
912 | if ((error = xfs_alloc_get_rec(bno_cur_gt, >bno, >len, &i))) | 1017 | if ((error = xfs_alloc_get_rec(bno_cur_gt, >bno, >len, &i))) |
913 | goto error0; | 1018 | goto error0; |
914 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | 1019 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); |
915 | xfs_alloc_compute_aligned(gtbno, gtlen, args->alignment, | 1020 | xfs_alloc_compute_aligned(args, gtbno, gtlen, |
916 | args->minlen, >bnoa, >lena); | 1021 | >bnoa, >lena); |
917 | if (gtlena >= args->minlen) | 1022 | if (gtlena >= args->minlen) |
918 | break; | 1023 | break; |
919 | if ((error = xfs_btree_increment(bno_cur_gt, 0, &i))) | 1024 | if ((error = xfs_btree_increment(bno_cur_gt, 0, &i))) |
@@ -925,203 +1030,45 @@ xfs_alloc_ag_vextent_near( | |||
925 | } | 1030 | } |
926 | } | 1031 | } |
927 | } while (bno_cur_lt || bno_cur_gt); | 1032 | } while (bno_cur_lt || bno_cur_gt); |
1033 | |||
928 | /* | 1034 | /* |
929 | * Got both cursors still active, need to find better entry. | 1035 | * Got both cursors still active, need to find better entry. |
930 | */ | 1036 | */ |
931 | if (bno_cur_lt && bno_cur_gt) { | 1037 | if (bno_cur_lt && bno_cur_gt) { |
932 | /* | ||
933 | * Left side is long enough, look for a right side entry. | ||
934 | */ | ||
935 | if (ltlena >= args->minlen) { | 1038 | if (ltlena >= args->minlen) { |
936 | /* | 1039 | /* |
937 | * Fix up the length. | 1040 | * Left side is good, look for a right side entry. |
938 | */ | 1041 | */ |
939 | args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen); | 1042 | args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen); |
940 | xfs_alloc_fix_len(args); | 1043 | xfs_alloc_fix_len(args); |
941 | rlen = args->len; | 1044 | ltdiff = xfs_alloc_compute_diff(args->agbno, args->len, |
942 | ltdiff = xfs_alloc_compute_diff(args->agbno, rlen, | ||
943 | args->alignment, ltbno, ltlen, <new); | 1045 | args->alignment, ltbno, ltlen, <new); |
1046 | |||
1047 | error = xfs_alloc_find_best_extent(args, | ||
1048 | &bno_cur_lt, &bno_cur_gt, | ||
1049 | ltdiff, >bno, >len, >lena, | ||
1050 | 0 /* search right */); | ||
1051 | } else { | ||
1052 | ASSERT(gtlena >= args->minlen); | ||
1053 | |||
944 | /* | 1054 | /* |
945 | * Not perfect. | 1055 | * Right side is good, look for a left side entry. |
946 | */ | ||
947 | if (ltdiff) { | ||
948 | /* | ||
949 | * Look until we find a better one, run out of | ||
950 | * space, or run off the end. | ||
951 | */ | ||
952 | while (bno_cur_lt && bno_cur_gt) { | ||
953 | if ((error = xfs_alloc_get_rec( | ||
954 | bno_cur_gt, >bno, | ||
955 | >len, &i))) | ||
956 | goto error0; | ||
957 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | ||
958 | xfs_alloc_compute_aligned(gtbno, gtlen, | ||
959 | args->alignment, args->minlen, | ||
960 | >bnoa, >lena); | ||
961 | /* | ||
962 | * The left one is clearly better. | ||
963 | */ | ||
964 | if (gtbnoa >= args->agbno + ltdiff) { | ||
965 | xfs_btree_del_cursor( | ||
966 | bno_cur_gt, | ||
967 | XFS_BTREE_NOERROR); | ||
968 | bno_cur_gt = NULL; | ||
969 | break; | ||
970 | } | ||
971 | /* | ||
972 | * If we reach a big enough entry, | ||
973 | * compare the two and pick the best. | ||
974 | */ | ||
975 | if (gtlena >= args->minlen) { | ||
976 | args->len = | ||
977 | XFS_EXTLEN_MIN(gtlena, | ||
978 | args->maxlen); | ||
979 | xfs_alloc_fix_len(args); | ||
980 | rlen = args->len; | ||
981 | gtdiff = xfs_alloc_compute_diff( | ||
982 | args->agbno, rlen, | ||
983 | args->alignment, | ||
984 | gtbno, gtlen, >new); | ||
985 | /* | ||
986 | * Right side is better. | ||
987 | */ | ||
988 | if (gtdiff < ltdiff) { | ||
989 | xfs_btree_del_cursor( | ||
990 | bno_cur_lt, | ||
991 | XFS_BTREE_NOERROR); | ||
992 | bno_cur_lt = NULL; | ||
993 | } | ||
994 | /* | ||
995 | * Left side is better. | ||
996 | */ | ||
997 | else { | ||
998 | xfs_btree_del_cursor( | ||
999 | bno_cur_gt, | ||
1000 | XFS_BTREE_NOERROR); | ||
1001 | bno_cur_gt = NULL; | ||
1002 | } | ||
1003 | break; | ||
1004 | } | ||
1005 | /* | ||
1006 | * Fell off the right end. | ||
1007 | */ | ||
1008 | if ((error = xfs_btree_increment( | ||
1009 | bno_cur_gt, 0, &i))) | ||
1010 | goto error0; | ||
1011 | if (!i) { | ||
1012 | xfs_btree_del_cursor( | ||
1013 | bno_cur_gt, | ||
1014 | XFS_BTREE_NOERROR); | ||
1015 | bno_cur_gt = NULL; | ||
1016 | break; | ||
1017 | } | ||
1018 | } | ||
1019 | } | ||
1020 | /* | ||
1021 | * The left side is perfect, trash the right side. | ||
1022 | */ | ||
1023 | else { | ||
1024 | xfs_btree_del_cursor(bno_cur_gt, | ||
1025 | XFS_BTREE_NOERROR); | ||
1026 | bno_cur_gt = NULL; | ||
1027 | } | ||
1028 | } | ||
1029 | /* | ||
1030 | * It's the right side that was found first, look left. | ||
1031 | */ | ||
1032 | else { | ||
1033 | /* | ||
1034 | * Fix up the length. | ||
1035 | */ | 1056 | */ |
1036 | args->len = XFS_EXTLEN_MIN(gtlena, args->maxlen); | 1057 | args->len = XFS_EXTLEN_MIN(gtlena, args->maxlen); |
1037 | xfs_alloc_fix_len(args); | 1058 | xfs_alloc_fix_len(args); |
1038 | rlen = args->len; | 1059 | gtdiff = xfs_alloc_compute_diff(args->agbno, args->len, |
1039 | gtdiff = xfs_alloc_compute_diff(args->agbno, rlen, | ||
1040 | args->alignment, gtbno, gtlen, >new); | 1060 | args->alignment, gtbno, gtlen, >new); |
1041 | /* | 1061 | |
1042 | * Right side entry isn't perfect. | 1062 | error = xfs_alloc_find_best_extent(args, |
1043 | */ | 1063 | &bno_cur_gt, &bno_cur_lt, |
1044 | if (gtdiff) { | 1064 | gtdiff, <bno, <len, <lena, |
1045 | /* | 1065 | 1 /* search left */); |
1046 | * Look until we find a better one, run out of | ||
1047 | * space, or run off the end. | ||
1048 | */ | ||
1049 | while (bno_cur_lt && bno_cur_gt) { | ||
1050 | if ((error = xfs_alloc_get_rec( | ||
1051 | bno_cur_lt, <bno, | ||
1052 | <len, &i))) | ||
1053 | goto error0; | ||
1054 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | ||
1055 | xfs_alloc_compute_aligned(ltbno, ltlen, | ||
1056 | args->alignment, args->minlen, | ||
1057 | <bnoa, <lena); | ||
1058 | /* | ||
1059 | * The right one is clearly better. | ||
1060 | */ | ||
1061 | if (ltbnoa <= args->agbno - gtdiff) { | ||
1062 | xfs_btree_del_cursor( | ||
1063 | bno_cur_lt, | ||
1064 | XFS_BTREE_NOERROR); | ||
1065 | bno_cur_lt = NULL; | ||
1066 | break; | ||
1067 | } | ||
1068 | /* | ||
1069 | * If we reach a big enough entry, | ||
1070 | * compare the two and pick the best. | ||
1071 | */ | ||
1072 | if (ltlena >= args->minlen) { | ||
1073 | args->len = XFS_EXTLEN_MIN( | ||
1074 | ltlena, args->maxlen); | ||
1075 | xfs_alloc_fix_len(args); | ||
1076 | rlen = args->len; | ||
1077 | ltdiff = xfs_alloc_compute_diff( | ||
1078 | args->agbno, rlen, | ||
1079 | args->alignment, | ||
1080 | ltbno, ltlen, <new); | ||
1081 | /* | ||
1082 | * Left side is better. | ||
1083 | */ | ||
1084 | if (ltdiff < gtdiff) { | ||
1085 | xfs_btree_del_cursor( | ||
1086 | bno_cur_gt, | ||
1087 | XFS_BTREE_NOERROR); | ||
1088 | bno_cur_gt = NULL; | ||
1089 | } | ||
1090 | /* | ||
1091 | * Right side is better. | ||
1092 | */ | ||
1093 | else { | ||
1094 | xfs_btree_del_cursor( | ||
1095 | bno_cur_lt, | ||
1096 | XFS_BTREE_NOERROR); | ||
1097 | bno_cur_lt = NULL; | ||
1098 | } | ||
1099 | break; | ||
1100 | } | ||
1101 | /* | ||
1102 | * Fell off the left end. | ||
1103 | */ | ||
1104 | if ((error = xfs_btree_decrement( | ||
1105 | bno_cur_lt, 0, &i))) | ||
1106 | goto error0; | ||
1107 | if (!i) { | ||
1108 | xfs_btree_del_cursor(bno_cur_lt, | ||
1109 | XFS_BTREE_NOERROR); | ||
1110 | bno_cur_lt = NULL; | ||
1111 | break; | ||
1112 | } | ||
1113 | } | ||
1114 | } | ||
1115 | /* | ||
1116 | * The right side is perfect, trash the left side. | ||
1117 | */ | ||
1118 | else { | ||
1119 | xfs_btree_del_cursor(bno_cur_lt, | ||
1120 | XFS_BTREE_NOERROR); | ||
1121 | bno_cur_lt = NULL; | ||
1122 | } | ||
1123 | } | 1066 | } |
1067 | |||
1068 | if (error) | ||
1069 | goto error0; | ||
1124 | } | 1070 | } |
1071 | |||
1125 | /* | 1072 | /* |
1126 | * If we couldn't get anything, give up. | 1073 | * If we couldn't get anything, give up. |
1127 | */ | 1074 | */ |
@@ -1130,6 +1077,7 @@ xfs_alloc_ag_vextent_near( | |||
1130 | args->agbno = NULLAGBLOCK; | 1077 | args->agbno = NULLAGBLOCK; |
1131 | return 0; | 1078 | return 0; |
1132 | } | 1079 | } |
1080 | |||
1133 | /* | 1081 | /* |
1134 | * At this point we have selected a freespace entry, either to the | 1082 | * At this point we have selected a freespace entry, either to the |
1135 | * left or to the right. If it's on the right, copy all the | 1083 | * left or to the right. If it's on the right, copy all the |
@@ -1146,6 +1094,7 @@ xfs_alloc_ag_vextent_near( | |||
1146 | j = 1; | 1094 | j = 1; |
1147 | } else | 1095 | } else |
1148 | j = 0; | 1096 | j = 0; |
1097 | |||
1149 | /* | 1098 | /* |
1150 | * Fix up the length and compute the useful address. | 1099 | * Fix up the length and compute the useful address. |
1151 | */ | 1100 | */ |
@@ -1248,8 +1197,7 @@ xfs_alloc_ag_vextent_size( | |||
1248 | * once aligned; if not, we search left for something better. | 1197 | * once aligned; if not, we search left for something better. |
1249 | * This can't happen in the second case above. | 1198 | * This can't happen in the second case above. |
1250 | */ | 1199 | */ |
1251 | xfs_alloc_compute_aligned(fbno, flen, args->alignment, args->minlen, | 1200 | xfs_alloc_compute_aligned(args, fbno, flen, &rbno, &rlen); |
1252 | &rbno, &rlen); | ||
1253 | rlen = XFS_EXTLEN_MIN(args->maxlen, rlen); | 1201 | rlen = XFS_EXTLEN_MIN(args->maxlen, rlen); |
1254 | XFS_WANT_CORRUPTED_GOTO(rlen == 0 || | 1202 | XFS_WANT_CORRUPTED_GOTO(rlen == 0 || |
1255 | (rlen <= flen && rbno + rlen <= fbno + flen), error0); | 1203 | (rlen <= flen && rbno + rlen <= fbno + flen), error0); |
@@ -1274,8 +1222,8 @@ xfs_alloc_ag_vextent_size( | |||
1274 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | 1222 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); |
1275 | if (flen < bestrlen) | 1223 | if (flen < bestrlen) |
1276 | break; | 1224 | break; |
1277 | xfs_alloc_compute_aligned(fbno, flen, args->alignment, | 1225 | xfs_alloc_compute_aligned(args, fbno, flen, |
1278 | args->minlen, &rbno, &rlen); | 1226 | &rbno, &rlen); |
1279 | rlen = XFS_EXTLEN_MIN(args->maxlen, rlen); | 1227 | rlen = XFS_EXTLEN_MIN(args->maxlen, rlen); |
1280 | XFS_WANT_CORRUPTED_GOTO(rlen == 0 || | 1228 | XFS_WANT_CORRUPTED_GOTO(rlen == 0 || |
1281 | (rlen <= flen && rbno + rlen <= fbno + flen), | 1229 | (rlen <= flen && rbno + rlen <= fbno + flen), |
@@ -1453,6 +1401,7 @@ xfs_free_ag_extent( | |||
1453 | xfs_mount_t *mp; /* mount point struct for filesystem */ | 1401 | xfs_mount_t *mp; /* mount point struct for filesystem */ |
1454 | xfs_agblock_t nbno; /* new starting block of freespace */ | 1402 | xfs_agblock_t nbno; /* new starting block of freespace */ |
1455 | xfs_extlen_t nlen; /* new length of freespace */ | 1403 | xfs_extlen_t nlen; /* new length of freespace */ |
1404 | xfs_perag_t *pag; /* per allocation group data */ | ||
1456 | 1405 | ||
1457 | mp = tp->t_mountp; | 1406 | mp = tp->t_mountp; |
1458 | /* | 1407 | /* |
@@ -1651,30 +1600,20 @@ xfs_free_ag_extent( | |||
1651 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | 1600 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); |
1652 | xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); | 1601 | xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); |
1653 | cnt_cur = NULL; | 1602 | cnt_cur = NULL; |
1603 | |||
1654 | /* | 1604 | /* |
1655 | * Update the freespace totals in the ag and superblock. | 1605 | * Update the freespace totals in the ag and superblock. |
1656 | */ | 1606 | */ |
1657 | { | 1607 | pag = xfs_perag_get(mp, agno); |
1658 | xfs_agf_t *agf; | 1608 | error = xfs_alloc_update_counters(tp, pag, agbp, len); |
1659 | xfs_perag_t *pag; /* per allocation group data */ | 1609 | xfs_perag_put(pag); |
1660 | 1610 | if (error) | |
1661 | pag = xfs_perag_get(mp, agno); | 1611 | goto error0; |
1662 | pag->pagf_freeblks += len; | 1612 | |
1663 | xfs_perag_put(pag); | 1613 | if (!isfl) |
1664 | 1614 | xfs_trans_mod_sb(tp, XFS_TRANS_SB_FDBLOCKS, (long)len); | |
1665 | agf = XFS_BUF_TO_AGF(agbp); | 1615 | XFS_STATS_INC(xs_freex); |
1666 | be32_add_cpu(&agf->agf_freeblks, len); | 1616 | XFS_STATS_ADD(xs_freeb, len); |
1667 | xfs_trans_agblocks_delta(tp, len); | ||
1668 | XFS_WANT_CORRUPTED_GOTO( | ||
1669 | be32_to_cpu(agf->agf_freeblks) <= | ||
1670 | be32_to_cpu(agf->agf_length), | ||
1671 | error0); | ||
1672 | xfs_alloc_log_agf(tp, agbp, XFS_AGF_FREEBLKS); | ||
1673 | if (!isfl) | ||
1674 | xfs_trans_mod_sb(tp, XFS_TRANS_SB_FDBLOCKS, (long)len); | ||
1675 | XFS_STATS_INC(xs_freex); | ||
1676 | XFS_STATS_ADD(xs_freeb, len); | ||
1677 | } | ||
1678 | 1617 | ||
1679 | trace_xfs_free_extent(mp, agno, bno, len, isfl, haveleft, haveright); | 1618 | trace_xfs_free_extent(mp, agno, bno, len, isfl, haveleft, haveright); |
1680 | 1619 | ||
@@ -2456,17 +2395,33 @@ xfs_free_extent( | |||
2456 | memset(&args, 0, sizeof(xfs_alloc_arg_t)); | 2395 | memset(&args, 0, sizeof(xfs_alloc_arg_t)); |
2457 | args.tp = tp; | 2396 | args.tp = tp; |
2458 | args.mp = tp->t_mountp; | 2397 | args.mp = tp->t_mountp; |
2398 | |||
2399 | /* | ||
2400 | * validate that the block number is legal - the enables us to detect | ||
2401 | * and handle a silent filesystem corruption rather than crashing. | ||
2402 | */ | ||
2459 | args.agno = XFS_FSB_TO_AGNO(args.mp, bno); | 2403 | args.agno = XFS_FSB_TO_AGNO(args.mp, bno); |
2460 | ASSERT(args.agno < args.mp->m_sb.sb_agcount); | 2404 | if (args.agno >= args.mp->m_sb.sb_agcount) |
2405 | return EFSCORRUPTED; | ||
2406 | |||
2461 | args.agbno = XFS_FSB_TO_AGBNO(args.mp, bno); | 2407 | args.agbno = XFS_FSB_TO_AGBNO(args.mp, bno); |
2408 | if (args.agbno >= args.mp->m_sb.sb_agblocks) | ||
2409 | return EFSCORRUPTED; | ||
2410 | |||
2462 | args.pag = xfs_perag_get(args.mp, args.agno); | 2411 | args.pag = xfs_perag_get(args.mp, args.agno); |
2463 | if ((error = xfs_alloc_fix_freelist(&args, XFS_ALLOC_FLAG_FREEING))) | 2412 | ASSERT(args.pag); |
2413 | |||
2414 | error = xfs_alloc_fix_freelist(&args, XFS_ALLOC_FLAG_FREEING); | ||
2415 | if (error) | ||
2464 | goto error0; | 2416 | goto error0; |
2465 | #ifdef DEBUG | 2417 | |
2466 | ASSERT(args.agbp != NULL); | 2418 | /* validate the extent size is legal now we have the agf locked */ |
2467 | ASSERT((args.agbno + len) <= | 2419 | if (args.agbno + len > |
2468 | be32_to_cpu(XFS_BUF_TO_AGF(args.agbp)->agf_length)); | 2420 | be32_to_cpu(XFS_BUF_TO_AGF(args.agbp)->agf_length)) { |
2469 | #endif | 2421 | error = EFSCORRUPTED; |
2422 | goto error0; | ||
2423 | } | ||
2424 | |||
2470 | error = xfs_free_ag_extent(tp, args.agbp, args.agno, args.agbno, len, 0); | 2425 | error = xfs_free_ag_extent(tp, args.agbp, args.agno, args.agbno, len, 0); |
2471 | error0: | 2426 | error0: |
2472 | xfs_perag_put(args.pag); | 2427 | xfs_perag_put(args.pag); |
@@ -2676,7 +2631,7 @@ restart: | |||
2676 | * will require a synchronous transaction, but it can still be | 2631 | * will require a synchronous transaction, but it can still be |
2677 | * used to distinguish between a partial or exact match. | 2632 | * used to distinguish between a partial or exact match. |
2678 | */ | 2633 | */ |
2679 | static int | 2634 | int |
2680 | xfs_alloc_busy_search( | 2635 | xfs_alloc_busy_search( |
2681 | struct xfs_mount *mp, | 2636 | struct xfs_mount *mp, |
2682 | xfs_agnumber_t agno, | 2637 | xfs_agnumber_t agno, |
diff --git a/fs/xfs/xfs_alloc.h b/fs/xfs/xfs_alloc.h index 895009a97271..d0b3bc72005b 100644 --- a/fs/xfs/xfs_alloc.h +++ b/fs/xfs/xfs_alloc.h | |||
@@ -19,6 +19,7 @@ | |||
19 | #define __XFS_ALLOC_H__ | 19 | #define __XFS_ALLOC_H__ |
20 | 20 | ||
21 | struct xfs_buf; | 21 | struct xfs_buf; |
22 | struct xfs_btree_cur; | ||
22 | struct xfs_mount; | 23 | struct xfs_mount; |
23 | struct xfs_perag; | 24 | struct xfs_perag; |
24 | struct xfs_trans; | 25 | struct xfs_trans; |
@@ -74,6 +75,22 @@ typedef unsigned int xfs_alloctype_t; | |||
74 | #define XFS_ALLOC_SET_ASIDE(mp) (4 + ((mp)->m_sb.sb_agcount * 4)) | 75 | #define XFS_ALLOC_SET_ASIDE(mp) (4 + ((mp)->m_sb.sb_agcount * 4)) |
75 | 76 | ||
76 | /* | 77 | /* |
78 | * When deciding how much space to allocate out of an AG, we limit the | ||
79 | * allocation maximum size to the size the AG. However, we cannot use all the | ||
80 | * blocks in the AG - some are permanently used by metadata. These | ||
81 | * blocks are generally: | ||
82 | * - the AG superblock, AGF, AGI and AGFL | ||
83 | * - the AGF (bno and cnt) and AGI btree root blocks | ||
84 | * - 4 blocks on the AGFL according to XFS_ALLOC_SET_ASIDE() limits | ||
85 | * | ||
86 | * The AG headers are sector sized, so the amount of space they take up is | ||
87 | * dependent on filesystem geometry. The others are all single blocks. | ||
88 | */ | ||
89 | #define XFS_ALLOC_AG_MAX_USABLE(mp) \ | ||
90 | ((mp)->m_sb.sb_agblocks - XFS_BB_TO_FSB(mp, XFS_FSS_TO_BB(mp, 4)) - 7) | ||
91 | |||
92 | |||
93 | /* | ||
77 | * Argument structure for xfs_alloc routines. | 94 | * Argument structure for xfs_alloc routines. |
78 | * This is turned into a structure to avoid having 20 arguments passed | 95 | * This is turned into a structure to avoid having 20 arguments passed |
79 | * down several levels of the stack. | 96 | * down several levels of the stack. |
@@ -118,16 +135,16 @@ xfs_alloc_longest_free_extent(struct xfs_mount *mp, | |||
118 | struct xfs_perag *pag); | 135 | struct xfs_perag *pag); |
119 | 136 | ||
120 | #ifdef __KERNEL__ | 137 | #ifdef __KERNEL__ |
121 | |||
122 | void | 138 | void |
123 | xfs_alloc_busy_insert(xfs_trans_t *tp, | 139 | xfs_alloc_busy_insert(struct xfs_trans *tp, xfs_agnumber_t agno, |
124 | xfs_agnumber_t agno, | 140 | xfs_agblock_t bno, xfs_extlen_t len); |
125 | xfs_agblock_t bno, | ||
126 | xfs_extlen_t len); | ||
127 | 141 | ||
128 | void | 142 | void |
129 | xfs_alloc_busy_clear(struct xfs_mount *mp, struct xfs_busy_extent *busyp); | 143 | xfs_alloc_busy_clear(struct xfs_mount *mp, struct xfs_busy_extent *busyp); |
130 | 144 | ||
145 | int | ||
146 | xfs_alloc_busy_search(struct xfs_mount *mp, xfs_agnumber_t agno, | ||
147 | xfs_agblock_t bno, xfs_extlen_t len); | ||
131 | #endif /* __KERNEL__ */ | 148 | #endif /* __KERNEL__ */ |
132 | 149 | ||
133 | /* | 150 | /* |
@@ -205,4 +222,18 @@ xfs_free_extent( | |||
205 | xfs_fsblock_t bno, /* starting block number of extent */ | 222 | xfs_fsblock_t bno, /* starting block number of extent */ |
206 | xfs_extlen_t len); /* length of extent */ | 223 | xfs_extlen_t len); /* length of extent */ |
207 | 224 | ||
225 | int /* error */ | ||
226 | xfs_alloc_lookup_le( | ||
227 | struct xfs_btree_cur *cur, /* btree cursor */ | ||
228 | xfs_agblock_t bno, /* starting block of extent */ | ||
229 | xfs_extlen_t len, /* length of extent */ | ||
230 | int *stat); /* success/failure */ | ||
231 | |||
232 | int /* error */ | ||
233 | xfs_alloc_get_rec( | ||
234 | struct xfs_btree_cur *cur, /* btree cursor */ | ||
235 | xfs_agblock_t *bno, /* output: starting block of extent */ | ||
236 | xfs_extlen_t *len, /* output: length of extent */ | ||
237 | int *stat); /* output: success/failure */ | ||
238 | |||
208 | #endif /* __XFS_ALLOC_H__ */ | 239 | #endif /* __XFS_ALLOC_H__ */ |
diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c index a6cff8edcdb6..71e90dc2aeb1 100644 --- a/fs/xfs/xfs_attr_leaf.c +++ b/fs/xfs/xfs_attr_leaf.c | |||
@@ -637,7 +637,7 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context) | |||
637 | * It didn't all fit, so we have to sort everything on hashval. | 637 | * It didn't all fit, so we have to sort everything on hashval. |
638 | */ | 638 | */ |
639 | sbsize = sf->hdr.count * sizeof(*sbuf); | 639 | sbsize = sf->hdr.count * sizeof(*sbuf); |
640 | sbp = sbuf = kmem_alloc(sbsize, KM_SLEEP); | 640 | sbp = sbuf = kmem_alloc(sbsize, KM_SLEEP | KM_NOFS); |
641 | 641 | ||
642 | /* | 642 | /* |
643 | * Scan the attribute list for the rest of the entries, storing | 643 | * Scan the attribute list for the rest of the entries, storing |
@@ -2386,7 +2386,7 @@ xfs_attr_leaf_list_int(xfs_dabuf_t *bp, xfs_attr_list_context_t *context) | |||
2386 | args.dp = context->dp; | 2386 | args.dp = context->dp; |
2387 | args.whichfork = XFS_ATTR_FORK; | 2387 | args.whichfork = XFS_ATTR_FORK; |
2388 | args.valuelen = valuelen; | 2388 | args.valuelen = valuelen; |
2389 | args.value = kmem_alloc(valuelen, KM_SLEEP); | 2389 | args.value = kmem_alloc(valuelen, KM_SLEEP | KM_NOFS); |
2390 | args.rmtblkno = be32_to_cpu(name_rmt->valueblk); | 2390 | args.rmtblkno = be32_to_cpu(name_rmt->valueblk); |
2391 | args.rmtblkcnt = XFS_B_TO_FSB(args.dp->i_mount, valuelen); | 2391 | args.rmtblkcnt = XFS_B_TO_FSB(args.dp->i_mount, valuelen); |
2392 | retval = xfs_attr_rmtval_get(&args); | 2392 | retval = xfs_attr_rmtval_get(&args); |
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c index 4111cd3966c7..fa00788de2f5 100644 --- a/fs/xfs/xfs_bmap.c +++ b/fs/xfs/xfs_bmap.c | |||
@@ -1038,17 +1038,34 @@ xfs_bmap_add_extent_delay_real( | |||
1038 | * Filling in the middle part of a previous delayed allocation. | 1038 | * Filling in the middle part of a previous delayed allocation. |
1039 | * Contiguity is impossible here. | 1039 | * Contiguity is impossible here. |
1040 | * This case is avoided almost all the time. | 1040 | * This case is avoided almost all the time. |
1041 | * | ||
1042 | * We start with a delayed allocation: | ||
1043 | * | ||
1044 | * +ddddddddddddddddddddddddddddddddddddddddddddddddddddddd+ | ||
1045 | * PREV @ idx | ||
1046 | * | ||
1047 | * and we are allocating: | ||
1048 | * +rrrrrrrrrrrrrrrrr+ | ||
1049 | * new | ||
1050 | * | ||
1051 | * and we set it up for insertion as: | ||
1052 | * +ddddddddddddddddddd+rrrrrrrrrrrrrrrrr+ddddddddddddddddd+ | ||
1053 | * new | ||
1054 | * PREV @ idx LEFT RIGHT | ||
1055 | * inserted at idx + 1 | ||
1041 | */ | 1056 | */ |
1042 | temp = new->br_startoff - PREV.br_startoff; | 1057 | temp = new->br_startoff - PREV.br_startoff; |
1043 | trace_xfs_bmap_pre_update(ip, idx, 0, _THIS_IP_); | ||
1044 | xfs_bmbt_set_blockcount(ep, temp); | ||
1045 | r[0] = *new; | ||
1046 | r[1].br_state = PREV.br_state; | ||
1047 | r[1].br_startblock = 0; | ||
1048 | r[1].br_startoff = new_endoff; | ||
1049 | temp2 = PREV.br_startoff + PREV.br_blockcount - new_endoff; | 1058 | temp2 = PREV.br_startoff + PREV.br_blockcount - new_endoff; |
1050 | r[1].br_blockcount = temp2; | 1059 | trace_xfs_bmap_pre_update(ip, idx, 0, _THIS_IP_); |
1051 | xfs_iext_insert(ip, idx + 1, 2, &r[0], state); | 1060 | xfs_bmbt_set_blockcount(ep, temp); /* truncate PREV */ |
1061 | LEFT = *new; | ||
1062 | RIGHT.br_state = PREV.br_state; | ||
1063 | RIGHT.br_startblock = nullstartblock( | ||
1064 | (int)xfs_bmap_worst_indlen(ip, temp2)); | ||
1065 | RIGHT.br_startoff = new_endoff; | ||
1066 | RIGHT.br_blockcount = temp2; | ||
1067 | /* insert LEFT (r[0]) and RIGHT (r[1]) at the same time */ | ||
1068 | xfs_iext_insert(ip, idx + 1, 2, &LEFT, state); | ||
1052 | ip->i_df.if_lastex = idx + 1; | 1069 | ip->i_df.if_lastex = idx + 1; |
1053 | ip->i_d.di_nextents++; | 1070 | ip->i_d.di_nextents++; |
1054 | if (cur == NULL) | 1071 | if (cur == NULL) |
@@ -2348,6 +2365,13 @@ xfs_bmap_rtalloc( | |||
2348 | */ | 2365 | */ |
2349 | if (ralen * mp->m_sb.sb_rextsize >= MAXEXTLEN) | 2366 | if (ralen * mp->m_sb.sb_rextsize >= MAXEXTLEN) |
2350 | ralen = MAXEXTLEN / mp->m_sb.sb_rextsize; | 2367 | ralen = MAXEXTLEN / mp->m_sb.sb_rextsize; |
2368 | |||
2369 | /* | ||
2370 | * Lock out other modifications to the RT bitmap inode. | ||
2371 | */ | ||
2372 | xfs_ilock(mp->m_rbmip, XFS_ILOCK_EXCL); | ||
2373 | xfs_trans_ijoin_ref(ap->tp, mp->m_rbmip, XFS_ILOCK_EXCL); | ||
2374 | |||
2351 | /* | 2375 | /* |
2352 | * If it's an allocation to an empty file at offset 0, | 2376 | * If it's an allocation to an empty file at offset 0, |
2353 | * pick an extent that will space things out in the rt area. | 2377 | * pick an extent that will space things out in the rt area. |
@@ -2430,7 +2454,7 @@ xfs_bmap_btalloc_nullfb( | |||
2430 | startag = ag = 0; | 2454 | startag = ag = 0; |
2431 | 2455 | ||
2432 | pag = xfs_perag_get(mp, ag); | 2456 | pag = xfs_perag_get(mp, ag); |
2433 | while (*blen < ap->alen) { | 2457 | while (*blen < args->maxlen) { |
2434 | if (!pag->pagf_init) { | 2458 | if (!pag->pagf_init) { |
2435 | error = xfs_alloc_pagf_init(mp, args->tp, ag, | 2459 | error = xfs_alloc_pagf_init(mp, args->tp, ag, |
2436 | XFS_ALLOC_FLAG_TRYLOCK); | 2460 | XFS_ALLOC_FLAG_TRYLOCK); |
@@ -2452,7 +2476,7 @@ xfs_bmap_btalloc_nullfb( | |||
2452 | notinit = 1; | 2476 | notinit = 1; |
2453 | 2477 | ||
2454 | if (xfs_inode_is_filestream(ap->ip)) { | 2478 | if (xfs_inode_is_filestream(ap->ip)) { |
2455 | if (*blen >= ap->alen) | 2479 | if (*blen >= args->maxlen) |
2456 | break; | 2480 | break; |
2457 | 2481 | ||
2458 | if (ap->userdata) { | 2482 | if (ap->userdata) { |
@@ -2498,14 +2522,14 @@ xfs_bmap_btalloc_nullfb( | |||
2498 | * If the best seen length is less than the request | 2522 | * If the best seen length is less than the request |
2499 | * length, use the best as the minimum. | 2523 | * length, use the best as the minimum. |
2500 | */ | 2524 | */ |
2501 | else if (*blen < ap->alen) | 2525 | else if (*blen < args->maxlen) |
2502 | args->minlen = *blen; | 2526 | args->minlen = *blen; |
2503 | /* | 2527 | /* |
2504 | * Otherwise we've seen an extent as big as alen, | 2528 | * Otherwise we've seen an extent as big as maxlen, |
2505 | * use that as the minimum. | 2529 | * use that as the minimum. |
2506 | */ | 2530 | */ |
2507 | else | 2531 | else |
2508 | args->minlen = ap->alen; | 2532 | args->minlen = args->maxlen; |
2509 | 2533 | ||
2510 | /* | 2534 | /* |
2511 | * set the failure fallback case to look in the selected | 2535 | * set the failure fallback case to look in the selected |
@@ -2573,7 +2597,9 @@ xfs_bmap_btalloc( | |||
2573 | args.tp = ap->tp; | 2597 | args.tp = ap->tp; |
2574 | args.mp = mp; | 2598 | args.mp = mp; |
2575 | args.fsbno = ap->rval; | 2599 | args.fsbno = ap->rval; |
2576 | args.maxlen = MIN(ap->alen, mp->m_sb.sb_agblocks); | 2600 | |
2601 | /* Trim the allocation back to the maximum an AG can fit. */ | ||
2602 | args.maxlen = MIN(ap->alen, XFS_ALLOC_AG_MAX_USABLE(mp)); | ||
2577 | args.firstblock = ap->firstblock; | 2603 | args.firstblock = ap->firstblock; |
2578 | blen = 0; | 2604 | blen = 0; |
2579 | if (nullfb) { | 2605 | if (nullfb) { |
@@ -2621,7 +2647,7 @@ xfs_bmap_btalloc( | |||
2621 | /* | 2647 | /* |
2622 | * Adjust for alignment | 2648 | * Adjust for alignment |
2623 | */ | 2649 | */ |
2624 | if (blen > args.alignment && blen <= ap->alen) | 2650 | if (blen > args.alignment && blen <= args.maxlen) |
2625 | args.minlen = blen - args.alignment; | 2651 | args.minlen = blen - args.alignment; |
2626 | args.minalignslop = 0; | 2652 | args.minalignslop = 0; |
2627 | } else { | 2653 | } else { |
@@ -2640,7 +2666,7 @@ xfs_bmap_btalloc( | |||
2640 | * of minlen+alignment+slop doesn't go up | 2666 | * of minlen+alignment+slop doesn't go up |
2641 | * between the calls. | 2667 | * between the calls. |
2642 | */ | 2668 | */ |
2643 | if (blen > mp->m_dalign && blen <= ap->alen) | 2669 | if (blen > mp->m_dalign && blen <= args.maxlen) |
2644 | nextminlen = blen - mp->m_dalign; | 2670 | nextminlen = blen - mp->m_dalign; |
2645 | else | 2671 | else |
2646 | nextminlen = args.minlen; | 2672 | nextminlen = args.minlen; |
@@ -3500,7 +3526,7 @@ xfs_bmap_search_extents( | |||
3500 | 3526 | ||
3501 | if (unlikely(!(gotp->br_startblock) && (*lastxp != NULLEXTNUM) && | 3527 | if (unlikely(!(gotp->br_startblock) && (*lastxp != NULLEXTNUM) && |
3502 | !(XFS_IS_REALTIME_INODE(ip) && fork == XFS_DATA_FORK))) { | 3528 | !(XFS_IS_REALTIME_INODE(ip) && fork == XFS_DATA_FORK))) { |
3503 | xfs_cmn_err(XFS_PTAG_FSBLOCK_ZERO, CE_ALERT, ip->i_mount, | 3529 | xfs_alert_tag(ip->i_mount, XFS_PTAG_FSBLOCK_ZERO, |
3504 | "Access to block zero in inode %llu " | 3530 | "Access to block zero in inode %llu " |
3505 | "start_block: %llx start_off: %llx " | 3531 | "start_block: %llx start_off: %llx " |
3506 | "blkcnt: %llx extent-state: %x lastx: %x\n", | 3532 | "blkcnt: %llx extent-state: %x lastx: %x\n", |
@@ -4174,12 +4200,11 @@ xfs_bmap_read_extents( | |||
4174 | num_recs = xfs_btree_get_numrecs(block); | 4200 | num_recs = xfs_btree_get_numrecs(block); |
4175 | if (unlikely(i + num_recs > room)) { | 4201 | if (unlikely(i + num_recs > room)) { |
4176 | ASSERT(i + num_recs <= room); | 4202 | ASSERT(i + num_recs <= room); |
4177 | xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount, | 4203 | xfs_warn(ip->i_mount, |
4178 | "corrupt dinode %Lu, (btree extents).", | 4204 | "corrupt dinode %Lu, (btree extents).", |
4179 | (unsigned long long) ip->i_ino); | 4205 | (unsigned long long) ip->i_ino); |
4180 | XFS_ERROR_REPORT("xfs_bmap_read_extents(1)", | 4206 | XFS_CORRUPTION_ERROR("xfs_bmap_read_extents(1)", |
4181 | XFS_ERRLEVEL_LOW, | 4207 | XFS_ERRLEVEL_LOW, ip->i_mount, block); |
4182 | ip->i_mount); | ||
4183 | goto error0; | 4208 | goto error0; |
4184 | } | 4209 | } |
4185 | XFS_WANT_CORRUPTED_GOTO( | 4210 | XFS_WANT_CORRUPTED_GOTO( |
@@ -4485,6 +4510,16 @@ xfs_bmapi( | |||
4485 | /* Figure out the extent size, adjust alen */ | 4510 | /* Figure out the extent size, adjust alen */ |
4486 | extsz = xfs_get_extsz_hint(ip); | 4511 | extsz = xfs_get_extsz_hint(ip); |
4487 | if (extsz) { | 4512 | if (extsz) { |
4513 | /* | ||
4514 | * make sure we don't exceed a single | ||
4515 | * extent length when we align the | ||
4516 | * extent by reducing length we are | ||
4517 | * going to allocate by the maximum | ||
4518 | * amount extent size aligment may | ||
4519 | * require. | ||
4520 | */ | ||
4521 | alen = XFS_FILBLKS_MIN(len, | ||
4522 | MAXEXTLEN - (2 * extsz - 1)); | ||
4488 | error = xfs_bmap_extsize_align(mp, | 4523 | error = xfs_bmap_extsize_align(mp, |
4489 | &got, &prev, extsz, | 4524 | &got, &prev, extsz, |
4490 | rt, eof, | 4525 | rt, eof, |
@@ -5743,7 +5778,7 @@ xfs_check_block( | |||
5743 | else | 5778 | else |
5744 | thispa = XFS_BMBT_PTR_ADDR(mp, block, j, dmxr); | 5779 | thispa = XFS_BMBT_PTR_ADDR(mp, block, j, dmxr); |
5745 | if (*thispa == *pp) { | 5780 | if (*thispa == *pp) { |
5746 | cmn_err(CE_WARN, "%s: thispa(%d) == pp(%d) %Ld", | 5781 | xfs_warn(mp, "%s: thispa(%d) == pp(%d) %Ld", |
5747 | __func__, j, i, | 5782 | __func__, j, i, |
5748 | (unsigned long long)be64_to_cpu(*thispa)); | 5783 | (unsigned long long)be64_to_cpu(*thispa)); |
5749 | panic("%s: ptrs are equal in node\n", | 5784 | panic("%s: ptrs are equal in node\n", |
@@ -5908,11 +5943,11 @@ xfs_bmap_check_leaf_extents( | |||
5908 | return; | 5943 | return; |
5909 | 5944 | ||
5910 | error0: | 5945 | error0: |
5911 | cmn_err(CE_WARN, "%s: at error0", __func__); | 5946 | xfs_warn(mp, "%s: at error0", __func__); |
5912 | if (bp_release) | 5947 | if (bp_release) |
5913 | xfs_trans_brelse(NULL, bp); | 5948 | xfs_trans_brelse(NULL, bp); |
5914 | error_norelse: | 5949 | error_norelse: |
5915 | cmn_err(CE_WARN, "%s: BAD after btree leaves for %d extents", | 5950 | xfs_warn(mp, "%s: BAD after btree leaves for %d extents", |
5916 | __func__, i); | 5951 | __func__, i); |
5917 | panic("%s: CORRUPTED BTREE OR SOMETHING", __func__); | 5952 | panic("%s: CORRUPTED BTREE OR SOMETHING", __func__); |
5918 | return; | 5953 | return; |
@@ -6115,7 +6150,7 @@ xfs_bmap_punch_delalloc_range( | |||
6115 | if (error) { | 6150 | if (error) { |
6116 | /* something screwed, just bail */ | 6151 | /* something screwed, just bail */ |
6117 | if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) { | 6152 | if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) { |
6118 | xfs_fs_cmn_err(CE_ALERT, ip->i_mount, | 6153 | xfs_alert(ip->i_mount, |
6119 | "Failed delalloc mapping lookup ino %lld fsb %lld.", | 6154 | "Failed delalloc mapping lookup ino %lld fsb %lld.", |
6120 | ip->i_ino, start_fsb); | 6155 | ip->i_ino, start_fsb); |
6121 | } | 6156 | } |
diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c index 04f9cca8da7e..2f9e97c128a0 100644 --- a/fs/xfs/xfs_btree.c +++ b/fs/xfs/xfs_btree.c | |||
@@ -634,9 +634,8 @@ xfs_btree_read_bufl( | |||
634 | return error; | 634 | return error; |
635 | } | 635 | } |
636 | ASSERT(!bp || !XFS_BUF_GETERROR(bp)); | 636 | ASSERT(!bp || !XFS_BUF_GETERROR(bp)); |
637 | if (bp != NULL) { | 637 | if (bp) |
638 | XFS_BUF_SET_VTYPE_REF(bp, B_FS_MAP, refval); | 638 | XFS_BUF_SET_VTYPE_REF(bp, B_FS_MAP, refval); |
639 | } | ||
640 | *bpp = bp; | 639 | *bpp = bp; |
641 | return 0; | 640 | return 0; |
642 | } | 641 | } |
@@ -944,13 +943,13 @@ xfs_btree_set_refs( | |||
944 | switch (cur->bc_btnum) { | 943 | switch (cur->bc_btnum) { |
945 | case XFS_BTNUM_BNO: | 944 | case XFS_BTNUM_BNO: |
946 | case XFS_BTNUM_CNT: | 945 | case XFS_BTNUM_CNT: |
947 | XFS_BUF_SET_VTYPE_REF(*bpp, B_FS_MAP, XFS_ALLOC_BTREE_REF); | 946 | XFS_BUF_SET_VTYPE_REF(bp, B_FS_MAP, XFS_ALLOC_BTREE_REF); |
948 | break; | 947 | break; |
949 | case XFS_BTNUM_INO: | 948 | case XFS_BTNUM_INO: |
950 | XFS_BUF_SET_VTYPE_REF(*bpp, B_FS_INOMAP, XFS_INO_BTREE_REF); | 949 | XFS_BUF_SET_VTYPE_REF(bp, B_FS_INOMAP, XFS_INO_BTREE_REF); |
951 | break; | 950 | break; |
952 | case XFS_BTNUM_BMAP: | 951 | case XFS_BTNUM_BMAP: |
953 | XFS_BUF_SET_VTYPE_REF(*bpp, B_FS_MAP, XFS_BMAP_BTREE_REF); | 952 | XFS_BUF_SET_VTYPE_REF(bp, B_FS_MAP, XFS_BMAP_BTREE_REF); |
954 | break; | 953 | break; |
955 | default: | 954 | default: |
956 | ASSERT(0); | 955 | ASSERT(0); |
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index 2686d0d54c5b..7b7e005e3dcc 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c | |||
@@ -130,10 +130,12 @@ xfs_buf_item_log_check( | |||
130 | orig = bip->bli_orig; | 130 | orig = bip->bli_orig; |
131 | buffer = XFS_BUF_PTR(bp); | 131 | buffer = XFS_BUF_PTR(bp); |
132 | for (x = 0; x < XFS_BUF_COUNT(bp); x++) { | 132 | for (x = 0; x < XFS_BUF_COUNT(bp); x++) { |
133 | if (orig[x] != buffer[x] && !btst(bip->bli_logged, x)) | 133 | if (orig[x] != buffer[x] && !btst(bip->bli_logged, x)) { |
134 | cmn_err(CE_PANIC, | 134 | xfs_emerg(bp->b_mount, |
135 | "xfs_buf_item_log_check bip %x buffer %x orig %x index %d", | 135 | "%s: bip %x buffer %x orig %x index %d", |
136 | bip, bp, orig, x); | 136 | __func__, bip, bp, orig, x); |
137 | ASSERT(0); | ||
138 | } | ||
137 | } | 139 | } |
138 | } | 140 | } |
139 | #else | 141 | #else |
@@ -141,8 +143,7 @@ xfs_buf_item_log_check( | |||
141 | #define xfs_buf_item_log_check(x) | 143 | #define xfs_buf_item_log_check(x) |
142 | #endif | 144 | #endif |
143 | 145 | ||
144 | STATIC void xfs_buf_error_relse(xfs_buf_t *bp); | 146 | STATIC void xfs_buf_do_callbacks(struct xfs_buf *bp); |
145 | STATIC void xfs_buf_do_callbacks(xfs_buf_t *bp, xfs_log_item_t *lip); | ||
146 | 147 | ||
147 | /* | 148 | /* |
148 | * This returns the number of log iovecs needed to log the | 149 | * This returns the number of log iovecs needed to log the |
@@ -428,13 +429,15 @@ xfs_buf_item_unpin( | |||
428 | 429 | ||
429 | if (remove) { | 430 | if (remove) { |
430 | /* | 431 | /* |
431 | * We have to remove the log item from the transaction | 432 | * If we are in a transaction context, we have to |
432 | * as we are about to release our reference to the | 433 | * remove the log item from the transaction as we are |
433 | * buffer. If we don't, the unlock that occurs later | 434 | * about to release our reference to the buffer. If we |
434 | * in xfs_trans_uncommit() will ry to reference the | 435 | * don't, the unlock that occurs later in |
436 | * xfs_trans_uncommit() will try to reference the | ||
435 | * buffer which we no longer have a hold on. | 437 | * buffer which we no longer have a hold on. |
436 | */ | 438 | */ |
437 | xfs_trans_del_item(lip); | 439 | if (lip->li_desc) |
440 | xfs_trans_del_item(lip); | ||
438 | 441 | ||
439 | /* | 442 | /* |
440 | * Since the transaction no longer refers to the buffer, | 443 | * Since the transaction no longer refers to the buffer, |
@@ -450,7 +453,7 @@ xfs_buf_item_unpin( | |||
450 | * xfs_trans_ail_delete() drops the AIL lock. | 453 | * xfs_trans_ail_delete() drops the AIL lock. |
451 | */ | 454 | */ |
452 | if (bip->bli_flags & XFS_BLI_STALE_INODE) { | 455 | if (bip->bli_flags & XFS_BLI_STALE_INODE) { |
453 | xfs_buf_do_callbacks(bp, (xfs_log_item_t *)bip); | 456 | xfs_buf_do_callbacks(bp); |
454 | XFS_BUF_SET_FSPRIVATE(bp, NULL); | 457 | XFS_BUF_SET_FSPRIVATE(bp, NULL); |
455 | XFS_BUF_CLR_IODONE_FUNC(bp); | 458 | XFS_BUF_CLR_IODONE_FUNC(bp); |
456 | } else { | 459 | } else { |
@@ -918,15 +921,26 @@ xfs_buf_attach_iodone( | |||
918 | XFS_BUF_SET_IODONE_FUNC(bp, xfs_buf_iodone_callbacks); | 921 | XFS_BUF_SET_IODONE_FUNC(bp, xfs_buf_iodone_callbacks); |
919 | } | 922 | } |
920 | 923 | ||
924 | /* | ||
925 | * We can have many callbacks on a buffer. Running the callbacks individually | ||
926 | * can cause a lot of contention on the AIL lock, so we allow for a single | ||
927 | * callback to be able to scan the remaining lip->li_bio_list for other items | ||
928 | * of the same type and callback to be processed in the first call. | ||
929 | * | ||
930 | * As a result, the loop walking the callback list below will also modify the | ||
931 | * list. it removes the first item from the list and then runs the callback. | ||
932 | * The loop then restarts from the new head of the list. This allows the | ||
933 | * callback to scan and modify the list attached to the buffer and we don't | ||
934 | * have to care about maintaining a next item pointer. | ||
935 | */ | ||
921 | STATIC void | 936 | STATIC void |
922 | xfs_buf_do_callbacks( | 937 | xfs_buf_do_callbacks( |
923 | xfs_buf_t *bp, | 938 | struct xfs_buf *bp) |
924 | xfs_log_item_t *lip) | ||
925 | { | 939 | { |
926 | xfs_log_item_t *nlip; | 940 | struct xfs_log_item *lip; |
927 | 941 | ||
928 | while (lip != NULL) { | 942 | while ((lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *)) != NULL) { |
929 | nlip = lip->li_bio_list; | 943 | XFS_BUF_SET_FSPRIVATE(bp, lip->li_bio_list); |
930 | ASSERT(lip->li_cb != NULL); | 944 | ASSERT(lip->li_cb != NULL); |
931 | /* | 945 | /* |
932 | * Clear the next pointer so we don't have any | 946 | * Clear the next pointer so we don't have any |
@@ -936,7 +950,6 @@ xfs_buf_do_callbacks( | |||
936 | */ | 950 | */ |
937 | lip->li_bio_list = NULL; | 951 | lip->li_bio_list = NULL; |
938 | lip->li_cb(bp, lip); | 952 | lip->li_cb(bp, lip); |
939 | lip = nlip; | ||
940 | } | 953 | } |
941 | } | 954 | } |
942 | 955 | ||
@@ -949,128 +962,75 @@ xfs_buf_do_callbacks( | |||
949 | */ | 962 | */ |
950 | void | 963 | void |
951 | xfs_buf_iodone_callbacks( | 964 | xfs_buf_iodone_callbacks( |
952 | xfs_buf_t *bp) | 965 | struct xfs_buf *bp) |
953 | { | 966 | { |
954 | xfs_log_item_t *lip; | 967 | struct xfs_log_item *lip = bp->b_fspriv; |
955 | static ulong lasttime; | 968 | struct xfs_mount *mp = lip->li_mountp; |
956 | static xfs_buftarg_t *lasttarg; | 969 | static ulong lasttime; |
957 | xfs_mount_t *mp; | 970 | static xfs_buftarg_t *lasttarg; |
958 | 971 | ||
959 | ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL); | 972 | if (likely(!XFS_BUF_GETERROR(bp))) |
960 | lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *); | 973 | goto do_callbacks; |
961 | 974 | ||
962 | if (XFS_BUF_GETERROR(bp) != 0) { | 975 | /* |
963 | /* | 976 | * If we've already decided to shutdown the filesystem because of |
964 | * If we've already decided to shutdown the filesystem | 977 | * I/O errors, there's no point in giving this a retry. |
965 | * because of IO errors, there's no point in giving this | 978 | */ |
966 | * a retry. | 979 | if (XFS_FORCED_SHUTDOWN(mp)) { |
967 | */ | 980 | XFS_BUF_SUPER_STALE(bp); |
968 | mp = lip->li_mountp; | 981 | trace_xfs_buf_item_iodone(bp, _RET_IP_); |
969 | if (XFS_FORCED_SHUTDOWN(mp)) { | 982 | goto do_callbacks; |
970 | ASSERT(XFS_BUF_TARGET(bp) == mp->m_ddev_targp); | 983 | } |
971 | XFS_BUF_SUPER_STALE(bp); | ||
972 | trace_xfs_buf_item_iodone(bp, _RET_IP_); | ||
973 | xfs_buf_do_callbacks(bp, lip); | ||
974 | XFS_BUF_SET_FSPRIVATE(bp, NULL); | ||
975 | XFS_BUF_CLR_IODONE_FUNC(bp); | ||
976 | xfs_buf_ioend(bp, 0); | ||
977 | return; | ||
978 | } | ||
979 | 984 | ||
980 | if ((XFS_BUF_TARGET(bp) != lasttarg) || | 985 | if (XFS_BUF_TARGET(bp) != lasttarg || |
981 | (time_after(jiffies, (lasttime + 5*HZ)))) { | 986 | time_after(jiffies, (lasttime + 5*HZ))) { |
982 | lasttime = jiffies; | 987 | lasttime = jiffies; |
983 | cmn_err(CE_ALERT, "Device %s, XFS metadata write error" | 988 | xfs_alert(mp, "Device %s: metadata write error block 0x%llx", |
984 | " block 0x%llx in %s", | 989 | XFS_BUFTARG_NAME(XFS_BUF_TARGET(bp)), |
985 | XFS_BUFTARG_NAME(XFS_BUF_TARGET(bp)), | 990 | (__uint64_t)XFS_BUF_ADDR(bp)); |
986 | (__uint64_t)XFS_BUF_ADDR(bp), mp->m_fsname); | 991 | } |
987 | } | 992 | lasttarg = XFS_BUF_TARGET(bp); |
988 | lasttarg = XFS_BUF_TARGET(bp); | ||
989 | 993 | ||
990 | if (XFS_BUF_ISASYNC(bp)) { | 994 | /* |
991 | /* | 995 | * If the write was asynchronous then no one will be looking for the |
992 | * If the write was asynchronous then noone will be | 996 | * error. Clear the error state and write the buffer out again. |
993 | * looking for the error. Clear the error state | 997 | * |
994 | * and write the buffer out again delayed write. | 998 | * During sync or umount we'll write all pending buffers again |
995 | * | 999 | * synchronous, which will catch these errors if they keep hanging |
996 | * XXXsup This is OK, so long as we catch these | 1000 | * around. |
997 | * before we start the umount; we don't want these | 1001 | */ |
998 | * DELWRI metadata bufs to be hanging around. | 1002 | if (XFS_BUF_ISASYNC(bp)) { |
999 | */ | 1003 | XFS_BUF_ERROR(bp, 0); /* errno of 0 unsets the flag */ |
1000 | XFS_BUF_ERROR(bp,0); /* errno of 0 unsets the flag */ | 1004 | |
1001 | 1005 | if (!XFS_BUF_ISSTALE(bp)) { | |
1002 | if (!(XFS_BUF_ISSTALE(bp))) { | 1006 | XFS_BUF_DELAYWRITE(bp); |
1003 | XFS_BUF_DELAYWRITE(bp); | ||
1004 | XFS_BUF_DONE(bp); | ||
1005 | XFS_BUF_SET_START(bp); | ||
1006 | } | ||
1007 | ASSERT(XFS_BUF_IODONE_FUNC(bp)); | ||
1008 | trace_xfs_buf_item_iodone_async(bp, _RET_IP_); | ||
1009 | xfs_buf_relse(bp); | ||
1010 | } else { | ||
1011 | /* | ||
1012 | * If the write of the buffer was not asynchronous, | ||
1013 | * then we want to make sure to return the error | ||
1014 | * to the caller of bwrite(). Because of this we | ||
1015 | * cannot clear the B_ERROR state at this point. | ||
1016 | * Instead we install a callback function that | ||
1017 | * will be called when the buffer is released, and | ||
1018 | * that routine will clear the error state and | ||
1019 | * set the buffer to be written out again after | ||
1020 | * some delay. | ||
1021 | */ | ||
1022 | /* We actually overwrite the existing b-relse | ||
1023 | function at times, but we're gonna be shutting down | ||
1024 | anyway. */ | ||
1025 | XFS_BUF_SET_BRELSE_FUNC(bp,xfs_buf_error_relse); | ||
1026 | XFS_BUF_DONE(bp); | 1007 | XFS_BUF_DONE(bp); |
1027 | XFS_BUF_FINISH_IOWAIT(bp); | 1008 | XFS_BUF_SET_START(bp); |
1028 | } | 1009 | } |
1010 | ASSERT(XFS_BUF_IODONE_FUNC(bp)); | ||
1011 | trace_xfs_buf_item_iodone_async(bp, _RET_IP_); | ||
1012 | xfs_buf_relse(bp); | ||
1029 | return; | 1013 | return; |
1030 | } | 1014 | } |
1031 | 1015 | ||
1032 | xfs_buf_do_callbacks(bp, lip); | 1016 | /* |
1033 | XFS_BUF_SET_FSPRIVATE(bp, NULL); | 1017 | * If the write of the buffer was synchronous, we want to make |
1034 | XFS_BUF_CLR_IODONE_FUNC(bp); | 1018 | * sure to return the error to the caller of xfs_bwrite(). |
1035 | xfs_buf_ioend(bp, 0); | 1019 | */ |
1036 | } | ||
1037 | |||
1038 | /* | ||
1039 | * This is a callback routine attached to a buffer which gets an error | ||
1040 | * when being written out synchronously. | ||
1041 | */ | ||
1042 | STATIC void | ||
1043 | xfs_buf_error_relse( | ||
1044 | xfs_buf_t *bp) | ||
1045 | { | ||
1046 | xfs_log_item_t *lip; | ||
1047 | xfs_mount_t *mp; | ||
1048 | |||
1049 | lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *); | ||
1050 | mp = (xfs_mount_t *)lip->li_mountp; | ||
1051 | ASSERT(XFS_BUF_TARGET(bp) == mp->m_ddev_targp); | ||
1052 | |||
1053 | XFS_BUF_STALE(bp); | 1020 | XFS_BUF_STALE(bp); |
1054 | XFS_BUF_DONE(bp); | 1021 | XFS_BUF_DONE(bp); |
1055 | XFS_BUF_UNDELAYWRITE(bp); | 1022 | XFS_BUF_UNDELAYWRITE(bp); |
1056 | XFS_BUF_ERROR(bp,0); | ||
1057 | 1023 | ||
1058 | trace_xfs_buf_error_relse(bp, _RET_IP_); | 1024 | trace_xfs_buf_error_relse(bp, _RET_IP_); |
1025 | xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR); | ||
1059 | 1026 | ||
1060 | if (! XFS_FORCED_SHUTDOWN(mp)) | 1027 | do_callbacks: |
1061 | xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR); | 1028 | xfs_buf_do_callbacks(bp); |
1062 | /* | ||
1063 | * We have to unpin the pinned buffers so do the | ||
1064 | * callbacks. | ||
1065 | */ | ||
1066 | xfs_buf_do_callbacks(bp, lip); | ||
1067 | XFS_BUF_SET_FSPRIVATE(bp, NULL); | 1029 | XFS_BUF_SET_FSPRIVATE(bp, NULL); |
1068 | XFS_BUF_CLR_IODONE_FUNC(bp); | 1030 | XFS_BUF_CLR_IODONE_FUNC(bp); |
1069 | XFS_BUF_SET_BRELSE_FUNC(bp,NULL); | 1031 | xfs_buf_ioend(bp, 0); |
1070 | xfs_buf_relse(bp); | ||
1071 | } | 1032 | } |
1072 | 1033 | ||
1073 | |||
1074 | /* | 1034 | /* |
1075 | * This is the iodone() function for buffers which have been | 1035 | * This is the iodone() function for buffers which have been |
1076 | * logged. It is called when they are eventually flushed out. | 1036 | * logged. It is called when they are eventually flushed out. |
diff --git a/fs/xfs/xfs_buf_item.h b/fs/xfs/xfs_buf_item.h index 0e2ed43f16c7..b6ecd2061e7c 100644 --- a/fs/xfs/xfs_buf_item.h +++ b/fs/xfs/xfs_buf_item.h | |||
@@ -105,17 +105,6 @@ typedef struct xfs_buf_log_item { | |||
105 | xfs_buf_log_format_t bli_format; /* in-log header */ | 105 | xfs_buf_log_format_t bli_format; /* in-log header */ |
106 | } xfs_buf_log_item_t; | 106 | } xfs_buf_log_item_t; |
107 | 107 | ||
108 | /* | ||
109 | * This structure is used during recovery to record the buf log | ||
110 | * items which have been canceled and should not be replayed. | ||
111 | */ | ||
112 | typedef struct xfs_buf_cancel { | ||
113 | xfs_daddr_t bc_blkno; | ||
114 | uint bc_len; | ||
115 | int bc_refcount; | ||
116 | struct xfs_buf_cancel *bc_next; | ||
117 | } xfs_buf_cancel_t; | ||
118 | |||
119 | void xfs_buf_item_init(struct xfs_buf *, struct xfs_mount *); | 108 | void xfs_buf_item_init(struct xfs_buf *, struct xfs_mount *); |
120 | void xfs_buf_item_relse(struct xfs_buf *); | 109 | void xfs_buf_item_relse(struct xfs_buf *); |
121 | void xfs_buf_item_log(xfs_buf_log_item_t *, uint, uint); | 110 | void xfs_buf_item_log(xfs_buf_log_item_t *, uint, uint); |
diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/xfs_da_btree.c index 1c00bedb3175..6102ac6d1dff 100644 --- a/fs/xfs/xfs_da_btree.c +++ b/fs/xfs/xfs_da_btree.c | |||
@@ -1995,13 +1995,12 @@ xfs_da_do_buf( | |||
1995 | error = mappedbno == -2 ? 0 : XFS_ERROR(EFSCORRUPTED); | 1995 | error = mappedbno == -2 ? 0 : XFS_ERROR(EFSCORRUPTED); |
1996 | if (unlikely(error == EFSCORRUPTED)) { | 1996 | if (unlikely(error == EFSCORRUPTED)) { |
1997 | if (xfs_error_level >= XFS_ERRLEVEL_LOW) { | 1997 | if (xfs_error_level >= XFS_ERRLEVEL_LOW) { |
1998 | cmn_err(CE_ALERT, "xfs_da_do_buf: bno %lld\n", | 1998 | xfs_alert(mp, "%s: bno %lld dir: inode %lld", |
1999 | (long long)bno); | 1999 | __func__, (long long)bno, |
2000 | cmn_err(CE_ALERT, "dir: inode %lld\n", | ||
2001 | (long long)dp->i_ino); | 2000 | (long long)dp->i_ino); |
2002 | for (i = 0; i < nmap; i++) { | 2001 | for (i = 0; i < nmap; i++) { |
2003 | cmn_err(CE_ALERT, | 2002 | xfs_alert(mp, |
2004 | "[%02d] br_startoff %lld br_startblock %lld br_blockcount %lld br_state %d\n", | 2003 | "[%02d] br_startoff %lld br_startblock %lld br_blockcount %lld br_state %d", |
2005 | i, | 2004 | i, |
2006 | (long long)mapp[i].br_startoff, | 2005 | (long long)mapp[i].br_startoff, |
2007 | (long long)mapp[i].br_startblock, | 2006 | (long long)mapp[i].br_startblock, |
diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c index e60490bc00a6..be628677c288 100644 --- a/fs/xfs/xfs_dfrag.c +++ b/fs/xfs/xfs_dfrag.c | |||
@@ -270,9 +270,9 @@ xfs_swap_extents( | |||
270 | /* check inode formats now that data is flushed */ | 270 | /* check inode formats now that data is flushed */ |
271 | error = xfs_swap_extents_check_format(ip, tip); | 271 | error = xfs_swap_extents_check_format(ip, tip); |
272 | if (error) { | 272 | if (error) { |
273 | xfs_fs_cmn_err(CE_NOTE, mp, | 273 | xfs_notice(mp, |
274 | "%s: inode 0x%llx format is incompatible for exchanging.", | 274 | "%s: inode 0x%llx format is incompatible for exchanging.", |
275 | __FILE__, ip->i_ino); | 275 | __func__, ip->i_ino); |
276 | goto out_unlock; | 276 | goto out_unlock; |
277 | } | 277 | } |
278 | 278 | ||
diff --git a/fs/xfs/xfs_dir2.c b/fs/xfs/xfs_dir2.c index a1321bc7f192..dba7a71cedf3 100644 --- a/fs/xfs/xfs_dir2.c +++ b/fs/xfs/xfs_dir2.c | |||
@@ -159,7 +159,7 @@ xfs_dir_ino_validate( | |||
159 | XFS_AGINO_TO_INO(mp, agno, agino) == ino; | 159 | XFS_AGINO_TO_INO(mp, agno, agino) == ino; |
160 | if (unlikely(XFS_TEST_ERROR(!ino_ok, mp, XFS_ERRTAG_DIR_INO_VALIDATE, | 160 | if (unlikely(XFS_TEST_ERROR(!ino_ok, mp, XFS_ERRTAG_DIR_INO_VALIDATE, |
161 | XFS_RANDOM_DIR_INO_VALIDATE))) { | 161 | XFS_RANDOM_DIR_INO_VALIDATE))) { |
162 | xfs_fs_cmn_err(CE_WARN, mp, "Invalid inode number 0x%Lx", | 162 | xfs_warn(mp, "Invalid inode number 0x%Lx", |
163 | (unsigned long long) ino); | 163 | (unsigned long long) ino); |
164 | XFS_ERROR_REPORT("xfs_dir_ino_validate", XFS_ERRLEVEL_LOW, mp); | 164 | XFS_ERROR_REPORT("xfs_dir_ino_validate", XFS_ERRLEVEL_LOW, mp); |
165 | return XFS_ERROR(EFSCORRUPTED); | 165 | return XFS_ERROR(EFSCORRUPTED); |
diff --git a/fs/xfs/xfs_dir2_node.c b/fs/xfs/xfs_dir2_node.c index f9a0864b696a..a0aab7d3294f 100644 --- a/fs/xfs/xfs_dir2_node.c +++ b/fs/xfs/xfs_dir2_node.c | |||
@@ -899,10 +899,9 @@ xfs_dir2_leafn_rebalance( | |||
899 | if(blk2->index < 0) { | 899 | if(blk2->index < 0) { |
900 | state->inleaf = 1; | 900 | state->inleaf = 1; |
901 | blk2->index = 0; | 901 | blk2->index = 0; |
902 | cmn_err(CE_ALERT, | 902 | xfs_alert(args->dp->i_mount, |
903 | "xfs_dir2_leafn_rebalance: picked the wrong leaf? reverting original leaf: " | 903 | "%s: picked the wrong leaf? reverting original leaf: blk1->index %d\n", |
904 | "blk1->index %d\n", | 904 | __func__, blk1->index); |
905 | blk1->index); | ||
906 | } | 905 | } |
907 | } | 906 | } |
908 | 907 | ||
@@ -1641,26 +1640,22 @@ xfs_dir2_node_addname_int( | |||
1641 | } | 1640 | } |
1642 | 1641 | ||
1643 | if (unlikely(xfs_dir2_db_to_fdb(mp, dbno) != fbno)) { | 1642 | if (unlikely(xfs_dir2_db_to_fdb(mp, dbno) != fbno)) { |
1644 | cmn_err(CE_ALERT, | 1643 | xfs_alert(mp, |
1645 | "xfs_dir2_node_addname_int: dir ino " | 1644 | "%s: dir ino " "%llu needed freesp block %lld for\n" |
1646 | "%llu needed freesp block %lld for\n" | 1645 | " data block %lld, got %lld ifbno %llu lastfbno %d", |
1647 | " data block %lld, got %lld\n" | 1646 | __func__, (unsigned long long)dp->i_ino, |
1648 | " ifbno %llu lastfbno %d\n", | ||
1649 | (unsigned long long)dp->i_ino, | ||
1650 | (long long)xfs_dir2_db_to_fdb(mp, dbno), | 1647 | (long long)xfs_dir2_db_to_fdb(mp, dbno), |
1651 | (long long)dbno, (long long)fbno, | 1648 | (long long)dbno, (long long)fbno, |
1652 | (unsigned long long)ifbno, lastfbno); | 1649 | (unsigned long long)ifbno, lastfbno); |
1653 | if (fblk) { | 1650 | if (fblk) { |
1654 | cmn_err(CE_ALERT, | 1651 | xfs_alert(mp, |
1655 | " fblk 0x%p blkno %llu " | 1652 | " fblk 0x%p blkno %llu index %d magic 0x%x", |
1656 | "index %d magic 0x%x\n", | ||
1657 | fblk, | 1653 | fblk, |
1658 | (unsigned long long)fblk->blkno, | 1654 | (unsigned long long)fblk->blkno, |
1659 | fblk->index, | 1655 | fblk->index, |
1660 | fblk->magic); | 1656 | fblk->magic); |
1661 | } else { | 1657 | } else { |
1662 | cmn_err(CE_ALERT, | 1658 | xfs_alert(mp, " ... fblk is NULL"); |
1663 | " ... fblk is NULL\n"); | ||
1664 | } | 1659 | } |
1665 | XFS_ERROR_REPORT("xfs_dir2_node_addname_int", | 1660 | XFS_ERROR_REPORT("xfs_dir2_node_addname_int", |
1666 | XFS_ERRLEVEL_LOW, mp); | 1661 | XFS_ERRLEVEL_LOW, mp); |
diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c index c78cc6a3d87c..39f06336b99d 100644 --- a/fs/xfs/xfs_error.c +++ b/fs/xfs/xfs_error.c | |||
@@ -48,7 +48,7 @@ xfs_error_trap(int e) | |||
48 | break; | 48 | break; |
49 | if (e != xfs_etrap[i]) | 49 | if (e != xfs_etrap[i]) |
50 | continue; | 50 | continue; |
51 | cmn_err(CE_NOTE, "xfs_error_trap: error %d", e); | 51 | xfs_notice(NULL, "%s: error %d", __func__, e); |
52 | BUG(); | 52 | BUG(); |
53 | break; | 53 | break; |
54 | } | 54 | } |
@@ -74,7 +74,7 @@ xfs_error_test(int error_tag, int *fsidp, char *expression, | |||
74 | 74 | ||
75 | for (i = 0; i < XFS_NUM_INJECT_ERROR; i++) { | 75 | for (i = 0; i < XFS_NUM_INJECT_ERROR; i++) { |
76 | if (xfs_etest[i] == error_tag && xfs_etest_fsid[i] == fsid) { | 76 | if (xfs_etest[i] == error_tag && xfs_etest_fsid[i] == fsid) { |
77 | cmn_err(CE_WARN, | 77 | xfs_warn(NULL, |
78 | "Injecting error (%s) at file %s, line %d, on filesystem \"%s\"", | 78 | "Injecting error (%s) at file %s, line %d, on filesystem \"%s\"", |
79 | expression, file, line, xfs_etest_fsname[i]); | 79 | expression, file, line, xfs_etest_fsname[i]); |
80 | return 1; | 80 | return 1; |
@@ -95,14 +95,14 @@ xfs_errortag_add(int error_tag, xfs_mount_t *mp) | |||
95 | 95 | ||
96 | for (i = 0; i < XFS_NUM_INJECT_ERROR; i++) { | 96 | for (i = 0; i < XFS_NUM_INJECT_ERROR; i++) { |
97 | if (xfs_etest_fsid[i] == fsid && xfs_etest[i] == error_tag) { | 97 | if (xfs_etest_fsid[i] == fsid && xfs_etest[i] == error_tag) { |
98 | cmn_err(CE_WARN, "XFS error tag #%d on", error_tag); | 98 | xfs_warn(mp, "error tag #%d on", error_tag); |
99 | return 0; | 99 | return 0; |
100 | } | 100 | } |
101 | } | 101 | } |
102 | 102 | ||
103 | for (i = 0; i < XFS_NUM_INJECT_ERROR; i++) { | 103 | for (i = 0; i < XFS_NUM_INJECT_ERROR; i++) { |
104 | if (xfs_etest[i] == 0) { | 104 | if (xfs_etest[i] == 0) { |
105 | cmn_err(CE_WARN, "Turned on XFS error tag #%d", | 105 | xfs_warn(mp, "Turned on XFS error tag #%d", |
106 | error_tag); | 106 | error_tag); |
107 | xfs_etest[i] = error_tag; | 107 | xfs_etest[i] = error_tag; |
108 | xfs_etest_fsid[i] = fsid; | 108 | xfs_etest_fsid[i] = fsid; |
@@ -114,7 +114,7 @@ xfs_errortag_add(int error_tag, xfs_mount_t *mp) | |||
114 | } | 114 | } |
115 | } | 115 | } |
116 | 116 | ||
117 | cmn_err(CE_WARN, "error tag overflow, too many turned on"); | 117 | xfs_warn(mp, "error tag overflow, too many turned on"); |
118 | 118 | ||
119 | return 1; | 119 | return 1; |
120 | } | 120 | } |
@@ -133,7 +133,7 @@ xfs_errortag_clearall(xfs_mount_t *mp, int loud) | |||
133 | if ((fsid == 0LL || xfs_etest_fsid[i] == fsid) && | 133 | if ((fsid == 0LL || xfs_etest_fsid[i] == fsid) && |
134 | xfs_etest[i] != 0) { | 134 | xfs_etest[i] != 0) { |
135 | cleared = 1; | 135 | cleared = 1; |
136 | cmn_err(CE_WARN, "Clearing XFS error tag #%d", | 136 | xfs_warn(mp, "Clearing XFS error tag #%d", |
137 | xfs_etest[i]); | 137 | xfs_etest[i]); |
138 | xfs_etest[i] = 0; | 138 | xfs_etest[i] = 0; |
139 | xfs_etest_fsid[i] = 0LL; | 139 | xfs_etest_fsid[i] = 0LL; |
@@ -144,45 +144,12 @@ xfs_errortag_clearall(xfs_mount_t *mp, int loud) | |||
144 | } | 144 | } |
145 | 145 | ||
146 | if (loud || cleared) | 146 | if (loud || cleared) |
147 | cmn_err(CE_WARN, | 147 | xfs_warn(mp, "Cleared all XFS error tags for filesystem"); |
148 | "Cleared all XFS error tags for filesystem \"%s\"", | ||
149 | mp->m_fsname); | ||
150 | 148 | ||
151 | return 0; | 149 | return 0; |
152 | } | 150 | } |
153 | #endif /* DEBUG */ | 151 | #endif /* DEBUG */ |
154 | 152 | ||
155 | |||
156 | void | ||
157 | xfs_fs_cmn_err(int level, xfs_mount_t *mp, char *fmt, ...) | ||
158 | { | ||
159 | va_list ap; | ||
160 | |||
161 | va_start(ap, fmt); | ||
162 | xfs_fs_vcmn_err(level, mp, fmt, ap); | ||
163 | va_end(ap); | ||
164 | } | ||
165 | |||
166 | void | ||
167 | xfs_cmn_err(int panic_tag, int level, xfs_mount_t *mp, char *fmt, ...) | ||
168 | { | ||
169 | va_list ap; | ||
170 | |||
171 | #ifdef DEBUG | ||
172 | xfs_panic_mask |= (XFS_PTAG_SHUTDOWN_CORRUPT | XFS_PTAG_LOGRES); | ||
173 | #endif | ||
174 | |||
175 | if (xfs_panic_mask && (xfs_panic_mask & panic_tag) | ||
176 | && (level & CE_ALERT)) { | ||
177 | level &= ~CE_ALERT; | ||
178 | level |= CE_PANIC; | ||
179 | cmn_err(CE_ALERT, "XFS: Transforming an alert into a BUG."); | ||
180 | } | ||
181 | va_start(ap, fmt); | ||
182 | xfs_fs_vcmn_err(level, mp, fmt, ap); | ||
183 | va_end(ap); | ||
184 | } | ||
185 | |||
186 | void | 153 | void |
187 | xfs_error_report( | 154 | xfs_error_report( |
188 | const char *tag, | 155 | const char *tag, |
@@ -193,9 +160,8 @@ xfs_error_report( | |||
193 | inst_t *ra) | 160 | inst_t *ra) |
194 | { | 161 | { |
195 | if (level <= xfs_error_level) { | 162 | if (level <= xfs_error_level) { |
196 | xfs_cmn_err(XFS_PTAG_ERROR_REPORT, | 163 | xfs_alert_tag(mp, XFS_PTAG_ERROR_REPORT, |
197 | CE_ALERT, mp, | 164 | "Internal error %s at line %d of file %s. Caller 0x%p\n", |
198 | "XFS internal error %s at line %d of file %s. Caller 0x%p\n", | ||
199 | tag, linenum, filename, ra); | 165 | tag, linenum, filename, ra); |
200 | 166 | ||
201 | xfs_stack_trace(); | 167 | xfs_stack_trace(); |
@@ -215,4 +181,5 @@ xfs_corruption_error( | |||
215 | if (level <= xfs_error_level) | 181 | if (level <= xfs_error_level) |
216 | xfs_hex_dump(p, 16); | 182 | xfs_hex_dump(p, 16); |
217 | xfs_error_report(tag, level, mp, filename, linenum, ra); | 183 | xfs_error_report(tag, level, mp, filename, linenum, ra); |
184 | xfs_alert(mp, "Corruption detected. Unmount and run xfs_repair"); | ||
218 | } | 185 | } |
diff --git a/fs/xfs/xfs_error.h b/fs/xfs/xfs_error.h index f338847f80b8..079a367f44ee 100644 --- a/fs/xfs/xfs_error.h +++ b/fs/xfs/xfs_error.h | |||
@@ -136,8 +136,8 @@ extern int xfs_error_test(int, int *, char *, int, char *, unsigned long); | |||
136 | xfs_error_test((tag), (mp)->m_fixedfsid, "expr", __LINE__, __FILE__, \ | 136 | xfs_error_test((tag), (mp)->m_fixedfsid, "expr", __LINE__, __FILE__, \ |
137 | (rf)))) | 137 | (rf)))) |
138 | 138 | ||
139 | extern int xfs_errortag_add(int error_tag, xfs_mount_t *mp); | 139 | extern int xfs_errortag_add(int error_tag, struct xfs_mount *mp); |
140 | extern int xfs_errortag_clearall(xfs_mount_t *mp, int loud); | 140 | extern int xfs_errortag_clearall(struct xfs_mount *mp, int loud); |
141 | #else | 141 | #else |
142 | #define XFS_TEST_ERROR(expr, mp, tag, rf) (expr) | 142 | #define XFS_TEST_ERROR(expr, mp, tag, rf) (expr) |
143 | #define xfs_errortag_add(tag, mp) (ENOSYS) | 143 | #define xfs_errortag_add(tag, mp) (ENOSYS) |
@@ -145,10 +145,8 @@ extern int xfs_errortag_clearall(xfs_mount_t *mp, int loud); | |||
145 | #endif /* DEBUG */ | 145 | #endif /* DEBUG */ |
146 | 146 | ||
147 | /* | 147 | /* |
148 | * XFS panic tags -- allow a call to xfs_cmn_err() be turned into | 148 | * XFS panic tags -- allow a call to xfs_alert_tag() be turned into |
149 | * a panic by setting xfs_panic_mask in a | 149 | * a panic by setting xfs_panic_mask in a sysctl. |
150 | * sysctl. update xfs_max[XFS_PARAM] if | ||
151 | * more are added. | ||
152 | */ | 150 | */ |
153 | #define XFS_NO_PTAG 0 | 151 | #define XFS_NO_PTAG 0 |
154 | #define XFS_PTAG_IFLUSH 0x00000001 | 152 | #define XFS_PTAG_IFLUSH 0x00000001 |
@@ -160,23 +158,4 @@ extern int xfs_errortag_clearall(xfs_mount_t *mp, int loud); | |||
160 | #define XFS_PTAG_SHUTDOWN_LOGERROR 0x00000040 | 158 | #define XFS_PTAG_SHUTDOWN_LOGERROR 0x00000040 |
161 | #define XFS_PTAG_FSBLOCK_ZERO 0x00000080 | 159 | #define XFS_PTAG_FSBLOCK_ZERO 0x00000080 |
162 | 160 | ||
163 | struct xfs_mount; | ||
164 | |||
165 | extern void xfs_fs_vcmn_err(int level, struct xfs_mount *mp, | ||
166 | char *fmt, va_list ap) | ||
167 | __attribute__ ((format (printf, 3, 0))); | ||
168 | extern void xfs_cmn_err(int panic_tag, int level, struct xfs_mount *mp, | ||
169 | char *fmt, ...) | ||
170 | __attribute__ ((format (printf, 4, 5))); | ||
171 | extern void xfs_fs_cmn_err(int level, struct xfs_mount *mp, char *fmt, ...) | ||
172 | __attribute__ ((format (printf, 3, 4))); | ||
173 | |||
174 | extern void xfs_hex_dump(void *p, int length); | ||
175 | |||
176 | #define xfs_fs_repair_cmn_err(level, mp, fmt, args...) \ | ||
177 | xfs_fs_cmn_err(level, mp, fmt " Unmount and run xfs_repair.", ## args) | ||
178 | |||
179 | #define xfs_fs_mount_cmn_err(f, fmt, args...) \ | ||
180 | ((f & XFS_MFSI_QUIET)? (void)0 : cmn_err(CE_WARN, "XFS: " fmt, ## args)) | ||
181 | |||
182 | #endif /* __XFS_ERROR_H__ */ | 161 | #endif /* __XFS_ERROR_H__ */ |
diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c index a55e687bf562..d22e62623437 100644 --- a/fs/xfs/xfs_extfree_item.c +++ b/fs/xfs/xfs_extfree_item.c | |||
@@ -48,6 +48,28 @@ xfs_efi_item_free( | |||
48 | } | 48 | } |
49 | 49 | ||
50 | /* | 50 | /* |
51 | * Freeing the efi requires that we remove it from the AIL if it has already | ||
52 | * been placed there. However, the EFI may not yet have been placed in the AIL | ||
53 | * when called by xfs_efi_release() from EFD processing due to the ordering of | ||
54 | * committed vs unpin operations in bulk insert operations. Hence the | ||
55 | * test_and_clear_bit(XFS_EFI_COMMITTED) to ensure only the last caller frees | ||
56 | * the EFI. | ||
57 | */ | ||
58 | STATIC void | ||
59 | __xfs_efi_release( | ||
60 | struct xfs_efi_log_item *efip) | ||
61 | { | ||
62 | struct xfs_ail *ailp = efip->efi_item.li_ailp; | ||
63 | |||
64 | if (!test_and_clear_bit(XFS_EFI_COMMITTED, &efip->efi_flags)) { | ||
65 | spin_lock(&ailp->xa_lock); | ||
66 | /* xfs_trans_ail_delete() drops the AIL lock. */ | ||
67 | xfs_trans_ail_delete(ailp, &efip->efi_item); | ||
68 | xfs_efi_item_free(efip); | ||
69 | } | ||
70 | } | ||
71 | |||
72 | /* | ||
51 | * This returns the number of iovecs needed to log the given efi item. | 73 | * This returns the number of iovecs needed to log the given efi item. |
52 | * We only need 1 iovec for an efi item. It just logs the efi_log_format | 74 | * We only need 1 iovec for an efi item. It just logs the efi_log_format |
53 | * structure. | 75 | * structure. |
@@ -74,7 +96,8 @@ xfs_efi_item_format( | |||
74 | struct xfs_efi_log_item *efip = EFI_ITEM(lip); | 96 | struct xfs_efi_log_item *efip = EFI_ITEM(lip); |
75 | uint size; | 97 | uint size; |
76 | 98 | ||
77 | ASSERT(efip->efi_next_extent == efip->efi_format.efi_nextents); | 99 | ASSERT(atomic_read(&efip->efi_next_extent) == |
100 | efip->efi_format.efi_nextents); | ||
78 | 101 | ||
79 | efip->efi_format.efi_type = XFS_LI_EFI; | 102 | efip->efi_format.efi_type = XFS_LI_EFI; |
80 | 103 | ||
@@ -99,10 +122,12 @@ xfs_efi_item_pin( | |||
99 | } | 122 | } |
100 | 123 | ||
101 | /* | 124 | /* |
102 | * While EFIs cannot really be pinned, the unpin operation is the | 125 | * While EFIs cannot really be pinned, the unpin operation is the last place at |
103 | * last place at which the EFI is manipulated during a transaction. | 126 | * which the EFI is manipulated during a transaction. If we are being asked to |
104 | * Here we coordinate with xfs_efi_cancel() to determine who gets to | 127 | * remove the EFI it's because the transaction has been cancelled and by |
105 | * free the EFI. | 128 | * definition that means the EFI cannot be in the AIL so remove it from the |
129 | * transaction and free it. Otherwise coordinate with xfs_efi_release() (via | ||
130 | * XFS_EFI_COMMITTED) to determine who gets to free the EFI. | ||
106 | */ | 131 | */ |
107 | STATIC void | 132 | STATIC void |
108 | xfs_efi_item_unpin( | 133 | xfs_efi_item_unpin( |
@@ -110,20 +135,15 @@ xfs_efi_item_unpin( | |||
110 | int remove) | 135 | int remove) |
111 | { | 136 | { |
112 | struct xfs_efi_log_item *efip = EFI_ITEM(lip); | 137 | struct xfs_efi_log_item *efip = EFI_ITEM(lip); |
113 | struct xfs_ail *ailp = lip->li_ailp; | ||
114 | 138 | ||
115 | spin_lock(&ailp->xa_lock); | 139 | if (remove) { |
116 | if (efip->efi_flags & XFS_EFI_CANCELED) { | 140 | ASSERT(!(lip->li_flags & XFS_LI_IN_AIL)); |
117 | if (remove) | 141 | if (lip->li_desc) |
118 | xfs_trans_del_item(lip); | 142 | xfs_trans_del_item(lip); |
119 | |||
120 | /* xfs_trans_ail_delete() drops the AIL lock. */ | ||
121 | xfs_trans_ail_delete(ailp, lip); | ||
122 | xfs_efi_item_free(efip); | 143 | xfs_efi_item_free(efip); |
123 | } else { | 144 | return; |
124 | efip->efi_flags |= XFS_EFI_COMMITTED; | ||
125 | spin_unlock(&ailp->xa_lock); | ||
126 | } | 145 | } |
146 | __xfs_efi_release(efip); | ||
127 | } | 147 | } |
128 | 148 | ||
129 | /* | 149 | /* |
@@ -152,16 +172,20 @@ xfs_efi_item_unlock( | |||
152 | } | 172 | } |
153 | 173 | ||
154 | /* | 174 | /* |
155 | * The EFI is logged only once and cannot be moved in the log, so | 175 | * The EFI is logged only once and cannot be moved in the log, so simply return |
156 | * simply return the lsn at which it's been logged. The canceled | 176 | * the lsn at which it's been logged. For bulk transaction committed |
157 | * flag is not paid any attention here. Checking for that is delayed | 177 | * processing, the EFI may be processed but not yet unpinned prior to the EFD |
158 | * until the EFI is unpinned. | 178 | * being processed. Set the XFS_EFI_COMMITTED flag so this case can be detected |
179 | * when processing the EFD. | ||
159 | */ | 180 | */ |
160 | STATIC xfs_lsn_t | 181 | STATIC xfs_lsn_t |
161 | xfs_efi_item_committed( | 182 | xfs_efi_item_committed( |
162 | struct xfs_log_item *lip, | 183 | struct xfs_log_item *lip, |
163 | xfs_lsn_t lsn) | 184 | xfs_lsn_t lsn) |
164 | { | 185 | { |
186 | struct xfs_efi_log_item *efip = EFI_ITEM(lip); | ||
187 | |||
188 | set_bit(XFS_EFI_COMMITTED, &efip->efi_flags); | ||
165 | return lsn; | 189 | return lsn; |
166 | } | 190 | } |
167 | 191 | ||
@@ -230,6 +254,7 @@ xfs_efi_init( | |||
230 | xfs_log_item_init(mp, &efip->efi_item, XFS_LI_EFI, &xfs_efi_item_ops); | 254 | xfs_log_item_init(mp, &efip->efi_item, XFS_LI_EFI, &xfs_efi_item_ops); |
231 | efip->efi_format.efi_nextents = nextents; | 255 | efip->efi_format.efi_nextents = nextents; |
232 | efip->efi_format.efi_id = (__psint_t)(void*)efip; | 256 | efip->efi_format.efi_id = (__psint_t)(void*)efip; |
257 | atomic_set(&efip->efi_next_extent, 0); | ||
233 | 258 | ||
234 | return efip; | 259 | return efip; |
235 | } | 260 | } |
@@ -289,37 +314,18 @@ xfs_efi_copy_format(xfs_log_iovec_t *buf, xfs_efi_log_format_t *dst_efi_fmt) | |||
289 | } | 314 | } |
290 | 315 | ||
291 | /* | 316 | /* |
292 | * This is called by the efd item code below to release references to | 317 | * This is called by the efd item code below to release references to the given |
293 | * the given efi item. Each efd calls this with the number of | 318 | * efi item. Each efd calls this with the number of extents that it has |
294 | * extents that it has logged, and when the sum of these reaches | 319 | * logged, and when the sum of these reaches the total number of extents logged |
295 | * the total number of extents logged by this efi item we can free | 320 | * by this efi item we can free the efi item. |
296 | * the efi item. | ||
297 | * | ||
298 | * Freeing the efi item requires that we remove it from the AIL. | ||
299 | * We'll use the AIL lock to protect our counters as well as | ||
300 | * the removal from the AIL. | ||
301 | */ | 321 | */ |
302 | void | 322 | void |
303 | xfs_efi_release(xfs_efi_log_item_t *efip, | 323 | xfs_efi_release(xfs_efi_log_item_t *efip, |
304 | uint nextents) | 324 | uint nextents) |
305 | { | 325 | { |
306 | struct xfs_ail *ailp = efip->efi_item.li_ailp; | 326 | ASSERT(atomic_read(&efip->efi_next_extent) >= nextents); |
307 | int extents_left; | 327 | if (atomic_sub_and_test(nextents, &efip->efi_next_extent)) |
308 | 328 | __xfs_efi_release(efip); | |
309 | ASSERT(efip->efi_next_extent > 0); | ||
310 | ASSERT(efip->efi_flags & XFS_EFI_COMMITTED); | ||
311 | |||
312 | spin_lock(&ailp->xa_lock); | ||
313 | ASSERT(efip->efi_next_extent >= nextents); | ||
314 | efip->efi_next_extent -= nextents; | ||
315 | extents_left = efip->efi_next_extent; | ||
316 | if (extents_left == 0) { | ||
317 | /* xfs_trans_ail_delete() drops the AIL lock. */ | ||
318 | xfs_trans_ail_delete(ailp, (xfs_log_item_t *)efip); | ||
319 | xfs_efi_item_free(efip); | ||
320 | } else { | ||
321 | spin_unlock(&ailp->xa_lock); | ||
322 | } | ||
323 | } | 329 | } |
324 | 330 | ||
325 | static inline struct xfs_efd_log_item *EFD_ITEM(struct xfs_log_item *lip) | 331 | static inline struct xfs_efd_log_item *EFD_ITEM(struct xfs_log_item *lip) |
diff --git a/fs/xfs/xfs_extfree_item.h b/fs/xfs/xfs_extfree_item.h index 0d22c56fdf64..375f68e42531 100644 --- a/fs/xfs/xfs_extfree_item.h +++ b/fs/xfs/xfs_extfree_item.h | |||
@@ -111,11 +111,10 @@ typedef struct xfs_efd_log_format_64 { | |||
111 | #define XFS_EFI_MAX_FAST_EXTENTS 16 | 111 | #define XFS_EFI_MAX_FAST_EXTENTS 16 |
112 | 112 | ||
113 | /* | 113 | /* |
114 | * Define EFI flags. | 114 | * Define EFI flag bits. Manipulated by set/clear/test_bit operators. |
115 | */ | 115 | */ |
116 | #define XFS_EFI_RECOVERED 0x1 | 116 | #define XFS_EFI_RECOVERED 1 |
117 | #define XFS_EFI_COMMITTED 0x2 | 117 | #define XFS_EFI_COMMITTED 2 |
118 | #define XFS_EFI_CANCELED 0x4 | ||
119 | 118 | ||
120 | /* | 119 | /* |
121 | * This is the "extent free intention" log item. It is used | 120 | * This is the "extent free intention" log item. It is used |
@@ -125,8 +124,8 @@ typedef struct xfs_efd_log_format_64 { | |||
125 | */ | 124 | */ |
126 | typedef struct xfs_efi_log_item { | 125 | typedef struct xfs_efi_log_item { |
127 | xfs_log_item_t efi_item; | 126 | xfs_log_item_t efi_item; |
128 | uint efi_flags; /* misc flags */ | 127 | atomic_t efi_next_extent; |
129 | uint efi_next_extent; | 128 | unsigned long efi_flags; /* misc flags */ |
130 | xfs_efi_log_format_t efi_format; | 129 | xfs_efi_log_format_t efi_format; |
131 | } xfs_efi_log_item_t; | 130 | } xfs_efi_log_item_t; |
132 | 131 | ||
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c index a7c116e814af..9153d2c77caf 100644 --- a/fs/xfs/xfs_fsops.c +++ b/fs/xfs/xfs_fsops.c | |||
@@ -53,6 +53,9 @@ xfs_fs_geometry( | |||
53 | xfs_fsop_geom_t *geo, | 53 | xfs_fsop_geom_t *geo, |
54 | int new_version) | 54 | int new_version) |
55 | { | 55 | { |
56 | |||
57 | memset(geo, 0, sizeof(*geo)); | ||
58 | |||
56 | geo->blocksize = mp->m_sb.sb_blocksize; | 59 | geo->blocksize = mp->m_sb.sb_blocksize; |
57 | geo->rtextsize = mp->m_sb.sb_rextsize; | 60 | geo->rtextsize = mp->m_sb.sb_rextsize; |
58 | geo->agblocks = mp->m_sb.sb_agblocks; | 61 | geo->agblocks = mp->m_sb.sb_agblocks; |
@@ -374,6 +377,7 @@ xfs_growfs_data_private( | |||
374 | mp->m_maxicount = icount << mp->m_sb.sb_inopblog; | 377 | mp->m_maxicount = icount << mp->m_sb.sb_inopblog; |
375 | } else | 378 | } else |
376 | mp->m_maxicount = 0; | 379 | mp->m_maxicount = 0; |
380 | xfs_set_low_space_thresholds(mp); | ||
377 | 381 | ||
378 | /* update secondary superblocks. */ | 382 | /* update secondary superblocks. */ |
379 | for (agno = 1; agno < nagcount; agno++) { | 383 | for (agno = 1; agno < nagcount; agno++) { |
@@ -381,8 +385,8 @@ xfs_growfs_data_private( | |||
381 | XFS_AGB_TO_DADDR(mp, agno, XFS_SB_BLOCK(mp)), | 385 | XFS_AGB_TO_DADDR(mp, agno, XFS_SB_BLOCK(mp)), |
382 | XFS_FSS_TO_BB(mp, 1), 0, &bp); | 386 | XFS_FSS_TO_BB(mp, 1), 0, &bp); |
383 | if (error) { | 387 | if (error) { |
384 | xfs_fs_cmn_err(CE_WARN, mp, | 388 | xfs_warn(mp, |
385 | "error %d reading secondary superblock for ag %d", | 389 | "error %d reading secondary superblock for ag %d", |
386 | error, agno); | 390 | error, agno); |
387 | break; | 391 | break; |
388 | } | 392 | } |
@@ -395,7 +399,7 @@ xfs_growfs_data_private( | |||
395 | if (!(error = xfs_bwrite(mp, bp))) { | 399 | if (!(error = xfs_bwrite(mp, bp))) { |
396 | continue; | 400 | continue; |
397 | } else { | 401 | } else { |
398 | xfs_fs_cmn_err(CE_WARN, mp, | 402 | xfs_warn(mp, |
399 | "write error %d updating secondary superblock for ag %d", | 403 | "write error %d updating secondary superblock for ag %d", |
400 | error, agno); | 404 | error, agno); |
401 | break; /* no point in continuing */ | 405 | break; /* no point in continuing */ |
@@ -611,12 +615,13 @@ out: | |||
611 | * | 615 | * |
612 | * We cannot use an inode here for this - that will push dirty state back up | 616 | * We cannot use an inode here for this - that will push dirty state back up |
613 | * into the VFS and then periodic inode flushing will prevent log covering from | 617 | * into the VFS and then periodic inode flushing will prevent log covering from |
614 | * making progress. Hence we log a field in the superblock instead. | 618 | * making progress. Hence we log a field in the superblock instead and use a |
619 | * synchronous transaction to ensure the superblock is immediately unpinned | ||
620 | * and can be written back. | ||
615 | */ | 621 | */ |
616 | int | 622 | int |
617 | xfs_fs_log_dummy( | 623 | xfs_fs_log_dummy( |
618 | xfs_mount_t *mp, | 624 | xfs_mount_t *mp) |
619 | int flags) | ||
620 | { | 625 | { |
621 | xfs_trans_t *tp; | 626 | xfs_trans_t *tp; |
622 | int error; | 627 | int error; |
@@ -631,8 +636,7 @@ xfs_fs_log_dummy( | |||
631 | 636 | ||
632 | /* log the UUID because it is an unchanging field */ | 637 | /* log the UUID because it is an unchanging field */ |
633 | xfs_mod_sb(tp, XFS_SB_UUID); | 638 | xfs_mod_sb(tp, XFS_SB_UUID); |
634 | if (flags & SYNC_WAIT) | 639 | xfs_trans_set_sync(tp); |
635 | xfs_trans_set_sync(tp); | ||
636 | return xfs_trans_commit(tp, 0); | 640 | return xfs_trans_commit(tp, 0); |
637 | } | 641 | } |
638 | 642 | ||
diff --git a/fs/xfs/xfs_fsops.h b/fs/xfs/xfs_fsops.h index a786c5212c1e..1b6a98b66886 100644 --- a/fs/xfs/xfs_fsops.h +++ b/fs/xfs/xfs_fsops.h | |||
@@ -25,6 +25,6 @@ extern int xfs_fs_counts(xfs_mount_t *mp, xfs_fsop_counts_t *cnt); | |||
25 | extern int xfs_reserve_blocks(xfs_mount_t *mp, __uint64_t *inval, | 25 | extern int xfs_reserve_blocks(xfs_mount_t *mp, __uint64_t *inval, |
26 | xfs_fsop_resblks_t *outval); | 26 | xfs_fsop_resblks_t *outval); |
27 | extern int xfs_fs_goingdown(xfs_mount_t *mp, __uint32_t inflags); | 27 | extern int xfs_fs_goingdown(xfs_mount_t *mp, __uint32_t inflags); |
28 | extern int xfs_fs_log_dummy(xfs_mount_t *mp, int flags); | 28 | extern int xfs_fs_log_dummy(struct xfs_mount *mp); |
29 | 29 | ||
30 | #endif /* __XFS_FSOPS_H__ */ | 30 | #endif /* __XFS_FSOPS_H__ */ |
diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c index 0626a32c3447..84ebeec16642 100644 --- a/fs/xfs/xfs_ialloc.c +++ b/fs/xfs/xfs_ialloc.c | |||
@@ -1055,28 +1055,23 @@ xfs_difree( | |||
1055 | */ | 1055 | */ |
1056 | agno = XFS_INO_TO_AGNO(mp, inode); | 1056 | agno = XFS_INO_TO_AGNO(mp, inode); |
1057 | if (agno >= mp->m_sb.sb_agcount) { | 1057 | if (agno >= mp->m_sb.sb_agcount) { |
1058 | cmn_err(CE_WARN, | 1058 | xfs_warn(mp, "%s: agno >= mp->m_sb.sb_agcount (%d >= %d).", |
1059 | "xfs_difree: agno >= mp->m_sb.sb_agcount (%d >= %d) on %s. Returning EINVAL.", | 1059 | __func__, agno, mp->m_sb.sb_agcount); |
1060 | agno, mp->m_sb.sb_agcount, mp->m_fsname); | ||
1061 | ASSERT(0); | 1060 | ASSERT(0); |
1062 | return XFS_ERROR(EINVAL); | 1061 | return XFS_ERROR(EINVAL); |
1063 | } | 1062 | } |
1064 | agino = XFS_INO_TO_AGINO(mp, inode); | 1063 | agino = XFS_INO_TO_AGINO(mp, inode); |
1065 | if (inode != XFS_AGINO_TO_INO(mp, agno, agino)) { | 1064 | if (inode != XFS_AGINO_TO_INO(mp, agno, agino)) { |
1066 | cmn_err(CE_WARN, | 1065 | xfs_warn(mp, "%s: inode != XFS_AGINO_TO_INO() (%llu != %llu).", |
1067 | "xfs_difree: inode != XFS_AGINO_TO_INO() " | 1066 | __func__, (unsigned long long)inode, |
1068 | "(%llu != %llu) on %s. Returning EINVAL.", | 1067 | (unsigned long long)XFS_AGINO_TO_INO(mp, agno, agino)); |
1069 | (unsigned long long)inode, | ||
1070 | (unsigned long long)XFS_AGINO_TO_INO(mp, agno, agino), | ||
1071 | mp->m_fsname); | ||
1072 | ASSERT(0); | 1068 | ASSERT(0); |
1073 | return XFS_ERROR(EINVAL); | 1069 | return XFS_ERROR(EINVAL); |
1074 | } | 1070 | } |
1075 | agbno = XFS_AGINO_TO_AGBNO(mp, agino); | 1071 | agbno = XFS_AGINO_TO_AGBNO(mp, agino); |
1076 | if (agbno >= mp->m_sb.sb_agblocks) { | 1072 | if (agbno >= mp->m_sb.sb_agblocks) { |
1077 | cmn_err(CE_WARN, | 1073 | xfs_warn(mp, "%s: agbno >= mp->m_sb.sb_agblocks (%d >= %d).", |
1078 | "xfs_difree: agbno >= mp->m_sb.sb_agblocks (%d >= %d) on %s. Returning EINVAL.", | 1074 | __func__, agbno, mp->m_sb.sb_agblocks); |
1079 | agbno, mp->m_sb.sb_agblocks, mp->m_fsname); | ||
1080 | ASSERT(0); | 1075 | ASSERT(0); |
1081 | return XFS_ERROR(EINVAL); | 1076 | return XFS_ERROR(EINVAL); |
1082 | } | 1077 | } |
@@ -1085,9 +1080,8 @@ xfs_difree( | |||
1085 | */ | 1080 | */ |
1086 | error = xfs_ialloc_read_agi(mp, tp, agno, &agbp); | 1081 | error = xfs_ialloc_read_agi(mp, tp, agno, &agbp); |
1087 | if (error) { | 1082 | if (error) { |
1088 | cmn_err(CE_WARN, | 1083 | xfs_warn(mp, "%s: xfs_ialloc_read_agi() returned error %d.", |
1089 | "xfs_difree: xfs_ialloc_read_agi() returned an error %d on %s. Returning error.", | 1084 | __func__, error); |
1090 | error, mp->m_fsname); | ||
1091 | return error; | 1085 | return error; |
1092 | } | 1086 | } |
1093 | agi = XFS_BUF_TO_AGI(agbp); | 1087 | agi = XFS_BUF_TO_AGI(agbp); |
@@ -1106,17 +1100,15 @@ xfs_difree( | |||
1106 | * Look for the entry describing this inode. | 1100 | * Look for the entry describing this inode. |
1107 | */ | 1101 | */ |
1108 | if ((error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, &i))) { | 1102 | if ((error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, &i))) { |
1109 | cmn_err(CE_WARN, | 1103 | xfs_warn(mp, "%s: xfs_inobt_lookup() returned error %d.", |
1110 | "xfs_difree: xfs_inobt_lookup returned() an error %d on %s. Returning error.", | 1104 | __func__, error); |
1111 | error, mp->m_fsname); | ||
1112 | goto error0; | 1105 | goto error0; |
1113 | } | 1106 | } |
1114 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | 1107 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); |
1115 | error = xfs_inobt_get_rec(cur, &rec, &i); | 1108 | error = xfs_inobt_get_rec(cur, &rec, &i); |
1116 | if (error) { | 1109 | if (error) { |
1117 | cmn_err(CE_WARN, | 1110 | xfs_warn(mp, "%s: xfs_inobt_get_rec() returned error %d.", |
1118 | "xfs_difree: xfs_inobt_get_rec() returned an error %d on %s. Returning error.", | 1111 | __func__, error); |
1119 | error, mp->m_fsname); | ||
1120 | goto error0; | 1112 | goto error0; |
1121 | } | 1113 | } |
1122 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | 1114 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); |
@@ -1157,8 +1149,8 @@ xfs_difree( | |||
1157 | xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, -(ilen - 1)); | 1149 | xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, -(ilen - 1)); |
1158 | 1150 | ||
1159 | if ((error = xfs_btree_delete(cur, &i))) { | 1151 | if ((error = xfs_btree_delete(cur, &i))) { |
1160 | cmn_err(CE_WARN, "xfs_difree: xfs_btree_delete returned an error %d on %s.\n", | 1152 | xfs_warn(mp, "%s: xfs_btree_delete returned error %d.", |
1161 | error, mp->m_fsname); | 1153 | __func__, error); |
1162 | goto error0; | 1154 | goto error0; |
1163 | } | 1155 | } |
1164 | 1156 | ||
@@ -1170,9 +1162,8 @@ xfs_difree( | |||
1170 | 1162 | ||
1171 | error = xfs_inobt_update(cur, &rec); | 1163 | error = xfs_inobt_update(cur, &rec); |
1172 | if (error) { | 1164 | if (error) { |
1173 | cmn_err(CE_WARN, | 1165 | xfs_warn(mp, "%s: xfs_inobt_update returned error %d.", |
1174 | "xfs_difree: xfs_inobt_update returned an error %d on %s.", | 1166 | __func__, error); |
1175 | error, mp->m_fsname); | ||
1176 | goto error0; | 1167 | goto error0; |
1177 | } | 1168 | } |
1178 | 1169 | ||
@@ -1218,10 +1209,9 @@ xfs_imap_lookup( | |||
1218 | 1209 | ||
1219 | error = xfs_ialloc_read_agi(mp, tp, agno, &agbp); | 1210 | error = xfs_ialloc_read_agi(mp, tp, agno, &agbp); |
1220 | if (error) { | 1211 | if (error) { |
1221 | xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: " | 1212 | xfs_alert(mp, |
1222 | "xfs_ialloc_read_agi() returned " | 1213 | "%s: xfs_ialloc_read_agi() returned error %d, agno %d", |
1223 | "error %d, agno %d", | 1214 | __func__, error, agno); |
1224 | error, agno); | ||
1225 | return error; | 1215 | return error; |
1226 | } | 1216 | } |
1227 | 1217 | ||
@@ -1299,24 +1289,21 @@ xfs_imap( | |||
1299 | if (flags & XFS_IGET_UNTRUSTED) | 1289 | if (flags & XFS_IGET_UNTRUSTED) |
1300 | return XFS_ERROR(EINVAL); | 1290 | return XFS_ERROR(EINVAL); |
1301 | if (agno >= mp->m_sb.sb_agcount) { | 1291 | if (agno >= mp->m_sb.sb_agcount) { |
1302 | xfs_fs_cmn_err(CE_ALERT, mp, | 1292 | xfs_alert(mp, |
1303 | "xfs_imap: agno (%d) >= " | 1293 | "%s: agno (%d) >= mp->m_sb.sb_agcount (%d)", |
1304 | "mp->m_sb.sb_agcount (%d)", | 1294 | __func__, agno, mp->m_sb.sb_agcount); |
1305 | agno, mp->m_sb.sb_agcount); | ||
1306 | } | 1295 | } |
1307 | if (agbno >= mp->m_sb.sb_agblocks) { | 1296 | if (agbno >= mp->m_sb.sb_agblocks) { |
1308 | xfs_fs_cmn_err(CE_ALERT, mp, | 1297 | xfs_alert(mp, |
1309 | "xfs_imap: agbno (0x%llx) >= " | 1298 | "%s: agbno (0x%llx) >= mp->m_sb.sb_agblocks (0x%lx)", |
1310 | "mp->m_sb.sb_agblocks (0x%lx)", | 1299 | __func__, (unsigned long long)agbno, |
1311 | (unsigned long long) agbno, | 1300 | (unsigned long)mp->m_sb.sb_agblocks); |
1312 | (unsigned long) mp->m_sb.sb_agblocks); | ||
1313 | } | 1301 | } |
1314 | if (ino != XFS_AGINO_TO_INO(mp, agno, agino)) { | 1302 | if (ino != XFS_AGINO_TO_INO(mp, agno, agino)) { |
1315 | xfs_fs_cmn_err(CE_ALERT, mp, | 1303 | xfs_alert(mp, |
1316 | "xfs_imap: ino (0x%llx) != " | 1304 | "%s: ino (0x%llx) != XFS_AGINO_TO_INO() (0x%llx)", |
1317 | "XFS_AGINO_TO_INO(mp, agno, agino) " | 1305 | __func__, ino, |
1318 | "(0x%llx)", | 1306 | XFS_AGINO_TO_INO(mp, agno, agino)); |
1319 | ino, XFS_AGINO_TO_INO(mp, agno, agino)); | ||
1320 | } | 1307 | } |
1321 | xfs_stack_trace(); | 1308 | xfs_stack_trace(); |
1322 | #endif /* DEBUG */ | 1309 | #endif /* DEBUG */ |
@@ -1388,10 +1375,9 @@ out_map: | |||
1388 | */ | 1375 | */ |
1389 | if ((imap->im_blkno + imap->im_len) > | 1376 | if ((imap->im_blkno + imap->im_len) > |
1390 | XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)) { | 1377 | XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)) { |
1391 | xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: " | 1378 | xfs_alert(mp, |
1392 | "(imap->im_blkno (0x%llx) + imap->im_len (0x%llx)) > " | 1379 | "%s: (im_blkno (0x%llx) + im_len (0x%llx)) > sb_dblocks (0x%llx)", |
1393 | " XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks) (0x%llx)", | 1380 | __func__, (unsigned long long) imap->im_blkno, |
1394 | (unsigned long long) imap->im_blkno, | ||
1395 | (unsigned long long) imap->im_len, | 1381 | (unsigned long long) imap->im_len, |
1396 | XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)); | 1382 | XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)); |
1397 | return XFS_ERROR(EINVAL); | 1383 | return XFS_ERROR(EINVAL); |
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c index 0cdd26932d8e..cb9b6d1469f7 100644 --- a/fs/xfs/xfs_iget.c +++ b/fs/xfs/xfs_iget.c | |||
@@ -43,6 +43,17 @@ | |||
43 | 43 | ||
44 | 44 | ||
45 | /* | 45 | /* |
46 | * Define xfs inode iolock lockdep classes. We need to ensure that all active | ||
47 | * inodes are considered the same for lockdep purposes, including inodes that | ||
48 | * are recycled through the XFS_IRECLAIMABLE state. This is the the only way to | ||
49 | * guarantee the locks are considered the same when there are multiple lock | ||
50 | * initialisation siteѕ. Also, define a reclaimable inode class so it is | ||
51 | * obvious in lockdep reports which class the report is against. | ||
52 | */ | ||
53 | static struct lock_class_key xfs_iolock_active; | ||
54 | struct lock_class_key xfs_iolock_reclaimable; | ||
55 | |||
56 | /* | ||
46 | * Allocate and initialise an xfs_inode. | 57 | * Allocate and initialise an xfs_inode. |
47 | */ | 58 | */ |
48 | STATIC struct xfs_inode * | 59 | STATIC struct xfs_inode * |
@@ -69,8 +80,11 @@ xfs_inode_alloc( | |||
69 | ASSERT(atomic_read(&ip->i_pincount) == 0); | 80 | ASSERT(atomic_read(&ip->i_pincount) == 0); |
70 | ASSERT(!spin_is_locked(&ip->i_flags_lock)); | 81 | ASSERT(!spin_is_locked(&ip->i_flags_lock)); |
71 | ASSERT(completion_done(&ip->i_flush)); | 82 | ASSERT(completion_done(&ip->i_flush)); |
83 | ASSERT(ip->i_ino == 0); | ||
72 | 84 | ||
73 | mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino); | 85 | mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino); |
86 | lockdep_set_class_and_name(&ip->i_iolock.mr_lock, | ||
87 | &xfs_iolock_active, "xfs_iolock_active"); | ||
74 | 88 | ||
75 | /* initialise the xfs inode */ | 89 | /* initialise the xfs inode */ |
76 | ip->i_ino = ino; | 90 | ip->i_ino = ino; |
@@ -85,12 +99,20 @@ xfs_inode_alloc( | |||
85 | ip->i_size = 0; | 99 | ip->i_size = 0; |
86 | ip->i_new_size = 0; | 100 | ip->i_new_size = 0; |
87 | 101 | ||
88 | /* prevent anyone from using this yet */ | ||
89 | VFS_I(ip)->i_state = I_NEW; | ||
90 | |||
91 | return ip; | 102 | return ip; |
92 | } | 103 | } |
93 | 104 | ||
105 | STATIC void | ||
106 | xfs_inode_free_callback( | ||
107 | struct rcu_head *head) | ||
108 | { | ||
109 | struct inode *inode = container_of(head, struct inode, i_rcu); | ||
110 | struct xfs_inode *ip = XFS_I(inode); | ||
111 | |||
112 | INIT_LIST_HEAD(&inode->i_dentry); | ||
113 | kmem_zone_free(xfs_inode_zone, ip); | ||
114 | } | ||
115 | |||
94 | void | 116 | void |
95 | xfs_inode_free( | 117 | xfs_inode_free( |
96 | struct xfs_inode *ip) | 118 | struct xfs_inode *ip) |
@@ -134,7 +156,18 @@ xfs_inode_free( | |||
134 | ASSERT(!spin_is_locked(&ip->i_flags_lock)); | 156 | ASSERT(!spin_is_locked(&ip->i_flags_lock)); |
135 | ASSERT(completion_done(&ip->i_flush)); | 157 | ASSERT(completion_done(&ip->i_flush)); |
136 | 158 | ||
137 | kmem_zone_free(xfs_inode_zone, ip); | 159 | /* |
160 | * Because we use RCU freeing we need to ensure the inode always | ||
161 | * appears to be reclaimed with an invalid inode number when in the | ||
162 | * free state. The ip->i_flags_lock provides the barrier against lookup | ||
163 | * races. | ||
164 | */ | ||
165 | spin_lock(&ip->i_flags_lock); | ||
166 | ip->i_flags = XFS_IRECLAIM; | ||
167 | ip->i_ino = 0; | ||
168 | spin_unlock(&ip->i_flags_lock); | ||
169 | |||
170 | call_rcu(&VFS_I(ip)->i_rcu, xfs_inode_free_callback); | ||
138 | } | 171 | } |
139 | 172 | ||
140 | /* | 173 | /* |
@@ -144,14 +177,29 @@ static int | |||
144 | xfs_iget_cache_hit( | 177 | xfs_iget_cache_hit( |
145 | struct xfs_perag *pag, | 178 | struct xfs_perag *pag, |
146 | struct xfs_inode *ip, | 179 | struct xfs_inode *ip, |
180 | xfs_ino_t ino, | ||
147 | int flags, | 181 | int flags, |
148 | int lock_flags) __releases(pag->pag_ici_lock) | 182 | int lock_flags) __releases(RCU) |
149 | { | 183 | { |
150 | struct inode *inode = VFS_I(ip); | 184 | struct inode *inode = VFS_I(ip); |
151 | struct xfs_mount *mp = ip->i_mount; | 185 | struct xfs_mount *mp = ip->i_mount; |
152 | int error; | 186 | int error; |
153 | 187 | ||
188 | /* | ||
189 | * check for re-use of an inode within an RCU grace period due to the | ||
190 | * radix tree nodes not being updated yet. We monitor for this by | ||
191 | * setting the inode number to zero before freeing the inode structure. | ||
192 | * If the inode has been reallocated and set up, then the inode number | ||
193 | * will not match, so check for that, too. | ||
194 | */ | ||
154 | spin_lock(&ip->i_flags_lock); | 195 | spin_lock(&ip->i_flags_lock); |
196 | if (ip->i_ino != ino) { | ||
197 | trace_xfs_iget_skip(ip); | ||
198 | XFS_STATS_INC(xs_ig_frecycle); | ||
199 | error = EAGAIN; | ||
200 | goto out_error; | ||
201 | } | ||
202 | |||
155 | 203 | ||
156 | /* | 204 | /* |
157 | * If we are racing with another cache hit that is currently | 205 | * If we are racing with another cache hit that is currently |
@@ -194,7 +242,7 @@ xfs_iget_cache_hit( | |||
194 | ip->i_flags |= XFS_IRECLAIM; | 242 | ip->i_flags |= XFS_IRECLAIM; |
195 | 243 | ||
196 | spin_unlock(&ip->i_flags_lock); | 244 | spin_unlock(&ip->i_flags_lock); |
197 | read_unlock(&pag->pag_ici_lock); | 245 | rcu_read_unlock(); |
198 | 246 | ||
199 | error = -inode_init_always(mp->m_super, inode); | 247 | error = -inode_init_always(mp->m_super, inode); |
200 | if (error) { | 248 | if (error) { |
@@ -202,7 +250,7 @@ xfs_iget_cache_hit( | |||
202 | * Re-initializing the inode failed, and we are in deep | 250 | * Re-initializing the inode failed, and we are in deep |
203 | * trouble. Try to re-add it to the reclaim list. | 251 | * trouble. Try to re-add it to the reclaim list. |
204 | */ | 252 | */ |
205 | read_lock(&pag->pag_ici_lock); | 253 | rcu_read_lock(); |
206 | spin_lock(&ip->i_flags_lock); | 254 | spin_lock(&ip->i_flags_lock); |
207 | 255 | ||
208 | ip->i_flags &= ~XFS_INEW; | 256 | ip->i_flags &= ~XFS_INEW; |
@@ -212,14 +260,20 @@ xfs_iget_cache_hit( | |||
212 | goto out_error; | 260 | goto out_error; |
213 | } | 261 | } |
214 | 262 | ||
215 | write_lock(&pag->pag_ici_lock); | 263 | spin_lock(&pag->pag_ici_lock); |
216 | spin_lock(&ip->i_flags_lock); | 264 | spin_lock(&ip->i_flags_lock); |
217 | ip->i_flags &= ~(XFS_IRECLAIMABLE | XFS_IRECLAIM); | 265 | ip->i_flags &= ~(XFS_IRECLAIMABLE | XFS_IRECLAIM); |
218 | ip->i_flags |= XFS_INEW; | 266 | ip->i_flags |= XFS_INEW; |
219 | __xfs_inode_clear_reclaim_tag(mp, pag, ip); | 267 | __xfs_inode_clear_reclaim_tag(mp, pag, ip); |
220 | inode->i_state = I_NEW; | 268 | inode->i_state = I_NEW; |
269 | |||
270 | ASSERT(!rwsem_is_locked(&ip->i_iolock.mr_lock)); | ||
271 | mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino); | ||
272 | lockdep_set_class_and_name(&ip->i_iolock.mr_lock, | ||
273 | &xfs_iolock_active, "xfs_iolock_active"); | ||
274 | |||
221 | spin_unlock(&ip->i_flags_lock); | 275 | spin_unlock(&ip->i_flags_lock); |
222 | write_unlock(&pag->pag_ici_lock); | 276 | spin_unlock(&pag->pag_ici_lock); |
223 | } else { | 277 | } else { |
224 | /* If the VFS inode is being torn down, pause and try again. */ | 278 | /* If the VFS inode is being torn down, pause and try again. */ |
225 | if (!igrab(inode)) { | 279 | if (!igrab(inode)) { |
@@ -230,7 +284,7 @@ xfs_iget_cache_hit( | |||
230 | 284 | ||
231 | /* We've got a live one. */ | 285 | /* We've got a live one. */ |
232 | spin_unlock(&ip->i_flags_lock); | 286 | spin_unlock(&ip->i_flags_lock); |
233 | read_unlock(&pag->pag_ici_lock); | 287 | rcu_read_unlock(); |
234 | trace_xfs_iget_hit(ip); | 288 | trace_xfs_iget_hit(ip); |
235 | } | 289 | } |
236 | 290 | ||
@@ -244,7 +298,7 @@ xfs_iget_cache_hit( | |||
244 | 298 | ||
245 | out_error: | 299 | out_error: |
246 | spin_unlock(&ip->i_flags_lock); | 300 | spin_unlock(&ip->i_flags_lock); |
247 | read_unlock(&pag->pag_ici_lock); | 301 | rcu_read_unlock(); |
248 | return error; | 302 | return error; |
249 | } | 303 | } |
250 | 304 | ||
@@ -297,7 +351,7 @@ xfs_iget_cache_miss( | |||
297 | BUG(); | 351 | BUG(); |
298 | } | 352 | } |
299 | 353 | ||
300 | write_lock(&pag->pag_ici_lock); | 354 | spin_lock(&pag->pag_ici_lock); |
301 | 355 | ||
302 | /* insert the new inode */ | 356 | /* insert the new inode */ |
303 | error = radix_tree_insert(&pag->pag_ici_root, agino, ip); | 357 | error = radix_tree_insert(&pag->pag_ici_root, agino, ip); |
@@ -312,14 +366,14 @@ xfs_iget_cache_miss( | |||
312 | ip->i_udquot = ip->i_gdquot = NULL; | 366 | ip->i_udquot = ip->i_gdquot = NULL; |
313 | xfs_iflags_set(ip, XFS_INEW); | 367 | xfs_iflags_set(ip, XFS_INEW); |
314 | 368 | ||
315 | write_unlock(&pag->pag_ici_lock); | 369 | spin_unlock(&pag->pag_ici_lock); |
316 | radix_tree_preload_end(); | 370 | radix_tree_preload_end(); |
317 | 371 | ||
318 | *ipp = ip; | 372 | *ipp = ip; |
319 | return 0; | 373 | return 0; |
320 | 374 | ||
321 | out_preload_end: | 375 | out_preload_end: |
322 | write_unlock(&pag->pag_ici_lock); | 376 | spin_unlock(&pag->pag_ici_lock); |
323 | radix_tree_preload_end(); | 377 | radix_tree_preload_end(); |
324 | if (lock_flags) | 378 | if (lock_flags) |
325 | xfs_iunlock(ip, lock_flags); | 379 | xfs_iunlock(ip, lock_flags); |
@@ -366,7 +420,7 @@ xfs_iget( | |||
366 | xfs_agino_t agino; | 420 | xfs_agino_t agino; |
367 | 421 | ||
368 | /* reject inode numbers outside existing AGs */ | 422 | /* reject inode numbers outside existing AGs */ |
369 | if (XFS_INO_TO_AGNO(mp, ino) >= mp->m_sb.sb_agcount) | 423 | if (!ino || XFS_INO_TO_AGNO(mp, ino) >= mp->m_sb.sb_agcount) |
370 | return EINVAL; | 424 | return EINVAL; |
371 | 425 | ||
372 | /* get the perag structure and ensure that it's inode capable */ | 426 | /* get the perag structure and ensure that it's inode capable */ |
@@ -375,15 +429,15 @@ xfs_iget( | |||
375 | 429 | ||
376 | again: | 430 | again: |
377 | error = 0; | 431 | error = 0; |
378 | read_lock(&pag->pag_ici_lock); | 432 | rcu_read_lock(); |
379 | ip = radix_tree_lookup(&pag->pag_ici_root, agino); | 433 | ip = radix_tree_lookup(&pag->pag_ici_root, agino); |
380 | 434 | ||
381 | if (ip) { | 435 | if (ip) { |
382 | error = xfs_iget_cache_hit(pag, ip, flags, lock_flags); | 436 | error = xfs_iget_cache_hit(pag, ip, ino, flags, lock_flags); |
383 | if (error) | 437 | if (error) |
384 | goto out_error_or_again; | 438 | goto out_error_or_again; |
385 | } else { | 439 | } else { |
386 | read_unlock(&pag->pag_ici_lock); | 440 | rcu_read_unlock(); |
387 | XFS_STATS_INC(xs_ig_missed); | 441 | XFS_STATS_INC(xs_ig_missed); |
388 | 442 | ||
389 | error = xfs_iget_cache_miss(mp, pag, tp, ino, &ip, | 443 | error = xfs_iget_cache_miss(mp, pag, tp, ino, &ip, |
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 108c7a085f94..a37480a6e023 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c | |||
@@ -110,8 +110,8 @@ xfs_inobp_check( | |||
110 | dip = (xfs_dinode_t *)xfs_buf_offset(bp, | 110 | dip = (xfs_dinode_t *)xfs_buf_offset(bp, |
111 | i * mp->m_sb.sb_inodesize); | 111 | i * mp->m_sb.sb_inodesize); |
112 | if (!dip->di_next_unlinked) { | 112 | if (!dip->di_next_unlinked) { |
113 | xfs_fs_cmn_err(CE_ALERT, mp, | 113 | xfs_alert(mp, |
114 | "Detected a bogus zero next_unlinked field in incore inode buffer 0x%p. About to pop an ASSERT.", | 114 | "Detected bogus zero next_unlinked field in incore inode buffer 0x%p.", |
115 | bp); | 115 | bp); |
116 | ASSERT(dip->di_next_unlinked); | 116 | ASSERT(dip->di_next_unlinked); |
117 | } | 117 | } |
@@ -142,10 +142,9 @@ xfs_imap_to_bp( | |||
142 | (int)imap->im_len, buf_flags, &bp); | 142 | (int)imap->im_len, buf_flags, &bp); |
143 | if (error) { | 143 | if (error) { |
144 | if (error != EAGAIN) { | 144 | if (error != EAGAIN) { |
145 | cmn_err(CE_WARN, | 145 | xfs_warn(mp, |
146 | "xfs_imap_to_bp: xfs_trans_read_buf()returned " | 146 | "%s: xfs_trans_read_buf() returned error %d.", |
147 | "an error %d on %s. Returning error.", | 147 | __func__, error); |
148 | error, mp->m_fsname); | ||
149 | } else { | 148 | } else { |
150 | ASSERT(buf_flags & XBF_TRYLOCK); | 149 | ASSERT(buf_flags & XBF_TRYLOCK); |
151 | } | 150 | } |
@@ -180,12 +179,11 @@ xfs_imap_to_bp( | |||
180 | XFS_CORRUPTION_ERROR("xfs_imap_to_bp", | 179 | XFS_CORRUPTION_ERROR("xfs_imap_to_bp", |
181 | XFS_ERRLEVEL_HIGH, mp, dip); | 180 | XFS_ERRLEVEL_HIGH, mp, dip); |
182 | #ifdef DEBUG | 181 | #ifdef DEBUG |
183 | cmn_err(CE_PANIC, | 182 | xfs_emerg(mp, |
184 | "Device %s - bad inode magic/vsn " | 183 | "bad inode magic/vsn daddr %lld #%d (magic=%x)", |
185 | "daddr %lld #%d (magic=%x)", | ||
186 | XFS_BUFTARG_NAME(mp->m_ddev_targp), | ||
187 | (unsigned long long)imap->im_blkno, i, | 184 | (unsigned long long)imap->im_blkno, i, |
188 | be16_to_cpu(dip->di_magic)); | 185 | be16_to_cpu(dip->di_magic)); |
186 | ASSERT(0); | ||
189 | #endif | 187 | #endif |
190 | xfs_trans_brelse(tp, bp); | 188 | xfs_trans_brelse(tp, bp); |
191 | return XFS_ERROR(EFSCORRUPTED); | 189 | return XFS_ERROR(EFSCORRUPTED); |
@@ -317,7 +315,7 @@ xfs_iformat( | |||
317 | if (unlikely(be32_to_cpu(dip->di_nextents) + | 315 | if (unlikely(be32_to_cpu(dip->di_nextents) + |
318 | be16_to_cpu(dip->di_anextents) > | 316 | be16_to_cpu(dip->di_anextents) > |
319 | be64_to_cpu(dip->di_nblocks))) { | 317 | be64_to_cpu(dip->di_nblocks))) { |
320 | xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount, | 318 | xfs_warn(ip->i_mount, |
321 | "corrupt dinode %Lu, extent total = %d, nblocks = %Lu.", | 319 | "corrupt dinode %Lu, extent total = %d, nblocks = %Lu.", |
322 | (unsigned long long)ip->i_ino, | 320 | (unsigned long long)ip->i_ino, |
323 | (int)(be32_to_cpu(dip->di_nextents) + | 321 | (int)(be32_to_cpu(dip->di_nextents) + |
@@ -330,8 +328,7 @@ xfs_iformat( | |||
330 | } | 328 | } |
331 | 329 | ||
332 | if (unlikely(dip->di_forkoff > ip->i_mount->m_sb.sb_inodesize)) { | 330 | if (unlikely(dip->di_forkoff > ip->i_mount->m_sb.sb_inodesize)) { |
333 | xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount, | 331 | xfs_warn(ip->i_mount, "corrupt dinode %Lu, forkoff = 0x%x.", |
334 | "corrupt dinode %Lu, forkoff = 0x%x.", | ||
335 | (unsigned long long)ip->i_ino, | 332 | (unsigned long long)ip->i_ino, |
336 | dip->di_forkoff); | 333 | dip->di_forkoff); |
337 | XFS_CORRUPTION_ERROR("xfs_iformat(2)", XFS_ERRLEVEL_LOW, | 334 | XFS_CORRUPTION_ERROR("xfs_iformat(2)", XFS_ERRLEVEL_LOW, |
@@ -341,7 +338,7 @@ xfs_iformat( | |||
341 | 338 | ||
342 | if (unlikely((ip->i_d.di_flags & XFS_DIFLAG_REALTIME) && | 339 | if (unlikely((ip->i_d.di_flags & XFS_DIFLAG_REALTIME) && |
343 | !ip->i_mount->m_rtdev_targp)) { | 340 | !ip->i_mount->m_rtdev_targp)) { |
344 | xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount, | 341 | xfs_warn(ip->i_mount, |
345 | "corrupt dinode %Lu, has realtime flag set.", | 342 | "corrupt dinode %Lu, has realtime flag set.", |
346 | ip->i_ino); | 343 | ip->i_ino); |
347 | XFS_CORRUPTION_ERROR("xfs_iformat(realtime)", | 344 | XFS_CORRUPTION_ERROR("xfs_iformat(realtime)", |
@@ -373,9 +370,8 @@ xfs_iformat( | |||
373 | * no local regular files yet | 370 | * no local regular files yet |
374 | */ | 371 | */ |
375 | if (unlikely((be16_to_cpu(dip->di_mode) & S_IFMT) == S_IFREG)) { | 372 | if (unlikely((be16_to_cpu(dip->di_mode) & S_IFMT) == S_IFREG)) { |
376 | xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount, | 373 | xfs_warn(ip->i_mount, |
377 | "corrupt inode %Lu " | 374 | "corrupt inode %Lu (local format for regular file).", |
378 | "(local format for regular file).", | ||
379 | (unsigned long long) ip->i_ino); | 375 | (unsigned long long) ip->i_ino); |
380 | XFS_CORRUPTION_ERROR("xfs_iformat(4)", | 376 | XFS_CORRUPTION_ERROR("xfs_iformat(4)", |
381 | XFS_ERRLEVEL_LOW, | 377 | XFS_ERRLEVEL_LOW, |
@@ -385,9 +381,8 @@ xfs_iformat( | |||
385 | 381 | ||
386 | di_size = be64_to_cpu(dip->di_size); | 382 | di_size = be64_to_cpu(dip->di_size); |
387 | if (unlikely(di_size > XFS_DFORK_DSIZE(dip, ip->i_mount))) { | 383 | if (unlikely(di_size > XFS_DFORK_DSIZE(dip, ip->i_mount))) { |
388 | xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount, | 384 | xfs_warn(ip->i_mount, |
389 | "corrupt inode %Lu " | 385 | "corrupt inode %Lu (bad size %Ld for local inode).", |
390 | "(bad size %Ld for local inode).", | ||
391 | (unsigned long long) ip->i_ino, | 386 | (unsigned long long) ip->i_ino, |
392 | (long long) di_size); | 387 | (long long) di_size); |
393 | XFS_CORRUPTION_ERROR("xfs_iformat(5)", | 388 | XFS_CORRUPTION_ERROR("xfs_iformat(5)", |
@@ -431,9 +426,8 @@ xfs_iformat( | |||
431 | size = be16_to_cpu(atp->hdr.totsize); | 426 | size = be16_to_cpu(atp->hdr.totsize); |
432 | 427 | ||
433 | if (unlikely(size < sizeof(struct xfs_attr_sf_hdr))) { | 428 | if (unlikely(size < sizeof(struct xfs_attr_sf_hdr))) { |
434 | xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount, | 429 | xfs_warn(ip->i_mount, |
435 | "corrupt inode %Lu " | 430 | "corrupt inode %Lu (bad attr fork size %Ld).", |
436 | "(bad attr fork size %Ld).", | ||
437 | (unsigned long long) ip->i_ino, | 431 | (unsigned long long) ip->i_ino, |
438 | (long long) size); | 432 | (long long) size); |
439 | XFS_CORRUPTION_ERROR("xfs_iformat(8)", | 433 | XFS_CORRUPTION_ERROR("xfs_iformat(8)", |
@@ -488,9 +482,8 @@ xfs_iformat_local( | |||
488 | * kmem_alloc() or memcpy() below. | 482 | * kmem_alloc() or memcpy() below. |
489 | */ | 483 | */ |
490 | if (unlikely(size > XFS_DFORK_SIZE(dip, ip->i_mount, whichfork))) { | 484 | if (unlikely(size > XFS_DFORK_SIZE(dip, ip->i_mount, whichfork))) { |
491 | xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount, | 485 | xfs_warn(ip->i_mount, |
492 | "corrupt inode %Lu " | 486 | "corrupt inode %Lu (bad size %d for local fork, size = %d).", |
493 | "(bad size %d for local fork, size = %d).", | ||
494 | (unsigned long long) ip->i_ino, size, | 487 | (unsigned long long) ip->i_ino, size, |
495 | XFS_DFORK_SIZE(dip, ip->i_mount, whichfork)); | 488 | XFS_DFORK_SIZE(dip, ip->i_mount, whichfork)); |
496 | XFS_CORRUPTION_ERROR("xfs_iformat_local", XFS_ERRLEVEL_LOW, | 489 | XFS_CORRUPTION_ERROR("xfs_iformat_local", XFS_ERRLEVEL_LOW, |
@@ -547,8 +540,7 @@ xfs_iformat_extents( | |||
547 | * kmem_alloc() or memcpy() below. | 540 | * kmem_alloc() or memcpy() below. |
548 | */ | 541 | */ |
549 | if (unlikely(size < 0 || size > XFS_DFORK_SIZE(dip, ip->i_mount, whichfork))) { | 542 | if (unlikely(size < 0 || size > XFS_DFORK_SIZE(dip, ip->i_mount, whichfork))) { |
550 | xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount, | 543 | xfs_warn(ip->i_mount, "corrupt inode %Lu ((a)extents = %d).", |
551 | "corrupt inode %Lu ((a)extents = %d).", | ||
552 | (unsigned long long) ip->i_ino, nex); | 544 | (unsigned long long) ip->i_ino, nex); |
553 | XFS_CORRUPTION_ERROR("xfs_iformat_extents(1)", XFS_ERRLEVEL_LOW, | 545 | XFS_CORRUPTION_ERROR("xfs_iformat_extents(1)", XFS_ERRLEVEL_LOW, |
554 | ip->i_mount, dip); | 546 | ip->i_mount, dip); |
@@ -623,11 +615,10 @@ xfs_iformat_btree( | |||
623 | || XFS_BMDR_SPACE_CALC(nrecs) > | 615 | || XFS_BMDR_SPACE_CALC(nrecs) > |
624 | XFS_DFORK_SIZE(dip, ip->i_mount, whichfork) | 616 | XFS_DFORK_SIZE(dip, ip->i_mount, whichfork) |
625 | || XFS_IFORK_NEXTENTS(ip, whichfork) > ip->i_d.di_nblocks)) { | 617 | || XFS_IFORK_NEXTENTS(ip, whichfork) > ip->i_d.di_nblocks)) { |
626 | xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount, | 618 | xfs_warn(ip->i_mount, "corrupt inode %Lu (btree).", |
627 | "corrupt inode %Lu (btree).", | ||
628 | (unsigned long long) ip->i_ino); | 619 | (unsigned long long) ip->i_ino); |
629 | XFS_ERROR_REPORT("xfs_iformat_btree", XFS_ERRLEVEL_LOW, | 620 | XFS_CORRUPTION_ERROR("xfs_iformat_btree", XFS_ERRLEVEL_LOW, |
630 | ip->i_mount); | 621 | ip->i_mount, dip); |
631 | return XFS_ERROR(EFSCORRUPTED); | 622 | return XFS_ERROR(EFSCORRUPTED); |
632 | } | 623 | } |
633 | 624 | ||
@@ -813,11 +804,9 @@ xfs_iread( | |||
813 | */ | 804 | */ |
814 | if (be16_to_cpu(dip->di_magic) != XFS_DINODE_MAGIC) { | 805 | if (be16_to_cpu(dip->di_magic) != XFS_DINODE_MAGIC) { |
815 | #ifdef DEBUG | 806 | #ifdef DEBUG |
816 | xfs_fs_cmn_err(CE_ALERT, mp, "xfs_iread: " | 807 | xfs_alert(mp, |
817 | "dip->di_magic (0x%x) != " | 808 | "%s: dip->di_magic (0x%x) != XFS_DINODE_MAGIC (0x%x)", |
818 | "XFS_DINODE_MAGIC (0x%x)", | 809 | __func__, be16_to_cpu(dip->di_magic), XFS_DINODE_MAGIC); |
819 | be16_to_cpu(dip->di_magic), | ||
820 | XFS_DINODE_MAGIC); | ||
821 | #endif /* DEBUG */ | 810 | #endif /* DEBUG */ |
822 | error = XFS_ERROR(EINVAL); | 811 | error = XFS_ERROR(EINVAL); |
823 | goto out_brelse; | 812 | goto out_brelse; |
@@ -835,9 +824,8 @@ xfs_iread( | |||
835 | error = xfs_iformat(ip, dip); | 824 | error = xfs_iformat(ip, dip); |
836 | if (error) { | 825 | if (error) { |
837 | #ifdef DEBUG | 826 | #ifdef DEBUG |
838 | xfs_fs_cmn_err(CE_ALERT, mp, "xfs_iread: " | 827 | xfs_alert(mp, "%s: xfs_iformat() returned error %d", |
839 | "xfs_iformat() returned error %d", | 828 | __func__, error); |
840 | error); | ||
841 | #endif /* DEBUG */ | 829 | #endif /* DEBUG */ |
842 | goto out_brelse; | 830 | goto out_brelse; |
843 | } | 831 | } |
@@ -887,7 +875,7 @@ xfs_iread( | |||
887 | * around for a while. This helps to keep recently accessed | 875 | * around for a while. This helps to keep recently accessed |
888 | * meta-data in-core longer. | 876 | * meta-data in-core longer. |
889 | */ | 877 | */ |
890 | XFS_BUF_SET_REF(bp, XFS_INO_REF); | 878 | xfs_buf_set_ref(bp, XFS_INO_REF); |
891 | 879 | ||
892 | /* | 880 | /* |
893 | * Use xfs_trans_brelse() to release the buffer containing the | 881 | * Use xfs_trans_brelse() to release the buffer containing the |
@@ -1016,8 +1004,8 @@ xfs_ialloc( | |||
1016 | * This is because we're setting fields here we need | 1004 | * This is because we're setting fields here we need |
1017 | * to prevent others from looking at until we're done. | 1005 | * to prevent others from looking at until we're done. |
1018 | */ | 1006 | */ |
1019 | error = xfs_trans_iget(tp->t_mountp, tp, ino, | 1007 | error = xfs_iget(tp->t_mountp, tp, ino, XFS_IGET_CREATE, |
1020 | XFS_IGET_CREATE, XFS_ILOCK_EXCL, &ip); | 1008 | XFS_ILOCK_EXCL, &ip); |
1021 | if (error) | 1009 | if (error) |
1022 | return error; | 1010 | return error; |
1023 | ASSERT(ip != NULL); | 1011 | ASSERT(ip != NULL); |
@@ -1166,6 +1154,7 @@ xfs_ialloc( | |||
1166 | /* | 1154 | /* |
1167 | * Log the new values stuffed into the inode. | 1155 | * Log the new values stuffed into the inode. |
1168 | */ | 1156 | */ |
1157 | xfs_trans_ijoin_ref(tp, ip, XFS_ILOCK_EXCL); | ||
1169 | xfs_trans_log_inode(tp, ip, flags); | 1158 | xfs_trans_log_inode(tp, ip, flags); |
1170 | 1159 | ||
1171 | /* now that we have an i_mode we can setup inode ops and unlock */ | 1160 | /* now that we have an i_mode we can setup inode ops and unlock */ |
@@ -1820,9 +1809,8 @@ xfs_iunlink_remove( | |||
1820 | */ | 1809 | */ |
1821 | error = xfs_itobp(mp, tp, ip, &dip, &ibp, XBF_LOCK); | 1810 | error = xfs_itobp(mp, tp, ip, &dip, &ibp, XBF_LOCK); |
1822 | if (error) { | 1811 | if (error) { |
1823 | cmn_err(CE_WARN, | 1812 | xfs_warn(mp, "%s: xfs_itobp() returned error %d.", |
1824 | "xfs_iunlink_remove: xfs_itobp() returned an error %d on %s. Returning error.", | 1813 | __func__, error); |
1825 | error, mp->m_fsname); | ||
1826 | return error; | 1814 | return error; |
1827 | } | 1815 | } |
1828 | next_agino = be32_to_cpu(dip->di_next_unlinked); | 1816 | next_agino = be32_to_cpu(dip->di_next_unlinked); |
@@ -1867,9 +1855,9 @@ xfs_iunlink_remove( | |||
1867 | error = xfs_inotobp(mp, tp, next_ino, &last_dip, | 1855 | error = xfs_inotobp(mp, tp, next_ino, &last_dip, |
1868 | &last_ibp, &last_offset, 0); | 1856 | &last_ibp, &last_offset, 0); |
1869 | if (error) { | 1857 | if (error) { |
1870 | cmn_err(CE_WARN, | 1858 | xfs_warn(mp, |
1871 | "xfs_iunlink_remove: xfs_inotobp() returned an error %d on %s. Returning error.", | 1859 | "%s: xfs_inotobp() returned error %d.", |
1872 | error, mp->m_fsname); | 1860 | __func__, error); |
1873 | return error; | 1861 | return error; |
1874 | } | 1862 | } |
1875 | next_agino = be32_to_cpu(last_dip->di_next_unlinked); | 1863 | next_agino = be32_to_cpu(last_dip->di_next_unlinked); |
@@ -1882,9 +1870,8 @@ xfs_iunlink_remove( | |||
1882 | */ | 1870 | */ |
1883 | error = xfs_itobp(mp, tp, ip, &dip, &ibp, XBF_LOCK); | 1871 | error = xfs_itobp(mp, tp, ip, &dip, &ibp, XBF_LOCK); |
1884 | if (error) { | 1872 | if (error) { |
1885 | cmn_err(CE_WARN, | 1873 | xfs_warn(mp, "%s: xfs_itobp(2) returned error %d.", |
1886 | "xfs_iunlink_remove: xfs_itobp() returned an error %d on %s. Returning error.", | 1874 | __func__, error); |
1887 | error, mp->m_fsname); | ||
1888 | return error; | 1875 | return error; |
1889 | } | 1876 | } |
1890 | next_agino = be32_to_cpu(dip->di_next_unlinked); | 1877 | next_agino = be32_to_cpu(dip->di_next_unlinked); |
@@ -2000,17 +1987,33 @@ xfs_ifree_cluster( | |||
2000 | */ | 1987 | */ |
2001 | for (i = 0; i < ninodes; i++) { | 1988 | for (i = 0; i < ninodes; i++) { |
2002 | retry: | 1989 | retry: |
2003 | read_lock(&pag->pag_ici_lock); | 1990 | rcu_read_lock(); |
2004 | ip = radix_tree_lookup(&pag->pag_ici_root, | 1991 | ip = radix_tree_lookup(&pag->pag_ici_root, |
2005 | XFS_INO_TO_AGINO(mp, (inum + i))); | 1992 | XFS_INO_TO_AGINO(mp, (inum + i))); |
2006 | 1993 | ||
2007 | /* Inode not in memory or stale, nothing to do */ | 1994 | /* Inode not in memory, nothing to do */ |
2008 | if (!ip || xfs_iflags_test(ip, XFS_ISTALE)) { | 1995 | if (!ip) { |
2009 | read_unlock(&pag->pag_ici_lock); | 1996 | rcu_read_unlock(); |
2010 | continue; | 1997 | continue; |
2011 | } | 1998 | } |
2012 | 1999 | ||
2013 | /* | 2000 | /* |
2001 | * because this is an RCU protected lookup, we could | ||
2002 | * find a recently freed or even reallocated inode | ||
2003 | * during the lookup. We need to check under the | ||
2004 | * i_flags_lock for a valid inode here. Skip it if it | ||
2005 | * is not valid, the wrong inode or stale. | ||
2006 | */ | ||
2007 | spin_lock(&ip->i_flags_lock); | ||
2008 | if (ip->i_ino != inum + i || | ||
2009 | __xfs_iflags_test(ip, XFS_ISTALE)) { | ||
2010 | spin_unlock(&ip->i_flags_lock); | ||
2011 | rcu_read_unlock(); | ||
2012 | continue; | ||
2013 | } | ||
2014 | spin_unlock(&ip->i_flags_lock); | ||
2015 | |||
2016 | /* | ||
2014 | * Don't try to lock/unlock the current inode, but we | 2017 | * Don't try to lock/unlock the current inode, but we |
2015 | * _cannot_ skip the other inodes that we did not find | 2018 | * _cannot_ skip the other inodes that we did not find |
2016 | * in the list attached to the buffer and are not | 2019 | * in the list attached to the buffer and are not |
@@ -2019,11 +2022,11 @@ retry: | |||
2019 | */ | 2022 | */ |
2020 | if (ip != free_ip && | 2023 | if (ip != free_ip && |
2021 | !xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) { | 2024 | !xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) { |
2022 | read_unlock(&pag->pag_ici_lock); | 2025 | rcu_read_unlock(); |
2023 | delay(1); | 2026 | delay(1); |
2024 | goto retry; | 2027 | goto retry; |
2025 | } | 2028 | } |
2026 | read_unlock(&pag->pag_ici_lock); | 2029 | rcu_read_unlock(); |
2027 | 2030 | ||
2028 | xfs_iflock(ip); | 2031 | xfs_iflock(ip); |
2029 | xfs_iflags_set(ip, XFS_ISTALE); | 2032 | xfs_iflags_set(ip, XFS_ISTALE); |
@@ -2629,7 +2632,7 @@ xfs_iflush_cluster( | |||
2629 | 2632 | ||
2630 | mask = ~(((XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog)) - 1); | 2633 | mask = ~(((XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog)) - 1); |
2631 | first_index = XFS_INO_TO_AGINO(mp, ip->i_ino) & mask; | 2634 | first_index = XFS_INO_TO_AGINO(mp, ip->i_ino) & mask; |
2632 | read_lock(&pag->pag_ici_lock); | 2635 | rcu_read_lock(); |
2633 | /* really need a gang lookup range call here */ | 2636 | /* really need a gang lookup range call here */ |
2634 | nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, (void**)ilist, | 2637 | nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, (void**)ilist, |
2635 | first_index, inodes_per_cluster); | 2638 | first_index, inodes_per_cluster); |
@@ -2640,9 +2643,21 @@ xfs_iflush_cluster( | |||
2640 | iq = ilist[i]; | 2643 | iq = ilist[i]; |
2641 | if (iq == ip) | 2644 | if (iq == ip) |
2642 | continue; | 2645 | continue; |
2643 | /* if the inode lies outside this cluster, we're done. */ | 2646 | |
2644 | if ((XFS_INO_TO_AGINO(mp, iq->i_ino) & mask) != first_index) | 2647 | /* |
2645 | break; | 2648 | * because this is an RCU protected lookup, we could find a |
2649 | * recently freed or even reallocated inode during the lookup. | ||
2650 | * We need to check under the i_flags_lock for a valid inode | ||
2651 | * here. Skip it if it is not valid or the wrong inode. | ||
2652 | */ | ||
2653 | spin_lock(&ip->i_flags_lock); | ||
2654 | if (!ip->i_ino || | ||
2655 | (XFS_INO_TO_AGINO(mp, iq->i_ino) & mask) != first_index) { | ||
2656 | spin_unlock(&ip->i_flags_lock); | ||
2657 | continue; | ||
2658 | } | ||
2659 | spin_unlock(&ip->i_flags_lock); | ||
2660 | |||
2646 | /* | 2661 | /* |
2647 | * Do an un-protected check to see if the inode is dirty and | 2662 | * Do an un-protected check to see if the inode is dirty and |
2648 | * is a candidate for flushing. These checks will be repeated | 2663 | * is a candidate for flushing. These checks will be repeated |
@@ -2692,7 +2707,7 @@ xfs_iflush_cluster( | |||
2692 | } | 2707 | } |
2693 | 2708 | ||
2694 | out_free: | 2709 | out_free: |
2695 | read_unlock(&pag->pag_ici_lock); | 2710 | rcu_read_unlock(); |
2696 | kmem_free(ilist); | 2711 | kmem_free(ilist); |
2697 | out_put: | 2712 | out_put: |
2698 | xfs_perag_put(pag); | 2713 | xfs_perag_put(pag); |
@@ -2704,7 +2719,7 @@ cluster_corrupt_out: | |||
2704 | * Corruption detected in the clustering loop. Invalidate the | 2719 | * Corruption detected in the clustering loop. Invalidate the |
2705 | * inode buffer and shut down the filesystem. | 2720 | * inode buffer and shut down the filesystem. |
2706 | */ | 2721 | */ |
2707 | read_unlock(&pag->pag_ici_lock); | 2722 | rcu_read_unlock(); |
2708 | /* | 2723 | /* |
2709 | * Clean up the buffer. If it was B_DELWRI, just release it -- | 2724 | * Clean up the buffer. If it was B_DELWRI, just release it -- |
2710 | * brelse can handle it with no problems. If not, shut down the | 2725 | * brelse can handle it with no problems. If not, shut down the |
@@ -2774,7 +2789,7 @@ xfs_iflush( | |||
2774 | 2789 | ||
2775 | /* | 2790 | /* |
2776 | * We can't flush the inode until it is unpinned, so wait for it if we | 2791 | * We can't flush the inode until it is unpinned, so wait for it if we |
2777 | * are allowed to block. We know noone new can pin it, because we are | 2792 | * are allowed to block. We know no one new can pin it, because we are |
2778 | * holding the inode lock shared and you need to hold it exclusively to | 2793 | * holding the inode lock shared and you need to hold it exclusively to |
2779 | * pin the inode. | 2794 | * pin the inode. |
2780 | * | 2795 | * |
@@ -2820,7 +2835,7 @@ xfs_iflush( | |||
2820 | * Get the buffer containing the on-disk inode. | 2835 | * Get the buffer containing the on-disk inode. |
2821 | */ | 2836 | */ |
2822 | error = xfs_itobp(mp, NULL, ip, &dip, &bp, | 2837 | error = xfs_itobp(mp, NULL, ip, &dip, &bp, |
2823 | (flags & SYNC_WAIT) ? XBF_LOCK : XBF_TRYLOCK); | 2838 | (flags & SYNC_TRYLOCK) ? XBF_TRYLOCK : XBF_LOCK); |
2824 | if (error || !bp) { | 2839 | if (error || !bp) { |
2825 | xfs_ifunlock(ip); | 2840 | xfs_ifunlock(ip); |
2826 | return error; | 2841 | return error; |
@@ -2911,16 +2926,16 @@ xfs_iflush_int( | |||
2911 | 2926 | ||
2912 | if (XFS_TEST_ERROR(be16_to_cpu(dip->di_magic) != XFS_DINODE_MAGIC, | 2927 | if (XFS_TEST_ERROR(be16_to_cpu(dip->di_magic) != XFS_DINODE_MAGIC, |
2913 | mp, XFS_ERRTAG_IFLUSH_1, XFS_RANDOM_IFLUSH_1)) { | 2928 | mp, XFS_ERRTAG_IFLUSH_1, XFS_RANDOM_IFLUSH_1)) { |
2914 | xfs_cmn_err(XFS_PTAG_IFLUSH, CE_ALERT, mp, | 2929 | xfs_alert_tag(mp, XFS_PTAG_IFLUSH, |
2915 | "xfs_iflush: Bad inode %Lu magic number 0x%x, ptr 0x%p", | 2930 | "%s: Bad inode %Lu magic number 0x%x, ptr 0x%p", |
2916 | ip->i_ino, be16_to_cpu(dip->di_magic), dip); | 2931 | __func__, ip->i_ino, be16_to_cpu(dip->di_magic), dip); |
2917 | goto corrupt_out; | 2932 | goto corrupt_out; |
2918 | } | 2933 | } |
2919 | if (XFS_TEST_ERROR(ip->i_d.di_magic != XFS_DINODE_MAGIC, | 2934 | if (XFS_TEST_ERROR(ip->i_d.di_magic != XFS_DINODE_MAGIC, |
2920 | mp, XFS_ERRTAG_IFLUSH_2, XFS_RANDOM_IFLUSH_2)) { | 2935 | mp, XFS_ERRTAG_IFLUSH_2, XFS_RANDOM_IFLUSH_2)) { |
2921 | xfs_cmn_err(XFS_PTAG_IFLUSH, CE_ALERT, mp, | 2936 | xfs_alert_tag(mp, XFS_PTAG_IFLUSH, |
2922 | "xfs_iflush: Bad inode %Lu, ptr 0x%p, magic number 0x%x", | 2937 | "%s: Bad inode %Lu, ptr 0x%p, magic number 0x%x", |
2923 | ip->i_ino, ip, ip->i_d.di_magic); | 2938 | __func__, ip->i_ino, ip, ip->i_d.di_magic); |
2924 | goto corrupt_out; | 2939 | goto corrupt_out; |
2925 | } | 2940 | } |
2926 | if ((ip->i_d.di_mode & S_IFMT) == S_IFREG) { | 2941 | if ((ip->i_d.di_mode & S_IFMT) == S_IFREG) { |
@@ -2928,9 +2943,9 @@ xfs_iflush_int( | |||
2928 | (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS) && | 2943 | (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS) && |
2929 | (ip->i_d.di_format != XFS_DINODE_FMT_BTREE), | 2944 | (ip->i_d.di_format != XFS_DINODE_FMT_BTREE), |
2930 | mp, XFS_ERRTAG_IFLUSH_3, XFS_RANDOM_IFLUSH_3)) { | 2945 | mp, XFS_ERRTAG_IFLUSH_3, XFS_RANDOM_IFLUSH_3)) { |
2931 | xfs_cmn_err(XFS_PTAG_IFLUSH, CE_ALERT, mp, | 2946 | xfs_alert_tag(mp, XFS_PTAG_IFLUSH, |
2932 | "xfs_iflush: Bad regular inode %Lu, ptr 0x%p", | 2947 | "%s: Bad regular inode %Lu, ptr 0x%p", |
2933 | ip->i_ino, ip); | 2948 | __func__, ip->i_ino, ip); |
2934 | goto corrupt_out; | 2949 | goto corrupt_out; |
2935 | } | 2950 | } |
2936 | } else if ((ip->i_d.di_mode & S_IFMT) == S_IFDIR) { | 2951 | } else if ((ip->i_d.di_mode & S_IFMT) == S_IFDIR) { |
@@ -2939,28 +2954,28 @@ xfs_iflush_int( | |||
2939 | (ip->i_d.di_format != XFS_DINODE_FMT_BTREE) && | 2954 | (ip->i_d.di_format != XFS_DINODE_FMT_BTREE) && |
2940 | (ip->i_d.di_format != XFS_DINODE_FMT_LOCAL), | 2955 | (ip->i_d.di_format != XFS_DINODE_FMT_LOCAL), |
2941 | mp, XFS_ERRTAG_IFLUSH_4, XFS_RANDOM_IFLUSH_4)) { | 2956 | mp, XFS_ERRTAG_IFLUSH_4, XFS_RANDOM_IFLUSH_4)) { |
2942 | xfs_cmn_err(XFS_PTAG_IFLUSH, CE_ALERT, mp, | 2957 | xfs_alert_tag(mp, XFS_PTAG_IFLUSH, |
2943 | "xfs_iflush: Bad directory inode %Lu, ptr 0x%p", | 2958 | "%s: Bad directory inode %Lu, ptr 0x%p", |
2944 | ip->i_ino, ip); | 2959 | __func__, ip->i_ino, ip); |
2945 | goto corrupt_out; | 2960 | goto corrupt_out; |
2946 | } | 2961 | } |
2947 | } | 2962 | } |
2948 | if (XFS_TEST_ERROR(ip->i_d.di_nextents + ip->i_d.di_anextents > | 2963 | if (XFS_TEST_ERROR(ip->i_d.di_nextents + ip->i_d.di_anextents > |
2949 | ip->i_d.di_nblocks, mp, XFS_ERRTAG_IFLUSH_5, | 2964 | ip->i_d.di_nblocks, mp, XFS_ERRTAG_IFLUSH_5, |
2950 | XFS_RANDOM_IFLUSH_5)) { | 2965 | XFS_RANDOM_IFLUSH_5)) { |
2951 | xfs_cmn_err(XFS_PTAG_IFLUSH, CE_ALERT, mp, | 2966 | xfs_alert_tag(mp, XFS_PTAG_IFLUSH, |
2952 | "xfs_iflush: detected corrupt incore inode %Lu, total extents = %d, nblocks = %Ld, ptr 0x%p", | 2967 | "%s: detected corrupt incore inode %Lu, " |
2953 | ip->i_ino, | 2968 | "total extents = %d, nblocks = %Ld, ptr 0x%p", |
2969 | __func__, ip->i_ino, | ||
2954 | ip->i_d.di_nextents + ip->i_d.di_anextents, | 2970 | ip->i_d.di_nextents + ip->i_d.di_anextents, |
2955 | ip->i_d.di_nblocks, | 2971 | ip->i_d.di_nblocks, ip); |
2956 | ip); | ||
2957 | goto corrupt_out; | 2972 | goto corrupt_out; |
2958 | } | 2973 | } |
2959 | if (XFS_TEST_ERROR(ip->i_d.di_forkoff > mp->m_sb.sb_inodesize, | 2974 | if (XFS_TEST_ERROR(ip->i_d.di_forkoff > mp->m_sb.sb_inodesize, |
2960 | mp, XFS_ERRTAG_IFLUSH_6, XFS_RANDOM_IFLUSH_6)) { | 2975 | mp, XFS_ERRTAG_IFLUSH_6, XFS_RANDOM_IFLUSH_6)) { |
2961 | xfs_cmn_err(XFS_PTAG_IFLUSH, CE_ALERT, mp, | 2976 | xfs_alert_tag(mp, XFS_PTAG_IFLUSH, |
2962 | "xfs_iflush: bad inode %Lu, forkoff 0x%x, ptr 0x%p", | 2977 | "%s: bad inode %Lu, forkoff 0x%x, ptr 0x%p", |
2963 | ip->i_ino, ip->i_d.di_forkoff, ip); | 2978 | __func__, ip->i_ino, ip->i_d.di_forkoff, ip); |
2964 | goto corrupt_out; | 2979 | goto corrupt_out; |
2965 | } | 2980 | } |
2966 | /* | 2981 | /* |
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index fb2ca2e4cdc9..ff4e2a30227d 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h | |||
@@ -111,7 +111,7 @@ struct xfs_imap { | |||
111 | * Generally, we do not want to hold the i_rlock while holding the | 111 | * Generally, we do not want to hold the i_rlock while holding the |
112 | * i_ilock. Hierarchy is i_iolock followed by i_rlock. | 112 | * i_ilock. Hierarchy is i_iolock followed by i_rlock. |
113 | * | 113 | * |
114 | * xfs_iptr_t contains all the inode fields upto and including the | 114 | * xfs_iptr_t contains all the inode fields up to and including the |
115 | * i_mnext and i_mprev fields, it is used as a marker in the inode | 115 | * i_mnext and i_mprev fields, it is used as a marker in the inode |
116 | * chain off the mount structure by xfs_sync calls. | 116 | * chain off the mount structure by xfs_sync calls. |
117 | */ | 117 | */ |
@@ -336,7 +336,7 @@ xfs_iflags_test_and_clear(xfs_inode_t *ip, unsigned short flags) | |||
336 | 336 | ||
337 | /* | 337 | /* |
338 | * Project quota id helpers (previously projid was 16bit only | 338 | * Project quota id helpers (previously projid was 16bit only |
339 | * and using two 16bit values to hold new 32bit projid was choosen | 339 | * and using two 16bit values to hold new 32bit projid was chosen |
340 | * to retain compatibility with "old" filesystems). | 340 | * to retain compatibility with "old" filesystems). |
341 | */ | 341 | */ |
342 | static inline prid_t | 342 | static inline prid_t |
@@ -376,12 +376,13 @@ static inline void xfs_ifunlock(xfs_inode_t *ip) | |||
376 | /* | 376 | /* |
377 | * In-core inode flags. | 377 | * In-core inode flags. |
378 | */ | 378 | */ |
379 | #define XFS_IRECLAIM 0x0001 /* we have started reclaiming this inode */ | 379 | #define XFS_IRECLAIM 0x0001 /* started reclaiming this inode */ |
380 | #define XFS_ISTALE 0x0002 /* inode has been staled */ | 380 | #define XFS_ISTALE 0x0002 /* inode has been staled */ |
381 | #define XFS_IRECLAIMABLE 0x0004 /* inode can be reclaimed */ | 381 | #define XFS_IRECLAIMABLE 0x0004 /* inode can be reclaimed */ |
382 | #define XFS_INEW 0x0008 /* inode has just been allocated */ | 382 | #define XFS_INEW 0x0008 /* inode has just been allocated */ |
383 | #define XFS_IFILESTREAM 0x0010 /* inode is in a filestream directory */ | 383 | #define XFS_IFILESTREAM 0x0010 /* inode is in a filestream directory */ |
384 | #define XFS_ITRUNCATED 0x0020 /* truncated down so flush-on-close */ | 384 | #define XFS_ITRUNCATED 0x0020 /* truncated down so flush-on-close */ |
385 | #define XFS_IDIRTY_RELEASE 0x0040 /* dirty release already seen */ | ||
385 | 386 | ||
386 | /* | 387 | /* |
387 | * Flags for inode locking. | 388 | * Flags for inode locking. |
@@ -408,28 +409,35 @@ static inline void xfs_ifunlock(xfs_inode_t *ip) | |||
408 | /* | 409 | /* |
409 | * Flags for lockdep annotations. | 410 | * Flags for lockdep annotations. |
410 | * | 411 | * |
411 | * XFS_I[O]LOCK_PARENT - for operations that require locking two inodes | 412 | * XFS_LOCK_PARENT - for directory operations that require locking a |
412 | * (ie directory operations that require locking a directory inode and | 413 | * parent directory inode and a child entry inode. The parent gets locked |
413 | * an entry inode). The first inode gets locked with this flag so it | 414 | * with this flag so it gets a lockdep subclass of 1 and the child entry |
414 | * gets a lockdep subclass of 1 and the second lock will have a lockdep | 415 | * lock will have a lockdep subclass of 0. |
415 | * subclass of 0. | 416 | * |
417 | * XFS_LOCK_RTBITMAP/XFS_LOCK_RTSUM - the realtime device bitmap and summary | ||
418 | * inodes do not participate in the normal lock order, and thus have their | ||
419 | * own subclasses. | ||
416 | * | 420 | * |
417 | * XFS_LOCK_INUMORDER - for locking several inodes at the some time | 421 | * XFS_LOCK_INUMORDER - for locking several inodes at the some time |
418 | * with xfs_lock_inodes(). This flag is used as the starting subclass | 422 | * with xfs_lock_inodes(). This flag is used as the starting subclass |
419 | * and each subsequent lock acquired will increment the subclass by one. | 423 | * and each subsequent lock acquired will increment the subclass by one. |
420 | * So the first lock acquired will have a lockdep subclass of 2, the | 424 | * So the first lock acquired will have a lockdep subclass of 4, the |
421 | * second lock will have a lockdep subclass of 3, and so on. It is | 425 | * second lock will have a lockdep subclass of 5, and so on. It is |
422 | * the responsibility of the class builder to shift this to the correct | 426 | * the responsibility of the class builder to shift this to the correct |
423 | * portion of the lock_mode lockdep mask. | 427 | * portion of the lock_mode lockdep mask. |
424 | */ | 428 | */ |
425 | #define XFS_LOCK_PARENT 1 | 429 | #define XFS_LOCK_PARENT 1 |
426 | #define XFS_LOCK_INUMORDER 2 | 430 | #define XFS_LOCK_RTBITMAP 2 |
431 | #define XFS_LOCK_RTSUM 3 | ||
432 | #define XFS_LOCK_INUMORDER 4 | ||
427 | 433 | ||
428 | #define XFS_IOLOCK_SHIFT 16 | 434 | #define XFS_IOLOCK_SHIFT 16 |
429 | #define XFS_IOLOCK_PARENT (XFS_LOCK_PARENT << XFS_IOLOCK_SHIFT) | 435 | #define XFS_IOLOCK_PARENT (XFS_LOCK_PARENT << XFS_IOLOCK_SHIFT) |
430 | 436 | ||
431 | #define XFS_ILOCK_SHIFT 24 | 437 | #define XFS_ILOCK_SHIFT 24 |
432 | #define XFS_ILOCK_PARENT (XFS_LOCK_PARENT << XFS_ILOCK_SHIFT) | 438 | #define XFS_ILOCK_PARENT (XFS_LOCK_PARENT << XFS_ILOCK_SHIFT) |
439 | #define XFS_ILOCK_RTBITMAP (XFS_LOCK_RTBITMAP << XFS_ILOCK_SHIFT) | ||
440 | #define XFS_ILOCK_RTSUM (XFS_LOCK_RTSUM << XFS_ILOCK_SHIFT) | ||
433 | 441 | ||
434 | #define XFS_IOLOCK_DEP_MASK 0x00ff0000 | 442 | #define XFS_IOLOCK_DEP_MASK 0x00ff0000 |
435 | #define XFS_ILOCK_DEP_MASK 0xff000000 | 443 | #define XFS_ILOCK_DEP_MASK 0xff000000 |
@@ -438,6 +446,8 @@ static inline void xfs_ifunlock(xfs_inode_t *ip) | |||
438 | #define XFS_IOLOCK_DEP(flags) (((flags) & XFS_IOLOCK_DEP_MASK) >> XFS_IOLOCK_SHIFT) | 446 | #define XFS_IOLOCK_DEP(flags) (((flags) & XFS_IOLOCK_DEP_MASK) >> XFS_IOLOCK_SHIFT) |
439 | #define XFS_ILOCK_DEP(flags) (((flags) & XFS_ILOCK_DEP_MASK) >> XFS_ILOCK_SHIFT) | 447 | #define XFS_ILOCK_DEP(flags) (((flags) & XFS_ILOCK_DEP_MASK) >> XFS_ILOCK_SHIFT) |
440 | 448 | ||
449 | extern struct lock_class_key xfs_iolock_reclaimable; | ||
450 | |||
441 | /* | 451 | /* |
442 | * Flags for xfs_itruncate_start(). | 452 | * Flags for xfs_itruncate_start(). |
443 | */ | 453 | */ |
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index 7c8d30c453c3..576fdfe81d60 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c | |||
@@ -198,6 +198,41 @@ xfs_inode_item_size( | |||
198 | } | 198 | } |
199 | 199 | ||
200 | /* | 200 | /* |
201 | * xfs_inode_item_format_extents - convert in-core extents to on-disk form | ||
202 | * | ||
203 | * For either the data or attr fork in extent format, we need to endian convert | ||
204 | * the in-core extent as we place them into the on-disk inode. In this case, we | ||
205 | * need to do this conversion before we write the extents into the log. Because | ||
206 | * we don't have the disk inode to write into here, we allocate a buffer and | ||
207 | * format the extents into it via xfs_iextents_copy(). We free the buffer in | ||
208 | * the unlock routine after the copy for the log has been made. | ||
209 | * | ||
210 | * In the case of the data fork, the in-core and on-disk fork sizes can be | ||
211 | * different due to delayed allocation extents. We only log on-disk extents | ||
212 | * here, so always use the physical fork size to determine the size of the | ||
213 | * buffer we need to allocate. | ||
214 | */ | ||
215 | STATIC void | ||
216 | xfs_inode_item_format_extents( | ||
217 | struct xfs_inode *ip, | ||
218 | struct xfs_log_iovec *vecp, | ||
219 | int whichfork, | ||
220 | int type) | ||
221 | { | ||
222 | xfs_bmbt_rec_t *ext_buffer; | ||
223 | |||
224 | ext_buffer = kmem_alloc(XFS_IFORK_SIZE(ip, whichfork), KM_SLEEP); | ||
225 | if (whichfork == XFS_DATA_FORK) | ||
226 | ip->i_itemp->ili_extents_buf = ext_buffer; | ||
227 | else | ||
228 | ip->i_itemp->ili_aextents_buf = ext_buffer; | ||
229 | |||
230 | vecp->i_addr = ext_buffer; | ||
231 | vecp->i_len = xfs_iextents_copy(ip, ext_buffer, whichfork); | ||
232 | vecp->i_type = type; | ||
233 | } | ||
234 | |||
235 | /* | ||
201 | * This is called to fill in the vector of log iovecs for the | 236 | * This is called to fill in the vector of log iovecs for the |
202 | * given inode log item. It fills the first item with an inode | 237 | * given inode log item. It fills the first item with an inode |
203 | * log format structure, the second with the on-disk inode structure, | 238 | * log format structure, the second with the on-disk inode structure, |
@@ -213,7 +248,6 @@ xfs_inode_item_format( | |||
213 | struct xfs_inode *ip = iip->ili_inode; | 248 | struct xfs_inode *ip = iip->ili_inode; |
214 | uint nvecs; | 249 | uint nvecs; |
215 | size_t data_bytes; | 250 | size_t data_bytes; |
216 | xfs_bmbt_rec_t *ext_buffer; | ||
217 | xfs_mount_t *mp; | 251 | xfs_mount_t *mp; |
218 | 252 | ||
219 | vecp->i_addr = &iip->ili_format; | 253 | vecp->i_addr = &iip->ili_format; |
@@ -320,22 +354,8 @@ xfs_inode_item_format( | |||
320 | } else | 354 | } else |
321 | #endif | 355 | #endif |
322 | { | 356 | { |
323 | /* | 357 | xfs_inode_item_format_extents(ip, vecp, |
324 | * There are delayed allocation extents | 358 | XFS_DATA_FORK, XLOG_REG_TYPE_IEXT); |
325 | * in the inode, or we need to convert | ||
326 | * the extents to on disk format. | ||
327 | * Use xfs_iextents_copy() | ||
328 | * to copy only the real extents into | ||
329 | * a separate buffer. We'll free the | ||
330 | * buffer in the unlock routine. | ||
331 | */ | ||
332 | ext_buffer = kmem_alloc(ip->i_df.if_bytes, | ||
333 | KM_SLEEP); | ||
334 | iip->ili_extents_buf = ext_buffer; | ||
335 | vecp->i_addr = ext_buffer; | ||
336 | vecp->i_len = xfs_iextents_copy(ip, ext_buffer, | ||
337 | XFS_DATA_FORK); | ||
338 | vecp->i_type = XLOG_REG_TYPE_IEXT; | ||
339 | } | 359 | } |
340 | ASSERT(vecp->i_len <= ip->i_df.if_bytes); | 360 | ASSERT(vecp->i_len <= ip->i_df.if_bytes); |
341 | iip->ili_format.ilf_dsize = vecp->i_len; | 361 | iip->ili_format.ilf_dsize = vecp->i_len; |
@@ -445,19 +465,12 @@ xfs_inode_item_format( | |||
445 | */ | 465 | */ |
446 | vecp->i_addr = ip->i_afp->if_u1.if_extents; | 466 | vecp->i_addr = ip->i_afp->if_u1.if_extents; |
447 | vecp->i_len = ip->i_afp->if_bytes; | 467 | vecp->i_len = ip->i_afp->if_bytes; |
468 | vecp->i_type = XLOG_REG_TYPE_IATTR_EXT; | ||
448 | #else | 469 | #else |
449 | ASSERT(iip->ili_aextents_buf == NULL); | 470 | ASSERT(iip->ili_aextents_buf == NULL); |
450 | /* | 471 | xfs_inode_item_format_extents(ip, vecp, |
451 | * Need to endian flip before logging | 472 | XFS_ATTR_FORK, XLOG_REG_TYPE_IATTR_EXT); |
452 | */ | ||
453 | ext_buffer = kmem_alloc(ip->i_afp->if_bytes, | ||
454 | KM_SLEEP); | ||
455 | iip->ili_aextents_buf = ext_buffer; | ||
456 | vecp->i_addr = ext_buffer; | ||
457 | vecp->i_len = xfs_iextents_copy(ip, ext_buffer, | ||
458 | XFS_ATTR_FORK); | ||
459 | #endif | 473 | #endif |
460 | vecp->i_type = XLOG_REG_TYPE_IATTR_EXT; | ||
461 | iip->ili_format.ilf_asize = vecp->i_len; | 474 | iip->ili_format.ilf_asize = vecp->i_len; |
462 | vecp++; | 475 | vecp++; |
463 | nvecs++; | 476 | nvecs++; |
@@ -760,11 +773,11 @@ xfs_inode_item_push( | |||
760 | * Push the inode to it's backing buffer. This will not remove the | 773 | * Push the inode to it's backing buffer. This will not remove the |
761 | * inode from the AIL - a further push will be required to trigger a | 774 | * inode from the AIL - a further push will be required to trigger a |
762 | * buffer push. However, this allows all the dirty inodes to be pushed | 775 | * buffer push. However, this allows all the dirty inodes to be pushed |
763 | * to the buffer before it is pushed to disk. THe buffer IO completion | 776 | * to the buffer before it is pushed to disk. The buffer IO completion |
764 | * will pull th einode from the AIL, mark it clean and unlock the flush | 777 | * will pull the inode from the AIL, mark it clean and unlock the flush |
765 | * lock. | 778 | * lock. |
766 | */ | 779 | */ |
767 | (void) xfs_iflush(ip, 0); | 780 | (void) xfs_iflush(ip, SYNC_TRYLOCK); |
768 | xfs_iunlock(ip, XFS_ILOCK_SHARED); | 781 | xfs_iunlock(ip, XFS_ILOCK_SHARED); |
769 | } | 782 | } |
770 | 783 | ||
@@ -842,15 +855,64 @@ xfs_inode_item_destroy( | |||
842 | * flushed to disk. It is responsible for removing the inode item | 855 | * flushed to disk. It is responsible for removing the inode item |
843 | * from the AIL if it has not been re-logged, and unlocking the inode's | 856 | * from the AIL if it has not been re-logged, and unlocking the inode's |
844 | * flush lock. | 857 | * flush lock. |
858 | * | ||
859 | * To reduce AIL lock traffic as much as possible, we scan the buffer log item | ||
860 | * list for other inodes that will run this function. We remove them from the | ||
861 | * buffer list so we can process all the inode IO completions in one AIL lock | ||
862 | * traversal. | ||
845 | */ | 863 | */ |
846 | void | 864 | void |
847 | xfs_iflush_done( | 865 | xfs_iflush_done( |
848 | struct xfs_buf *bp, | 866 | struct xfs_buf *bp, |
849 | struct xfs_log_item *lip) | 867 | struct xfs_log_item *lip) |
850 | { | 868 | { |
851 | struct xfs_inode_log_item *iip = INODE_ITEM(lip); | 869 | struct xfs_inode_log_item *iip; |
852 | xfs_inode_t *ip = iip->ili_inode; | 870 | struct xfs_log_item *blip; |
871 | struct xfs_log_item *next; | ||
872 | struct xfs_log_item *prev; | ||
853 | struct xfs_ail *ailp = lip->li_ailp; | 873 | struct xfs_ail *ailp = lip->li_ailp; |
874 | int need_ail = 0; | ||
875 | |||
876 | /* | ||
877 | * Scan the buffer IO completions for other inodes being completed and | ||
878 | * attach them to the current inode log item. | ||
879 | */ | ||
880 | blip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *); | ||
881 | prev = NULL; | ||
882 | while (blip != NULL) { | ||
883 | if (lip->li_cb != xfs_iflush_done) { | ||
884 | prev = blip; | ||
885 | blip = blip->li_bio_list; | ||
886 | continue; | ||
887 | } | ||
888 | |||
889 | /* remove from list */ | ||
890 | next = blip->li_bio_list; | ||
891 | if (!prev) { | ||
892 | XFS_BUF_SET_FSPRIVATE(bp, next); | ||
893 | } else { | ||
894 | prev->li_bio_list = next; | ||
895 | } | ||
896 | |||
897 | /* add to current list */ | ||
898 | blip->li_bio_list = lip->li_bio_list; | ||
899 | lip->li_bio_list = blip; | ||
900 | |||
901 | /* | ||
902 | * while we have the item, do the unlocked check for needing | ||
903 | * the AIL lock. | ||
904 | */ | ||
905 | iip = INODE_ITEM(blip); | ||
906 | if (iip->ili_logged && blip->li_lsn == iip->ili_flush_lsn) | ||
907 | need_ail++; | ||
908 | |||
909 | blip = next; | ||
910 | } | ||
911 | |||
912 | /* make sure we capture the state of the initial inode. */ | ||
913 | iip = INODE_ITEM(lip); | ||
914 | if (iip->ili_logged && lip->li_lsn == iip->ili_flush_lsn) | ||
915 | need_ail++; | ||
854 | 916 | ||
855 | /* | 917 | /* |
856 | * We only want to pull the item from the AIL if it is | 918 | * We only want to pull the item from the AIL if it is |
@@ -861,28 +923,37 @@ xfs_iflush_done( | |||
861 | * the lock since it's cheaper, and then we recheck while | 923 | * the lock since it's cheaper, and then we recheck while |
862 | * holding the lock before removing the inode from the AIL. | 924 | * holding the lock before removing the inode from the AIL. |
863 | */ | 925 | */ |
864 | if (iip->ili_logged && lip->li_lsn == iip->ili_flush_lsn) { | 926 | if (need_ail) { |
927 | struct xfs_log_item *log_items[need_ail]; | ||
928 | int i = 0; | ||
865 | spin_lock(&ailp->xa_lock); | 929 | spin_lock(&ailp->xa_lock); |
866 | if (lip->li_lsn == iip->ili_flush_lsn) { | 930 | for (blip = lip; blip; blip = blip->li_bio_list) { |
867 | /* xfs_trans_ail_delete() drops the AIL lock. */ | 931 | iip = INODE_ITEM(blip); |
868 | xfs_trans_ail_delete(ailp, lip); | 932 | if (iip->ili_logged && |
869 | } else { | 933 | blip->li_lsn == iip->ili_flush_lsn) { |
870 | spin_unlock(&ailp->xa_lock); | 934 | log_items[i++] = blip; |
935 | } | ||
936 | ASSERT(i <= need_ail); | ||
871 | } | 937 | } |
938 | /* xfs_trans_ail_delete_bulk() drops the AIL lock. */ | ||
939 | xfs_trans_ail_delete_bulk(ailp, log_items, i); | ||
872 | } | 940 | } |
873 | 941 | ||
874 | iip->ili_logged = 0; | ||
875 | 942 | ||
876 | /* | 943 | /* |
877 | * Clear the ili_last_fields bits now that we know that the | 944 | * clean up and unlock the flush lock now we are done. We can clear the |
878 | * data corresponding to them is safely on disk. | 945 | * ili_last_fields bits now that we know that the data corresponding to |
946 | * them is safely on disk. | ||
879 | */ | 947 | */ |
880 | iip->ili_last_fields = 0; | 948 | for (blip = lip; blip; blip = next) { |
949 | next = blip->li_bio_list; | ||
950 | blip->li_bio_list = NULL; | ||
881 | 951 | ||
882 | /* | 952 | iip = INODE_ITEM(blip); |
883 | * Release the inode's flush lock since we're done with it. | 953 | iip->ili_logged = 0; |
884 | */ | 954 | iip->ili_last_fields = 0; |
885 | xfs_ifunlock(ip); | 955 | xfs_ifunlock(iip->ili_inode); |
956 | } | ||
886 | } | 957 | } |
887 | 958 | ||
888 | /* | 959 | /* |
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 20576146369f..091d82b94c4d 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c | |||
@@ -47,127 +47,8 @@ | |||
47 | 47 | ||
48 | #define XFS_WRITEIO_ALIGN(mp,off) (((off) >> mp->m_writeio_log) \ | 48 | #define XFS_WRITEIO_ALIGN(mp,off) (((off) >> mp->m_writeio_log) \ |
49 | << mp->m_writeio_log) | 49 | << mp->m_writeio_log) |
50 | #define XFS_STRAT_WRITE_IMAPS 2 | ||
51 | #define XFS_WRITE_IMAPS XFS_BMAP_MAX_NMAP | 50 | #define XFS_WRITE_IMAPS XFS_BMAP_MAX_NMAP |
52 | 51 | ||
53 | STATIC int xfs_iomap_write_direct(struct xfs_inode *, xfs_off_t, size_t, | ||
54 | int, struct xfs_bmbt_irec *, int *); | ||
55 | STATIC int xfs_iomap_write_delay(struct xfs_inode *, xfs_off_t, size_t, int, | ||
56 | struct xfs_bmbt_irec *, int *); | ||
57 | STATIC int xfs_iomap_write_allocate(struct xfs_inode *, xfs_off_t, size_t, | ||
58 | struct xfs_bmbt_irec *, int *); | ||
59 | |||
60 | int | ||
61 | xfs_iomap( | ||
62 | struct xfs_inode *ip, | ||
63 | xfs_off_t offset, | ||
64 | ssize_t count, | ||
65 | int flags, | ||
66 | struct xfs_bmbt_irec *imap, | ||
67 | int *nimaps, | ||
68 | int *new) | ||
69 | { | ||
70 | struct xfs_mount *mp = ip->i_mount; | ||
71 | xfs_fileoff_t offset_fsb, end_fsb; | ||
72 | int error = 0; | ||
73 | int lockmode = 0; | ||
74 | int bmapi_flags = 0; | ||
75 | |||
76 | ASSERT((ip->i_d.di_mode & S_IFMT) == S_IFREG); | ||
77 | |||
78 | *new = 0; | ||
79 | |||
80 | if (XFS_FORCED_SHUTDOWN(mp)) | ||
81 | return XFS_ERROR(EIO); | ||
82 | |||
83 | trace_xfs_iomap_enter(ip, offset, count, flags, NULL); | ||
84 | |||
85 | switch (flags & (BMAPI_READ | BMAPI_WRITE | BMAPI_ALLOCATE)) { | ||
86 | case BMAPI_READ: | ||
87 | lockmode = xfs_ilock_map_shared(ip); | ||
88 | bmapi_flags = XFS_BMAPI_ENTIRE; | ||
89 | break; | ||
90 | case BMAPI_WRITE: | ||
91 | lockmode = XFS_ILOCK_EXCL; | ||
92 | if (flags & BMAPI_IGNSTATE) | ||
93 | bmapi_flags |= XFS_BMAPI_IGSTATE|XFS_BMAPI_ENTIRE; | ||
94 | xfs_ilock(ip, lockmode); | ||
95 | break; | ||
96 | case BMAPI_ALLOCATE: | ||
97 | lockmode = XFS_ILOCK_SHARED; | ||
98 | bmapi_flags = XFS_BMAPI_ENTIRE; | ||
99 | |||
100 | /* Attempt non-blocking lock */ | ||
101 | if (flags & BMAPI_TRYLOCK) { | ||
102 | if (!xfs_ilock_nowait(ip, lockmode)) | ||
103 | return XFS_ERROR(EAGAIN); | ||
104 | } else { | ||
105 | xfs_ilock(ip, lockmode); | ||
106 | } | ||
107 | break; | ||
108 | default: | ||
109 | BUG(); | ||
110 | } | ||
111 | |||
112 | ASSERT(offset <= mp->m_maxioffset); | ||
113 | if ((xfs_fsize_t)offset + count > mp->m_maxioffset) | ||
114 | count = mp->m_maxioffset - offset; | ||
115 | end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count); | ||
116 | offset_fsb = XFS_B_TO_FSBT(mp, offset); | ||
117 | |||
118 | error = xfs_bmapi(NULL, ip, offset_fsb, | ||
119 | (xfs_filblks_t)(end_fsb - offset_fsb), | ||
120 | bmapi_flags, NULL, 0, imap, | ||
121 | nimaps, NULL); | ||
122 | |||
123 | if (error) | ||
124 | goto out; | ||
125 | |||
126 | switch (flags & (BMAPI_WRITE|BMAPI_ALLOCATE)) { | ||
127 | case BMAPI_WRITE: | ||
128 | /* If we found an extent, return it */ | ||
129 | if (*nimaps && | ||
130 | (imap->br_startblock != HOLESTARTBLOCK) && | ||
131 | (imap->br_startblock != DELAYSTARTBLOCK)) { | ||
132 | trace_xfs_iomap_found(ip, offset, count, flags, imap); | ||
133 | break; | ||
134 | } | ||
135 | |||
136 | if (flags & BMAPI_DIRECT) { | ||
137 | error = xfs_iomap_write_direct(ip, offset, count, flags, | ||
138 | imap, nimaps); | ||
139 | } else { | ||
140 | error = xfs_iomap_write_delay(ip, offset, count, flags, | ||
141 | imap, nimaps); | ||
142 | } | ||
143 | if (!error) { | ||
144 | trace_xfs_iomap_alloc(ip, offset, count, flags, imap); | ||
145 | } | ||
146 | *new = 1; | ||
147 | break; | ||
148 | case BMAPI_ALLOCATE: | ||
149 | /* If we found an extent, return it */ | ||
150 | xfs_iunlock(ip, lockmode); | ||
151 | lockmode = 0; | ||
152 | |||
153 | if (*nimaps && !isnullstartblock(imap->br_startblock)) { | ||
154 | trace_xfs_iomap_found(ip, offset, count, flags, imap); | ||
155 | break; | ||
156 | } | ||
157 | |||
158 | error = xfs_iomap_write_allocate(ip, offset, count, | ||
159 | imap, nimaps); | ||
160 | break; | ||
161 | } | ||
162 | |||
163 | ASSERT(*nimaps <= 1); | ||
164 | |||
165 | out: | ||
166 | if (lockmode) | ||
167 | xfs_iunlock(ip, lockmode); | ||
168 | return XFS_ERROR(error); | ||
169 | } | ||
170 | |||
171 | STATIC int | 52 | STATIC int |
172 | xfs_iomap_eof_align_last_fsb( | 53 | xfs_iomap_eof_align_last_fsb( |
173 | xfs_mount_t *mp, | 54 | xfs_mount_t *mp, |
@@ -220,11 +101,11 @@ xfs_iomap_eof_align_last_fsb( | |||
220 | } | 101 | } |
221 | 102 | ||
222 | STATIC int | 103 | STATIC int |
223 | xfs_cmn_err_fsblock_zero( | 104 | xfs_alert_fsblock_zero( |
224 | xfs_inode_t *ip, | 105 | xfs_inode_t *ip, |
225 | xfs_bmbt_irec_t *imap) | 106 | xfs_bmbt_irec_t *imap) |
226 | { | 107 | { |
227 | xfs_cmn_err(XFS_PTAG_FSBLOCK_ZERO, CE_ALERT, ip->i_mount, | 108 | xfs_alert_tag(ip->i_mount, XFS_PTAG_FSBLOCK_ZERO, |
228 | "Access to block zero in inode %llu " | 109 | "Access to block zero in inode %llu " |
229 | "start_block: %llx start_off: %llx " | 110 | "start_block: %llx start_off: %llx " |
230 | "blkcnt: %llx extent-state: %x\n", | 111 | "blkcnt: %llx extent-state: %x\n", |
@@ -236,14 +117,13 @@ xfs_cmn_err_fsblock_zero( | |||
236 | return EFSCORRUPTED; | 117 | return EFSCORRUPTED; |
237 | } | 118 | } |
238 | 119 | ||
239 | STATIC int | 120 | int |
240 | xfs_iomap_write_direct( | 121 | xfs_iomap_write_direct( |
241 | xfs_inode_t *ip, | 122 | xfs_inode_t *ip, |
242 | xfs_off_t offset, | 123 | xfs_off_t offset, |
243 | size_t count, | 124 | size_t count, |
244 | int flags, | ||
245 | xfs_bmbt_irec_t *imap, | 125 | xfs_bmbt_irec_t *imap, |
246 | int *nmaps) | 126 | int nmaps) |
247 | { | 127 | { |
248 | xfs_mount_t *mp = ip->i_mount; | 128 | xfs_mount_t *mp = ip->i_mount; |
249 | xfs_fileoff_t offset_fsb; | 129 | xfs_fileoff_t offset_fsb; |
@@ -279,7 +159,7 @@ xfs_iomap_write_direct( | |||
279 | if (error) | 159 | if (error) |
280 | goto error_out; | 160 | goto error_out; |
281 | } else { | 161 | } else { |
282 | if (*nmaps && (imap->br_startblock == HOLESTARTBLOCK)) | 162 | if (nmaps && (imap->br_startblock == HOLESTARTBLOCK)) |
283 | last_fsb = MIN(last_fsb, (xfs_fileoff_t) | 163 | last_fsb = MIN(last_fsb, (xfs_fileoff_t) |
284 | imap->br_blockcount + | 164 | imap->br_blockcount + |
285 | imap->br_startoff); | 165 | imap->br_startoff); |
@@ -331,7 +211,7 @@ xfs_iomap_write_direct( | |||
331 | xfs_trans_ijoin(tp, ip); | 211 | xfs_trans_ijoin(tp, ip); |
332 | 212 | ||
333 | bmapi_flag = XFS_BMAPI_WRITE; | 213 | bmapi_flag = XFS_BMAPI_WRITE; |
334 | if ((flags & BMAPI_DIRECT) && (offset < ip->i_size || extsz)) | 214 | if (offset < ip->i_size || extsz) |
335 | bmapi_flag |= XFS_BMAPI_PREALLOC; | 215 | bmapi_flag |= XFS_BMAPI_PREALLOC; |
336 | 216 | ||
337 | /* | 217 | /* |
@@ -366,11 +246,10 @@ xfs_iomap_write_direct( | |||
366 | } | 246 | } |
367 | 247 | ||
368 | if (!(imap->br_startblock || XFS_IS_REALTIME_INODE(ip))) { | 248 | if (!(imap->br_startblock || XFS_IS_REALTIME_INODE(ip))) { |
369 | error = xfs_cmn_err_fsblock_zero(ip, imap); | 249 | error = xfs_alert_fsblock_zero(ip, imap); |
370 | goto error_out; | 250 | goto error_out; |
371 | } | 251 | } |
372 | 252 | ||
373 | *nmaps = 1; | ||
374 | return 0; | 253 | return 0; |
375 | 254 | ||
376 | error0: /* Cancel bmap, unlock inode, unreserve quota blocks, cancel trans */ | 255 | error0: /* Cancel bmap, unlock inode, unreserve quota blocks, cancel trans */ |
@@ -379,7 +258,6 @@ error0: /* Cancel bmap, unlock inode, unreserve quota blocks, cancel trans */ | |||
379 | 258 | ||
380 | error1: /* Just cancel transaction */ | 259 | error1: /* Just cancel transaction */ |
381 | xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); | 260 | xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); |
382 | *nmaps = 0; /* nothing set-up here */ | ||
383 | 261 | ||
384 | error_out: | 262 | error_out: |
385 | return XFS_ERROR(error); | 263 | return XFS_ERROR(error); |
@@ -389,6 +267,9 @@ error_out: | |||
389 | * If the caller is doing a write at the end of the file, then extend the | 267 | * If the caller is doing a write at the end of the file, then extend the |
390 | * allocation out to the file system's write iosize. We clean up any extra | 268 | * allocation out to the file system's write iosize. We clean up any extra |
391 | * space left over when the file is closed in xfs_inactive(). | 269 | * space left over when the file is closed in xfs_inactive(). |
270 | * | ||
271 | * If we find we already have delalloc preallocation beyond EOF, don't do more | ||
272 | * preallocation as it it not needed. | ||
392 | */ | 273 | */ |
393 | STATIC int | 274 | STATIC int |
394 | xfs_iomap_eof_want_preallocate( | 275 | xfs_iomap_eof_want_preallocate( |
@@ -396,7 +277,6 @@ xfs_iomap_eof_want_preallocate( | |||
396 | xfs_inode_t *ip, | 277 | xfs_inode_t *ip, |
397 | xfs_off_t offset, | 278 | xfs_off_t offset, |
398 | size_t count, | 279 | size_t count, |
399 | int ioflag, | ||
400 | xfs_bmbt_irec_t *imap, | 280 | xfs_bmbt_irec_t *imap, |
401 | int nimaps, | 281 | int nimaps, |
402 | int *prealloc) | 282 | int *prealloc) |
@@ -405,6 +285,7 @@ xfs_iomap_eof_want_preallocate( | |||
405 | xfs_filblks_t count_fsb; | 285 | xfs_filblks_t count_fsb; |
406 | xfs_fsblock_t firstblock; | 286 | xfs_fsblock_t firstblock; |
407 | int n, error, imaps; | 287 | int n, error, imaps; |
288 | int found_delalloc = 0; | ||
408 | 289 | ||
409 | *prealloc = 0; | 290 | *prealloc = 0; |
410 | if ((offset + count) <= ip->i_size) | 291 | if ((offset + count) <= ip->i_size) |
@@ -429,20 +310,71 @@ xfs_iomap_eof_want_preallocate( | |||
429 | return 0; | 310 | return 0; |
430 | start_fsb += imap[n].br_blockcount; | 311 | start_fsb += imap[n].br_blockcount; |
431 | count_fsb -= imap[n].br_blockcount; | 312 | count_fsb -= imap[n].br_blockcount; |
313 | |||
314 | if (imap[n].br_startblock == DELAYSTARTBLOCK) | ||
315 | found_delalloc = 1; | ||
432 | } | 316 | } |
433 | } | 317 | } |
434 | *prealloc = 1; | 318 | if (!found_delalloc) |
319 | *prealloc = 1; | ||
435 | return 0; | 320 | return 0; |
436 | } | 321 | } |
437 | 322 | ||
438 | STATIC int | 323 | /* |
324 | * If we don't have a user specified preallocation size, dynamically increase | ||
325 | * the preallocation size as the size of the file grows. Cap the maximum size | ||
326 | * at a single extent or less if the filesystem is near full. The closer the | ||
327 | * filesystem is to full, the smaller the maximum prealocation. | ||
328 | */ | ||
329 | STATIC xfs_fsblock_t | ||
330 | xfs_iomap_prealloc_size( | ||
331 | struct xfs_mount *mp, | ||
332 | struct xfs_inode *ip) | ||
333 | { | ||
334 | xfs_fsblock_t alloc_blocks = 0; | ||
335 | |||
336 | if (!(mp->m_flags & XFS_MOUNT_DFLT_IOSIZE)) { | ||
337 | int shift = 0; | ||
338 | int64_t freesp; | ||
339 | |||
340 | /* | ||
341 | * rounddown_pow_of_two() returns an undefined result | ||
342 | * if we pass in alloc_blocks = 0. Hence the "+ 1" to | ||
343 | * ensure we always pass in a non-zero value. | ||
344 | */ | ||
345 | alloc_blocks = XFS_B_TO_FSB(mp, ip->i_size) + 1; | ||
346 | alloc_blocks = XFS_FILEOFF_MIN(MAXEXTLEN, | ||
347 | rounddown_pow_of_two(alloc_blocks)); | ||
348 | |||
349 | xfs_icsb_sync_counters(mp, XFS_ICSB_LAZY_COUNT); | ||
350 | freesp = mp->m_sb.sb_fdblocks; | ||
351 | if (freesp < mp->m_low_space[XFS_LOWSP_5_PCNT]) { | ||
352 | shift = 2; | ||
353 | if (freesp < mp->m_low_space[XFS_LOWSP_4_PCNT]) | ||
354 | shift++; | ||
355 | if (freesp < mp->m_low_space[XFS_LOWSP_3_PCNT]) | ||
356 | shift++; | ||
357 | if (freesp < mp->m_low_space[XFS_LOWSP_2_PCNT]) | ||
358 | shift++; | ||
359 | if (freesp < mp->m_low_space[XFS_LOWSP_1_PCNT]) | ||
360 | shift++; | ||
361 | } | ||
362 | if (shift) | ||
363 | alloc_blocks >>= shift; | ||
364 | } | ||
365 | |||
366 | if (alloc_blocks < mp->m_writeio_blocks) | ||
367 | alloc_blocks = mp->m_writeio_blocks; | ||
368 | |||
369 | return alloc_blocks; | ||
370 | } | ||
371 | |||
372 | int | ||
439 | xfs_iomap_write_delay( | 373 | xfs_iomap_write_delay( |
440 | xfs_inode_t *ip, | 374 | xfs_inode_t *ip, |
441 | xfs_off_t offset, | 375 | xfs_off_t offset, |
442 | size_t count, | 376 | size_t count, |
443 | int ioflag, | 377 | xfs_bmbt_irec_t *ret_imap) |
444 | xfs_bmbt_irec_t *ret_imap, | ||
445 | int *nmaps) | ||
446 | { | 378 | { |
447 | xfs_mount_t *mp = ip->i_mount; | 379 | xfs_mount_t *mp = ip->i_mount; |
448 | xfs_fileoff_t offset_fsb; | 380 | xfs_fileoff_t offset_fsb; |
@@ -469,16 +401,19 @@ xfs_iomap_write_delay( | |||
469 | extsz = xfs_get_extsz_hint(ip); | 401 | extsz = xfs_get_extsz_hint(ip); |
470 | offset_fsb = XFS_B_TO_FSBT(mp, offset); | 402 | offset_fsb = XFS_B_TO_FSBT(mp, offset); |
471 | 403 | ||
404 | |||
472 | error = xfs_iomap_eof_want_preallocate(mp, ip, offset, count, | 405 | error = xfs_iomap_eof_want_preallocate(mp, ip, offset, count, |
473 | ioflag, imap, XFS_WRITE_IMAPS, &prealloc); | 406 | imap, XFS_WRITE_IMAPS, &prealloc); |
474 | if (error) | 407 | if (error) |
475 | return error; | 408 | return error; |
476 | 409 | ||
477 | retry: | 410 | retry: |
478 | if (prealloc) { | 411 | if (prealloc) { |
412 | xfs_fsblock_t alloc_blocks = xfs_iomap_prealloc_size(mp, ip); | ||
413 | |||
479 | aligned_offset = XFS_WRITEIO_ALIGN(mp, (offset + count - 1)); | 414 | aligned_offset = XFS_WRITEIO_ALIGN(mp, (offset + count - 1)); |
480 | ioalign = XFS_B_TO_FSBT(mp, aligned_offset); | 415 | ioalign = XFS_B_TO_FSBT(mp, aligned_offset); |
481 | last_fsb = ioalign + mp->m_writeio_blocks; | 416 | last_fsb = ioalign + alloc_blocks; |
482 | } else { | 417 | } else { |
483 | last_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)(offset + count))); | 418 | last_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)(offset + count))); |
484 | } | 419 | } |
@@ -496,22 +431,31 @@ retry: | |||
496 | XFS_BMAPI_DELAY | XFS_BMAPI_WRITE | | 431 | XFS_BMAPI_DELAY | XFS_BMAPI_WRITE | |
497 | XFS_BMAPI_ENTIRE, &firstblock, 1, imap, | 432 | XFS_BMAPI_ENTIRE, &firstblock, 1, imap, |
498 | &nimaps, NULL); | 433 | &nimaps, NULL); |
499 | if (error && (error != ENOSPC)) | 434 | switch (error) { |
435 | case 0: | ||
436 | case ENOSPC: | ||
437 | case EDQUOT: | ||
438 | break; | ||
439 | default: | ||
500 | return XFS_ERROR(error); | 440 | return XFS_ERROR(error); |
441 | } | ||
501 | 442 | ||
502 | /* | 443 | /* |
503 | * If bmapi returned us nothing, and if we didn't get back EDQUOT, | 444 | * If bmapi returned us nothing, we got either ENOSPC or EDQUOT. For |
504 | * then we must have run out of space - flush all other inodes with | 445 | * ENOSPC, * flush all other inodes with delalloc blocks to free up |
505 | * delalloc blocks and retry without EOF preallocation. | 446 | * some of the excess reserved metadata space. For both cases, retry |
447 | * without EOF preallocation. | ||
506 | */ | 448 | */ |
507 | if (nimaps == 0) { | 449 | if (nimaps == 0) { |
508 | trace_xfs_delalloc_enospc(ip, offset, count); | 450 | trace_xfs_delalloc_enospc(ip, offset, count); |
509 | if (flushed) | 451 | if (flushed) |
510 | return XFS_ERROR(ENOSPC); | 452 | return XFS_ERROR(error ? error : ENOSPC); |
511 | 453 | ||
512 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | 454 | if (error == ENOSPC) { |
513 | xfs_flush_inodes(ip); | 455 | xfs_iunlock(ip, XFS_ILOCK_EXCL); |
514 | xfs_ilock(ip, XFS_ILOCK_EXCL); | 456 | xfs_flush_inodes(ip); |
457 | xfs_ilock(ip, XFS_ILOCK_EXCL); | ||
458 | } | ||
515 | 459 | ||
516 | flushed = 1; | 460 | flushed = 1; |
517 | error = 0; | 461 | error = 0; |
@@ -520,11 +464,9 @@ retry: | |||
520 | } | 464 | } |
521 | 465 | ||
522 | if (!(imap[0].br_startblock || XFS_IS_REALTIME_INODE(ip))) | 466 | if (!(imap[0].br_startblock || XFS_IS_REALTIME_INODE(ip))) |
523 | return xfs_cmn_err_fsblock_zero(ip, &imap[0]); | 467 | return xfs_alert_fsblock_zero(ip, &imap[0]); |
524 | 468 | ||
525 | *ret_imap = imap[0]; | 469 | *ret_imap = imap[0]; |
526 | *nmaps = 1; | ||
527 | |||
528 | return 0; | 470 | return 0; |
529 | } | 471 | } |
530 | 472 | ||
@@ -538,13 +480,12 @@ retry: | |||
538 | * We no longer bother to look at the incoming map - all we have to | 480 | * We no longer bother to look at the incoming map - all we have to |
539 | * guarantee is that whatever we allocate fills the required range. | 481 | * guarantee is that whatever we allocate fills the required range. |
540 | */ | 482 | */ |
541 | STATIC int | 483 | int |
542 | xfs_iomap_write_allocate( | 484 | xfs_iomap_write_allocate( |
543 | xfs_inode_t *ip, | 485 | xfs_inode_t *ip, |
544 | xfs_off_t offset, | 486 | xfs_off_t offset, |
545 | size_t count, | 487 | size_t count, |
546 | xfs_bmbt_irec_t *imap, | 488 | xfs_bmbt_irec_t *imap) |
547 | int *retmap) | ||
548 | { | 489 | { |
549 | xfs_mount_t *mp = ip->i_mount; | 490 | xfs_mount_t *mp = ip->i_mount; |
550 | xfs_fileoff_t offset_fsb, last_block; | 491 | xfs_fileoff_t offset_fsb, last_block; |
@@ -557,8 +498,6 @@ xfs_iomap_write_allocate( | |||
557 | int error = 0; | 498 | int error = 0; |
558 | int nres; | 499 | int nres; |
559 | 500 | ||
560 | *retmap = 0; | ||
561 | |||
562 | /* | 501 | /* |
563 | * Make sure that the dquots are there. | 502 | * Make sure that the dquots are there. |
564 | */ | 503 | */ |
@@ -675,12 +614,11 @@ xfs_iomap_write_allocate( | |||
675 | * covers at least part of the callers request | 614 | * covers at least part of the callers request |
676 | */ | 615 | */ |
677 | if (!(imap->br_startblock || XFS_IS_REALTIME_INODE(ip))) | 616 | if (!(imap->br_startblock || XFS_IS_REALTIME_INODE(ip))) |
678 | return xfs_cmn_err_fsblock_zero(ip, imap); | 617 | return xfs_alert_fsblock_zero(ip, imap); |
679 | 618 | ||
680 | if ((offset_fsb >= imap->br_startoff) && | 619 | if ((offset_fsb >= imap->br_startoff) && |
681 | (offset_fsb < (imap->br_startoff + | 620 | (offset_fsb < (imap->br_startoff + |
682 | imap->br_blockcount))) { | 621 | imap->br_blockcount))) { |
683 | *retmap = 1; | ||
684 | XFS_STATS_INC(xs_xstrat_quick); | 622 | XFS_STATS_INC(xs_xstrat_quick); |
685 | return 0; | 623 | return 0; |
686 | } | 624 | } |
@@ -786,7 +724,7 @@ xfs_iomap_write_unwritten( | |||
786 | return XFS_ERROR(error); | 724 | return XFS_ERROR(error); |
787 | 725 | ||
788 | if (!(imap.br_startblock || XFS_IS_REALTIME_INODE(ip))) | 726 | if (!(imap.br_startblock || XFS_IS_REALTIME_INODE(ip))) |
789 | return xfs_cmn_err_fsblock_zero(ip, &imap); | 727 | return xfs_alert_fsblock_zero(ip, &imap); |
790 | 728 | ||
791 | if ((numblks_fsb = imap.br_blockcount) == 0) { | 729 | if ((numblks_fsb = imap.br_blockcount) == 0) { |
792 | /* | 730 | /* |
diff --git a/fs/xfs/xfs_iomap.h b/fs/xfs/xfs_iomap.h index 7748a430f50d..80615760959a 100644 --- a/fs/xfs/xfs_iomap.h +++ b/fs/xfs/xfs_iomap.h | |||
@@ -18,30 +18,15 @@ | |||
18 | #ifndef __XFS_IOMAP_H__ | 18 | #ifndef __XFS_IOMAP_H__ |
19 | #define __XFS_IOMAP_H__ | 19 | #define __XFS_IOMAP_H__ |
20 | 20 | ||
21 | /* base extent manipulation calls */ | ||
22 | #define BMAPI_READ (1 << 0) /* read extents */ | ||
23 | #define BMAPI_WRITE (1 << 1) /* create extents */ | ||
24 | #define BMAPI_ALLOCATE (1 << 2) /* delayed allocate to real extents */ | ||
25 | |||
26 | /* modifiers */ | ||
27 | #define BMAPI_IGNSTATE (1 << 4) /* ignore unwritten state on read */ | ||
28 | #define BMAPI_DIRECT (1 << 5) /* direct instead of buffered write */ | ||
29 | #define BMAPI_MMA (1 << 6) /* allocate for mmap write */ | ||
30 | #define BMAPI_TRYLOCK (1 << 7) /* non-blocking request */ | ||
31 | |||
32 | #define BMAPI_FLAGS \ | ||
33 | { BMAPI_READ, "READ" }, \ | ||
34 | { BMAPI_WRITE, "WRITE" }, \ | ||
35 | { BMAPI_ALLOCATE, "ALLOCATE" }, \ | ||
36 | { BMAPI_IGNSTATE, "IGNSTATE" }, \ | ||
37 | { BMAPI_DIRECT, "DIRECT" }, \ | ||
38 | { BMAPI_TRYLOCK, "TRYLOCK" } | ||
39 | |||
40 | struct xfs_inode; | 21 | struct xfs_inode; |
41 | struct xfs_bmbt_irec; | 22 | struct xfs_bmbt_irec; |
42 | 23 | ||
43 | extern int xfs_iomap(struct xfs_inode *, xfs_off_t, ssize_t, int, | 24 | extern int xfs_iomap_write_direct(struct xfs_inode *, xfs_off_t, size_t, |
44 | struct xfs_bmbt_irec *, int *, int *); | 25 | struct xfs_bmbt_irec *, int); |
26 | extern int xfs_iomap_write_delay(struct xfs_inode *, xfs_off_t, size_t, | ||
27 | struct xfs_bmbt_irec *); | ||
28 | extern int xfs_iomap_write_allocate(struct xfs_inode *, xfs_off_t, size_t, | ||
29 | struct xfs_bmbt_irec *); | ||
45 | extern int xfs_iomap_write_unwritten(struct xfs_inode *, xfs_off_t, size_t); | 30 | extern int xfs_iomap_write_unwritten(struct xfs_inode *, xfs_off_t, size_t); |
46 | 31 | ||
47 | #endif /* __XFS_IOMAP_H__*/ | 32 | #endif /* __XFS_IOMAP_H__*/ |
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c index dc1882adaf54..751e94fe1f77 100644 --- a/fs/xfs/xfs_itable.c +++ b/fs/xfs/xfs_itable.c | |||
@@ -204,7 +204,6 @@ xfs_bulkstat( | |||
204 | xfs_agi_t *agi; /* agi header data */ | 204 | xfs_agi_t *agi; /* agi header data */ |
205 | xfs_agino_t agino; /* inode # in allocation group */ | 205 | xfs_agino_t agino; /* inode # in allocation group */ |
206 | xfs_agnumber_t agno; /* allocation group number */ | 206 | xfs_agnumber_t agno; /* allocation group number */ |
207 | xfs_daddr_t bno; /* inode cluster start daddr */ | ||
208 | int chunkidx; /* current index into inode chunk */ | 207 | int chunkidx; /* current index into inode chunk */ |
209 | int clustidx; /* current index into inode cluster */ | 208 | int clustidx; /* current index into inode cluster */ |
210 | xfs_btree_cur_t *cur; /* btree cursor for ialloc btree */ | 209 | xfs_btree_cur_t *cur; /* btree cursor for ialloc btree */ |
@@ -463,7 +462,6 @@ xfs_bulkstat( | |||
463 | mp->m_sb.sb_inopblog); | 462 | mp->m_sb.sb_inopblog); |
464 | } | 463 | } |
465 | ino = XFS_AGINO_TO_INO(mp, agno, agino); | 464 | ino = XFS_AGINO_TO_INO(mp, agno, agino); |
466 | bno = XFS_AGB_TO_DADDR(mp, agno, agbno); | ||
467 | /* | 465 | /* |
468 | * Skip if this inode is free. | 466 | * Skip if this inode is free. |
469 | */ | 467 | */ |
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index cee4ab9f8a9e..b612ce4520ae 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c | |||
@@ -47,7 +47,7 @@ STATIC xlog_t * xlog_alloc_log(xfs_mount_t *mp, | |||
47 | xfs_buftarg_t *log_target, | 47 | xfs_buftarg_t *log_target, |
48 | xfs_daddr_t blk_offset, | 48 | xfs_daddr_t blk_offset, |
49 | int num_bblks); | 49 | int num_bblks); |
50 | STATIC int xlog_space_left(xlog_t *log, int cycle, int bytes); | 50 | STATIC int xlog_space_left(struct log *log, atomic64_t *head); |
51 | STATIC int xlog_sync(xlog_t *log, xlog_in_core_t *iclog); | 51 | STATIC int xlog_sync(xlog_t *log, xlog_in_core_t *iclog); |
52 | STATIC void xlog_dealloc_log(xlog_t *log); | 52 | STATIC void xlog_dealloc_log(xlog_t *log); |
53 | 53 | ||
@@ -70,7 +70,7 @@ STATIC void xlog_state_want_sync(xlog_t *log, xlog_in_core_t *iclog); | |||
70 | /* local functions to manipulate grant head */ | 70 | /* local functions to manipulate grant head */ |
71 | STATIC int xlog_grant_log_space(xlog_t *log, | 71 | STATIC int xlog_grant_log_space(xlog_t *log, |
72 | xlog_ticket_t *xtic); | 72 | xlog_ticket_t *xtic); |
73 | STATIC void xlog_grant_push_ail(xfs_mount_t *mp, | 73 | STATIC void xlog_grant_push_ail(struct log *log, |
74 | int need_bytes); | 74 | int need_bytes); |
75 | STATIC void xlog_regrant_reserve_log_space(xlog_t *log, | 75 | STATIC void xlog_regrant_reserve_log_space(xlog_t *log, |
76 | xlog_ticket_t *ticket); | 76 | xlog_ticket_t *ticket); |
@@ -81,98 +81,73 @@ STATIC void xlog_ungrant_log_space(xlog_t *log, | |||
81 | 81 | ||
82 | #if defined(DEBUG) | 82 | #if defined(DEBUG) |
83 | STATIC void xlog_verify_dest_ptr(xlog_t *log, char *ptr); | 83 | STATIC void xlog_verify_dest_ptr(xlog_t *log, char *ptr); |
84 | STATIC void xlog_verify_grant_head(xlog_t *log, int equals); | 84 | STATIC void xlog_verify_grant_tail(struct log *log); |
85 | STATIC void xlog_verify_iclog(xlog_t *log, xlog_in_core_t *iclog, | 85 | STATIC void xlog_verify_iclog(xlog_t *log, xlog_in_core_t *iclog, |
86 | int count, boolean_t syncing); | 86 | int count, boolean_t syncing); |
87 | STATIC void xlog_verify_tail_lsn(xlog_t *log, xlog_in_core_t *iclog, | 87 | STATIC void xlog_verify_tail_lsn(xlog_t *log, xlog_in_core_t *iclog, |
88 | xfs_lsn_t tail_lsn); | 88 | xfs_lsn_t tail_lsn); |
89 | #else | 89 | #else |
90 | #define xlog_verify_dest_ptr(a,b) | 90 | #define xlog_verify_dest_ptr(a,b) |
91 | #define xlog_verify_grant_head(a,b) | 91 | #define xlog_verify_grant_tail(a) |
92 | #define xlog_verify_iclog(a,b,c,d) | 92 | #define xlog_verify_iclog(a,b,c,d) |
93 | #define xlog_verify_tail_lsn(a,b,c) | 93 | #define xlog_verify_tail_lsn(a,b,c) |
94 | #endif | 94 | #endif |
95 | 95 | ||
96 | STATIC int xlog_iclogs_empty(xlog_t *log); | 96 | STATIC int xlog_iclogs_empty(xlog_t *log); |
97 | 97 | ||
98 | |||
99 | static void | 98 | static void |
100 | xlog_ins_ticketq(struct xlog_ticket **qp, struct xlog_ticket *tic) | 99 | xlog_grant_sub_space( |
100 | struct log *log, | ||
101 | atomic64_t *head, | ||
102 | int bytes) | ||
101 | { | 103 | { |
102 | if (*qp) { | 104 | int64_t head_val = atomic64_read(head); |
103 | tic->t_next = (*qp); | 105 | int64_t new, old; |
104 | tic->t_prev = (*qp)->t_prev; | ||
105 | (*qp)->t_prev->t_next = tic; | ||
106 | (*qp)->t_prev = tic; | ||
107 | } else { | ||
108 | tic->t_prev = tic->t_next = tic; | ||
109 | *qp = tic; | ||
110 | } | ||
111 | 106 | ||
112 | tic->t_flags |= XLOG_TIC_IN_Q; | 107 | do { |
113 | } | 108 | int cycle, space; |
114 | 109 | ||
115 | static void | 110 | xlog_crack_grant_head_val(head_val, &cycle, &space); |
116 | xlog_del_ticketq(struct xlog_ticket **qp, struct xlog_ticket *tic) | ||
117 | { | ||
118 | if (tic == tic->t_next) { | ||
119 | *qp = NULL; | ||
120 | } else { | ||
121 | *qp = tic->t_next; | ||
122 | tic->t_next->t_prev = tic->t_prev; | ||
123 | tic->t_prev->t_next = tic->t_next; | ||
124 | } | ||
125 | 111 | ||
126 | tic->t_next = tic->t_prev = NULL; | 112 | space -= bytes; |
127 | tic->t_flags &= ~XLOG_TIC_IN_Q; | 113 | if (space < 0) { |
114 | space += log->l_logsize; | ||
115 | cycle--; | ||
116 | } | ||
117 | |||
118 | old = head_val; | ||
119 | new = xlog_assign_grant_head_val(cycle, space); | ||
120 | head_val = atomic64_cmpxchg(head, old, new); | ||
121 | } while (head_val != old); | ||
128 | } | 122 | } |
129 | 123 | ||
130 | static void | 124 | static void |
131 | xlog_grant_sub_space(struct log *log, int bytes) | 125 | xlog_grant_add_space( |
126 | struct log *log, | ||
127 | atomic64_t *head, | ||
128 | int bytes) | ||
132 | { | 129 | { |
133 | log->l_grant_write_bytes -= bytes; | 130 | int64_t head_val = atomic64_read(head); |
134 | if (log->l_grant_write_bytes < 0) { | 131 | int64_t new, old; |
135 | log->l_grant_write_bytes += log->l_logsize; | ||
136 | log->l_grant_write_cycle--; | ||
137 | } | ||
138 | |||
139 | log->l_grant_reserve_bytes -= bytes; | ||
140 | if ((log)->l_grant_reserve_bytes < 0) { | ||
141 | log->l_grant_reserve_bytes += log->l_logsize; | ||
142 | log->l_grant_reserve_cycle--; | ||
143 | } | ||
144 | 132 | ||
145 | } | 133 | do { |
134 | int tmp; | ||
135 | int cycle, space; | ||
146 | 136 | ||
147 | static void | 137 | xlog_crack_grant_head_val(head_val, &cycle, &space); |
148 | xlog_grant_add_space_write(struct log *log, int bytes) | ||
149 | { | ||
150 | int tmp = log->l_logsize - log->l_grant_write_bytes; | ||
151 | if (tmp > bytes) | ||
152 | log->l_grant_write_bytes += bytes; | ||
153 | else { | ||
154 | log->l_grant_write_cycle++; | ||
155 | log->l_grant_write_bytes = bytes - tmp; | ||
156 | } | ||
157 | } | ||
158 | 138 | ||
159 | static void | 139 | tmp = log->l_logsize - space; |
160 | xlog_grant_add_space_reserve(struct log *log, int bytes) | 140 | if (tmp > bytes) |
161 | { | 141 | space += bytes; |
162 | int tmp = log->l_logsize - log->l_grant_reserve_bytes; | 142 | else { |
163 | if (tmp > bytes) | 143 | space = bytes - tmp; |
164 | log->l_grant_reserve_bytes += bytes; | 144 | cycle++; |
165 | else { | 145 | } |
166 | log->l_grant_reserve_cycle++; | ||
167 | log->l_grant_reserve_bytes = bytes - tmp; | ||
168 | } | ||
169 | } | ||
170 | 146 | ||
171 | static inline void | 147 | old = head_val; |
172 | xlog_grant_add_space(struct log *log, int bytes) | 148 | new = xlog_assign_grant_head_val(cycle, space); |
173 | { | 149 | head_val = atomic64_cmpxchg(head, old, new); |
174 | xlog_grant_add_space_write(log, bytes); | 150 | } while (head_val != old); |
175 | xlog_grant_add_space_reserve(log, bytes); | ||
176 | } | 151 | } |
177 | 152 | ||
178 | static void | 153 | static void |
@@ -355,7 +330,7 @@ xfs_log_reserve( | |||
355 | 330 | ||
356 | trace_xfs_log_reserve(log, internal_ticket); | 331 | trace_xfs_log_reserve(log, internal_ticket); |
357 | 332 | ||
358 | xlog_grant_push_ail(mp, internal_ticket->t_unit_res); | 333 | xlog_grant_push_ail(log, internal_ticket->t_unit_res); |
359 | retval = xlog_regrant_write_log_space(log, internal_ticket); | 334 | retval = xlog_regrant_write_log_space(log, internal_ticket); |
360 | } else { | 335 | } else { |
361 | /* may sleep if need to allocate more tickets */ | 336 | /* may sleep if need to allocate more tickets */ |
@@ -369,7 +344,7 @@ xfs_log_reserve( | |||
369 | 344 | ||
370 | trace_xfs_log_reserve(log, internal_ticket); | 345 | trace_xfs_log_reserve(log, internal_ticket); |
371 | 346 | ||
372 | xlog_grant_push_ail(mp, | 347 | xlog_grant_push_ail(log, |
373 | (internal_ticket->t_unit_res * | 348 | (internal_ticket->t_unit_res * |
374 | internal_ticket->t_cnt)); | 349 | internal_ticket->t_cnt)); |
375 | retval = xlog_grant_log_space(log, internal_ticket); | 350 | retval = xlog_grant_log_space(log, internal_ticket); |
@@ -399,11 +374,10 @@ xfs_log_mount( | |||
399 | int error; | 374 | int error; |
400 | 375 | ||
401 | if (!(mp->m_flags & XFS_MOUNT_NORECOVERY)) | 376 | if (!(mp->m_flags & XFS_MOUNT_NORECOVERY)) |
402 | cmn_err(CE_NOTE, "XFS mounting filesystem %s", mp->m_fsname); | 377 | xfs_notice(mp, "Mounting Filesystem"); |
403 | else { | 378 | else { |
404 | cmn_err(CE_NOTE, | 379 | xfs_notice(mp, |
405 | "!Mounting filesystem \"%s\" in no-recovery mode. Filesystem will be inconsistent.", | 380 | "Mounting filesystem in no-recovery mode. Filesystem will be inconsistent."); |
406 | mp->m_fsname); | ||
407 | ASSERT(mp->m_flags & XFS_MOUNT_RDONLY); | 381 | ASSERT(mp->m_flags & XFS_MOUNT_RDONLY); |
408 | } | 382 | } |
409 | 383 | ||
@@ -418,7 +392,7 @@ xfs_log_mount( | |||
418 | */ | 392 | */ |
419 | error = xfs_trans_ail_init(mp); | 393 | error = xfs_trans_ail_init(mp); |
420 | if (error) { | 394 | if (error) { |
421 | cmn_err(CE_WARN, "XFS: AIL initialisation failed: error %d", error); | 395 | xfs_warn(mp, "AIL initialisation failed: error %d", error); |
422 | goto out_free_log; | 396 | goto out_free_log; |
423 | } | 397 | } |
424 | mp->m_log->l_ailp = mp->m_ail; | 398 | mp->m_log->l_ailp = mp->m_ail; |
@@ -438,7 +412,8 @@ xfs_log_mount( | |||
438 | if (readonly) | 412 | if (readonly) |
439 | mp->m_flags |= XFS_MOUNT_RDONLY; | 413 | mp->m_flags |= XFS_MOUNT_RDONLY; |
440 | if (error) { | 414 | if (error) { |
441 | cmn_err(CE_WARN, "XFS: log mount/recovery failed: error %d", error); | 415 | xfs_warn(mp, "log mount/recovery failed: error %d", |
416 | error); | ||
442 | goto out_destroy_ail; | 417 | goto out_destroy_ail; |
443 | } | 418 | } |
444 | } | 419 | } |
@@ -567,10 +542,8 @@ xfs_log_unmount_write(xfs_mount_t *mp) | |||
567 | */ | 542 | */ |
568 | } | 543 | } |
569 | 544 | ||
570 | if (error) { | 545 | if (error) |
571 | xfs_fs_cmn_err(CE_ALERT, mp, | 546 | xfs_alert(mp, "%s: unmount record failed", __func__); |
572 | "xfs_log_unmount: unmount record failed"); | ||
573 | } | ||
574 | 547 | ||
575 | 548 | ||
576 | spin_lock(&log->l_icloglock); | 549 | spin_lock(&log->l_icloglock); |
@@ -584,8 +557,8 @@ xfs_log_unmount_write(xfs_mount_t *mp) | |||
584 | if (!(iclog->ic_state == XLOG_STATE_ACTIVE || | 557 | if (!(iclog->ic_state == XLOG_STATE_ACTIVE || |
585 | iclog->ic_state == XLOG_STATE_DIRTY)) { | 558 | iclog->ic_state == XLOG_STATE_DIRTY)) { |
586 | if (!XLOG_FORCED_SHUTDOWN(log)) { | 559 | if (!XLOG_FORCED_SHUTDOWN(log)) { |
587 | sv_wait(&iclog->ic_force_wait, PMEM, | 560 | xlog_wait(&iclog->ic_force_wait, |
588 | &log->l_icloglock, s); | 561 | &log->l_icloglock); |
589 | } else { | 562 | } else { |
590 | spin_unlock(&log->l_icloglock); | 563 | spin_unlock(&log->l_icloglock); |
591 | } | 564 | } |
@@ -625,8 +598,8 @@ xfs_log_unmount_write(xfs_mount_t *mp) | |||
625 | || iclog->ic_state == XLOG_STATE_DIRTY | 598 | || iclog->ic_state == XLOG_STATE_DIRTY |
626 | || iclog->ic_state == XLOG_STATE_IOERROR) ) { | 599 | || iclog->ic_state == XLOG_STATE_IOERROR) ) { |
627 | 600 | ||
628 | sv_wait(&iclog->ic_force_wait, PMEM, | 601 | xlog_wait(&iclog->ic_force_wait, |
629 | &log->l_icloglock, s); | 602 | &log->l_icloglock); |
630 | } else { | 603 | } else { |
631 | spin_unlock(&log->l_icloglock); | 604 | spin_unlock(&log->l_icloglock); |
632 | } | 605 | } |
@@ -703,55 +676,46 @@ xfs_log_move_tail(xfs_mount_t *mp, | |||
703 | { | 676 | { |
704 | xlog_ticket_t *tic; | 677 | xlog_ticket_t *tic; |
705 | xlog_t *log = mp->m_log; | 678 | xlog_t *log = mp->m_log; |
706 | int need_bytes, free_bytes, cycle, bytes; | 679 | int need_bytes, free_bytes; |
707 | 680 | ||
708 | if (XLOG_FORCED_SHUTDOWN(log)) | 681 | if (XLOG_FORCED_SHUTDOWN(log)) |
709 | return; | 682 | return; |
710 | 683 | ||
711 | if (tail_lsn == 0) { | 684 | if (tail_lsn == 0) |
712 | /* needed since sync_lsn is 64 bits */ | 685 | tail_lsn = atomic64_read(&log->l_last_sync_lsn); |
713 | spin_lock(&log->l_icloglock); | ||
714 | tail_lsn = log->l_last_sync_lsn; | ||
715 | spin_unlock(&log->l_icloglock); | ||
716 | } | ||
717 | |||
718 | spin_lock(&log->l_grant_lock); | ||
719 | 686 | ||
720 | /* Also an invalid lsn. 1 implies that we aren't passing in a valid | 687 | /* tail_lsn == 1 implies that we weren't passed a valid value. */ |
721 | * tail_lsn. | 688 | if (tail_lsn != 1) |
722 | */ | 689 | atomic64_set(&log->l_tail_lsn, tail_lsn); |
723 | if (tail_lsn != 1) { | ||
724 | log->l_tail_lsn = tail_lsn; | ||
725 | } | ||
726 | 690 | ||
727 | if ((tic = log->l_write_headq)) { | 691 | if (!list_empty_careful(&log->l_writeq)) { |
728 | #ifdef DEBUG | 692 | #ifdef DEBUG |
729 | if (log->l_flags & XLOG_ACTIVE_RECOVERY) | 693 | if (log->l_flags & XLOG_ACTIVE_RECOVERY) |
730 | panic("Recovery problem"); | 694 | panic("Recovery problem"); |
731 | #endif | 695 | #endif |
732 | cycle = log->l_grant_write_cycle; | 696 | spin_lock(&log->l_grant_write_lock); |
733 | bytes = log->l_grant_write_bytes; | 697 | free_bytes = xlog_space_left(log, &log->l_grant_write_head); |
734 | free_bytes = xlog_space_left(log, cycle, bytes); | 698 | list_for_each_entry(tic, &log->l_writeq, t_queue) { |
735 | do { | ||
736 | ASSERT(tic->t_flags & XLOG_TIC_PERM_RESERV); | 699 | ASSERT(tic->t_flags & XLOG_TIC_PERM_RESERV); |
737 | 700 | ||
738 | if (free_bytes < tic->t_unit_res && tail_lsn != 1) | 701 | if (free_bytes < tic->t_unit_res && tail_lsn != 1) |
739 | break; | 702 | break; |
740 | tail_lsn = 0; | 703 | tail_lsn = 0; |
741 | free_bytes -= tic->t_unit_res; | 704 | free_bytes -= tic->t_unit_res; |
742 | sv_signal(&tic->t_wait); | 705 | trace_xfs_log_regrant_write_wake_up(log, tic); |
743 | tic = tic->t_next; | 706 | wake_up(&tic->t_wait); |
744 | } while (tic != log->l_write_headq); | 707 | } |
708 | spin_unlock(&log->l_grant_write_lock); | ||
745 | } | 709 | } |
746 | if ((tic = log->l_reserve_headq)) { | 710 | |
711 | if (!list_empty_careful(&log->l_reserveq)) { | ||
747 | #ifdef DEBUG | 712 | #ifdef DEBUG |
748 | if (log->l_flags & XLOG_ACTIVE_RECOVERY) | 713 | if (log->l_flags & XLOG_ACTIVE_RECOVERY) |
749 | panic("Recovery problem"); | 714 | panic("Recovery problem"); |
750 | #endif | 715 | #endif |
751 | cycle = log->l_grant_reserve_cycle; | 716 | spin_lock(&log->l_grant_reserve_lock); |
752 | bytes = log->l_grant_reserve_bytes; | 717 | free_bytes = xlog_space_left(log, &log->l_grant_reserve_head); |
753 | free_bytes = xlog_space_left(log, cycle, bytes); | 718 | list_for_each_entry(tic, &log->l_reserveq, t_queue) { |
754 | do { | ||
755 | if (tic->t_flags & XLOG_TIC_PERM_RESERV) | 719 | if (tic->t_flags & XLOG_TIC_PERM_RESERV) |
756 | need_bytes = tic->t_unit_res*tic->t_cnt; | 720 | need_bytes = tic->t_unit_res*tic->t_cnt; |
757 | else | 721 | else |
@@ -760,12 +724,12 @@ xfs_log_move_tail(xfs_mount_t *mp, | |||
760 | break; | 724 | break; |
761 | tail_lsn = 0; | 725 | tail_lsn = 0; |
762 | free_bytes -= need_bytes; | 726 | free_bytes -= need_bytes; |
763 | sv_signal(&tic->t_wait); | 727 | trace_xfs_log_grant_wake_up(log, tic); |
764 | tic = tic->t_next; | 728 | wake_up(&tic->t_wait); |
765 | } while (tic != log->l_reserve_headq); | 729 | } |
730 | spin_unlock(&log->l_grant_reserve_lock); | ||
766 | } | 731 | } |
767 | spin_unlock(&log->l_grant_lock); | 732 | } |
768 | } /* xfs_log_move_tail */ | ||
769 | 733 | ||
770 | /* | 734 | /* |
771 | * Determine if we have a transaction that has gone to disk | 735 | * Determine if we have a transaction that has gone to disk |
@@ -797,7 +761,7 @@ xfs_log_need_covered(xfs_mount_t *mp) | |||
797 | break; | 761 | break; |
798 | case XLOG_STATE_COVER_NEED: | 762 | case XLOG_STATE_COVER_NEED: |
799 | case XLOG_STATE_COVER_NEED2: | 763 | case XLOG_STATE_COVER_NEED2: |
800 | if (!xfs_trans_ail_tail(log->l_ailp) && | 764 | if (!xfs_ail_min_lsn(log->l_ailp) && |
801 | xlog_iclogs_empty(log)) { | 765 | xlog_iclogs_empty(log)) { |
802 | if (log->l_covered_state == XLOG_STATE_COVER_NEED) | 766 | if (log->l_covered_state == XLOG_STATE_COVER_NEED) |
803 | log->l_covered_state = XLOG_STATE_COVER_DONE; | 767 | log->l_covered_state = XLOG_STATE_COVER_DONE; |
@@ -831,23 +795,19 @@ xfs_log_need_covered(xfs_mount_t *mp) | |||
831 | * We may be holding the log iclog lock upon entering this routine. | 795 | * We may be holding the log iclog lock upon entering this routine. |
832 | */ | 796 | */ |
833 | xfs_lsn_t | 797 | xfs_lsn_t |
834 | xlog_assign_tail_lsn(xfs_mount_t *mp) | 798 | xlog_assign_tail_lsn( |
799 | struct xfs_mount *mp) | ||
835 | { | 800 | { |
836 | xfs_lsn_t tail_lsn; | 801 | xfs_lsn_t tail_lsn; |
837 | xlog_t *log = mp->m_log; | 802 | struct log *log = mp->m_log; |
838 | 803 | ||
839 | tail_lsn = xfs_trans_ail_tail(mp->m_ail); | 804 | tail_lsn = xfs_ail_min_lsn(mp->m_ail); |
840 | spin_lock(&log->l_grant_lock); | 805 | if (!tail_lsn) |
841 | if (tail_lsn != 0) { | 806 | tail_lsn = atomic64_read(&log->l_last_sync_lsn); |
842 | log->l_tail_lsn = tail_lsn; | ||
843 | } else { | ||
844 | tail_lsn = log->l_tail_lsn = log->l_last_sync_lsn; | ||
845 | } | ||
846 | spin_unlock(&log->l_grant_lock); | ||
847 | 807 | ||
808 | atomic64_set(&log->l_tail_lsn, tail_lsn); | ||
848 | return tail_lsn; | 809 | return tail_lsn; |
849 | } /* xlog_assign_tail_lsn */ | 810 | } |
850 | |||
851 | 811 | ||
852 | /* | 812 | /* |
853 | * Return the space in the log between the tail and the head. The head | 813 | * Return the space in the log between the tail and the head. The head |
@@ -864,37 +824,42 @@ xlog_assign_tail_lsn(xfs_mount_t *mp) | |||
864 | * result is that we return the size of the log as the amount of space left. | 824 | * result is that we return the size of the log as the amount of space left. |
865 | */ | 825 | */ |
866 | STATIC int | 826 | STATIC int |
867 | xlog_space_left(xlog_t *log, int cycle, int bytes) | 827 | xlog_space_left( |
868 | { | 828 | struct log *log, |
869 | int free_bytes; | 829 | atomic64_t *head) |
870 | int tail_bytes; | 830 | { |
871 | int tail_cycle; | 831 | int free_bytes; |
872 | 832 | int tail_bytes; | |
873 | tail_bytes = BBTOB(BLOCK_LSN(log->l_tail_lsn)); | 833 | int tail_cycle; |
874 | tail_cycle = CYCLE_LSN(log->l_tail_lsn); | 834 | int head_cycle; |
875 | if ((tail_cycle == cycle) && (bytes >= tail_bytes)) { | 835 | int head_bytes; |
876 | free_bytes = log->l_logsize - (bytes - tail_bytes); | 836 | |
877 | } else if ((tail_cycle + 1) < cycle) { | 837 | xlog_crack_grant_head(head, &head_cycle, &head_bytes); |
838 | xlog_crack_atomic_lsn(&log->l_tail_lsn, &tail_cycle, &tail_bytes); | ||
839 | tail_bytes = BBTOB(tail_bytes); | ||
840 | if (tail_cycle == head_cycle && head_bytes >= tail_bytes) | ||
841 | free_bytes = log->l_logsize - (head_bytes - tail_bytes); | ||
842 | else if (tail_cycle + 1 < head_cycle) | ||
878 | return 0; | 843 | return 0; |
879 | } else if (tail_cycle < cycle) { | 844 | else if (tail_cycle < head_cycle) { |
880 | ASSERT(tail_cycle == (cycle - 1)); | 845 | ASSERT(tail_cycle == (head_cycle - 1)); |
881 | free_bytes = tail_bytes - bytes; | 846 | free_bytes = tail_bytes - head_bytes; |
882 | } else { | 847 | } else { |
883 | /* | 848 | /* |
884 | * The reservation head is behind the tail. | 849 | * The reservation head is behind the tail. |
885 | * In this case we just want to return the size of the | 850 | * In this case we just want to return the size of the |
886 | * log as the amount of space left. | 851 | * log as the amount of space left. |
887 | */ | 852 | */ |
888 | xfs_fs_cmn_err(CE_ALERT, log->l_mp, | 853 | xfs_alert(log->l_mp, |
889 | "xlog_space_left: head behind tail\n" | 854 | "xlog_space_left: head behind tail\n" |
890 | " tail_cycle = %d, tail_bytes = %d\n" | 855 | " tail_cycle = %d, tail_bytes = %d\n" |
891 | " GH cycle = %d, GH bytes = %d", | 856 | " GH cycle = %d, GH bytes = %d", |
892 | tail_cycle, tail_bytes, cycle, bytes); | 857 | tail_cycle, tail_bytes, head_cycle, head_bytes); |
893 | ASSERT(0); | 858 | ASSERT(0); |
894 | free_bytes = log->l_logsize; | 859 | free_bytes = log->l_logsize; |
895 | } | 860 | } |
896 | return free_bytes; | 861 | return free_bytes; |
897 | } /* xlog_space_left */ | 862 | } |
898 | 863 | ||
899 | 864 | ||
900 | /* | 865 | /* |
@@ -1034,7 +999,7 @@ xlog_alloc_log(xfs_mount_t *mp, | |||
1034 | 999 | ||
1035 | log = kmem_zalloc(sizeof(xlog_t), KM_MAYFAIL); | 1000 | log = kmem_zalloc(sizeof(xlog_t), KM_MAYFAIL); |
1036 | if (!log) { | 1001 | if (!log) { |
1037 | xlog_warn("XFS: Log allocation failed: No memory!"); | 1002 | xfs_warn(mp, "Log allocation failed: No memory!"); |
1038 | goto out; | 1003 | goto out; |
1039 | } | 1004 | } |
1040 | 1005 | ||
@@ -1047,35 +1012,39 @@ xlog_alloc_log(xfs_mount_t *mp, | |||
1047 | log->l_flags |= XLOG_ACTIVE_RECOVERY; | 1012 | log->l_flags |= XLOG_ACTIVE_RECOVERY; |
1048 | 1013 | ||
1049 | log->l_prev_block = -1; | 1014 | log->l_prev_block = -1; |
1050 | log->l_tail_lsn = xlog_assign_lsn(1, 0); | ||
1051 | /* log->l_tail_lsn = 0x100000000LL; cycle = 1; current block = 0 */ | 1015 | /* log->l_tail_lsn = 0x100000000LL; cycle = 1; current block = 0 */ |
1052 | log->l_last_sync_lsn = log->l_tail_lsn; | 1016 | xlog_assign_atomic_lsn(&log->l_tail_lsn, 1, 0); |
1017 | xlog_assign_atomic_lsn(&log->l_last_sync_lsn, 1, 0); | ||
1053 | log->l_curr_cycle = 1; /* 0 is bad since this is initial value */ | 1018 | log->l_curr_cycle = 1; /* 0 is bad since this is initial value */ |
1054 | log->l_grant_reserve_cycle = 1; | 1019 | xlog_assign_grant_head(&log->l_grant_reserve_head, 1, 0); |
1055 | log->l_grant_write_cycle = 1; | 1020 | xlog_assign_grant_head(&log->l_grant_write_head, 1, 0); |
1021 | INIT_LIST_HEAD(&log->l_reserveq); | ||
1022 | INIT_LIST_HEAD(&log->l_writeq); | ||
1023 | spin_lock_init(&log->l_grant_reserve_lock); | ||
1024 | spin_lock_init(&log->l_grant_write_lock); | ||
1056 | 1025 | ||
1057 | error = EFSCORRUPTED; | 1026 | error = EFSCORRUPTED; |
1058 | if (xfs_sb_version_hassector(&mp->m_sb)) { | 1027 | if (xfs_sb_version_hassector(&mp->m_sb)) { |
1059 | log2_size = mp->m_sb.sb_logsectlog; | 1028 | log2_size = mp->m_sb.sb_logsectlog; |
1060 | if (log2_size < BBSHIFT) { | 1029 | if (log2_size < BBSHIFT) { |
1061 | xlog_warn("XFS: Log sector size too small " | 1030 | xfs_warn(mp, "Log sector size too small (0x%x < 0x%x)", |
1062 | "(0x%x < 0x%x)", log2_size, BBSHIFT); | 1031 | log2_size, BBSHIFT); |
1063 | goto out_free_log; | 1032 | goto out_free_log; |
1064 | } | 1033 | } |
1065 | 1034 | ||
1066 | log2_size -= BBSHIFT; | 1035 | log2_size -= BBSHIFT; |
1067 | if (log2_size > mp->m_sectbb_log) { | 1036 | if (log2_size > mp->m_sectbb_log) { |
1068 | xlog_warn("XFS: Log sector size too large " | 1037 | xfs_warn(mp, "Log sector size too large (0x%x > 0x%x)", |
1069 | "(0x%x > 0x%x)", log2_size, mp->m_sectbb_log); | 1038 | log2_size, mp->m_sectbb_log); |
1070 | goto out_free_log; | 1039 | goto out_free_log; |
1071 | } | 1040 | } |
1072 | 1041 | ||
1073 | /* for larger sector sizes, must have v2 or external log */ | 1042 | /* for larger sector sizes, must have v2 or external log */ |
1074 | if (log2_size && log->l_logBBstart > 0 && | 1043 | if (log2_size && log->l_logBBstart > 0 && |
1075 | !xfs_sb_version_haslogv2(&mp->m_sb)) { | 1044 | !xfs_sb_version_haslogv2(&mp->m_sb)) { |
1076 | 1045 | xfs_warn(mp, | |
1077 | xlog_warn("XFS: log sector size (0x%x) invalid " | 1046 | "log sector size (0x%x) invalid for configuration.", |
1078 | "for configuration.", log2_size); | 1047 | log2_size); |
1079 | goto out_free_log; | 1048 | goto out_free_log; |
1080 | } | 1049 | } |
1081 | } | 1050 | } |
@@ -1094,8 +1063,7 @@ xlog_alloc_log(xfs_mount_t *mp, | |||
1094 | log->l_xbuf = bp; | 1063 | log->l_xbuf = bp; |
1095 | 1064 | ||
1096 | spin_lock_init(&log->l_icloglock); | 1065 | spin_lock_init(&log->l_icloglock); |
1097 | spin_lock_init(&log->l_grant_lock); | 1066 | init_waitqueue_head(&log->l_flush_wait); |
1098 | sv_init(&log->l_flush_wait, 0, "flush_wait"); | ||
1099 | 1067 | ||
1100 | /* log record size must be multiple of BBSIZE; see xlog_rec_header_t */ | 1068 | /* log record size must be multiple of BBSIZE; see xlog_rec_header_t */ |
1101 | ASSERT((XFS_BUF_SIZE(bp) & BBMASK) == 0); | 1069 | ASSERT((XFS_BUF_SIZE(bp) & BBMASK) == 0); |
@@ -1151,8 +1119,8 @@ xlog_alloc_log(xfs_mount_t *mp, | |||
1151 | 1119 | ||
1152 | ASSERT(XFS_BUF_ISBUSY(iclog->ic_bp)); | 1120 | ASSERT(XFS_BUF_ISBUSY(iclog->ic_bp)); |
1153 | ASSERT(XFS_BUF_VALUSEMA(iclog->ic_bp) <= 0); | 1121 | ASSERT(XFS_BUF_VALUSEMA(iclog->ic_bp) <= 0); |
1154 | sv_init(&iclog->ic_force_wait, SV_DEFAULT, "iclog-force"); | 1122 | init_waitqueue_head(&iclog->ic_force_wait); |
1155 | sv_init(&iclog->ic_write_wait, SV_DEFAULT, "iclog-write"); | 1123 | init_waitqueue_head(&iclog->ic_write_wait); |
1156 | 1124 | ||
1157 | iclogp = &iclog->ic_next; | 1125 | iclogp = &iclog->ic_next; |
1158 | } | 1126 | } |
@@ -1167,15 +1135,11 @@ xlog_alloc_log(xfs_mount_t *mp, | |||
1167 | out_free_iclog: | 1135 | out_free_iclog: |
1168 | for (iclog = log->l_iclog; iclog; iclog = prev_iclog) { | 1136 | for (iclog = log->l_iclog; iclog; iclog = prev_iclog) { |
1169 | prev_iclog = iclog->ic_next; | 1137 | prev_iclog = iclog->ic_next; |
1170 | if (iclog->ic_bp) { | 1138 | if (iclog->ic_bp) |
1171 | sv_destroy(&iclog->ic_force_wait); | ||
1172 | sv_destroy(&iclog->ic_write_wait); | ||
1173 | xfs_buf_free(iclog->ic_bp); | 1139 | xfs_buf_free(iclog->ic_bp); |
1174 | } | ||
1175 | kmem_free(iclog); | 1140 | kmem_free(iclog); |
1176 | } | 1141 | } |
1177 | spinlock_destroy(&log->l_icloglock); | 1142 | spinlock_destroy(&log->l_icloglock); |
1178 | spinlock_destroy(&log->l_grant_lock); | ||
1179 | xfs_buf_free(log->l_xbuf); | 1143 | xfs_buf_free(log->l_xbuf); |
1180 | out_free_log: | 1144 | out_free_log: |
1181 | kmem_free(log); | 1145 | kmem_free(log); |
@@ -1223,61 +1187,60 @@ xlog_commit_record( | |||
1223 | * water mark. In this manner, we would be creating a low water mark. | 1187 | * water mark. In this manner, we would be creating a low water mark. |
1224 | */ | 1188 | */ |
1225 | STATIC void | 1189 | STATIC void |
1226 | xlog_grant_push_ail(xfs_mount_t *mp, | 1190 | xlog_grant_push_ail( |
1227 | int need_bytes) | 1191 | struct log *log, |
1192 | int need_bytes) | ||
1228 | { | 1193 | { |
1229 | xlog_t *log = mp->m_log; /* pointer to the log */ | 1194 | xfs_lsn_t threshold_lsn = 0; |
1230 | xfs_lsn_t tail_lsn; /* lsn of the log tail */ | 1195 | xfs_lsn_t last_sync_lsn; |
1231 | xfs_lsn_t threshold_lsn = 0; /* lsn we'd like to be at */ | 1196 | int free_blocks; |
1232 | int free_blocks; /* free blocks left to write to */ | 1197 | int free_bytes; |
1233 | int free_bytes; /* free bytes left to write to */ | 1198 | int threshold_block; |
1234 | int threshold_block; /* block in lsn we'd like to be at */ | 1199 | int threshold_cycle; |
1235 | int threshold_cycle; /* lsn cycle we'd like to be at */ | 1200 | int free_threshold; |
1236 | int free_threshold; | 1201 | |
1237 | 1202 | ASSERT(BTOBB(need_bytes) < log->l_logBBsize); | |
1238 | ASSERT(BTOBB(need_bytes) < log->l_logBBsize); | 1203 | |
1239 | 1204 | free_bytes = xlog_space_left(log, &log->l_grant_reserve_head); | |
1240 | spin_lock(&log->l_grant_lock); | 1205 | free_blocks = BTOBBT(free_bytes); |
1241 | free_bytes = xlog_space_left(log, | 1206 | |
1242 | log->l_grant_reserve_cycle, | 1207 | /* |
1243 | log->l_grant_reserve_bytes); | 1208 | * Set the threshold for the minimum number of free blocks in the |
1244 | tail_lsn = log->l_tail_lsn; | 1209 | * log to the maximum of what the caller needs, one quarter of the |
1245 | free_blocks = BTOBBT(free_bytes); | 1210 | * log, and 256 blocks. |
1246 | 1211 | */ | |
1247 | /* | 1212 | free_threshold = BTOBB(need_bytes); |
1248 | * Set the threshold for the minimum number of free blocks in the | 1213 | free_threshold = MAX(free_threshold, (log->l_logBBsize >> 2)); |
1249 | * log to the maximum of what the caller needs, one quarter of the | 1214 | free_threshold = MAX(free_threshold, 256); |
1250 | * log, and 256 blocks. | 1215 | if (free_blocks >= free_threshold) |
1251 | */ | 1216 | return; |
1252 | free_threshold = BTOBB(need_bytes); | 1217 | |
1253 | free_threshold = MAX(free_threshold, (log->l_logBBsize >> 2)); | 1218 | xlog_crack_atomic_lsn(&log->l_tail_lsn, &threshold_cycle, |
1254 | free_threshold = MAX(free_threshold, 256); | 1219 | &threshold_block); |
1255 | if (free_blocks < free_threshold) { | 1220 | threshold_block += free_threshold; |
1256 | threshold_block = BLOCK_LSN(tail_lsn) + free_threshold; | ||
1257 | threshold_cycle = CYCLE_LSN(tail_lsn); | ||
1258 | if (threshold_block >= log->l_logBBsize) { | 1221 | if (threshold_block >= log->l_logBBsize) { |
1259 | threshold_block -= log->l_logBBsize; | 1222 | threshold_block -= log->l_logBBsize; |
1260 | threshold_cycle += 1; | 1223 | threshold_cycle += 1; |
1261 | } | 1224 | } |
1262 | threshold_lsn = xlog_assign_lsn(threshold_cycle, threshold_block); | 1225 | threshold_lsn = xlog_assign_lsn(threshold_cycle, |
1226 | threshold_block); | ||
1227 | /* | ||
1228 | * Don't pass in an lsn greater than the lsn of the last | ||
1229 | * log record known to be on disk. Use a snapshot of the last sync lsn | ||
1230 | * so that it doesn't change between the compare and the set. | ||
1231 | */ | ||
1232 | last_sync_lsn = atomic64_read(&log->l_last_sync_lsn); | ||
1233 | if (XFS_LSN_CMP(threshold_lsn, last_sync_lsn) > 0) | ||
1234 | threshold_lsn = last_sync_lsn; | ||
1263 | 1235 | ||
1264 | /* Don't pass in an lsn greater than the lsn of the last | 1236 | /* |
1265 | * log record known to be on disk. | 1237 | * Get the transaction layer to kick the dirty buffers out to |
1238 | * disk asynchronously. No point in trying to do this if | ||
1239 | * the filesystem is shutting down. | ||
1266 | */ | 1240 | */ |
1267 | if (XFS_LSN_CMP(threshold_lsn, log->l_last_sync_lsn) > 0) | 1241 | if (!XLOG_FORCED_SHUTDOWN(log)) |
1268 | threshold_lsn = log->l_last_sync_lsn; | 1242 | xfs_ail_push(log->l_ailp, threshold_lsn); |
1269 | } | 1243 | } |
1270 | spin_unlock(&log->l_grant_lock); | ||
1271 | |||
1272 | /* | ||
1273 | * Get the transaction layer to kick the dirty buffers out to | ||
1274 | * disk asynchronously. No point in trying to do this if | ||
1275 | * the filesystem is shutting down. | ||
1276 | */ | ||
1277 | if (threshold_lsn && | ||
1278 | !XLOG_FORCED_SHUTDOWN(log)) | ||
1279 | xfs_trans_ail_push(log->l_ailp, threshold_lsn); | ||
1280 | } /* xlog_grant_push_ail */ | ||
1281 | 1244 | ||
1282 | /* | 1245 | /* |
1283 | * The bdstrat callback function for log bufs. This gives us a central | 1246 | * The bdstrat callback function for log bufs. This gives us a central |
@@ -1372,9 +1335,8 @@ xlog_sync(xlog_t *log, | |||
1372 | roundoff < BBTOB(1))); | 1335 | roundoff < BBTOB(1))); |
1373 | 1336 | ||
1374 | /* move grant heads by roundoff in sync */ | 1337 | /* move grant heads by roundoff in sync */ |
1375 | spin_lock(&log->l_grant_lock); | 1338 | xlog_grant_add_space(log, &log->l_grant_reserve_head, roundoff); |
1376 | xlog_grant_add_space(log, roundoff); | 1339 | xlog_grant_add_space(log, &log->l_grant_write_head, roundoff); |
1377 | spin_unlock(&log->l_grant_lock); | ||
1378 | 1340 | ||
1379 | /* put cycle number in every block */ | 1341 | /* put cycle number in every block */ |
1380 | xlog_pack_data(log, iclog, roundoff); | 1342 | xlog_pack_data(log, iclog, roundoff); |
@@ -1489,15 +1451,12 @@ xlog_dealloc_log(xlog_t *log) | |||
1489 | 1451 | ||
1490 | iclog = log->l_iclog; | 1452 | iclog = log->l_iclog; |
1491 | for (i=0; i<log->l_iclog_bufs; i++) { | 1453 | for (i=0; i<log->l_iclog_bufs; i++) { |
1492 | sv_destroy(&iclog->ic_force_wait); | ||
1493 | sv_destroy(&iclog->ic_write_wait); | ||
1494 | xfs_buf_free(iclog->ic_bp); | 1454 | xfs_buf_free(iclog->ic_bp); |
1495 | next_iclog = iclog->ic_next; | 1455 | next_iclog = iclog->ic_next; |
1496 | kmem_free(iclog); | 1456 | kmem_free(iclog); |
1497 | iclog = next_iclog; | 1457 | iclog = next_iclog; |
1498 | } | 1458 | } |
1499 | spinlock_destroy(&log->l_icloglock); | 1459 | spinlock_destroy(&log->l_icloglock); |
1500 | spinlock_destroy(&log->l_grant_lock); | ||
1501 | 1460 | ||
1502 | xfs_buf_free(log->l_xbuf); | 1461 | xfs_buf_free(log->l_xbuf); |
1503 | log->l_mp->m_log = NULL; | 1462 | log->l_mp->m_log = NULL; |
@@ -1602,38 +1561,36 @@ xlog_print_tic_res( | |||
1602 | "SWAPEXT" | 1561 | "SWAPEXT" |
1603 | }; | 1562 | }; |
1604 | 1563 | ||
1605 | xfs_fs_cmn_err(CE_WARN, mp, | 1564 | xfs_warn(mp, |
1606 | "xfs_log_write: reservation summary:\n" | 1565 | "xfs_log_write: reservation summary:\n" |
1607 | " trans type = %s (%u)\n" | 1566 | " trans type = %s (%u)\n" |
1608 | " unit res = %d bytes\n" | 1567 | " unit res = %d bytes\n" |
1609 | " current res = %d bytes\n" | 1568 | " current res = %d bytes\n" |
1610 | " total reg = %u bytes (o/flow = %u bytes)\n" | 1569 | " total reg = %u bytes (o/flow = %u bytes)\n" |
1611 | " ophdrs = %u (ophdr space = %u bytes)\n" | 1570 | " ophdrs = %u (ophdr space = %u bytes)\n" |
1612 | " ophdr + reg = %u bytes\n" | 1571 | " ophdr + reg = %u bytes\n" |
1613 | " num regions = %u\n", | 1572 | " num regions = %u\n", |
1614 | ((ticket->t_trans_type <= 0 || | 1573 | ((ticket->t_trans_type <= 0 || |
1615 | ticket->t_trans_type > XFS_TRANS_TYPE_MAX) ? | 1574 | ticket->t_trans_type > XFS_TRANS_TYPE_MAX) ? |
1616 | "bad-trans-type" : trans_type_str[ticket->t_trans_type-1]), | 1575 | "bad-trans-type" : trans_type_str[ticket->t_trans_type-1]), |
1617 | ticket->t_trans_type, | 1576 | ticket->t_trans_type, |
1618 | ticket->t_unit_res, | 1577 | ticket->t_unit_res, |
1619 | ticket->t_curr_res, | 1578 | ticket->t_curr_res, |
1620 | ticket->t_res_arr_sum, ticket->t_res_o_flow, | 1579 | ticket->t_res_arr_sum, ticket->t_res_o_flow, |
1621 | ticket->t_res_num_ophdrs, ophdr_spc, | 1580 | ticket->t_res_num_ophdrs, ophdr_spc, |
1622 | ticket->t_res_arr_sum + | 1581 | ticket->t_res_arr_sum + |
1623 | ticket->t_res_o_flow + ophdr_spc, | 1582 | ticket->t_res_o_flow + ophdr_spc, |
1624 | ticket->t_res_num); | 1583 | ticket->t_res_num); |
1625 | 1584 | ||
1626 | for (i = 0; i < ticket->t_res_num; i++) { | 1585 | for (i = 0; i < ticket->t_res_num; i++) { |
1627 | uint r_type = ticket->t_res_arr[i].r_type; | 1586 | uint r_type = ticket->t_res_arr[i].r_type; |
1628 | cmn_err(CE_WARN, | 1587 | xfs_warn(mp, "region[%u]: %s - %u bytes\n", i, |
1629 | "region[%u]: %s - %u bytes\n", | ||
1630 | i, | ||
1631 | ((r_type <= 0 || r_type > XLOG_REG_TYPE_MAX) ? | 1588 | ((r_type <= 0 || r_type > XLOG_REG_TYPE_MAX) ? |
1632 | "bad-rtype" : res_type_str[r_type-1]), | 1589 | "bad-rtype" : res_type_str[r_type-1]), |
1633 | ticket->t_res_arr[i].r_len); | 1590 | ticket->t_res_arr[i].r_len); |
1634 | } | 1591 | } |
1635 | 1592 | ||
1636 | xfs_cmn_err(XFS_PTAG_LOGRES, CE_ALERT, mp, | 1593 | xfs_alert_tag(mp, XFS_PTAG_LOGRES, |
1637 | "xfs_log_write: reservation ran out. Need to up reservation"); | 1594 | "xfs_log_write: reservation ran out. Need to up reservation"); |
1638 | xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); | 1595 | xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); |
1639 | } | 1596 | } |
@@ -1721,7 +1678,7 @@ xlog_write_setup_ophdr( | |||
1721 | case XFS_LOG: | 1678 | case XFS_LOG: |
1722 | break; | 1679 | break; |
1723 | default: | 1680 | default: |
1724 | xfs_fs_cmn_err(CE_WARN, log->l_mp, | 1681 | xfs_warn(log->l_mp, |
1725 | "Bad XFS transaction clientid 0x%x in ticket 0x%p", | 1682 | "Bad XFS transaction clientid 0x%x in ticket 0x%p", |
1726 | ophdr->oh_clientid, ticket); | 1683 | ophdr->oh_clientid, ticket); |
1727 | return NULL; | 1684 | return NULL; |
@@ -2232,7 +2189,7 @@ xlog_state_do_callback( | |||
2232 | lowest_lsn = xlog_get_lowest_lsn(log); | 2189 | lowest_lsn = xlog_get_lowest_lsn(log); |
2233 | if (lowest_lsn && | 2190 | if (lowest_lsn && |
2234 | XFS_LSN_CMP(lowest_lsn, | 2191 | XFS_LSN_CMP(lowest_lsn, |
2235 | be64_to_cpu(iclog->ic_header.h_lsn)) < 0) { | 2192 | be64_to_cpu(iclog->ic_header.h_lsn)) < 0) { |
2236 | iclog = iclog->ic_next; | 2193 | iclog = iclog->ic_next; |
2237 | continue; /* Leave this iclog for | 2194 | continue; /* Leave this iclog for |
2238 | * another thread */ | 2195 | * another thread */ |
@@ -2240,23 +2197,21 @@ xlog_state_do_callback( | |||
2240 | 2197 | ||
2241 | iclog->ic_state = XLOG_STATE_CALLBACK; | 2198 | iclog->ic_state = XLOG_STATE_CALLBACK; |
2242 | 2199 | ||
2243 | spin_unlock(&log->l_icloglock); | ||
2244 | 2200 | ||
2245 | /* l_last_sync_lsn field protected by | 2201 | /* |
2246 | * l_grant_lock. Don't worry about iclog's lsn. | 2202 | * update the last_sync_lsn before we drop the |
2247 | * No one else can be here except us. | 2203 | * icloglock to ensure we are the only one that |
2204 | * can update it. | ||
2248 | */ | 2205 | */ |
2249 | spin_lock(&log->l_grant_lock); | 2206 | ASSERT(XFS_LSN_CMP(atomic64_read(&log->l_last_sync_lsn), |
2250 | ASSERT(XFS_LSN_CMP(log->l_last_sync_lsn, | 2207 | be64_to_cpu(iclog->ic_header.h_lsn)) <= 0); |
2251 | be64_to_cpu(iclog->ic_header.h_lsn)) <= 0); | 2208 | atomic64_set(&log->l_last_sync_lsn, |
2252 | log->l_last_sync_lsn = | 2209 | be64_to_cpu(iclog->ic_header.h_lsn)); |
2253 | be64_to_cpu(iclog->ic_header.h_lsn); | ||
2254 | spin_unlock(&log->l_grant_lock); | ||
2255 | 2210 | ||
2256 | } else { | 2211 | } else |
2257 | spin_unlock(&log->l_icloglock); | ||
2258 | ioerrors++; | 2212 | ioerrors++; |
2259 | } | 2213 | |
2214 | spin_unlock(&log->l_icloglock); | ||
2260 | 2215 | ||
2261 | /* | 2216 | /* |
2262 | * Keep processing entries in the callback list until | 2217 | * Keep processing entries in the callback list until |
@@ -2297,7 +2252,7 @@ xlog_state_do_callback( | |||
2297 | xlog_state_clean_log(log); | 2252 | xlog_state_clean_log(log); |
2298 | 2253 | ||
2299 | /* wake up threads waiting in xfs_log_force() */ | 2254 | /* wake up threads waiting in xfs_log_force() */ |
2300 | sv_broadcast(&iclog->ic_force_wait); | 2255 | wake_up_all(&iclog->ic_force_wait); |
2301 | 2256 | ||
2302 | iclog = iclog->ic_next; | 2257 | iclog = iclog->ic_next; |
2303 | } while (first_iclog != iclog); | 2258 | } while (first_iclog != iclog); |
@@ -2305,7 +2260,7 @@ xlog_state_do_callback( | |||
2305 | if (repeats > 5000) { | 2260 | if (repeats > 5000) { |
2306 | flushcnt += repeats; | 2261 | flushcnt += repeats; |
2307 | repeats = 0; | 2262 | repeats = 0; |
2308 | xfs_fs_cmn_err(CE_WARN, log->l_mp, | 2263 | xfs_warn(log->l_mp, |
2309 | "%s: possible infinite loop (%d iterations)", | 2264 | "%s: possible infinite loop (%d iterations)", |
2310 | __func__, flushcnt); | 2265 | __func__, flushcnt); |
2311 | } | 2266 | } |
@@ -2344,7 +2299,7 @@ xlog_state_do_callback( | |||
2344 | spin_unlock(&log->l_icloglock); | 2299 | spin_unlock(&log->l_icloglock); |
2345 | 2300 | ||
2346 | if (wake) | 2301 | if (wake) |
2347 | sv_broadcast(&log->l_flush_wait); | 2302 | wake_up_all(&log->l_flush_wait); |
2348 | } | 2303 | } |
2349 | 2304 | ||
2350 | 2305 | ||
@@ -2395,7 +2350,7 @@ xlog_state_done_syncing( | |||
2395 | * iclog buffer, we wake them all, one will get to do the | 2350 | * iclog buffer, we wake them all, one will get to do the |
2396 | * I/O, the others get to wait for the result. | 2351 | * I/O, the others get to wait for the result. |
2397 | */ | 2352 | */ |
2398 | sv_broadcast(&iclog->ic_write_wait); | 2353 | wake_up_all(&iclog->ic_write_wait); |
2399 | spin_unlock(&log->l_icloglock); | 2354 | spin_unlock(&log->l_icloglock); |
2400 | xlog_state_do_callback(log, aborted, iclog); /* also cleans log */ | 2355 | xlog_state_do_callback(log, aborted, iclog); /* also cleans log */ |
2401 | } /* xlog_state_done_syncing */ | 2356 | } /* xlog_state_done_syncing */ |
@@ -2444,7 +2399,7 @@ restart: | |||
2444 | XFS_STATS_INC(xs_log_noiclogs); | 2399 | XFS_STATS_INC(xs_log_noiclogs); |
2445 | 2400 | ||
2446 | /* Wait for log writes to have flushed */ | 2401 | /* Wait for log writes to have flushed */ |
2447 | sv_wait(&log->l_flush_wait, 0, &log->l_icloglock, 0); | 2402 | xlog_wait(&log->l_flush_wait, &log->l_icloglock); |
2448 | goto restart; | 2403 | goto restart; |
2449 | } | 2404 | } |
2450 | 2405 | ||
@@ -2527,6 +2482,18 @@ restart: | |||
2527 | * | 2482 | * |
2528 | * Once a ticket gets put onto the reserveq, it will only return after | 2483 | * Once a ticket gets put onto the reserveq, it will only return after |
2529 | * the needed reservation is satisfied. | 2484 | * the needed reservation is satisfied. |
2485 | * | ||
2486 | * This function is structured so that it has a lock free fast path. This is | ||
2487 | * necessary because every new transaction reservation will come through this | ||
2488 | * path. Hence any lock will be globally hot if we take it unconditionally on | ||
2489 | * every pass. | ||
2490 | * | ||
2491 | * As tickets are only ever moved on and off the reserveq under the | ||
2492 | * l_grant_reserve_lock, we only need to take that lock if we are going | ||
2493 | * to add the ticket to the queue and sleep. We can avoid taking the lock if the | ||
2494 | * ticket was never added to the reserveq because the t_queue list head will be | ||
2495 | * empty and we hold the only reference to it so it can safely be checked | ||
2496 | * unlocked. | ||
2530 | */ | 2497 | */ |
2531 | STATIC int | 2498 | STATIC int |
2532 | xlog_grant_log_space(xlog_t *log, | 2499 | xlog_grant_log_space(xlog_t *log, |
@@ -2534,24 +2501,27 @@ xlog_grant_log_space(xlog_t *log, | |||
2534 | { | 2501 | { |
2535 | int free_bytes; | 2502 | int free_bytes; |
2536 | int need_bytes; | 2503 | int need_bytes; |
2537 | #ifdef DEBUG | ||
2538 | xfs_lsn_t tail_lsn; | ||
2539 | #endif | ||
2540 | |||
2541 | 2504 | ||
2542 | #ifdef DEBUG | 2505 | #ifdef DEBUG |
2543 | if (log->l_flags & XLOG_ACTIVE_RECOVERY) | 2506 | if (log->l_flags & XLOG_ACTIVE_RECOVERY) |
2544 | panic("grant Recovery problem"); | 2507 | panic("grant Recovery problem"); |
2545 | #endif | 2508 | #endif |
2546 | 2509 | ||
2547 | /* Is there space or do we need to sleep? */ | ||
2548 | spin_lock(&log->l_grant_lock); | ||
2549 | |||
2550 | trace_xfs_log_grant_enter(log, tic); | 2510 | trace_xfs_log_grant_enter(log, tic); |
2551 | 2511 | ||
2512 | need_bytes = tic->t_unit_res; | ||
2513 | if (tic->t_flags & XFS_LOG_PERM_RESERV) | ||
2514 | need_bytes *= tic->t_ocnt; | ||
2515 | |||
2552 | /* something is already sleeping; insert new transaction at end */ | 2516 | /* something is already sleeping; insert new transaction at end */ |
2553 | if (log->l_reserve_headq) { | 2517 | if (!list_empty_careful(&log->l_reserveq)) { |
2554 | xlog_ins_ticketq(&log->l_reserve_headq, tic); | 2518 | spin_lock(&log->l_grant_reserve_lock); |
2519 | /* recheck the queue now we are locked */ | ||
2520 | if (list_empty(&log->l_reserveq)) { | ||
2521 | spin_unlock(&log->l_grant_reserve_lock); | ||
2522 | goto redo; | ||
2523 | } | ||
2524 | list_add_tail(&tic->t_queue, &log->l_reserveq); | ||
2555 | 2525 | ||
2556 | trace_xfs_log_grant_sleep1(log, tic); | 2526 | trace_xfs_log_grant_sleep1(log, tic); |
2557 | 2527 | ||
@@ -2563,72 +2533,57 @@ xlog_grant_log_space(xlog_t *log, | |||
2563 | goto error_return; | 2533 | goto error_return; |
2564 | 2534 | ||
2565 | XFS_STATS_INC(xs_sleep_logspace); | 2535 | XFS_STATS_INC(xs_sleep_logspace); |
2566 | sv_wait(&tic->t_wait, PINOD|PLTWAIT, &log->l_grant_lock, s); | 2536 | xlog_wait(&tic->t_wait, &log->l_grant_reserve_lock); |
2537 | |||
2567 | /* | 2538 | /* |
2568 | * If we got an error, and the filesystem is shutting down, | 2539 | * If we got an error, and the filesystem is shutting down, |
2569 | * we'll catch it down below. So just continue... | 2540 | * we'll catch it down below. So just continue... |
2570 | */ | 2541 | */ |
2571 | trace_xfs_log_grant_wake1(log, tic); | 2542 | trace_xfs_log_grant_wake1(log, tic); |
2572 | spin_lock(&log->l_grant_lock); | ||
2573 | } | 2543 | } |
2574 | if (tic->t_flags & XFS_LOG_PERM_RESERV) | ||
2575 | need_bytes = tic->t_unit_res*tic->t_ocnt; | ||
2576 | else | ||
2577 | need_bytes = tic->t_unit_res; | ||
2578 | 2544 | ||
2579 | redo: | 2545 | redo: |
2580 | if (XLOG_FORCED_SHUTDOWN(log)) | 2546 | if (XLOG_FORCED_SHUTDOWN(log)) |
2581 | goto error_return; | 2547 | goto error_return_unlocked; |
2582 | 2548 | ||
2583 | free_bytes = xlog_space_left(log, log->l_grant_reserve_cycle, | 2549 | free_bytes = xlog_space_left(log, &log->l_grant_reserve_head); |
2584 | log->l_grant_reserve_bytes); | ||
2585 | if (free_bytes < need_bytes) { | 2550 | if (free_bytes < need_bytes) { |
2586 | if ((tic->t_flags & XLOG_TIC_IN_Q) == 0) | 2551 | spin_lock(&log->l_grant_reserve_lock); |
2587 | xlog_ins_ticketq(&log->l_reserve_headq, tic); | 2552 | if (list_empty(&tic->t_queue)) |
2553 | list_add_tail(&tic->t_queue, &log->l_reserveq); | ||
2588 | 2554 | ||
2589 | trace_xfs_log_grant_sleep2(log, tic); | 2555 | trace_xfs_log_grant_sleep2(log, tic); |
2590 | 2556 | ||
2591 | spin_unlock(&log->l_grant_lock); | ||
2592 | xlog_grant_push_ail(log->l_mp, need_bytes); | ||
2593 | spin_lock(&log->l_grant_lock); | ||
2594 | |||
2595 | XFS_STATS_INC(xs_sleep_logspace); | ||
2596 | sv_wait(&tic->t_wait, PINOD|PLTWAIT, &log->l_grant_lock, s); | ||
2597 | |||
2598 | spin_lock(&log->l_grant_lock); | ||
2599 | if (XLOG_FORCED_SHUTDOWN(log)) | 2557 | if (XLOG_FORCED_SHUTDOWN(log)) |
2600 | goto error_return; | 2558 | goto error_return; |
2601 | 2559 | ||
2602 | trace_xfs_log_grant_wake2(log, tic); | 2560 | xlog_grant_push_ail(log, need_bytes); |
2603 | 2561 | ||
2562 | XFS_STATS_INC(xs_sleep_logspace); | ||
2563 | xlog_wait(&tic->t_wait, &log->l_grant_reserve_lock); | ||
2564 | |||
2565 | trace_xfs_log_grant_wake2(log, tic); | ||
2604 | goto redo; | 2566 | goto redo; |
2605 | } else if (tic->t_flags & XLOG_TIC_IN_Q) | 2567 | } |
2606 | xlog_del_ticketq(&log->l_reserve_headq, tic); | ||
2607 | 2568 | ||
2608 | /* we've got enough space */ | 2569 | if (!list_empty(&tic->t_queue)) { |
2609 | xlog_grant_add_space(log, need_bytes); | 2570 | spin_lock(&log->l_grant_reserve_lock); |
2610 | #ifdef DEBUG | 2571 | list_del_init(&tic->t_queue); |
2611 | tail_lsn = log->l_tail_lsn; | 2572 | spin_unlock(&log->l_grant_reserve_lock); |
2612 | /* | ||
2613 | * Check to make sure the grant write head didn't just over lap the | ||
2614 | * tail. If the cycles are the same, we can't be overlapping. | ||
2615 | * Otherwise, make sure that the cycles differ by exactly one and | ||
2616 | * check the byte count. | ||
2617 | */ | ||
2618 | if (CYCLE_LSN(tail_lsn) != log->l_grant_write_cycle) { | ||
2619 | ASSERT(log->l_grant_write_cycle-1 == CYCLE_LSN(tail_lsn)); | ||
2620 | ASSERT(log->l_grant_write_bytes <= BBTOB(BLOCK_LSN(tail_lsn))); | ||
2621 | } | 2573 | } |
2622 | #endif | 2574 | |
2575 | /* we've got enough space */ | ||
2576 | xlog_grant_add_space(log, &log->l_grant_reserve_head, need_bytes); | ||
2577 | xlog_grant_add_space(log, &log->l_grant_write_head, need_bytes); | ||
2623 | trace_xfs_log_grant_exit(log, tic); | 2578 | trace_xfs_log_grant_exit(log, tic); |
2624 | xlog_verify_grant_head(log, 1); | 2579 | xlog_verify_grant_tail(log); |
2625 | spin_unlock(&log->l_grant_lock); | ||
2626 | return 0; | 2580 | return 0; |
2627 | 2581 | ||
2628 | error_return: | 2582 | error_return_unlocked: |
2629 | if (tic->t_flags & XLOG_TIC_IN_Q) | 2583 | spin_lock(&log->l_grant_reserve_lock); |
2630 | xlog_del_ticketq(&log->l_reserve_headq, tic); | 2584 | error_return: |
2631 | 2585 | list_del_init(&tic->t_queue); | |
2586 | spin_unlock(&log->l_grant_reserve_lock); | ||
2632 | trace_xfs_log_grant_error(log, tic); | 2587 | trace_xfs_log_grant_error(log, tic); |
2633 | 2588 | ||
2634 | /* | 2589 | /* |
@@ -2638,7 +2593,6 @@ redo: | |||
2638 | */ | 2593 | */ |
2639 | tic->t_curr_res = 0; | 2594 | tic->t_curr_res = 0; |
2640 | tic->t_cnt = 0; /* ungrant will give back unit_res * t_cnt. */ | 2595 | tic->t_cnt = 0; /* ungrant will give back unit_res * t_cnt. */ |
2641 | spin_unlock(&log->l_grant_lock); | ||
2642 | return XFS_ERROR(EIO); | 2596 | return XFS_ERROR(EIO); |
2643 | } /* xlog_grant_log_space */ | 2597 | } /* xlog_grant_log_space */ |
2644 | 2598 | ||
@@ -2646,17 +2600,14 @@ redo: | |||
2646 | /* | 2600 | /* |
2647 | * Replenish the byte reservation required by moving the grant write head. | 2601 | * Replenish the byte reservation required by moving the grant write head. |
2648 | * | 2602 | * |
2649 | * | 2603 | * Similar to xlog_grant_log_space, the function is structured to have a lock |
2604 | * free fast path. | ||
2650 | */ | 2605 | */ |
2651 | STATIC int | 2606 | STATIC int |
2652 | xlog_regrant_write_log_space(xlog_t *log, | 2607 | xlog_regrant_write_log_space(xlog_t *log, |
2653 | xlog_ticket_t *tic) | 2608 | xlog_ticket_t *tic) |
2654 | { | 2609 | { |
2655 | int free_bytes, need_bytes; | 2610 | int free_bytes, need_bytes; |
2656 | xlog_ticket_t *ntic; | ||
2657 | #ifdef DEBUG | ||
2658 | xfs_lsn_t tail_lsn; | ||
2659 | #endif | ||
2660 | 2611 | ||
2661 | tic->t_curr_res = tic->t_unit_res; | 2612 | tic->t_curr_res = tic->t_unit_res; |
2662 | xlog_tic_reset_res(tic); | 2613 | xlog_tic_reset_res(tic); |
@@ -2669,12 +2620,9 @@ xlog_regrant_write_log_space(xlog_t *log, | |||
2669 | panic("regrant Recovery problem"); | 2620 | panic("regrant Recovery problem"); |
2670 | #endif | 2621 | #endif |
2671 | 2622 | ||
2672 | spin_lock(&log->l_grant_lock); | ||
2673 | |||
2674 | trace_xfs_log_regrant_write_enter(log, tic); | 2623 | trace_xfs_log_regrant_write_enter(log, tic); |
2675 | |||
2676 | if (XLOG_FORCED_SHUTDOWN(log)) | 2624 | if (XLOG_FORCED_SHUTDOWN(log)) |
2677 | goto error_return; | 2625 | goto error_return_unlocked; |
2678 | 2626 | ||
2679 | /* If there are other waiters on the queue then give them a | 2627 | /* If there are other waiters on the queue then give them a |
2680 | * chance at logspace before us. Wake up the first waiters, | 2628 | * chance at logspace before us. Wake up the first waiters, |
@@ -2683,92 +2631,76 @@ xlog_regrant_write_log_space(xlog_t *log, | |||
2683 | * this transaction. | 2631 | * this transaction. |
2684 | */ | 2632 | */ |
2685 | need_bytes = tic->t_unit_res; | 2633 | need_bytes = tic->t_unit_res; |
2686 | if ((ntic = log->l_write_headq)) { | 2634 | if (!list_empty_careful(&log->l_writeq)) { |
2687 | free_bytes = xlog_space_left(log, log->l_grant_write_cycle, | 2635 | struct xlog_ticket *ntic; |
2688 | log->l_grant_write_bytes); | 2636 | |
2689 | do { | 2637 | spin_lock(&log->l_grant_write_lock); |
2638 | free_bytes = xlog_space_left(log, &log->l_grant_write_head); | ||
2639 | list_for_each_entry(ntic, &log->l_writeq, t_queue) { | ||
2690 | ASSERT(ntic->t_flags & XLOG_TIC_PERM_RESERV); | 2640 | ASSERT(ntic->t_flags & XLOG_TIC_PERM_RESERV); |
2691 | 2641 | ||
2692 | if (free_bytes < ntic->t_unit_res) | 2642 | if (free_bytes < ntic->t_unit_res) |
2693 | break; | 2643 | break; |
2694 | free_bytes -= ntic->t_unit_res; | 2644 | free_bytes -= ntic->t_unit_res; |
2695 | sv_signal(&ntic->t_wait); | 2645 | wake_up(&ntic->t_wait); |
2696 | ntic = ntic->t_next; | 2646 | } |
2697 | } while (ntic != log->l_write_headq); | ||
2698 | |||
2699 | if (ntic != log->l_write_headq) { | ||
2700 | if ((tic->t_flags & XLOG_TIC_IN_Q) == 0) | ||
2701 | xlog_ins_ticketq(&log->l_write_headq, tic); | ||
2702 | 2647 | ||
2648 | if (ntic != list_first_entry(&log->l_writeq, | ||
2649 | struct xlog_ticket, t_queue)) { | ||
2650 | if (list_empty(&tic->t_queue)) | ||
2651 | list_add_tail(&tic->t_queue, &log->l_writeq); | ||
2703 | trace_xfs_log_regrant_write_sleep1(log, tic); | 2652 | trace_xfs_log_regrant_write_sleep1(log, tic); |
2704 | 2653 | ||
2705 | spin_unlock(&log->l_grant_lock); | 2654 | xlog_grant_push_ail(log, need_bytes); |
2706 | xlog_grant_push_ail(log->l_mp, need_bytes); | ||
2707 | spin_lock(&log->l_grant_lock); | ||
2708 | 2655 | ||
2709 | XFS_STATS_INC(xs_sleep_logspace); | 2656 | XFS_STATS_INC(xs_sleep_logspace); |
2710 | sv_wait(&tic->t_wait, PINOD|PLTWAIT, | 2657 | xlog_wait(&tic->t_wait, &log->l_grant_write_lock); |
2711 | &log->l_grant_lock, s); | ||
2712 | |||
2713 | /* If we're shutting down, this tic is already | ||
2714 | * off the queue */ | ||
2715 | spin_lock(&log->l_grant_lock); | ||
2716 | if (XLOG_FORCED_SHUTDOWN(log)) | ||
2717 | goto error_return; | ||
2718 | |||
2719 | trace_xfs_log_regrant_write_wake1(log, tic); | 2658 | trace_xfs_log_regrant_write_wake1(log, tic); |
2720 | } | 2659 | } else |
2660 | spin_unlock(&log->l_grant_write_lock); | ||
2721 | } | 2661 | } |
2722 | 2662 | ||
2723 | redo: | 2663 | redo: |
2724 | if (XLOG_FORCED_SHUTDOWN(log)) | 2664 | if (XLOG_FORCED_SHUTDOWN(log)) |
2725 | goto error_return; | 2665 | goto error_return_unlocked; |
2726 | 2666 | ||
2727 | free_bytes = xlog_space_left(log, log->l_grant_write_cycle, | 2667 | free_bytes = xlog_space_left(log, &log->l_grant_write_head); |
2728 | log->l_grant_write_bytes); | ||
2729 | if (free_bytes < need_bytes) { | 2668 | if (free_bytes < need_bytes) { |
2730 | if ((tic->t_flags & XLOG_TIC_IN_Q) == 0) | 2669 | spin_lock(&log->l_grant_write_lock); |
2731 | xlog_ins_ticketq(&log->l_write_headq, tic); | 2670 | if (list_empty(&tic->t_queue)) |
2732 | spin_unlock(&log->l_grant_lock); | 2671 | list_add_tail(&tic->t_queue, &log->l_writeq); |
2733 | xlog_grant_push_ail(log->l_mp, need_bytes); | ||
2734 | spin_lock(&log->l_grant_lock); | ||
2735 | |||
2736 | XFS_STATS_INC(xs_sleep_logspace); | ||
2737 | trace_xfs_log_regrant_write_sleep2(log, tic); | ||
2738 | |||
2739 | sv_wait(&tic->t_wait, PINOD|PLTWAIT, &log->l_grant_lock, s); | ||
2740 | 2672 | ||
2741 | /* If we're shutting down, this tic is already off the queue */ | ||
2742 | spin_lock(&log->l_grant_lock); | ||
2743 | if (XLOG_FORCED_SHUTDOWN(log)) | 2673 | if (XLOG_FORCED_SHUTDOWN(log)) |
2744 | goto error_return; | 2674 | goto error_return; |
2745 | 2675 | ||
2676 | xlog_grant_push_ail(log, need_bytes); | ||
2677 | |||
2678 | XFS_STATS_INC(xs_sleep_logspace); | ||
2679 | trace_xfs_log_regrant_write_sleep2(log, tic); | ||
2680 | xlog_wait(&tic->t_wait, &log->l_grant_write_lock); | ||
2681 | |||
2746 | trace_xfs_log_regrant_write_wake2(log, tic); | 2682 | trace_xfs_log_regrant_write_wake2(log, tic); |
2747 | goto redo; | 2683 | goto redo; |
2748 | } else if (tic->t_flags & XLOG_TIC_IN_Q) | 2684 | } |
2749 | xlog_del_ticketq(&log->l_write_headq, tic); | ||
2750 | 2685 | ||
2751 | /* we've got enough space */ | 2686 | if (!list_empty(&tic->t_queue)) { |
2752 | xlog_grant_add_space_write(log, need_bytes); | 2687 | spin_lock(&log->l_grant_write_lock); |
2753 | #ifdef DEBUG | 2688 | list_del_init(&tic->t_queue); |
2754 | tail_lsn = log->l_tail_lsn; | 2689 | spin_unlock(&log->l_grant_write_lock); |
2755 | if (CYCLE_LSN(tail_lsn) != log->l_grant_write_cycle) { | ||
2756 | ASSERT(log->l_grant_write_cycle-1 == CYCLE_LSN(tail_lsn)); | ||
2757 | ASSERT(log->l_grant_write_bytes <= BBTOB(BLOCK_LSN(tail_lsn))); | ||
2758 | } | 2690 | } |
2759 | #endif | ||
2760 | 2691 | ||
2692 | /* we've got enough space */ | ||
2693 | xlog_grant_add_space(log, &log->l_grant_write_head, need_bytes); | ||
2761 | trace_xfs_log_regrant_write_exit(log, tic); | 2694 | trace_xfs_log_regrant_write_exit(log, tic); |
2762 | 2695 | xlog_verify_grant_tail(log); | |
2763 | xlog_verify_grant_head(log, 1); | ||
2764 | spin_unlock(&log->l_grant_lock); | ||
2765 | return 0; | 2696 | return 0; |
2766 | 2697 | ||
2767 | 2698 | ||
2699 | error_return_unlocked: | ||
2700 | spin_lock(&log->l_grant_write_lock); | ||
2768 | error_return: | 2701 | error_return: |
2769 | if (tic->t_flags & XLOG_TIC_IN_Q) | 2702 | list_del_init(&tic->t_queue); |
2770 | xlog_del_ticketq(&log->l_reserve_headq, tic); | 2703 | spin_unlock(&log->l_grant_write_lock); |
2771 | |||
2772 | trace_xfs_log_regrant_write_error(log, tic); | 2704 | trace_xfs_log_regrant_write_error(log, tic); |
2773 | 2705 | ||
2774 | /* | 2706 | /* |
@@ -2778,7 +2710,6 @@ redo: | |||
2778 | */ | 2710 | */ |
2779 | tic->t_curr_res = 0; | 2711 | tic->t_curr_res = 0; |
2780 | tic->t_cnt = 0; /* ungrant will give back unit_res * t_cnt. */ | 2712 | tic->t_cnt = 0; /* ungrant will give back unit_res * t_cnt. */ |
2781 | spin_unlock(&log->l_grant_lock); | ||
2782 | return XFS_ERROR(EIO); | 2713 | return XFS_ERROR(EIO); |
2783 | } /* xlog_regrant_write_log_space */ | 2714 | } /* xlog_regrant_write_log_space */ |
2784 | 2715 | ||
@@ -2799,27 +2730,24 @@ xlog_regrant_reserve_log_space(xlog_t *log, | |||
2799 | if (ticket->t_cnt > 0) | 2730 | if (ticket->t_cnt > 0) |
2800 | ticket->t_cnt--; | 2731 | ticket->t_cnt--; |
2801 | 2732 | ||
2802 | spin_lock(&log->l_grant_lock); | 2733 | xlog_grant_sub_space(log, &log->l_grant_reserve_head, |
2803 | xlog_grant_sub_space(log, ticket->t_curr_res); | 2734 | ticket->t_curr_res); |
2735 | xlog_grant_sub_space(log, &log->l_grant_write_head, | ||
2736 | ticket->t_curr_res); | ||
2804 | ticket->t_curr_res = ticket->t_unit_res; | 2737 | ticket->t_curr_res = ticket->t_unit_res; |
2805 | xlog_tic_reset_res(ticket); | 2738 | xlog_tic_reset_res(ticket); |
2806 | 2739 | ||
2807 | trace_xfs_log_regrant_reserve_sub(log, ticket); | 2740 | trace_xfs_log_regrant_reserve_sub(log, ticket); |
2808 | 2741 | ||
2809 | xlog_verify_grant_head(log, 1); | ||
2810 | |||
2811 | /* just return if we still have some of the pre-reserved space */ | 2742 | /* just return if we still have some of the pre-reserved space */ |
2812 | if (ticket->t_cnt > 0) { | 2743 | if (ticket->t_cnt > 0) |
2813 | spin_unlock(&log->l_grant_lock); | ||
2814 | return; | 2744 | return; |
2815 | } | ||
2816 | 2745 | ||
2817 | xlog_grant_add_space_reserve(log, ticket->t_unit_res); | 2746 | xlog_grant_add_space(log, &log->l_grant_reserve_head, |
2747 | ticket->t_unit_res); | ||
2818 | 2748 | ||
2819 | trace_xfs_log_regrant_reserve_exit(log, ticket); | 2749 | trace_xfs_log_regrant_reserve_exit(log, ticket); |
2820 | 2750 | ||
2821 | xlog_verify_grant_head(log, 0); | ||
2822 | spin_unlock(&log->l_grant_lock); | ||
2823 | ticket->t_curr_res = ticket->t_unit_res; | 2751 | ticket->t_curr_res = ticket->t_unit_res; |
2824 | xlog_tic_reset_res(ticket); | 2752 | xlog_tic_reset_res(ticket); |
2825 | } /* xlog_regrant_reserve_log_space */ | 2753 | } /* xlog_regrant_reserve_log_space */ |
@@ -2843,28 +2771,29 @@ STATIC void | |||
2843 | xlog_ungrant_log_space(xlog_t *log, | 2771 | xlog_ungrant_log_space(xlog_t *log, |
2844 | xlog_ticket_t *ticket) | 2772 | xlog_ticket_t *ticket) |
2845 | { | 2773 | { |
2774 | int bytes; | ||
2775 | |||
2846 | if (ticket->t_cnt > 0) | 2776 | if (ticket->t_cnt > 0) |
2847 | ticket->t_cnt--; | 2777 | ticket->t_cnt--; |
2848 | 2778 | ||
2849 | spin_lock(&log->l_grant_lock); | ||
2850 | trace_xfs_log_ungrant_enter(log, ticket); | 2779 | trace_xfs_log_ungrant_enter(log, ticket); |
2851 | |||
2852 | xlog_grant_sub_space(log, ticket->t_curr_res); | ||
2853 | |||
2854 | trace_xfs_log_ungrant_sub(log, ticket); | 2780 | trace_xfs_log_ungrant_sub(log, ticket); |
2855 | 2781 | ||
2856 | /* If this is a permanent reservation ticket, we may be able to free | 2782 | /* |
2783 | * If this is a permanent reservation ticket, we may be able to free | ||
2857 | * up more space based on the remaining count. | 2784 | * up more space based on the remaining count. |
2858 | */ | 2785 | */ |
2786 | bytes = ticket->t_curr_res; | ||
2859 | if (ticket->t_cnt > 0) { | 2787 | if (ticket->t_cnt > 0) { |
2860 | ASSERT(ticket->t_flags & XLOG_TIC_PERM_RESERV); | 2788 | ASSERT(ticket->t_flags & XLOG_TIC_PERM_RESERV); |
2861 | xlog_grant_sub_space(log, ticket->t_unit_res*ticket->t_cnt); | 2789 | bytes += ticket->t_unit_res*ticket->t_cnt; |
2862 | } | 2790 | } |
2863 | 2791 | ||
2792 | xlog_grant_sub_space(log, &log->l_grant_reserve_head, bytes); | ||
2793 | xlog_grant_sub_space(log, &log->l_grant_write_head, bytes); | ||
2794 | |||
2864 | trace_xfs_log_ungrant_exit(log, ticket); | 2795 | trace_xfs_log_ungrant_exit(log, ticket); |
2865 | 2796 | ||
2866 | xlog_verify_grant_head(log, 1); | ||
2867 | spin_unlock(&log->l_grant_lock); | ||
2868 | xfs_log_move_tail(log->l_mp, 1); | 2797 | xfs_log_move_tail(log->l_mp, 1); |
2869 | } /* xlog_ungrant_log_space */ | 2798 | } /* xlog_ungrant_log_space */ |
2870 | 2799 | ||
@@ -2901,11 +2830,11 @@ xlog_state_release_iclog( | |||
2901 | 2830 | ||
2902 | if (iclog->ic_state == XLOG_STATE_WANT_SYNC) { | 2831 | if (iclog->ic_state == XLOG_STATE_WANT_SYNC) { |
2903 | /* update tail before writing to iclog */ | 2832 | /* update tail before writing to iclog */ |
2904 | xlog_assign_tail_lsn(log->l_mp); | 2833 | xfs_lsn_t tail_lsn = xlog_assign_tail_lsn(log->l_mp); |
2905 | sync++; | 2834 | sync++; |
2906 | iclog->ic_state = XLOG_STATE_SYNCING; | 2835 | iclog->ic_state = XLOG_STATE_SYNCING; |
2907 | iclog->ic_header.h_tail_lsn = cpu_to_be64(log->l_tail_lsn); | 2836 | iclog->ic_header.h_tail_lsn = cpu_to_be64(tail_lsn); |
2908 | xlog_verify_tail_lsn(log, iclog, log->l_tail_lsn); | 2837 | xlog_verify_tail_lsn(log, iclog, tail_lsn); |
2909 | /* cycle incremented when incrementing curr_block */ | 2838 | /* cycle incremented when incrementing curr_block */ |
2910 | } | 2839 | } |
2911 | spin_unlock(&log->l_icloglock); | 2840 | spin_unlock(&log->l_icloglock); |
@@ -3088,7 +3017,7 @@ maybe_sleep: | |||
3088 | return XFS_ERROR(EIO); | 3017 | return XFS_ERROR(EIO); |
3089 | } | 3018 | } |
3090 | XFS_STATS_INC(xs_log_force_sleep); | 3019 | XFS_STATS_INC(xs_log_force_sleep); |
3091 | sv_wait(&iclog->ic_force_wait, PINOD, &log->l_icloglock, s); | 3020 | xlog_wait(&iclog->ic_force_wait, &log->l_icloglock); |
3092 | /* | 3021 | /* |
3093 | * No need to grab the log lock here since we're | 3022 | * No need to grab the log lock here since we're |
3094 | * only deciding whether or not to return EIO | 3023 | * only deciding whether or not to return EIO |
@@ -3119,10 +3048,8 @@ xfs_log_force( | |||
3119 | int error; | 3048 | int error; |
3120 | 3049 | ||
3121 | error = _xfs_log_force(mp, flags, NULL); | 3050 | error = _xfs_log_force(mp, flags, NULL); |
3122 | if (error) { | 3051 | if (error) |
3123 | xfs_fs_cmn_err(CE_WARN, mp, "xfs_log_force: " | 3052 | xfs_warn(mp, "%s: error %d returned.", __func__, error); |
3124 | "error %d returned.", error); | ||
3125 | } | ||
3126 | } | 3053 | } |
3127 | 3054 | ||
3128 | /* | 3055 | /* |
@@ -3206,8 +3133,8 @@ try_again: | |||
3206 | 3133 | ||
3207 | XFS_STATS_INC(xs_log_force_sleep); | 3134 | XFS_STATS_INC(xs_log_force_sleep); |
3208 | 3135 | ||
3209 | sv_wait(&iclog->ic_prev->ic_write_wait, | 3136 | xlog_wait(&iclog->ic_prev->ic_write_wait, |
3210 | PSWP, &log->l_icloglock, s); | 3137 | &log->l_icloglock); |
3211 | if (log_flushed) | 3138 | if (log_flushed) |
3212 | *log_flushed = 1; | 3139 | *log_flushed = 1; |
3213 | already_slept = 1; | 3140 | already_slept = 1; |
@@ -3235,7 +3162,7 @@ try_again: | |||
3235 | return XFS_ERROR(EIO); | 3162 | return XFS_ERROR(EIO); |
3236 | } | 3163 | } |
3237 | XFS_STATS_INC(xs_log_force_sleep); | 3164 | XFS_STATS_INC(xs_log_force_sleep); |
3238 | sv_wait(&iclog->ic_force_wait, PSWP, &log->l_icloglock, s); | 3165 | xlog_wait(&iclog->ic_force_wait, &log->l_icloglock); |
3239 | /* | 3166 | /* |
3240 | * No need to grab the log lock here since we're | 3167 | * No need to grab the log lock here since we're |
3241 | * only deciding whether or not to return EIO | 3168 | * only deciding whether or not to return EIO |
@@ -3271,10 +3198,8 @@ xfs_log_force_lsn( | |||
3271 | int error; | 3198 | int error; |
3272 | 3199 | ||
3273 | error = _xfs_log_force_lsn(mp, lsn, flags, NULL); | 3200 | error = _xfs_log_force_lsn(mp, lsn, flags, NULL); |
3274 | if (error) { | 3201 | if (error) |
3275 | xfs_fs_cmn_err(CE_WARN, mp, "xfs_log_force: " | 3202 | xfs_warn(mp, "%s: error %d returned.", __func__, error); |
3276 | "error %d returned.", error); | ||
3277 | } | ||
3278 | } | 3203 | } |
3279 | 3204 | ||
3280 | /* | 3205 | /* |
@@ -3310,10 +3235,8 @@ xfs_log_ticket_put( | |||
3310 | xlog_ticket_t *ticket) | 3235 | xlog_ticket_t *ticket) |
3311 | { | 3236 | { |
3312 | ASSERT(atomic_read(&ticket->t_ref) > 0); | 3237 | ASSERT(atomic_read(&ticket->t_ref) > 0); |
3313 | if (atomic_dec_and_test(&ticket->t_ref)) { | 3238 | if (atomic_dec_and_test(&ticket->t_ref)) |
3314 | sv_destroy(&ticket->t_wait); | ||
3315 | kmem_zone_free(xfs_log_ticket_zone, ticket); | 3239 | kmem_zone_free(xfs_log_ticket_zone, ticket); |
3316 | } | ||
3317 | } | 3240 | } |
3318 | 3241 | ||
3319 | xlog_ticket_t * | 3242 | xlog_ticket_t * |
@@ -3435,6 +3358,7 @@ xlog_ticket_alloc( | |||
3435 | } | 3358 | } |
3436 | 3359 | ||
3437 | atomic_set(&tic->t_ref, 1); | 3360 | atomic_set(&tic->t_ref, 1); |
3361 | INIT_LIST_HEAD(&tic->t_queue); | ||
3438 | tic->t_unit_res = unit_bytes; | 3362 | tic->t_unit_res = unit_bytes; |
3439 | tic->t_curr_res = unit_bytes; | 3363 | tic->t_curr_res = unit_bytes; |
3440 | tic->t_cnt = cnt; | 3364 | tic->t_cnt = cnt; |
@@ -3445,7 +3369,7 @@ xlog_ticket_alloc( | |||
3445 | tic->t_trans_type = 0; | 3369 | tic->t_trans_type = 0; |
3446 | if (xflags & XFS_LOG_PERM_RESERV) | 3370 | if (xflags & XFS_LOG_PERM_RESERV) |
3447 | tic->t_flags |= XLOG_TIC_PERM_RESERV; | 3371 | tic->t_flags |= XLOG_TIC_PERM_RESERV; |
3448 | sv_init(&tic->t_wait, SV_DEFAULT, "logtick"); | 3372 | init_waitqueue_head(&tic->t_wait); |
3449 | 3373 | ||
3450 | xlog_tic_reset_res(tic); | 3374 | xlog_tic_reset_res(tic); |
3451 | 3375 | ||
@@ -3480,22 +3404,45 @@ xlog_verify_dest_ptr( | |||
3480 | } | 3404 | } |
3481 | 3405 | ||
3482 | if (!good_ptr) | 3406 | if (!good_ptr) |
3483 | xlog_panic("xlog_verify_dest_ptr: invalid ptr"); | 3407 | xfs_emerg(log->l_mp, "%s: invalid ptr", __func__); |
3484 | } | 3408 | } |
3485 | 3409 | ||
3410 | /* | ||
3411 | * Check to make sure the grant write head didn't just over lap the tail. If | ||
3412 | * the cycles are the same, we can't be overlapping. Otherwise, make sure that | ||
3413 | * the cycles differ by exactly one and check the byte count. | ||
3414 | * | ||
3415 | * This check is run unlocked, so can give false positives. Rather than assert | ||
3416 | * on failures, use a warn-once flag and a panic tag to allow the admin to | ||
3417 | * determine if they want to panic the machine when such an error occurs. For | ||
3418 | * debug kernels this will have the same effect as using an assert but, unlinke | ||
3419 | * an assert, it can be turned off at runtime. | ||
3420 | */ | ||
3486 | STATIC void | 3421 | STATIC void |
3487 | xlog_verify_grant_head(xlog_t *log, int equals) | 3422 | xlog_verify_grant_tail( |
3488 | { | 3423 | struct log *log) |
3489 | if (log->l_grant_reserve_cycle == log->l_grant_write_cycle) { | 3424 | { |
3490 | if (equals) | 3425 | int tail_cycle, tail_blocks; |
3491 | ASSERT(log->l_grant_reserve_bytes >= log->l_grant_write_bytes); | 3426 | int cycle, space; |
3492 | else | 3427 | |
3493 | ASSERT(log->l_grant_reserve_bytes > log->l_grant_write_bytes); | 3428 | xlog_crack_grant_head(&log->l_grant_write_head, &cycle, &space); |
3494 | } else { | 3429 | xlog_crack_atomic_lsn(&log->l_tail_lsn, &tail_cycle, &tail_blocks); |
3495 | ASSERT(log->l_grant_reserve_cycle-1 == log->l_grant_write_cycle); | 3430 | if (tail_cycle != cycle) { |
3496 | ASSERT(log->l_grant_write_bytes >= log->l_grant_reserve_bytes); | 3431 | if (cycle - 1 != tail_cycle && |
3497 | } | 3432 | !(log->l_flags & XLOG_TAIL_WARN)) { |
3498 | } /* xlog_verify_grant_head */ | 3433 | xfs_alert_tag(log->l_mp, XFS_PTAG_LOGRES, |
3434 | "%s: cycle - 1 != tail_cycle", __func__); | ||
3435 | log->l_flags |= XLOG_TAIL_WARN; | ||
3436 | } | ||
3437 | |||
3438 | if (space > BBTOB(tail_blocks) && | ||
3439 | !(log->l_flags & XLOG_TAIL_WARN)) { | ||
3440 | xfs_alert_tag(log->l_mp, XFS_PTAG_LOGRES, | ||
3441 | "%s: space > BBTOB(tail_blocks)", __func__); | ||
3442 | log->l_flags |= XLOG_TAIL_WARN; | ||
3443 | } | ||
3444 | } | ||
3445 | } | ||
3499 | 3446 | ||
3500 | /* check if it will fit */ | 3447 | /* check if it will fit */ |
3501 | STATIC void | 3448 | STATIC void |
@@ -3509,16 +3456,16 @@ xlog_verify_tail_lsn(xlog_t *log, | |||
3509 | blocks = | 3456 | blocks = |
3510 | log->l_logBBsize - (log->l_prev_block - BLOCK_LSN(tail_lsn)); | 3457 | log->l_logBBsize - (log->l_prev_block - BLOCK_LSN(tail_lsn)); |
3511 | if (blocks < BTOBB(iclog->ic_offset)+BTOBB(log->l_iclog_hsize)) | 3458 | if (blocks < BTOBB(iclog->ic_offset)+BTOBB(log->l_iclog_hsize)) |
3512 | xlog_panic("xlog_verify_tail_lsn: ran out of log space"); | 3459 | xfs_emerg(log->l_mp, "%s: ran out of log space", __func__); |
3513 | } else { | 3460 | } else { |
3514 | ASSERT(CYCLE_LSN(tail_lsn)+1 == log->l_prev_cycle); | 3461 | ASSERT(CYCLE_LSN(tail_lsn)+1 == log->l_prev_cycle); |
3515 | 3462 | ||
3516 | if (BLOCK_LSN(tail_lsn) == log->l_prev_block) | 3463 | if (BLOCK_LSN(tail_lsn) == log->l_prev_block) |
3517 | xlog_panic("xlog_verify_tail_lsn: tail wrapped"); | 3464 | xfs_emerg(log->l_mp, "%s: tail wrapped", __func__); |
3518 | 3465 | ||
3519 | blocks = BLOCK_LSN(tail_lsn) - log->l_prev_block; | 3466 | blocks = BLOCK_LSN(tail_lsn) - log->l_prev_block; |
3520 | if (blocks < BTOBB(iclog->ic_offset) + 1) | 3467 | if (blocks < BTOBB(iclog->ic_offset) + 1) |
3521 | xlog_panic("xlog_verify_tail_lsn: ran out of log space"); | 3468 | xfs_emerg(log->l_mp, "%s: ran out of log space", __func__); |
3522 | } | 3469 | } |
3523 | } /* xlog_verify_tail_lsn */ | 3470 | } /* xlog_verify_tail_lsn */ |
3524 | 3471 | ||
@@ -3558,22 +3505,23 @@ xlog_verify_iclog(xlog_t *log, | |||
3558 | icptr = log->l_iclog; | 3505 | icptr = log->l_iclog; |
3559 | for (i=0; i < log->l_iclog_bufs; i++) { | 3506 | for (i=0; i < log->l_iclog_bufs; i++) { |
3560 | if (icptr == NULL) | 3507 | if (icptr == NULL) |
3561 | xlog_panic("xlog_verify_iclog: invalid ptr"); | 3508 | xfs_emerg(log->l_mp, "%s: invalid ptr", __func__); |
3562 | icptr = icptr->ic_next; | 3509 | icptr = icptr->ic_next; |
3563 | } | 3510 | } |
3564 | if (icptr != log->l_iclog) | 3511 | if (icptr != log->l_iclog) |
3565 | xlog_panic("xlog_verify_iclog: corrupt iclog ring"); | 3512 | xfs_emerg(log->l_mp, "%s: corrupt iclog ring", __func__); |
3566 | spin_unlock(&log->l_icloglock); | 3513 | spin_unlock(&log->l_icloglock); |
3567 | 3514 | ||
3568 | /* check log magic numbers */ | 3515 | /* check log magic numbers */ |
3569 | if (be32_to_cpu(iclog->ic_header.h_magicno) != XLOG_HEADER_MAGIC_NUM) | 3516 | if (be32_to_cpu(iclog->ic_header.h_magicno) != XLOG_HEADER_MAGIC_NUM) |
3570 | xlog_panic("xlog_verify_iclog: invalid magic num"); | 3517 | xfs_emerg(log->l_mp, "%s: invalid magic num", __func__); |
3571 | 3518 | ||
3572 | ptr = (xfs_caddr_t) &iclog->ic_header; | 3519 | ptr = (xfs_caddr_t) &iclog->ic_header; |
3573 | for (ptr += BBSIZE; ptr < ((xfs_caddr_t)&iclog->ic_header) + count; | 3520 | for (ptr += BBSIZE; ptr < ((xfs_caddr_t)&iclog->ic_header) + count; |
3574 | ptr += BBSIZE) { | 3521 | ptr += BBSIZE) { |
3575 | if (be32_to_cpu(*(__be32 *)ptr) == XLOG_HEADER_MAGIC_NUM) | 3522 | if (be32_to_cpu(*(__be32 *)ptr) == XLOG_HEADER_MAGIC_NUM) |
3576 | xlog_panic("xlog_verify_iclog: unexpected magic num"); | 3523 | xfs_emerg(log->l_mp, "%s: unexpected magic num", |
3524 | __func__); | ||
3577 | } | 3525 | } |
3578 | 3526 | ||
3579 | /* check fields */ | 3527 | /* check fields */ |
@@ -3603,9 +3551,10 @@ xlog_verify_iclog(xlog_t *log, | |||
3603 | } | 3551 | } |
3604 | } | 3552 | } |
3605 | if (clientid != XFS_TRANSACTION && clientid != XFS_LOG) | 3553 | if (clientid != XFS_TRANSACTION && clientid != XFS_LOG) |
3606 | cmn_err(CE_WARN, "xlog_verify_iclog: " | 3554 | xfs_warn(log->l_mp, |
3607 | "invalid clientid %d op 0x%p offset 0x%lx", | 3555 | "%s: invalid clientid %d op 0x%p offset 0x%lx", |
3608 | clientid, ophead, (unsigned long)field_offset); | 3556 | __func__, clientid, ophead, |
3557 | (unsigned long)field_offset); | ||
3609 | 3558 | ||
3610 | /* check length */ | 3559 | /* check length */ |
3611 | field_offset = (__psint_t) | 3560 | field_offset = (__psint_t) |
@@ -3716,12 +3665,10 @@ xfs_log_force_umount( | |||
3716 | xlog_cil_force(log); | 3665 | xlog_cil_force(log); |
3717 | 3666 | ||
3718 | /* | 3667 | /* |
3719 | * We must hold both the GRANT lock and the LOG lock, | 3668 | * mark the filesystem and the as in a shutdown state and wake |
3720 | * before we mark the filesystem SHUTDOWN and wake | 3669 | * everybody up to tell them the bad news. |
3721 | * everybody up to tell the bad news. | ||
3722 | */ | 3670 | */ |
3723 | spin_lock(&log->l_icloglock); | 3671 | spin_lock(&log->l_icloglock); |
3724 | spin_lock(&log->l_grant_lock); | ||
3725 | mp->m_flags |= XFS_MOUNT_FS_SHUTDOWN; | 3672 | mp->m_flags |= XFS_MOUNT_FS_SHUTDOWN; |
3726 | if (mp->m_sb_bp) | 3673 | if (mp->m_sb_bp) |
3727 | XFS_BUF_DONE(mp->m_sb_bp); | 3674 | XFS_BUF_DONE(mp->m_sb_bp); |
@@ -3742,27 +3689,21 @@ xfs_log_force_umount( | |||
3742 | spin_unlock(&log->l_icloglock); | 3689 | spin_unlock(&log->l_icloglock); |
3743 | 3690 | ||
3744 | /* | 3691 | /* |
3745 | * We don't want anybody waiting for log reservations | 3692 | * We don't want anybody waiting for log reservations after this. That |
3746 | * after this. That means we have to wake up everybody | 3693 | * means we have to wake up everybody queued up on reserveq as well as |
3747 | * queued up on reserve_headq as well as write_headq. | 3694 | * writeq. In addition, we make sure in xlog_{re}grant_log_space that |
3748 | * In addition, we make sure in xlog_{re}grant_log_space | 3695 | * we don't enqueue anything once the SHUTDOWN flag is set, and this |
3749 | * that we don't enqueue anything once the SHUTDOWN flag | 3696 | * action is protected by the grant locks. |
3750 | * is set, and this action is protected by the GRANTLOCK. | ||
3751 | */ | 3697 | */ |
3752 | if ((tic = log->l_reserve_headq)) { | 3698 | spin_lock(&log->l_grant_reserve_lock); |
3753 | do { | 3699 | list_for_each_entry(tic, &log->l_reserveq, t_queue) |
3754 | sv_signal(&tic->t_wait); | 3700 | wake_up(&tic->t_wait); |
3755 | tic = tic->t_next; | 3701 | spin_unlock(&log->l_grant_reserve_lock); |
3756 | } while (tic != log->l_reserve_headq); | 3702 | |
3757 | } | 3703 | spin_lock(&log->l_grant_write_lock); |
3758 | 3704 | list_for_each_entry(tic, &log->l_writeq, t_queue) | |
3759 | if ((tic = log->l_write_headq)) { | 3705 | wake_up(&tic->t_wait); |
3760 | do { | 3706 | spin_unlock(&log->l_grant_write_lock); |
3761 | sv_signal(&tic->t_wait); | ||
3762 | tic = tic->t_next; | ||
3763 | } while (tic != log->l_write_headq); | ||
3764 | } | ||
3765 | spin_unlock(&log->l_grant_lock); | ||
3766 | 3707 | ||
3767 | if (!(log->l_iclog->ic_state & XLOG_STATE_IOERROR)) { | 3708 | if (!(log->l_iclog->ic_state & XLOG_STATE_IOERROR)) { |
3768 | ASSERT(!logerror); | 3709 | ASSERT(!logerror); |
diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h index 916eb7db14d9..3bd3291ef8d2 100644 --- a/fs/xfs/xfs_log.h +++ b/fs/xfs/xfs_log.h | |||
@@ -191,7 +191,7 @@ void xfs_log_ticket_put(struct xlog_ticket *ticket); | |||
191 | 191 | ||
192 | xlog_tid_t xfs_log_get_trans_ident(struct xfs_trans *tp); | 192 | xlog_tid_t xfs_log_get_trans_ident(struct xfs_trans *tp); |
193 | 193 | ||
194 | int xfs_log_commit_cil(struct xfs_mount *mp, struct xfs_trans *tp, | 194 | void xfs_log_commit_cil(struct xfs_mount *mp, struct xfs_trans *tp, |
195 | struct xfs_log_vec *log_vector, | 195 | struct xfs_log_vec *log_vector, |
196 | xfs_lsn_t *commit_lsn, int flags); | 196 | xfs_lsn_t *commit_lsn, int flags); |
197 | bool xfs_log_item_in_current_chkpt(struct xfs_log_item *lip); | 197 | bool xfs_log_item_in_current_chkpt(struct xfs_log_item *lip); |
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c index 23d6ceb5e97b..9ca59be08977 100644 --- a/fs/xfs/xfs_log_cil.c +++ b/fs/xfs/xfs_log_cil.c | |||
@@ -61,7 +61,7 @@ xlog_cil_init( | |||
61 | INIT_LIST_HEAD(&cil->xc_committing); | 61 | INIT_LIST_HEAD(&cil->xc_committing); |
62 | spin_lock_init(&cil->xc_cil_lock); | 62 | spin_lock_init(&cil->xc_cil_lock); |
63 | init_rwsem(&cil->xc_ctx_lock); | 63 | init_rwsem(&cil->xc_ctx_lock); |
64 | sv_init(&cil->xc_commit_wait, SV_DEFAULT, "cilwait"); | 64 | init_waitqueue_head(&cil->xc_commit_wait); |
65 | 65 | ||
66 | INIT_LIST_HEAD(&ctx->committing); | 66 | INIT_LIST_HEAD(&ctx->committing); |
67 | INIT_LIST_HEAD(&ctx->busy_extents); | 67 | INIT_LIST_HEAD(&ctx->busy_extents); |
@@ -361,15 +361,10 @@ xlog_cil_committed( | |||
361 | int abort) | 361 | int abort) |
362 | { | 362 | { |
363 | struct xfs_cil_ctx *ctx = args; | 363 | struct xfs_cil_ctx *ctx = args; |
364 | struct xfs_log_vec *lv; | ||
365 | int abortflag = abort ? XFS_LI_ABORTED : 0; | ||
366 | struct xfs_busy_extent *busyp, *n; | 364 | struct xfs_busy_extent *busyp, *n; |
367 | 365 | ||
368 | /* unpin all the log items */ | 366 | xfs_trans_committed_bulk(ctx->cil->xc_log->l_ailp, ctx->lv_chain, |
369 | for (lv = ctx->lv_chain; lv; lv = lv->lv_next ) { | 367 | ctx->start_lsn, abort); |
370 | xfs_trans_item_committed(lv->lv_item, ctx->start_lsn, | ||
371 | abortflag); | ||
372 | } | ||
373 | 368 | ||
374 | list_for_each_entry_safe(busyp, n, &ctx->busy_extents, list) | 369 | list_for_each_entry_safe(busyp, n, &ctx->busy_extents, list) |
375 | xfs_alloc_busy_clear(ctx->cil->xc_log->l_mp, busyp); | 370 | xfs_alloc_busy_clear(ctx->cil->xc_log->l_mp, busyp); |
@@ -548,7 +543,7 @@ xlog_cil_push( | |||
548 | 543 | ||
549 | error = xlog_write(log, &lvhdr, tic, &ctx->start_lsn, NULL, 0); | 544 | error = xlog_write(log, &lvhdr, tic, &ctx->start_lsn, NULL, 0); |
550 | if (error) | 545 | if (error) |
551 | goto out_abort; | 546 | goto out_abort_free_ticket; |
552 | 547 | ||
553 | /* | 548 | /* |
554 | * now that we've written the checkpoint into the log, strictly | 549 | * now that we've written the checkpoint into the log, strictly |
@@ -568,14 +563,15 @@ restart: | |||
568 | * It is still being pushed! Wait for the push to | 563 | * It is still being pushed! Wait for the push to |
569 | * complete, then start again from the beginning. | 564 | * complete, then start again from the beginning. |
570 | */ | 565 | */ |
571 | sv_wait(&cil->xc_commit_wait, 0, &cil->xc_cil_lock, 0); | 566 | xlog_wait(&cil->xc_commit_wait, &cil->xc_cil_lock); |
572 | goto restart; | 567 | goto restart; |
573 | } | 568 | } |
574 | } | 569 | } |
575 | spin_unlock(&cil->xc_cil_lock); | 570 | spin_unlock(&cil->xc_cil_lock); |
576 | 571 | ||
572 | /* xfs_log_done always frees the ticket on error. */ | ||
577 | commit_lsn = xfs_log_done(log->l_mp, tic, &commit_iclog, 0); | 573 | commit_lsn = xfs_log_done(log->l_mp, tic, &commit_iclog, 0); |
578 | if (error || commit_lsn == -1) | 574 | if (commit_lsn == -1) |
579 | goto out_abort; | 575 | goto out_abort; |
580 | 576 | ||
581 | /* attach all the transactions w/ busy extents to iclog */ | 577 | /* attach all the transactions w/ busy extents to iclog */ |
@@ -592,7 +588,7 @@ restart: | |||
592 | */ | 588 | */ |
593 | spin_lock(&cil->xc_cil_lock); | 589 | spin_lock(&cil->xc_cil_lock); |
594 | ctx->commit_lsn = commit_lsn; | 590 | ctx->commit_lsn = commit_lsn; |
595 | sv_broadcast(&cil->xc_commit_wait); | 591 | wake_up_all(&cil->xc_commit_wait); |
596 | spin_unlock(&cil->xc_cil_lock); | 592 | spin_unlock(&cil->xc_cil_lock); |
597 | 593 | ||
598 | /* release the hounds! */ | 594 | /* release the hounds! */ |
@@ -605,6 +601,8 @@ out_free_ticket: | |||
605 | kmem_free(new_ctx); | 601 | kmem_free(new_ctx); |
606 | return 0; | 602 | return 0; |
607 | 603 | ||
604 | out_abort_free_ticket: | ||
605 | xfs_log_ticket_put(tic); | ||
608 | out_abort: | 606 | out_abort: |
609 | xlog_cil_committed(ctx, XFS_LI_ABORTED); | 607 | xlog_cil_committed(ctx, XFS_LI_ABORTED); |
610 | return XFS_ERROR(EIO); | 608 | return XFS_ERROR(EIO); |
@@ -627,7 +625,7 @@ out_abort: | |||
627 | * background commit, returns without it held once background commits are | 625 | * background commit, returns without it held once background commits are |
628 | * allowed again. | 626 | * allowed again. |
629 | */ | 627 | */ |
630 | int | 628 | void |
631 | xfs_log_commit_cil( | 629 | xfs_log_commit_cil( |
632 | struct xfs_mount *mp, | 630 | struct xfs_mount *mp, |
633 | struct xfs_trans *tp, | 631 | struct xfs_trans *tp, |
@@ -642,11 +640,6 @@ xfs_log_commit_cil( | |||
642 | if (flags & XFS_TRANS_RELEASE_LOG_RES) | 640 | if (flags & XFS_TRANS_RELEASE_LOG_RES) |
643 | log_flags = XFS_LOG_REL_PERM_RESERV; | 641 | log_flags = XFS_LOG_REL_PERM_RESERV; |
644 | 642 | ||
645 | if (XLOG_FORCED_SHUTDOWN(log)) { | ||
646 | xlog_cil_free_logvec(log_vector); | ||
647 | return XFS_ERROR(EIO); | ||
648 | } | ||
649 | |||
650 | /* | 643 | /* |
651 | * do all the hard work of formatting items (including memory | 644 | * do all the hard work of formatting items (including memory |
652 | * allocation) outside the CIL context lock. This prevents stalling CIL | 645 | * allocation) outside the CIL context lock. This prevents stalling CIL |
@@ -706,7 +699,6 @@ xfs_log_commit_cil( | |||
706 | */ | 699 | */ |
707 | if (push) | 700 | if (push) |
708 | xlog_cil_push(log, 0); | 701 | xlog_cil_push(log, 0); |
709 | return 0; | ||
710 | } | 702 | } |
711 | 703 | ||
712 | /* | 704 | /* |
@@ -757,7 +749,7 @@ restart: | |||
757 | * It is still being pushed! Wait for the push to | 749 | * It is still being pushed! Wait for the push to |
758 | * complete, then start again from the beginning. | 750 | * complete, then start again from the beginning. |
759 | */ | 751 | */ |
760 | sv_wait(&cil->xc_commit_wait, 0, &cil->xc_cil_lock, 0); | 752 | xlog_wait(&cil->xc_commit_wait, &cil->xc_cil_lock); |
761 | goto restart; | 753 | goto restart; |
762 | } | 754 | } |
763 | if (ctx->sequence != sequence) | 755 | if (ctx->sequence != sequence) |
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h index edcdfe01617f..5864850e9e34 100644 --- a/fs/xfs/xfs_log_priv.h +++ b/fs/xfs/xfs_log_priv.h | |||
@@ -21,7 +21,6 @@ | |||
21 | struct xfs_buf; | 21 | struct xfs_buf; |
22 | struct log; | 22 | struct log; |
23 | struct xlog_ticket; | 23 | struct xlog_ticket; |
24 | struct xfs_buf_cancel; | ||
25 | struct xfs_mount; | 24 | struct xfs_mount; |
26 | 25 | ||
27 | /* | 26 | /* |
@@ -54,7 +53,6 @@ struct xfs_mount; | |||
54 | BTOBB(XLOG_MAX_ICLOGS << (xfs_sb_version_haslogv2(&log->l_mp->m_sb) ? \ | 53 | BTOBB(XLOG_MAX_ICLOGS << (xfs_sb_version_haslogv2(&log->l_mp->m_sb) ? \ |
55 | XLOG_MAX_RECORD_BSHIFT : XLOG_BIG_RECORD_BSHIFT)) | 54 | XLOG_MAX_RECORD_BSHIFT : XLOG_BIG_RECORD_BSHIFT)) |
56 | 55 | ||
57 | |||
58 | static inline xfs_lsn_t xlog_assign_lsn(uint cycle, uint block) | 56 | static inline xfs_lsn_t xlog_assign_lsn(uint cycle, uint block) |
59 | { | 57 | { |
60 | return ((xfs_lsn_t)cycle << 32) | block; | 58 | return ((xfs_lsn_t)cycle << 32) | block; |
@@ -89,10 +87,6 @@ static inline uint xlog_get_client_id(__be32 i) | |||
89 | return be32_to_cpu(i) >> 24; | 87 | return be32_to_cpu(i) >> 24; |
90 | } | 88 | } |
91 | 89 | ||
92 | #define xlog_panic(args...) cmn_err(CE_PANIC, ## args) | ||
93 | #define xlog_exit(args...) cmn_err(CE_PANIC, ## args) | ||
94 | #define xlog_warn(args...) cmn_err(CE_WARN, ## args) | ||
95 | |||
96 | /* | 90 | /* |
97 | * In core log state | 91 | * In core log state |
98 | */ | 92 | */ |
@@ -133,12 +127,10 @@ static inline uint xlog_get_client_id(__be32 i) | |||
133 | */ | 127 | */ |
134 | #define XLOG_TIC_INITED 0x1 /* has been initialized */ | 128 | #define XLOG_TIC_INITED 0x1 /* has been initialized */ |
135 | #define XLOG_TIC_PERM_RESERV 0x2 /* permanent reservation */ | 129 | #define XLOG_TIC_PERM_RESERV 0x2 /* permanent reservation */ |
136 | #define XLOG_TIC_IN_Q 0x4 | ||
137 | 130 | ||
138 | #define XLOG_TIC_FLAGS \ | 131 | #define XLOG_TIC_FLAGS \ |
139 | { XLOG_TIC_INITED, "XLOG_TIC_INITED" }, \ | 132 | { XLOG_TIC_INITED, "XLOG_TIC_INITED" }, \ |
140 | { XLOG_TIC_PERM_RESERV, "XLOG_TIC_PERM_RESERV" }, \ | 133 | { XLOG_TIC_PERM_RESERV, "XLOG_TIC_PERM_RESERV" } |
141 | { XLOG_TIC_IN_Q, "XLOG_TIC_IN_Q" } | ||
142 | 134 | ||
143 | #endif /* __KERNEL__ */ | 135 | #endif /* __KERNEL__ */ |
144 | 136 | ||
@@ -152,6 +144,7 @@ static inline uint xlog_get_client_id(__be32 i) | |||
152 | #define XLOG_RECOVERY_NEEDED 0x4 /* log was recovered */ | 144 | #define XLOG_RECOVERY_NEEDED 0x4 /* log was recovered */ |
153 | #define XLOG_IO_ERROR 0x8 /* log hit an I/O error, and being | 145 | #define XLOG_IO_ERROR 0x8 /* log hit an I/O error, and being |
154 | shutdown */ | 146 | shutdown */ |
147 | #define XLOG_TAIL_WARN 0x10 /* log tail verify warning issued */ | ||
155 | 148 | ||
156 | #ifdef __KERNEL__ | 149 | #ifdef __KERNEL__ |
157 | /* | 150 | /* |
@@ -244,9 +237,8 @@ typedef struct xlog_res { | |||
244 | } xlog_res_t; | 237 | } xlog_res_t; |
245 | 238 | ||
246 | typedef struct xlog_ticket { | 239 | typedef struct xlog_ticket { |
247 | sv_t t_wait; /* ticket wait queue : 20 */ | 240 | wait_queue_head_t t_wait; /* ticket wait queue */ |
248 | struct xlog_ticket *t_next; /* :4|8 */ | 241 | struct list_head t_queue; /* reserve/write queue */ |
249 | struct xlog_ticket *t_prev; /* :4|8 */ | ||
250 | xlog_tid_t t_tid; /* transaction identifier : 4 */ | 242 | xlog_tid_t t_tid; /* transaction identifier : 4 */ |
251 | atomic_t t_ref; /* ticket reference count : 4 */ | 243 | atomic_t t_ref; /* ticket reference count : 4 */ |
252 | int t_curr_res; /* current reservation in bytes : 4 */ | 244 | int t_curr_res; /* current reservation in bytes : 4 */ |
@@ -353,8 +345,8 @@ typedef union xlog_in_core2 { | |||
353 | * and move everything else out to subsequent cachelines. | 345 | * and move everything else out to subsequent cachelines. |
354 | */ | 346 | */ |
355 | typedef struct xlog_in_core { | 347 | typedef struct xlog_in_core { |
356 | sv_t ic_force_wait; | 348 | wait_queue_head_t ic_force_wait; |
357 | sv_t ic_write_wait; | 349 | wait_queue_head_t ic_write_wait; |
358 | struct xlog_in_core *ic_next; | 350 | struct xlog_in_core *ic_next; |
359 | struct xlog_in_core *ic_prev; | 351 | struct xlog_in_core *ic_prev; |
360 | struct xfs_buf *ic_bp; | 352 | struct xfs_buf *ic_bp; |
@@ -421,7 +413,7 @@ struct xfs_cil { | |||
421 | struct xfs_cil_ctx *xc_ctx; | 413 | struct xfs_cil_ctx *xc_ctx; |
422 | struct rw_semaphore xc_ctx_lock; | 414 | struct rw_semaphore xc_ctx_lock; |
423 | struct list_head xc_committing; | 415 | struct list_head xc_committing; |
424 | sv_t xc_commit_wait; | 416 | wait_queue_head_t xc_commit_wait; |
425 | xfs_lsn_t xc_current_sequence; | 417 | xfs_lsn_t xc_current_sequence; |
426 | }; | 418 | }; |
427 | 419 | ||
@@ -491,7 +483,7 @@ typedef struct log { | |||
491 | struct xfs_buftarg *l_targ; /* buftarg of log */ | 483 | struct xfs_buftarg *l_targ; /* buftarg of log */ |
492 | uint l_flags; | 484 | uint l_flags; |
493 | uint l_quotaoffs_flag; /* XFS_DQ_*, for QUOTAOFFs */ | 485 | uint l_quotaoffs_flag; /* XFS_DQ_*, for QUOTAOFFs */ |
494 | struct xfs_buf_cancel **l_buf_cancel_table; | 486 | struct list_head *l_buf_cancel_table; |
495 | int l_iclog_hsize; /* size of iclog header */ | 487 | int l_iclog_hsize; /* size of iclog header */ |
496 | int l_iclog_heads; /* # of iclog header sectors */ | 488 | int l_iclog_heads; /* # of iclog header sectors */ |
497 | uint l_sectBBsize; /* sector size in BBs (2^n) */ | 489 | uint l_sectBBsize; /* sector size in BBs (2^n) */ |
@@ -503,29 +495,40 @@ typedef struct log { | |||
503 | int l_logBBsize; /* size of log in BB chunks */ | 495 | int l_logBBsize; /* size of log in BB chunks */ |
504 | 496 | ||
505 | /* The following block of fields are changed while holding icloglock */ | 497 | /* The following block of fields are changed while holding icloglock */ |
506 | sv_t l_flush_wait ____cacheline_aligned_in_smp; | 498 | wait_queue_head_t l_flush_wait ____cacheline_aligned_in_smp; |
507 | /* waiting for iclog flush */ | 499 | /* waiting for iclog flush */ |
508 | int l_covered_state;/* state of "covering disk | 500 | int l_covered_state;/* state of "covering disk |
509 | * log entries" */ | 501 | * log entries" */ |
510 | xlog_in_core_t *l_iclog; /* head log queue */ | 502 | xlog_in_core_t *l_iclog; /* head log queue */ |
511 | spinlock_t l_icloglock; /* grab to change iclog state */ | 503 | spinlock_t l_icloglock; /* grab to change iclog state */ |
512 | xfs_lsn_t l_tail_lsn; /* lsn of 1st LR with unflushed | ||
513 | * buffers */ | ||
514 | xfs_lsn_t l_last_sync_lsn;/* lsn of last LR on disk */ | ||
515 | int l_curr_cycle; /* Cycle number of log writes */ | 504 | int l_curr_cycle; /* Cycle number of log writes */ |
516 | int l_prev_cycle; /* Cycle number before last | 505 | int l_prev_cycle; /* Cycle number before last |
517 | * block increment */ | 506 | * block increment */ |
518 | int l_curr_block; /* current logical log block */ | 507 | int l_curr_block; /* current logical log block */ |
519 | int l_prev_block; /* previous logical log block */ | 508 | int l_prev_block; /* previous logical log block */ |
520 | 509 | ||
521 | /* The following block of fields are changed while holding grant_lock */ | 510 | /* |
522 | spinlock_t l_grant_lock ____cacheline_aligned_in_smp; | 511 | * l_last_sync_lsn and l_tail_lsn are atomics so they can be set and |
523 | xlog_ticket_t *l_reserve_headq; | 512 | * read without needing to hold specific locks. To avoid operations |
524 | xlog_ticket_t *l_write_headq; | 513 | * contending with other hot objects, place each of them on a separate |
525 | int l_grant_reserve_cycle; | 514 | * cacheline. |
526 | int l_grant_reserve_bytes; | 515 | */ |
527 | int l_grant_write_cycle; | 516 | /* lsn of last LR on disk */ |
528 | int l_grant_write_bytes; | 517 | atomic64_t l_last_sync_lsn ____cacheline_aligned_in_smp; |
518 | /* lsn of 1st LR with unflushed * buffers */ | ||
519 | atomic64_t l_tail_lsn ____cacheline_aligned_in_smp; | ||
520 | |||
521 | /* | ||
522 | * ticket grant locks, queues and accounting have their own cachlines | ||
523 | * as these are quite hot and can be operated on concurrently. | ||
524 | */ | ||
525 | spinlock_t l_grant_reserve_lock ____cacheline_aligned_in_smp; | ||
526 | struct list_head l_reserveq; | ||
527 | atomic64_t l_grant_reserve_head; | ||
528 | |||
529 | spinlock_t l_grant_write_lock ____cacheline_aligned_in_smp; | ||
530 | struct list_head l_writeq; | ||
531 | atomic64_t l_grant_write_head; | ||
529 | 532 | ||
530 | /* The following field are used for debugging; need to hold icloglock */ | 533 | /* The following field are used for debugging; need to hold icloglock */ |
531 | #ifdef DEBUG | 534 | #ifdef DEBUG |
@@ -534,6 +537,9 @@ typedef struct log { | |||
534 | 537 | ||
535 | } xlog_t; | 538 | } xlog_t; |
536 | 539 | ||
540 | #define XLOG_BUF_CANCEL_BUCKET(log, blkno) \ | ||
541 | ((log)->l_buf_cancel_table + ((__uint64_t)blkno % XLOG_BC_TABLE_SIZE)) | ||
542 | |||
537 | #define XLOG_FORCED_SHUTDOWN(log) ((log)->l_flags & XLOG_IO_ERROR) | 543 | #define XLOG_FORCED_SHUTDOWN(log) ((log)->l_flags & XLOG_IO_ERROR) |
538 | 544 | ||
539 | /* common routines */ | 545 | /* common routines */ |
@@ -562,6 +568,61 @@ int xlog_write(struct log *log, struct xfs_log_vec *log_vector, | |||
562 | xlog_in_core_t **commit_iclog, uint flags); | 568 | xlog_in_core_t **commit_iclog, uint flags); |
563 | 569 | ||
564 | /* | 570 | /* |
571 | * When we crack an atomic LSN, we sample it first so that the value will not | ||
572 | * change while we are cracking it into the component values. This means we | ||
573 | * will always get consistent component values to work from. This should always | ||
574 | * be used to sample and crack LSNs that are stored and updated in atomic | ||
575 | * variables. | ||
576 | */ | ||
577 | static inline void | ||
578 | xlog_crack_atomic_lsn(atomic64_t *lsn, uint *cycle, uint *block) | ||
579 | { | ||
580 | xfs_lsn_t val = atomic64_read(lsn); | ||
581 | |||
582 | *cycle = CYCLE_LSN(val); | ||
583 | *block = BLOCK_LSN(val); | ||
584 | } | ||
585 | |||
586 | /* | ||
587 | * Calculate and assign a value to an atomic LSN variable from component pieces. | ||
588 | */ | ||
589 | static inline void | ||
590 | xlog_assign_atomic_lsn(atomic64_t *lsn, uint cycle, uint block) | ||
591 | { | ||
592 | atomic64_set(lsn, xlog_assign_lsn(cycle, block)); | ||
593 | } | ||
594 | |||
595 | /* | ||
596 | * When we crack the grant head, we sample it first so that the value will not | ||
597 | * change while we are cracking it into the component values. This means we | ||
598 | * will always get consistent component values to work from. | ||
599 | */ | ||
600 | static inline void | ||
601 | xlog_crack_grant_head_val(int64_t val, int *cycle, int *space) | ||
602 | { | ||
603 | *cycle = val >> 32; | ||
604 | *space = val & 0xffffffff; | ||
605 | } | ||
606 | |||
607 | static inline void | ||
608 | xlog_crack_grant_head(atomic64_t *head, int *cycle, int *space) | ||
609 | { | ||
610 | xlog_crack_grant_head_val(atomic64_read(head), cycle, space); | ||
611 | } | ||
612 | |||
613 | static inline int64_t | ||
614 | xlog_assign_grant_head_val(int cycle, int space) | ||
615 | { | ||
616 | return ((int64_t)cycle << 32) | space; | ||
617 | } | ||
618 | |||
619 | static inline void | ||
620 | xlog_assign_grant_head(atomic64_t *head, int cycle, int space) | ||
621 | { | ||
622 | atomic64_set(head, xlog_assign_grant_head_val(cycle, space)); | ||
623 | } | ||
624 | |||
625 | /* | ||
565 | * Committed Item List interfaces | 626 | * Committed Item List interfaces |
566 | */ | 627 | */ |
567 | int xlog_cil_init(struct log *log); | 628 | int xlog_cil_init(struct log *log); |
@@ -585,6 +646,21 @@ xlog_cil_force(struct log *log) | |||
585 | */ | 646 | */ |
586 | #define XLOG_UNMOUNT_REC_TYPE (-1U) | 647 | #define XLOG_UNMOUNT_REC_TYPE (-1U) |
587 | 648 | ||
649 | /* | ||
650 | * Wrapper function for waiting on a wait queue serialised against wakeups | ||
651 | * by a spinlock. This matches the semantics of all the wait queues used in the | ||
652 | * log code. | ||
653 | */ | ||
654 | static inline void xlog_wait(wait_queue_head_t *wq, spinlock_t *lock) | ||
655 | { | ||
656 | DECLARE_WAITQUEUE(wait, current); | ||
657 | |||
658 | add_wait_queue_exclusive(wq, &wait); | ||
659 | __set_current_state(TASK_UNINTERRUPTIBLE); | ||
660 | spin_unlock(lock); | ||
661 | schedule(); | ||
662 | remove_wait_queue(wq, &wait); | ||
663 | } | ||
588 | #endif /* __KERNEL__ */ | 664 | #endif /* __KERNEL__ */ |
589 | 665 | ||
590 | #endif /* __XFS_LOG_PRIV_H__ */ | 666 | #endif /* __XFS_LOG_PRIV_H__ */ |
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 966d3f97458c..5cc464a17c93 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c | |||
@@ -53,6 +53,17 @@ STATIC void xlog_recover_check_summary(xlog_t *); | |||
53 | #endif | 53 | #endif |
54 | 54 | ||
55 | /* | 55 | /* |
56 | * This structure is used during recovery to record the buf log items which | ||
57 | * have been canceled and should not be replayed. | ||
58 | */ | ||
59 | struct xfs_buf_cancel { | ||
60 | xfs_daddr_t bc_blkno; | ||
61 | uint bc_len; | ||
62 | int bc_refcount; | ||
63 | struct list_head bc_list; | ||
64 | }; | ||
65 | |||
66 | /* | ||
56 | * Sector aligned buffer routines for buffer create/read/write/access | 67 | * Sector aligned buffer routines for buffer create/read/write/access |
57 | */ | 68 | */ |
58 | 69 | ||
@@ -81,7 +92,7 @@ xlog_get_bp( | |||
81 | int nbblks) | 92 | int nbblks) |
82 | { | 93 | { |
83 | if (!xlog_buf_bbcount_valid(log, nbblks)) { | 94 | if (!xlog_buf_bbcount_valid(log, nbblks)) { |
84 | xlog_warn("XFS: Invalid block length (0x%x) given for buffer", | 95 | xfs_warn(log->l_mp, "Invalid block length (0x%x) for buffer", |
85 | nbblks); | 96 | nbblks); |
86 | XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_HIGH, log->l_mp); | 97 | XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_HIGH, log->l_mp); |
87 | return NULL; | 98 | return NULL; |
@@ -90,7 +101,7 @@ xlog_get_bp( | |||
90 | /* | 101 | /* |
91 | * We do log I/O in units of log sectors (a power-of-2 | 102 | * We do log I/O in units of log sectors (a power-of-2 |
92 | * multiple of the basic block size), so we round up the | 103 | * multiple of the basic block size), so we round up the |
93 | * requested size to acommodate the basic blocks required | 104 | * requested size to accommodate the basic blocks required |
94 | * for complete log sectors. | 105 | * for complete log sectors. |
95 | * | 106 | * |
96 | * In addition, the buffer may be used for a non-sector- | 107 | * In addition, the buffer may be used for a non-sector- |
@@ -101,7 +112,7 @@ xlog_get_bp( | |||
101 | * an issue. Nor will this be a problem if the log I/O is | 112 | * an issue. Nor will this be a problem if the log I/O is |
102 | * done in basic blocks (sector size 1). But otherwise we | 113 | * done in basic blocks (sector size 1). But otherwise we |
103 | * extend the buffer by one extra log sector to ensure | 114 | * extend the buffer by one extra log sector to ensure |
104 | * there's space to accomodate this possiblility. | 115 | * there's space to accommodate this possibility. |
105 | */ | 116 | */ |
106 | if (nbblks > 1 && log->l_sectBBsize > 1) | 117 | if (nbblks > 1 && log->l_sectBBsize > 1) |
107 | nbblks += log->l_sectBBsize; | 118 | nbblks += log->l_sectBBsize; |
@@ -149,7 +160,7 @@ xlog_bread_noalign( | |||
149 | int error; | 160 | int error; |
150 | 161 | ||
151 | if (!xlog_buf_bbcount_valid(log, nbblks)) { | 162 | if (!xlog_buf_bbcount_valid(log, nbblks)) { |
152 | xlog_warn("XFS: Invalid block length (0x%x) given for buffer", | 163 | xfs_warn(log->l_mp, "Invalid block length (0x%x) for buffer", |
153 | nbblks); | 164 | nbblks); |
154 | XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_HIGH, log->l_mp); | 165 | XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_HIGH, log->l_mp); |
155 | return EFSCORRUPTED; | 166 | return EFSCORRUPTED; |
@@ -208,7 +219,7 @@ xlog_bwrite( | |||
208 | int error; | 219 | int error; |
209 | 220 | ||
210 | if (!xlog_buf_bbcount_valid(log, nbblks)) { | 221 | if (!xlog_buf_bbcount_valid(log, nbblks)) { |
211 | xlog_warn("XFS: Invalid block length (0x%x) given for buffer", | 222 | xfs_warn(log->l_mp, "Invalid block length (0x%x) for buffer", |
212 | nbblks); | 223 | nbblks); |
213 | XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_HIGH, log->l_mp); | 224 | XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_HIGH, log->l_mp); |
214 | return EFSCORRUPTED; | 225 | return EFSCORRUPTED; |
@@ -243,9 +254,9 @@ xlog_header_check_dump( | |||
243 | xfs_mount_t *mp, | 254 | xfs_mount_t *mp, |
244 | xlog_rec_header_t *head) | 255 | xlog_rec_header_t *head) |
245 | { | 256 | { |
246 | cmn_err(CE_DEBUG, "%s: SB : uuid = %pU, fmt = %d\n", | 257 | xfs_debug(mp, "%s: SB : uuid = %pU, fmt = %d\n", |
247 | __func__, &mp->m_sb.sb_uuid, XLOG_FMT); | 258 | __func__, &mp->m_sb.sb_uuid, XLOG_FMT); |
248 | cmn_err(CE_DEBUG, " log : uuid = %pU, fmt = %d\n", | 259 | xfs_debug(mp, " log : uuid = %pU, fmt = %d\n", |
249 | &head->h_fs_uuid, be32_to_cpu(head->h_fmt)); | 260 | &head->h_fs_uuid, be32_to_cpu(head->h_fmt)); |
250 | } | 261 | } |
251 | #else | 262 | #else |
@@ -268,15 +279,15 @@ xlog_header_check_recover( | |||
268 | * a dirty log created in IRIX. | 279 | * a dirty log created in IRIX. |
269 | */ | 280 | */ |
270 | if (unlikely(be32_to_cpu(head->h_fmt) != XLOG_FMT)) { | 281 | if (unlikely(be32_to_cpu(head->h_fmt) != XLOG_FMT)) { |
271 | xlog_warn( | 282 | xfs_warn(mp, |
272 | "XFS: dirty log written in incompatible format - can't recover"); | 283 | "dirty log written in incompatible format - can't recover"); |
273 | xlog_header_check_dump(mp, head); | 284 | xlog_header_check_dump(mp, head); |
274 | XFS_ERROR_REPORT("xlog_header_check_recover(1)", | 285 | XFS_ERROR_REPORT("xlog_header_check_recover(1)", |
275 | XFS_ERRLEVEL_HIGH, mp); | 286 | XFS_ERRLEVEL_HIGH, mp); |
276 | return XFS_ERROR(EFSCORRUPTED); | 287 | return XFS_ERROR(EFSCORRUPTED); |
277 | } else if (unlikely(!uuid_equal(&mp->m_sb.sb_uuid, &head->h_fs_uuid))) { | 288 | } else if (unlikely(!uuid_equal(&mp->m_sb.sb_uuid, &head->h_fs_uuid))) { |
278 | xlog_warn( | 289 | xfs_warn(mp, |
279 | "XFS: dirty log entry has mismatched uuid - can't recover"); | 290 | "dirty log entry has mismatched uuid - can't recover"); |
280 | xlog_header_check_dump(mp, head); | 291 | xlog_header_check_dump(mp, head); |
281 | XFS_ERROR_REPORT("xlog_header_check_recover(2)", | 292 | XFS_ERROR_REPORT("xlog_header_check_recover(2)", |
282 | XFS_ERRLEVEL_HIGH, mp); | 293 | XFS_ERRLEVEL_HIGH, mp); |
@@ -301,9 +312,9 @@ xlog_header_check_mount( | |||
301 | * h_fs_uuid is nil, we assume this log was last mounted | 312 | * h_fs_uuid is nil, we assume this log was last mounted |
302 | * by IRIX and continue. | 313 | * by IRIX and continue. |
303 | */ | 314 | */ |
304 | xlog_warn("XFS: nil uuid in log - IRIX style log"); | 315 | xfs_warn(mp, "nil uuid in log - IRIX style log"); |
305 | } else if (unlikely(!uuid_equal(&mp->m_sb.sb_uuid, &head->h_fs_uuid))) { | 316 | } else if (unlikely(!uuid_equal(&mp->m_sb.sb_uuid, &head->h_fs_uuid))) { |
306 | xlog_warn("XFS: log has mismatched uuid - can't recover"); | 317 | xfs_warn(mp, "log has mismatched uuid - can't recover"); |
307 | xlog_header_check_dump(mp, head); | 318 | xlog_header_check_dump(mp, head); |
308 | XFS_ERROR_REPORT("xlog_header_check_mount", | 319 | XFS_ERROR_REPORT("xlog_header_check_mount", |
309 | XFS_ERRLEVEL_HIGH, mp); | 320 | XFS_ERRLEVEL_HIGH, mp); |
@@ -479,8 +490,8 @@ xlog_find_verify_log_record( | |||
479 | for (i = (*last_blk) - 1; i >= 0; i--) { | 490 | for (i = (*last_blk) - 1; i >= 0; i--) { |
480 | if (i < start_blk) { | 491 | if (i < start_blk) { |
481 | /* valid log record not found */ | 492 | /* valid log record not found */ |
482 | xlog_warn( | 493 | xfs_warn(log->l_mp, |
483 | "XFS: Log inconsistent (didn't find previous header)"); | 494 | "Log inconsistent (didn't find previous header)"); |
484 | ASSERT(0); | 495 | ASSERT(0); |
485 | error = XFS_ERROR(EIO); | 496 | error = XFS_ERROR(EIO); |
486 | goto out; | 497 | goto out; |
@@ -580,12 +591,12 @@ xlog_find_head( | |||
580 | * mkfs etc write a dummy unmount record to a fresh | 591 | * mkfs etc write a dummy unmount record to a fresh |
581 | * log so we can store the uuid in there | 592 | * log so we can store the uuid in there |
582 | */ | 593 | */ |
583 | xlog_warn("XFS: totally zeroed log"); | 594 | xfs_warn(log->l_mp, "totally zeroed log"); |
584 | } | 595 | } |
585 | 596 | ||
586 | return 0; | 597 | return 0; |
587 | } else if (error) { | 598 | } else if (error) { |
588 | xlog_warn("XFS: empty log check failed"); | 599 | xfs_warn(log->l_mp, "empty log check failed"); |
589 | return error; | 600 | return error; |
590 | } | 601 | } |
591 | 602 | ||
@@ -808,7 +819,7 @@ validate_head: | |||
808 | xlog_put_bp(bp); | 819 | xlog_put_bp(bp); |
809 | 820 | ||
810 | if (error) | 821 | if (error) |
811 | xlog_warn("XFS: failed to find log head"); | 822 | xfs_warn(log->l_mp, "failed to find log head"); |
812 | return error; | 823 | return error; |
813 | } | 824 | } |
814 | 825 | ||
@@ -901,7 +912,7 @@ xlog_find_tail( | |||
901 | } | 912 | } |
902 | } | 913 | } |
903 | if (!found) { | 914 | if (!found) { |
904 | xlog_warn("XFS: xlog_find_tail: couldn't find sync record"); | 915 | xfs_warn(log->l_mp, "%s: couldn't find sync record", __func__); |
905 | ASSERT(0); | 916 | ASSERT(0); |
906 | return XFS_ERROR(EIO); | 917 | return XFS_ERROR(EIO); |
907 | } | 918 | } |
@@ -925,12 +936,12 @@ xlog_find_tail( | |||
925 | log->l_curr_cycle = be32_to_cpu(rhead->h_cycle); | 936 | log->l_curr_cycle = be32_to_cpu(rhead->h_cycle); |
926 | if (found == 2) | 937 | if (found == 2) |
927 | log->l_curr_cycle++; | 938 | log->l_curr_cycle++; |
928 | log->l_tail_lsn = be64_to_cpu(rhead->h_tail_lsn); | 939 | atomic64_set(&log->l_tail_lsn, be64_to_cpu(rhead->h_tail_lsn)); |
929 | log->l_last_sync_lsn = be64_to_cpu(rhead->h_lsn); | 940 | atomic64_set(&log->l_last_sync_lsn, be64_to_cpu(rhead->h_lsn)); |
930 | log->l_grant_reserve_cycle = log->l_curr_cycle; | 941 | xlog_assign_grant_head(&log->l_grant_reserve_head, log->l_curr_cycle, |
931 | log->l_grant_reserve_bytes = BBTOB(log->l_curr_block); | 942 | BBTOB(log->l_curr_block)); |
932 | log->l_grant_write_cycle = log->l_curr_cycle; | 943 | xlog_assign_grant_head(&log->l_grant_write_head, log->l_curr_cycle, |
933 | log->l_grant_write_bytes = BBTOB(log->l_curr_block); | 944 | BBTOB(log->l_curr_block)); |
934 | 945 | ||
935 | /* | 946 | /* |
936 | * Look for unmount record. If we find it, then we know there | 947 | * Look for unmount record. If we find it, then we know there |
@@ -960,7 +971,7 @@ xlog_find_tail( | |||
960 | } | 971 | } |
961 | after_umount_blk = (i + hblks + (int) | 972 | after_umount_blk = (i + hblks + (int) |
962 | BTOBB(be32_to_cpu(rhead->h_len))) % log->l_logBBsize; | 973 | BTOBB(be32_to_cpu(rhead->h_len))) % log->l_logBBsize; |
963 | tail_lsn = log->l_tail_lsn; | 974 | tail_lsn = atomic64_read(&log->l_tail_lsn); |
964 | if (*head_blk == after_umount_blk && | 975 | if (*head_blk == after_umount_blk && |
965 | be32_to_cpu(rhead->h_num_logops) == 1) { | 976 | be32_to_cpu(rhead->h_num_logops) == 1) { |
966 | umount_data_blk = (i + hblks) % log->l_logBBsize; | 977 | umount_data_blk = (i + hblks) % log->l_logBBsize; |
@@ -975,12 +986,10 @@ xlog_find_tail( | |||
975 | * log records will point recovery to after the | 986 | * log records will point recovery to after the |
976 | * current unmount record. | 987 | * current unmount record. |
977 | */ | 988 | */ |
978 | log->l_tail_lsn = | 989 | xlog_assign_atomic_lsn(&log->l_tail_lsn, |
979 | xlog_assign_lsn(log->l_curr_cycle, | 990 | log->l_curr_cycle, after_umount_blk); |
980 | after_umount_blk); | 991 | xlog_assign_atomic_lsn(&log->l_last_sync_lsn, |
981 | log->l_last_sync_lsn = | 992 | log->l_curr_cycle, after_umount_blk); |
982 | xlog_assign_lsn(log->l_curr_cycle, | ||
983 | after_umount_blk); | ||
984 | *tail_blk = after_umount_blk; | 993 | *tail_blk = after_umount_blk; |
985 | 994 | ||
986 | /* | 995 | /* |
@@ -1019,7 +1028,7 @@ done: | |||
1019 | xlog_put_bp(bp); | 1028 | xlog_put_bp(bp); |
1020 | 1029 | ||
1021 | if (error) | 1030 | if (error) |
1022 | xlog_warn("XFS: failed to locate log tail"); | 1031 | xfs_warn(log->l_mp, "failed to locate log tail"); |
1023 | return error; | 1032 | return error; |
1024 | } | 1033 | } |
1025 | 1034 | ||
@@ -1083,7 +1092,8 @@ xlog_find_zeroed( | |||
1083 | * the first block must be 1. If it's not, maybe we're | 1092 | * the first block must be 1. If it's not, maybe we're |
1084 | * not looking at a log... Bail out. | 1093 | * not looking at a log... Bail out. |
1085 | */ | 1094 | */ |
1086 | xlog_warn("XFS: Log inconsistent or not a log (last==0, first!=1)"); | 1095 | xfs_warn(log->l_mp, |
1096 | "Log inconsistent or not a log (last==0, first!=1)"); | ||
1087 | return XFS_ERROR(EINVAL); | 1097 | return XFS_ERROR(EINVAL); |
1088 | } | 1098 | } |
1089 | 1099 | ||
@@ -1497,8 +1507,8 @@ xlog_recover_add_to_trans( | |||
1497 | if (list_empty(&trans->r_itemq)) { | 1507 | if (list_empty(&trans->r_itemq)) { |
1498 | /* we need to catch log corruptions here */ | 1508 | /* we need to catch log corruptions here */ |
1499 | if (*(uint *)dp != XFS_TRANS_HEADER_MAGIC) { | 1509 | if (*(uint *)dp != XFS_TRANS_HEADER_MAGIC) { |
1500 | xlog_warn("XFS: xlog_recover_add_to_trans: " | 1510 | xfs_warn(log->l_mp, "%s: bad header magic number", |
1501 | "bad header magic number"); | 1511 | __func__); |
1502 | ASSERT(0); | 1512 | ASSERT(0); |
1503 | return XFS_ERROR(EIO); | 1513 | return XFS_ERROR(EIO); |
1504 | } | 1514 | } |
@@ -1525,8 +1535,8 @@ xlog_recover_add_to_trans( | |||
1525 | if (item->ri_total == 0) { /* first region to be added */ | 1535 | if (item->ri_total == 0) { /* first region to be added */ |
1526 | if (in_f->ilf_size == 0 || | 1536 | if (in_f->ilf_size == 0 || |
1527 | in_f->ilf_size > XLOG_MAX_REGIONS_IN_ITEM) { | 1537 | in_f->ilf_size > XLOG_MAX_REGIONS_IN_ITEM) { |
1528 | xlog_warn( | 1538 | xfs_warn(log->l_mp, |
1529 | "XFS: bad number of regions (%d) in inode log format", | 1539 | "bad number of regions (%d) in inode log format", |
1530 | in_f->ilf_size); | 1540 | in_f->ilf_size); |
1531 | ASSERT(0); | 1541 | ASSERT(0); |
1532 | return XFS_ERROR(EIO); | 1542 | return XFS_ERROR(EIO); |
@@ -1583,8 +1593,9 @@ xlog_recover_reorder_trans( | |||
1583 | list_move_tail(&item->ri_list, &trans->r_itemq); | 1593 | list_move_tail(&item->ri_list, &trans->r_itemq); |
1584 | break; | 1594 | break; |
1585 | default: | 1595 | default: |
1586 | xlog_warn( | 1596 | xfs_warn(log->l_mp, |
1587 | "XFS: xlog_recover_reorder_trans: unrecognized type of log operation"); | 1597 | "%s: unrecognized type of log operation", |
1598 | __func__); | ||
1588 | ASSERT(0); | 1599 | ASSERT(0); |
1589 | return XFS_ERROR(EIO); | 1600 | return XFS_ERROR(EIO); |
1590 | } | 1601 | } |
@@ -1605,82 +1616,45 @@ xlog_recover_reorder_trans( | |||
1605 | * record in the table to tell us how many times we expect to see this | 1616 | * record in the table to tell us how many times we expect to see this |
1606 | * record during the second pass. | 1617 | * record during the second pass. |
1607 | */ | 1618 | */ |
1608 | STATIC void | 1619 | STATIC int |
1609 | xlog_recover_do_buffer_pass1( | 1620 | xlog_recover_buffer_pass1( |
1610 | xlog_t *log, | 1621 | struct log *log, |
1611 | xfs_buf_log_format_t *buf_f) | 1622 | xlog_recover_item_t *item) |
1612 | { | 1623 | { |
1613 | xfs_buf_cancel_t *bcp; | 1624 | xfs_buf_log_format_t *buf_f = item->ri_buf[0].i_addr; |
1614 | xfs_buf_cancel_t *nextp; | 1625 | struct list_head *bucket; |
1615 | xfs_buf_cancel_t *prevp; | 1626 | struct xfs_buf_cancel *bcp; |
1616 | xfs_buf_cancel_t **bucket; | ||
1617 | xfs_daddr_t blkno = 0; | ||
1618 | uint len = 0; | ||
1619 | ushort flags = 0; | ||
1620 | |||
1621 | switch (buf_f->blf_type) { | ||
1622 | case XFS_LI_BUF: | ||
1623 | blkno = buf_f->blf_blkno; | ||
1624 | len = buf_f->blf_len; | ||
1625 | flags = buf_f->blf_flags; | ||
1626 | break; | ||
1627 | } | ||
1628 | 1627 | ||
1629 | /* | 1628 | /* |
1630 | * If this isn't a cancel buffer item, then just return. | 1629 | * If this isn't a cancel buffer item, then just return. |
1631 | */ | 1630 | */ |
1632 | if (!(flags & XFS_BLF_CANCEL)) { | 1631 | if (!(buf_f->blf_flags & XFS_BLF_CANCEL)) { |
1633 | trace_xfs_log_recover_buf_not_cancel(log, buf_f); | 1632 | trace_xfs_log_recover_buf_not_cancel(log, buf_f); |
1634 | return; | 1633 | return 0; |
1635 | } | ||
1636 | |||
1637 | /* | ||
1638 | * Insert an xfs_buf_cancel record into the hash table of | ||
1639 | * them. If there is already an identical record, bump | ||
1640 | * its reference count. | ||
1641 | */ | ||
1642 | bucket = &log->l_buf_cancel_table[(__uint64_t)blkno % | ||
1643 | XLOG_BC_TABLE_SIZE]; | ||
1644 | /* | ||
1645 | * If the hash bucket is empty then just insert a new record into | ||
1646 | * the bucket. | ||
1647 | */ | ||
1648 | if (*bucket == NULL) { | ||
1649 | bcp = (xfs_buf_cancel_t *)kmem_alloc(sizeof(xfs_buf_cancel_t), | ||
1650 | KM_SLEEP); | ||
1651 | bcp->bc_blkno = blkno; | ||
1652 | bcp->bc_len = len; | ||
1653 | bcp->bc_refcount = 1; | ||
1654 | bcp->bc_next = NULL; | ||
1655 | *bucket = bcp; | ||
1656 | return; | ||
1657 | } | 1634 | } |
1658 | 1635 | ||
1659 | /* | 1636 | /* |
1660 | * The hash bucket is not empty, so search for duplicates of our | 1637 | * Insert an xfs_buf_cancel record into the hash table of them. |
1661 | * record. If we find one them just bump its refcount. If not | 1638 | * If there is already an identical record, bump its reference count. |
1662 | * then add us at the end of the list. | ||
1663 | */ | 1639 | */ |
1664 | prevp = NULL; | 1640 | bucket = XLOG_BUF_CANCEL_BUCKET(log, buf_f->blf_blkno); |
1665 | nextp = *bucket; | 1641 | list_for_each_entry(bcp, bucket, bc_list) { |
1666 | while (nextp != NULL) { | 1642 | if (bcp->bc_blkno == buf_f->blf_blkno && |
1667 | if (nextp->bc_blkno == blkno && nextp->bc_len == len) { | 1643 | bcp->bc_len == buf_f->blf_len) { |
1668 | nextp->bc_refcount++; | 1644 | bcp->bc_refcount++; |
1669 | trace_xfs_log_recover_buf_cancel_ref_inc(log, buf_f); | 1645 | trace_xfs_log_recover_buf_cancel_ref_inc(log, buf_f); |
1670 | return; | 1646 | return 0; |
1671 | } | 1647 | } |
1672 | prevp = nextp; | 1648 | } |
1673 | nextp = nextp->bc_next; | 1649 | |
1674 | } | 1650 | bcp = kmem_alloc(sizeof(struct xfs_buf_cancel), KM_SLEEP); |
1675 | ASSERT(prevp != NULL); | 1651 | bcp->bc_blkno = buf_f->blf_blkno; |
1676 | bcp = (xfs_buf_cancel_t *)kmem_alloc(sizeof(xfs_buf_cancel_t), | 1652 | bcp->bc_len = buf_f->blf_len; |
1677 | KM_SLEEP); | ||
1678 | bcp->bc_blkno = blkno; | ||
1679 | bcp->bc_len = len; | ||
1680 | bcp->bc_refcount = 1; | 1653 | bcp->bc_refcount = 1; |
1681 | bcp->bc_next = NULL; | 1654 | list_add_tail(&bcp->bc_list, bucket); |
1682 | prevp->bc_next = bcp; | 1655 | |
1683 | trace_xfs_log_recover_buf_cancel_add(log, buf_f); | 1656 | trace_xfs_log_recover_buf_cancel_add(log, buf_f); |
1657 | return 0; | ||
1684 | } | 1658 | } |
1685 | 1659 | ||
1686 | /* | 1660 | /* |
@@ -1698,14 +1672,13 @@ xlog_recover_do_buffer_pass1( | |||
1698 | */ | 1672 | */ |
1699 | STATIC int | 1673 | STATIC int |
1700 | xlog_check_buffer_cancelled( | 1674 | xlog_check_buffer_cancelled( |
1701 | xlog_t *log, | 1675 | struct log *log, |
1702 | xfs_daddr_t blkno, | 1676 | xfs_daddr_t blkno, |
1703 | uint len, | 1677 | uint len, |
1704 | ushort flags) | 1678 | ushort flags) |
1705 | { | 1679 | { |
1706 | xfs_buf_cancel_t *bcp; | 1680 | struct list_head *bucket; |
1707 | xfs_buf_cancel_t *prevp; | 1681 | struct xfs_buf_cancel *bcp; |
1708 | xfs_buf_cancel_t **bucket; | ||
1709 | 1682 | ||
1710 | if (log->l_buf_cancel_table == NULL) { | 1683 | if (log->l_buf_cancel_table == NULL) { |
1711 | /* | 1684 | /* |
@@ -1716,128 +1689,70 @@ xlog_check_buffer_cancelled( | |||
1716 | return 0; | 1689 | return 0; |
1717 | } | 1690 | } |
1718 | 1691 | ||
1719 | bucket = &log->l_buf_cancel_table[(__uint64_t)blkno % | ||
1720 | XLOG_BC_TABLE_SIZE]; | ||
1721 | bcp = *bucket; | ||
1722 | if (bcp == NULL) { | ||
1723 | /* | ||
1724 | * There is no corresponding entry in the table built | ||
1725 | * in pass one, so this buffer has not been cancelled. | ||
1726 | */ | ||
1727 | ASSERT(!(flags & XFS_BLF_CANCEL)); | ||
1728 | return 0; | ||
1729 | } | ||
1730 | |||
1731 | /* | 1692 | /* |
1732 | * Search for an entry in the buffer cancel table that | 1693 | * Search for an entry in the cancel table that matches our buffer. |
1733 | * matches our buffer. | ||
1734 | */ | 1694 | */ |
1735 | prevp = NULL; | 1695 | bucket = XLOG_BUF_CANCEL_BUCKET(log, blkno); |
1736 | while (bcp != NULL) { | 1696 | list_for_each_entry(bcp, bucket, bc_list) { |
1737 | if (bcp->bc_blkno == blkno && bcp->bc_len == len) { | 1697 | if (bcp->bc_blkno == blkno && bcp->bc_len == len) |
1738 | /* | 1698 | goto found; |
1739 | * We've go a match, so return 1 so that the | ||
1740 | * recovery of this buffer is cancelled. | ||
1741 | * If this buffer is actually a buffer cancel | ||
1742 | * log item, then decrement the refcount on the | ||
1743 | * one in the table and remove it if this is the | ||
1744 | * last reference. | ||
1745 | */ | ||
1746 | if (flags & XFS_BLF_CANCEL) { | ||
1747 | bcp->bc_refcount--; | ||
1748 | if (bcp->bc_refcount == 0) { | ||
1749 | if (prevp == NULL) { | ||
1750 | *bucket = bcp->bc_next; | ||
1751 | } else { | ||
1752 | prevp->bc_next = bcp->bc_next; | ||
1753 | } | ||
1754 | kmem_free(bcp); | ||
1755 | } | ||
1756 | } | ||
1757 | return 1; | ||
1758 | } | ||
1759 | prevp = bcp; | ||
1760 | bcp = bcp->bc_next; | ||
1761 | } | 1699 | } |
1700 | |||
1762 | /* | 1701 | /* |
1763 | * We didn't find a corresponding entry in the table, so | 1702 | * We didn't find a corresponding entry in the table, so return 0 so |
1764 | * return 0 so that the buffer is NOT cancelled. | 1703 | * that the buffer is NOT cancelled. |
1765 | */ | 1704 | */ |
1766 | ASSERT(!(flags & XFS_BLF_CANCEL)); | 1705 | ASSERT(!(flags & XFS_BLF_CANCEL)); |
1767 | return 0; | 1706 | return 0; |
1768 | } | ||
1769 | 1707 | ||
1770 | STATIC int | 1708 | found: |
1771 | xlog_recover_do_buffer_pass2( | 1709 | /* |
1772 | xlog_t *log, | 1710 | * We've go a match, so return 1 so that the recovery of this buffer |
1773 | xfs_buf_log_format_t *buf_f) | 1711 | * is cancelled. If this buffer is actually a buffer cancel log |
1774 | { | 1712 | * item, then decrement the refcount on the one in the table and |
1775 | xfs_daddr_t blkno = 0; | 1713 | * remove it if this is the last reference. |
1776 | ushort flags = 0; | 1714 | */ |
1777 | uint len = 0; | 1715 | if (flags & XFS_BLF_CANCEL) { |
1778 | 1716 | if (--bcp->bc_refcount == 0) { | |
1779 | switch (buf_f->blf_type) { | 1717 | list_del(&bcp->bc_list); |
1780 | case XFS_LI_BUF: | 1718 | kmem_free(bcp); |
1781 | blkno = buf_f->blf_blkno; | 1719 | } |
1782 | flags = buf_f->blf_flags; | ||
1783 | len = buf_f->blf_len; | ||
1784 | break; | ||
1785 | } | 1720 | } |
1786 | 1721 | return 1; | |
1787 | return xlog_check_buffer_cancelled(log, blkno, len, flags); | ||
1788 | } | 1722 | } |
1789 | 1723 | ||
1790 | /* | 1724 | /* |
1791 | * Perform recovery for a buffer full of inodes. In these buffers, | 1725 | * Perform recovery for a buffer full of inodes. In these buffers, the only |
1792 | * the only data which should be recovered is that which corresponds | 1726 | * data which should be recovered is that which corresponds to the |
1793 | * to the di_next_unlinked pointers in the on disk inode structures. | 1727 | * di_next_unlinked pointers in the on disk inode structures. The rest of the |
1794 | * The rest of the data for the inodes is always logged through the | 1728 | * data for the inodes is always logged through the inodes themselves rather |
1795 | * inodes themselves rather than the inode buffer and is recovered | 1729 | * than the inode buffer and is recovered in xlog_recover_inode_pass2(). |
1796 | * in xlog_recover_do_inode_trans(). | ||
1797 | * | 1730 | * |
1798 | * The only time when buffers full of inodes are fully recovered is | 1731 | * The only time when buffers full of inodes are fully recovered is when the |
1799 | * when the buffer is full of newly allocated inodes. In this case | 1732 | * buffer is full of newly allocated inodes. In this case the buffer will |
1800 | * the buffer will not be marked as an inode buffer and so will be | 1733 | * not be marked as an inode buffer and so will be sent to |
1801 | * sent to xlog_recover_do_reg_buffer() below during recovery. | 1734 | * xlog_recover_do_reg_buffer() below during recovery. |
1802 | */ | 1735 | */ |
1803 | STATIC int | 1736 | STATIC int |
1804 | xlog_recover_do_inode_buffer( | 1737 | xlog_recover_do_inode_buffer( |
1805 | xfs_mount_t *mp, | 1738 | struct xfs_mount *mp, |
1806 | xlog_recover_item_t *item, | 1739 | xlog_recover_item_t *item, |
1807 | xfs_buf_t *bp, | 1740 | struct xfs_buf *bp, |
1808 | xfs_buf_log_format_t *buf_f) | 1741 | xfs_buf_log_format_t *buf_f) |
1809 | { | 1742 | { |
1810 | int i; | 1743 | int i; |
1811 | int item_index; | 1744 | int item_index = 0; |
1812 | int bit; | 1745 | int bit = 0; |
1813 | int nbits; | 1746 | int nbits = 0; |
1814 | int reg_buf_offset; | 1747 | int reg_buf_offset = 0; |
1815 | int reg_buf_bytes; | 1748 | int reg_buf_bytes = 0; |
1816 | int next_unlinked_offset; | 1749 | int next_unlinked_offset; |
1817 | int inodes_per_buf; | 1750 | int inodes_per_buf; |
1818 | xfs_agino_t *logged_nextp; | 1751 | xfs_agino_t *logged_nextp; |
1819 | xfs_agino_t *buffer_nextp; | 1752 | xfs_agino_t *buffer_nextp; |
1820 | unsigned int *data_map = NULL; | ||
1821 | unsigned int map_size = 0; | ||
1822 | 1753 | ||
1823 | trace_xfs_log_recover_buf_inode_buf(mp->m_log, buf_f); | 1754 | trace_xfs_log_recover_buf_inode_buf(mp->m_log, buf_f); |
1824 | 1755 | ||
1825 | switch (buf_f->blf_type) { | ||
1826 | case XFS_LI_BUF: | ||
1827 | data_map = buf_f->blf_data_map; | ||
1828 | map_size = buf_f->blf_map_size; | ||
1829 | break; | ||
1830 | } | ||
1831 | /* | ||
1832 | * Set the variables corresponding to the current region to | ||
1833 | * 0 so that we'll initialize them on the first pass through | ||
1834 | * the loop. | ||
1835 | */ | ||
1836 | reg_buf_offset = 0; | ||
1837 | reg_buf_bytes = 0; | ||
1838 | bit = 0; | ||
1839 | nbits = 0; | ||
1840 | item_index = 0; | ||
1841 | inodes_per_buf = XFS_BUF_COUNT(bp) >> mp->m_sb.sb_inodelog; | 1756 | inodes_per_buf = XFS_BUF_COUNT(bp) >> mp->m_sb.sb_inodelog; |
1842 | for (i = 0; i < inodes_per_buf; i++) { | 1757 | for (i = 0; i < inodes_per_buf; i++) { |
1843 | next_unlinked_offset = (i * mp->m_sb.sb_inodesize) + | 1758 | next_unlinked_offset = (i * mp->m_sb.sb_inodesize) + |
@@ -1852,18 +1767,18 @@ xlog_recover_do_inode_buffer( | |||
1852 | * the current di_next_unlinked field. | 1767 | * the current di_next_unlinked field. |
1853 | */ | 1768 | */ |
1854 | bit += nbits; | 1769 | bit += nbits; |
1855 | bit = xfs_next_bit(data_map, map_size, bit); | 1770 | bit = xfs_next_bit(buf_f->blf_data_map, |
1771 | buf_f->blf_map_size, bit); | ||
1856 | 1772 | ||
1857 | /* | 1773 | /* |
1858 | * If there are no more logged regions in the | 1774 | * If there are no more logged regions in the |
1859 | * buffer, then we're done. | 1775 | * buffer, then we're done. |
1860 | */ | 1776 | */ |
1861 | if (bit == -1) { | 1777 | if (bit == -1) |
1862 | return 0; | 1778 | return 0; |
1863 | } | ||
1864 | 1779 | ||
1865 | nbits = xfs_contig_bits(data_map, map_size, | 1780 | nbits = xfs_contig_bits(buf_f->blf_data_map, |
1866 | bit); | 1781 | buf_f->blf_map_size, bit); |
1867 | ASSERT(nbits > 0); | 1782 | ASSERT(nbits > 0); |
1868 | reg_buf_offset = bit << XFS_BLF_SHIFT; | 1783 | reg_buf_offset = bit << XFS_BLF_SHIFT; |
1869 | reg_buf_bytes = nbits << XFS_BLF_SHIFT; | 1784 | reg_buf_bytes = nbits << XFS_BLF_SHIFT; |
@@ -1875,9 +1790,8 @@ xlog_recover_do_inode_buffer( | |||
1875 | * di_next_unlinked field, then move on to the next | 1790 | * di_next_unlinked field, then move on to the next |
1876 | * di_next_unlinked field. | 1791 | * di_next_unlinked field. |
1877 | */ | 1792 | */ |
1878 | if (next_unlinked_offset < reg_buf_offset) { | 1793 | if (next_unlinked_offset < reg_buf_offset) |
1879 | continue; | 1794 | continue; |
1880 | } | ||
1881 | 1795 | ||
1882 | ASSERT(item->ri_buf[item_index].i_addr != NULL); | 1796 | ASSERT(item->ri_buf[item_index].i_addr != NULL); |
1883 | ASSERT((item->ri_buf[item_index].i_len % XFS_BLF_CHUNK) == 0); | 1797 | ASSERT((item->ri_buf[item_index].i_len % XFS_BLF_CHUNK) == 0); |
@@ -1891,8 +1805,9 @@ xlog_recover_do_inode_buffer( | |||
1891 | logged_nextp = item->ri_buf[item_index].i_addr + | 1805 | logged_nextp = item->ri_buf[item_index].i_addr + |
1892 | next_unlinked_offset - reg_buf_offset; | 1806 | next_unlinked_offset - reg_buf_offset; |
1893 | if (unlikely(*logged_nextp == 0)) { | 1807 | if (unlikely(*logged_nextp == 0)) { |
1894 | xfs_fs_cmn_err(CE_ALERT, mp, | 1808 | xfs_alert(mp, |
1895 | "bad inode buffer log record (ptr = 0x%p, bp = 0x%p). XFS trying to replay bad (0) inode di_next_unlinked field", | 1809 | "Bad inode buffer log record (ptr = 0x%p, bp = 0x%p). " |
1810 | "Trying to replay bad (0) inode di_next_unlinked field.", | ||
1896 | item, bp); | 1811 | item, bp); |
1897 | XFS_ERROR_REPORT("xlog_recover_do_inode_buf", | 1812 | XFS_ERROR_REPORT("xlog_recover_do_inode_buf", |
1898 | XFS_ERRLEVEL_LOW, mp); | 1813 | XFS_ERRLEVEL_LOW, mp); |
@@ -1913,36 +1828,29 @@ xlog_recover_do_inode_buffer( | |||
1913 | * given buffer. The bitmap in the buf log format structure indicates | 1828 | * given buffer. The bitmap in the buf log format structure indicates |
1914 | * where to place the logged data. | 1829 | * where to place the logged data. |
1915 | */ | 1830 | */ |
1916 | /*ARGSUSED*/ | ||
1917 | STATIC void | 1831 | STATIC void |
1918 | xlog_recover_do_reg_buffer( | 1832 | xlog_recover_do_reg_buffer( |
1919 | struct xfs_mount *mp, | 1833 | struct xfs_mount *mp, |
1920 | xlog_recover_item_t *item, | 1834 | xlog_recover_item_t *item, |
1921 | xfs_buf_t *bp, | 1835 | struct xfs_buf *bp, |
1922 | xfs_buf_log_format_t *buf_f) | 1836 | xfs_buf_log_format_t *buf_f) |
1923 | { | 1837 | { |
1924 | int i; | 1838 | int i; |
1925 | int bit; | 1839 | int bit; |
1926 | int nbits; | 1840 | int nbits; |
1927 | unsigned int *data_map = NULL; | ||
1928 | unsigned int map_size = 0; | ||
1929 | int error; | 1841 | int error; |
1930 | 1842 | ||
1931 | trace_xfs_log_recover_buf_reg_buf(mp->m_log, buf_f); | 1843 | trace_xfs_log_recover_buf_reg_buf(mp->m_log, buf_f); |
1932 | 1844 | ||
1933 | switch (buf_f->blf_type) { | ||
1934 | case XFS_LI_BUF: | ||
1935 | data_map = buf_f->blf_data_map; | ||
1936 | map_size = buf_f->blf_map_size; | ||
1937 | break; | ||
1938 | } | ||
1939 | bit = 0; | 1845 | bit = 0; |
1940 | i = 1; /* 0 is the buf format structure */ | 1846 | i = 1; /* 0 is the buf format structure */ |
1941 | while (1) { | 1847 | while (1) { |
1942 | bit = xfs_next_bit(data_map, map_size, bit); | 1848 | bit = xfs_next_bit(buf_f->blf_data_map, |
1849 | buf_f->blf_map_size, bit); | ||
1943 | if (bit == -1) | 1850 | if (bit == -1) |
1944 | break; | 1851 | break; |
1945 | nbits = xfs_contig_bits(data_map, map_size, bit); | 1852 | nbits = xfs_contig_bits(buf_f->blf_data_map, |
1853 | buf_f->blf_map_size, bit); | ||
1946 | ASSERT(nbits > 0); | 1854 | ASSERT(nbits > 0); |
1947 | ASSERT(item->ri_buf[i].i_addr != NULL); | 1855 | ASSERT(item->ri_buf[i].i_addr != NULL); |
1948 | ASSERT(item->ri_buf[i].i_len % XFS_BLF_CHUNK == 0); | 1856 | ASSERT(item->ri_buf[i].i_len % XFS_BLF_CHUNK == 0); |
@@ -1958,17 +1866,17 @@ xlog_recover_do_reg_buffer( | |||
1958 | if (buf_f->blf_flags & | 1866 | if (buf_f->blf_flags & |
1959 | (XFS_BLF_UDQUOT_BUF|XFS_BLF_PDQUOT_BUF|XFS_BLF_GDQUOT_BUF)) { | 1867 | (XFS_BLF_UDQUOT_BUF|XFS_BLF_PDQUOT_BUF|XFS_BLF_GDQUOT_BUF)) { |
1960 | if (item->ri_buf[i].i_addr == NULL) { | 1868 | if (item->ri_buf[i].i_addr == NULL) { |
1961 | cmn_err(CE_ALERT, | 1869 | xfs_alert(mp, |
1962 | "XFS: NULL dquot in %s.", __func__); | 1870 | "XFS: NULL dquot in %s.", __func__); |
1963 | goto next; | 1871 | goto next; |
1964 | } | 1872 | } |
1965 | if (item->ri_buf[i].i_len < sizeof(xfs_disk_dquot_t)) { | 1873 | if (item->ri_buf[i].i_len < sizeof(xfs_disk_dquot_t)) { |
1966 | cmn_err(CE_ALERT, | 1874 | xfs_alert(mp, |
1967 | "XFS: dquot too small (%d) in %s.", | 1875 | "XFS: dquot too small (%d) in %s.", |
1968 | item->ri_buf[i].i_len, __func__); | 1876 | item->ri_buf[i].i_len, __func__); |
1969 | goto next; | 1877 | goto next; |
1970 | } | 1878 | } |
1971 | error = xfs_qm_dqcheck(item->ri_buf[i].i_addr, | 1879 | error = xfs_qm_dqcheck(mp, item->ri_buf[i].i_addr, |
1972 | -1, 0, XFS_QMOPT_DOWARN, | 1880 | -1, 0, XFS_QMOPT_DOWARN, |
1973 | "dquot_buf_recover"); | 1881 | "dquot_buf_recover"); |
1974 | if (error) | 1882 | if (error) |
@@ -1993,6 +1901,7 @@ xlog_recover_do_reg_buffer( | |||
1993 | */ | 1901 | */ |
1994 | int | 1902 | int |
1995 | xfs_qm_dqcheck( | 1903 | xfs_qm_dqcheck( |
1904 | struct xfs_mount *mp, | ||
1996 | xfs_disk_dquot_t *ddq, | 1905 | xfs_disk_dquot_t *ddq, |
1997 | xfs_dqid_t id, | 1906 | xfs_dqid_t id, |
1998 | uint type, /* used only when IO_dorepair is true */ | 1907 | uint type, /* used only when IO_dorepair is true */ |
@@ -2019,14 +1928,14 @@ xfs_qm_dqcheck( | |||
2019 | */ | 1928 | */ |
2020 | if (be16_to_cpu(ddq->d_magic) != XFS_DQUOT_MAGIC) { | 1929 | if (be16_to_cpu(ddq->d_magic) != XFS_DQUOT_MAGIC) { |
2021 | if (flags & XFS_QMOPT_DOWARN) | 1930 | if (flags & XFS_QMOPT_DOWARN) |
2022 | cmn_err(CE_ALERT, | 1931 | xfs_alert(mp, |
2023 | "%s : XFS dquot ID 0x%x, magic 0x%x != 0x%x", | 1932 | "%s : XFS dquot ID 0x%x, magic 0x%x != 0x%x", |
2024 | str, id, be16_to_cpu(ddq->d_magic), XFS_DQUOT_MAGIC); | 1933 | str, id, be16_to_cpu(ddq->d_magic), XFS_DQUOT_MAGIC); |
2025 | errs++; | 1934 | errs++; |
2026 | } | 1935 | } |
2027 | if (ddq->d_version != XFS_DQUOT_VERSION) { | 1936 | if (ddq->d_version != XFS_DQUOT_VERSION) { |
2028 | if (flags & XFS_QMOPT_DOWARN) | 1937 | if (flags & XFS_QMOPT_DOWARN) |
2029 | cmn_err(CE_ALERT, | 1938 | xfs_alert(mp, |
2030 | "%s : XFS dquot ID 0x%x, version 0x%x != 0x%x", | 1939 | "%s : XFS dquot ID 0x%x, version 0x%x != 0x%x", |
2031 | str, id, ddq->d_version, XFS_DQUOT_VERSION); | 1940 | str, id, ddq->d_version, XFS_DQUOT_VERSION); |
2032 | errs++; | 1941 | errs++; |
@@ -2036,7 +1945,7 @@ xfs_qm_dqcheck( | |||
2036 | ddq->d_flags != XFS_DQ_PROJ && | 1945 | ddq->d_flags != XFS_DQ_PROJ && |
2037 | ddq->d_flags != XFS_DQ_GROUP) { | 1946 | ddq->d_flags != XFS_DQ_GROUP) { |
2038 | if (flags & XFS_QMOPT_DOWARN) | 1947 | if (flags & XFS_QMOPT_DOWARN) |
2039 | cmn_err(CE_ALERT, | 1948 | xfs_alert(mp, |
2040 | "%s : XFS dquot ID 0x%x, unknown flags 0x%x", | 1949 | "%s : XFS dquot ID 0x%x, unknown flags 0x%x", |
2041 | str, id, ddq->d_flags); | 1950 | str, id, ddq->d_flags); |
2042 | errs++; | 1951 | errs++; |
@@ -2044,7 +1953,7 @@ xfs_qm_dqcheck( | |||
2044 | 1953 | ||
2045 | if (id != -1 && id != be32_to_cpu(ddq->d_id)) { | 1954 | if (id != -1 && id != be32_to_cpu(ddq->d_id)) { |
2046 | if (flags & XFS_QMOPT_DOWARN) | 1955 | if (flags & XFS_QMOPT_DOWARN) |
2047 | cmn_err(CE_ALERT, | 1956 | xfs_alert(mp, |
2048 | "%s : ondisk-dquot 0x%p, ID mismatch: " | 1957 | "%s : ondisk-dquot 0x%p, ID mismatch: " |
2049 | "0x%x expected, found id 0x%x", | 1958 | "0x%x expected, found id 0x%x", |
2050 | str, ddq, id, be32_to_cpu(ddq->d_id)); | 1959 | str, ddq, id, be32_to_cpu(ddq->d_id)); |
@@ -2057,9 +1966,8 @@ xfs_qm_dqcheck( | |||
2057 | be64_to_cpu(ddq->d_blk_softlimit)) { | 1966 | be64_to_cpu(ddq->d_blk_softlimit)) { |
2058 | if (!ddq->d_btimer) { | 1967 | if (!ddq->d_btimer) { |
2059 | if (flags & XFS_QMOPT_DOWARN) | 1968 | if (flags & XFS_QMOPT_DOWARN) |
2060 | cmn_err(CE_ALERT, | 1969 | xfs_alert(mp, |
2061 | "%s : Dquot ID 0x%x (0x%p) " | 1970 | "%s : Dquot ID 0x%x (0x%p) BLK TIMER NOT STARTED", |
2062 | "BLK TIMER NOT STARTED", | ||
2063 | str, (int)be32_to_cpu(ddq->d_id), ddq); | 1971 | str, (int)be32_to_cpu(ddq->d_id), ddq); |
2064 | errs++; | 1972 | errs++; |
2065 | } | 1973 | } |
@@ -2069,9 +1977,8 @@ xfs_qm_dqcheck( | |||
2069 | be64_to_cpu(ddq->d_ino_softlimit)) { | 1977 | be64_to_cpu(ddq->d_ino_softlimit)) { |
2070 | if (!ddq->d_itimer) { | 1978 | if (!ddq->d_itimer) { |
2071 | if (flags & XFS_QMOPT_DOWARN) | 1979 | if (flags & XFS_QMOPT_DOWARN) |
2072 | cmn_err(CE_ALERT, | 1980 | xfs_alert(mp, |
2073 | "%s : Dquot ID 0x%x (0x%p) " | 1981 | "%s : Dquot ID 0x%x (0x%p) INODE TIMER NOT STARTED", |
2074 | "INODE TIMER NOT STARTED", | ||
2075 | str, (int)be32_to_cpu(ddq->d_id), ddq); | 1982 | str, (int)be32_to_cpu(ddq->d_id), ddq); |
2076 | errs++; | 1983 | errs++; |
2077 | } | 1984 | } |
@@ -2081,9 +1988,8 @@ xfs_qm_dqcheck( | |||
2081 | be64_to_cpu(ddq->d_rtb_softlimit)) { | 1988 | be64_to_cpu(ddq->d_rtb_softlimit)) { |
2082 | if (!ddq->d_rtbtimer) { | 1989 | if (!ddq->d_rtbtimer) { |
2083 | if (flags & XFS_QMOPT_DOWARN) | 1990 | if (flags & XFS_QMOPT_DOWARN) |
2084 | cmn_err(CE_ALERT, | 1991 | xfs_alert(mp, |
2085 | "%s : Dquot ID 0x%x (0x%p) " | 1992 | "%s : Dquot ID 0x%x (0x%p) RTBLK TIMER NOT STARTED", |
2086 | "RTBLK TIMER NOT STARTED", | ||
2087 | str, (int)be32_to_cpu(ddq->d_id), ddq); | 1993 | str, (int)be32_to_cpu(ddq->d_id), ddq); |
2088 | errs++; | 1994 | errs++; |
2089 | } | 1995 | } |
@@ -2094,7 +2000,7 @@ xfs_qm_dqcheck( | |||
2094 | return errs; | 2000 | return errs; |
2095 | 2001 | ||
2096 | if (flags & XFS_QMOPT_DOWARN) | 2002 | if (flags & XFS_QMOPT_DOWARN) |
2097 | cmn_err(CE_NOTE, "Re-initializing dquot ID 0x%x", id); | 2003 | xfs_notice(mp, "Re-initializing dquot ID 0x%x", id); |
2098 | 2004 | ||
2099 | /* | 2005 | /* |
2100 | * Typically, a repair is only requested by quotacheck. | 2006 | * Typically, a repair is only requested by quotacheck. |
@@ -2176,77 +2082,46 @@ xlog_recover_do_dquot_buffer( | |||
2176 | * for more details on the implementation of the table of cancel records. | 2082 | * for more details on the implementation of the table of cancel records. |
2177 | */ | 2083 | */ |
2178 | STATIC int | 2084 | STATIC int |
2179 | xlog_recover_do_buffer_trans( | 2085 | xlog_recover_buffer_pass2( |
2180 | xlog_t *log, | 2086 | xlog_t *log, |
2181 | xlog_recover_item_t *item, | 2087 | xlog_recover_item_t *item) |
2182 | int pass) | ||
2183 | { | 2088 | { |
2184 | xfs_buf_log_format_t *buf_f = item->ri_buf[0].i_addr; | 2089 | xfs_buf_log_format_t *buf_f = item->ri_buf[0].i_addr; |
2185 | xfs_mount_t *mp; | 2090 | xfs_mount_t *mp = log->l_mp; |
2186 | xfs_buf_t *bp; | 2091 | xfs_buf_t *bp; |
2187 | int error; | 2092 | int error; |
2188 | int cancel; | ||
2189 | xfs_daddr_t blkno; | ||
2190 | int len; | ||
2191 | ushort flags; | ||
2192 | uint buf_flags; | 2093 | uint buf_flags; |
2193 | 2094 | ||
2194 | if (pass == XLOG_RECOVER_PASS1) { | 2095 | /* |
2195 | /* | 2096 | * In this pass we only want to recover all the buffers which have |
2196 | * In this pass we're only looking for buf items | 2097 | * not been cancelled and are not cancellation buffers themselves. |
2197 | * with the XFS_BLF_CANCEL bit set. | 2098 | */ |
2198 | */ | 2099 | if (xlog_check_buffer_cancelled(log, buf_f->blf_blkno, |
2199 | xlog_recover_do_buffer_pass1(log, buf_f); | 2100 | buf_f->blf_len, buf_f->blf_flags)) { |
2101 | trace_xfs_log_recover_buf_cancel(log, buf_f); | ||
2200 | return 0; | 2102 | return 0; |
2201 | } else { | ||
2202 | /* | ||
2203 | * In this pass we want to recover all the buffers | ||
2204 | * which have not been cancelled and are not | ||
2205 | * cancellation buffers themselves. The routine | ||
2206 | * we call here will tell us whether or not to | ||
2207 | * continue with the replay of this buffer. | ||
2208 | */ | ||
2209 | cancel = xlog_recover_do_buffer_pass2(log, buf_f); | ||
2210 | if (cancel) { | ||
2211 | trace_xfs_log_recover_buf_cancel(log, buf_f); | ||
2212 | return 0; | ||
2213 | } | ||
2214 | } | 2103 | } |
2104 | |||
2215 | trace_xfs_log_recover_buf_recover(log, buf_f); | 2105 | trace_xfs_log_recover_buf_recover(log, buf_f); |
2216 | switch (buf_f->blf_type) { | ||
2217 | case XFS_LI_BUF: | ||
2218 | blkno = buf_f->blf_blkno; | ||
2219 | len = buf_f->blf_len; | ||
2220 | flags = buf_f->blf_flags; | ||
2221 | break; | ||
2222 | default: | ||
2223 | xfs_fs_cmn_err(CE_ALERT, log->l_mp, | ||
2224 | "xfs_log_recover: unknown buffer type 0x%x, logdev %s", | ||
2225 | buf_f->blf_type, log->l_mp->m_logname ? | ||
2226 | log->l_mp->m_logname : "internal"); | ||
2227 | XFS_ERROR_REPORT("xlog_recover_do_buffer_trans", | ||
2228 | XFS_ERRLEVEL_LOW, log->l_mp); | ||
2229 | return XFS_ERROR(EFSCORRUPTED); | ||
2230 | } | ||
2231 | 2106 | ||
2232 | mp = log->l_mp; | ||
2233 | buf_flags = XBF_LOCK; | 2107 | buf_flags = XBF_LOCK; |
2234 | if (!(flags & XFS_BLF_INODE_BUF)) | 2108 | if (!(buf_f->blf_flags & XFS_BLF_INODE_BUF)) |
2235 | buf_flags |= XBF_MAPPED; | 2109 | buf_flags |= XBF_MAPPED; |
2236 | 2110 | ||
2237 | bp = xfs_buf_read(mp->m_ddev_targp, blkno, len, buf_flags); | 2111 | bp = xfs_buf_read(mp->m_ddev_targp, buf_f->blf_blkno, buf_f->blf_len, |
2112 | buf_flags); | ||
2238 | if (XFS_BUF_ISERROR(bp)) { | 2113 | if (XFS_BUF_ISERROR(bp)) { |
2239 | xfs_ioerror_alert("xlog_recover_do..(read#1)", log->l_mp, | 2114 | xfs_ioerror_alert("xlog_recover_do..(read#1)", mp, |
2240 | bp, blkno); | 2115 | bp, buf_f->blf_blkno); |
2241 | error = XFS_BUF_GETERROR(bp); | 2116 | error = XFS_BUF_GETERROR(bp); |
2242 | xfs_buf_relse(bp); | 2117 | xfs_buf_relse(bp); |
2243 | return error; | 2118 | return error; |
2244 | } | 2119 | } |
2245 | 2120 | ||
2246 | error = 0; | 2121 | error = 0; |
2247 | if (flags & XFS_BLF_INODE_BUF) { | 2122 | if (buf_f->blf_flags & XFS_BLF_INODE_BUF) { |
2248 | error = xlog_recover_do_inode_buffer(mp, item, bp, buf_f); | 2123 | error = xlog_recover_do_inode_buffer(mp, item, bp, buf_f); |
2249 | } else if (flags & | 2124 | } else if (buf_f->blf_flags & |
2250 | (XFS_BLF_UDQUOT_BUF|XFS_BLF_PDQUOT_BUF|XFS_BLF_GDQUOT_BUF)) { | 2125 | (XFS_BLF_UDQUOT_BUF|XFS_BLF_PDQUOT_BUF|XFS_BLF_GDQUOT_BUF)) { |
2251 | xlog_recover_do_dquot_buffer(mp, log, item, bp, buf_f); | 2126 | xlog_recover_do_dquot_buffer(mp, log, item, bp, buf_f); |
2252 | } else { | 2127 | } else { |
@@ -2286,16 +2161,14 @@ xlog_recover_do_buffer_trans( | |||
2286 | } | 2161 | } |
2287 | 2162 | ||
2288 | STATIC int | 2163 | STATIC int |
2289 | xlog_recover_do_inode_trans( | 2164 | xlog_recover_inode_pass2( |
2290 | xlog_t *log, | 2165 | xlog_t *log, |
2291 | xlog_recover_item_t *item, | 2166 | xlog_recover_item_t *item) |
2292 | int pass) | ||
2293 | { | 2167 | { |
2294 | xfs_inode_log_format_t *in_f; | 2168 | xfs_inode_log_format_t *in_f; |
2295 | xfs_mount_t *mp; | 2169 | xfs_mount_t *mp = log->l_mp; |
2296 | xfs_buf_t *bp; | 2170 | xfs_buf_t *bp; |
2297 | xfs_dinode_t *dip; | 2171 | xfs_dinode_t *dip; |
2298 | xfs_ino_t ino; | ||
2299 | int len; | 2172 | int len; |
2300 | xfs_caddr_t src; | 2173 | xfs_caddr_t src; |
2301 | xfs_caddr_t dest; | 2174 | xfs_caddr_t dest; |
@@ -2305,10 +2178,6 @@ xlog_recover_do_inode_trans( | |||
2305 | xfs_icdinode_t *dicp; | 2178 | xfs_icdinode_t *dicp; |
2306 | int need_free = 0; | 2179 | int need_free = 0; |
2307 | 2180 | ||
2308 | if (pass == XLOG_RECOVER_PASS1) { | ||
2309 | return 0; | ||
2310 | } | ||
2311 | |||
2312 | if (item->ri_buf[0].i_len == sizeof(xfs_inode_log_format_t)) { | 2181 | if (item->ri_buf[0].i_len == sizeof(xfs_inode_log_format_t)) { |
2313 | in_f = item->ri_buf[0].i_addr; | 2182 | in_f = item->ri_buf[0].i_addr; |
2314 | } else { | 2183 | } else { |
@@ -2318,8 +2187,6 @@ xlog_recover_do_inode_trans( | |||
2318 | if (error) | 2187 | if (error) |
2319 | goto error; | 2188 | goto error; |
2320 | } | 2189 | } |
2321 | ino = in_f->ilf_ino; | ||
2322 | mp = log->l_mp; | ||
2323 | 2190 | ||
2324 | /* | 2191 | /* |
2325 | * Inode buffers can be freed, look out for it, | 2192 | * Inode buffers can be freed, look out for it, |
@@ -2352,10 +2219,10 @@ xlog_recover_do_inode_trans( | |||
2352 | */ | 2219 | */ |
2353 | if (unlikely(be16_to_cpu(dip->di_magic) != XFS_DINODE_MAGIC)) { | 2220 | if (unlikely(be16_to_cpu(dip->di_magic) != XFS_DINODE_MAGIC)) { |
2354 | xfs_buf_relse(bp); | 2221 | xfs_buf_relse(bp); |
2355 | xfs_fs_cmn_err(CE_ALERT, mp, | 2222 | xfs_alert(mp, |
2356 | "xfs_inode_recover: Bad inode magic number, dino ptr = 0x%p, dino bp = 0x%p, ino = %Ld", | 2223 | "%s: Bad inode magic number, dip = 0x%p, dino bp = 0x%p, ino = %Ld", |
2357 | dip, bp, ino); | 2224 | __func__, dip, bp, in_f->ilf_ino); |
2358 | XFS_ERROR_REPORT("xlog_recover_do_inode_trans(1)", | 2225 | XFS_ERROR_REPORT("xlog_recover_inode_pass2(1)", |
2359 | XFS_ERRLEVEL_LOW, mp); | 2226 | XFS_ERRLEVEL_LOW, mp); |
2360 | error = EFSCORRUPTED; | 2227 | error = EFSCORRUPTED; |
2361 | goto error; | 2228 | goto error; |
@@ -2363,10 +2230,10 @@ xlog_recover_do_inode_trans( | |||
2363 | dicp = item->ri_buf[1].i_addr; | 2230 | dicp = item->ri_buf[1].i_addr; |
2364 | if (unlikely(dicp->di_magic != XFS_DINODE_MAGIC)) { | 2231 | if (unlikely(dicp->di_magic != XFS_DINODE_MAGIC)) { |
2365 | xfs_buf_relse(bp); | 2232 | xfs_buf_relse(bp); |
2366 | xfs_fs_cmn_err(CE_ALERT, mp, | 2233 | xfs_alert(mp, |
2367 | "xfs_inode_recover: Bad inode log record, rec ptr 0x%p, ino %Ld", | 2234 | "%s: Bad inode log record, rec ptr 0x%p, ino %Ld", |
2368 | item, ino); | 2235 | __func__, item, in_f->ilf_ino); |
2369 | XFS_ERROR_REPORT("xlog_recover_do_inode_trans(2)", | 2236 | XFS_ERROR_REPORT("xlog_recover_inode_pass2(2)", |
2370 | XFS_ERRLEVEL_LOW, mp); | 2237 | XFS_ERRLEVEL_LOW, mp); |
2371 | error = EFSCORRUPTED; | 2238 | error = EFSCORRUPTED; |
2372 | goto error; | 2239 | goto error; |
@@ -2394,12 +2261,13 @@ xlog_recover_do_inode_trans( | |||
2394 | if (unlikely((dicp->di_mode & S_IFMT) == S_IFREG)) { | 2261 | if (unlikely((dicp->di_mode & S_IFMT) == S_IFREG)) { |
2395 | if ((dicp->di_format != XFS_DINODE_FMT_EXTENTS) && | 2262 | if ((dicp->di_format != XFS_DINODE_FMT_EXTENTS) && |
2396 | (dicp->di_format != XFS_DINODE_FMT_BTREE)) { | 2263 | (dicp->di_format != XFS_DINODE_FMT_BTREE)) { |
2397 | XFS_CORRUPTION_ERROR("xlog_recover_do_inode_trans(3)", | 2264 | XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(3)", |
2398 | XFS_ERRLEVEL_LOW, mp, dicp); | 2265 | XFS_ERRLEVEL_LOW, mp, dicp); |
2399 | xfs_buf_relse(bp); | 2266 | xfs_buf_relse(bp); |
2400 | xfs_fs_cmn_err(CE_ALERT, mp, | 2267 | xfs_alert(mp, |
2401 | "xfs_inode_recover: Bad regular inode log record, rec ptr 0x%p, ino ptr = 0x%p, ino bp = 0x%p, ino %Ld", | 2268 | "%s: Bad regular inode log record, rec ptr 0x%p, " |
2402 | item, dip, bp, ino); | 2269 | "ino ptr = 0x%p, ino bp = 0x%p, ino %Ld", |
2270 | __func__, item, dip, bp, in_f->ilf_ino); | ||
2403 | error = EFSCORRUPTED; | 2271 | error = EFSCORRUPTED; |
2404 | goto error; | 2272 | goto error; |
2405 | } | 2273 | } |
@@ -2407,45 +2275,48 @@ xlog_recover_do_inode_trans( | |||
2407 | if ((dicp->di_format != XFS_DINODE_FMT_EXTENTS) && | 2275 | if ((dicp->di_format != XFS_DINODE_FMT_EXTENTS) && |
2408 | (dicp->di_format != XFS_DINODE_FMT_BTREE) && | 2276 | (dicp->di_format != XFS_DINODE_FMT_BTREE) && |
2409 | (dicp->di_format != XFS_DINODE_FMT_LOCAL)) { | 2277 | (dicp->di_format != XFS_DINODE_FMT_LOCAL)) { |
2410 | XFS_CORRUPTION_ERROR("xlog_recover_do_inode_trans(4)", | 2278 | XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(4)", |
2411 | XFS_ERRLEVEL_LOW, mp, dicp); | 2279 | XFS_ERRLEVEL_LOW, mp, dicp); |
2412 | xfs_buf_relse(bp); | 2280 | xfs_buf_relse(bp); |
2413 | xfs_fs_cmn_err(CE_ALERT, mp, | 2281 | xfs_alert(mp, |
2414 | "xfs_inode_recover: Bad dir inode log record, rec ptr 0x%p, ino ptr = 0x%p, ino bp = 0x%p, ino %Ld", | 2282 | "%s: Bad dir inode log record, rec ptr 0x%p, " |
2415 | item, dip, bp, ino); | 2283 | "ino ptr = 0x%p, ino bp = 0x%p, ino %Ld", |
2284 | __func__, item, dip, bp, in_f->ilf_ino); | ||
2416 | error = EFSCORRUPTED; | 2285 | error = EFSCORRUPTED; |
2417 | goto error; | 2286 | goto error; |
2418 | } | 2287 | } |
2419 | } | 2288 | } |
2420 | if (unlikely(dicp->di_nextents + dicp->di_anextents > dicp->di_nblocks)){ | 2289 | if (unlikely(dicp->di_nextents + dicp->di_anextents > dicp->di_nblocks)){ |
2421 | XFS_CORRUPTION_ERROR("xlog_recover_do_inode_trans(5)", | 2290 | XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(5)", |
2422 | XFS_ERRLEVEL_LOW, mp, dicp); | 2291 | XFS_ERRLEVEL_LOW, mp, dicp); |
2423 | xfs_buf_relse(bp); | 2292 | xfs_buf_relse(bp); |
2424 | xfs_fs_cmn_err(CE_ALERT, mp, | 2293 | xfs_alert(mp, |
2425 | "xfs_inode_recover: Bad inode log record, rec ptr 0x%p, dino ptr 0x%p, dino bp 0x%p, ino %Ld, total extents = %d, nblocks = %Ld", | 2294 | "%s: Bad inode log record, rec ptr 0x%p, dino ptr 0x%p, " |
2426 | item, dip, bp, ino, | 2295 | "dino bp 0x%p, ino %Ld, total extents = %d, nblocks = %Ld", |
2296 | __func__, item, dip, bp, in_f->ilf_ino, | ||
2427 | dicp->di_nextents + dicp->di_anextents, | 2297 | dicp->di_nextents + dicp->di_anextents, |
2428 | dicp->di_nblocks); | 2298 | dicp->di_nblocks); |
2429 | error = EFSCORRUPTED; | 2299 | error = EFSCORRUPTED; |
2430 | goto error; | 2300 | goto error; |
2431 | } | 2301 | } |
2432 | if (unlikely(dicp->di_forkoff > mp->m_sb.sb_inodesize)) { | 2302 | if (unlikely(dicp->di_forkoff > mp->m_sb.sb_inodesize)) { |
2433 | XFS_CORRUPTION_ERROR("xlog_recover_do_inode_trans(6)", | 2303 | XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(6)", |
2434 | XFS_ERRLEVEL_LOW, mp, dicp); | 2304 | XFS_ERRLEVEL_LOW, mp, dicp); |
2435 | xfs_buf_relse(bp); | 2305 | xfs_buf_relse(bp); |
2436 | xfs_fs_cmn_err(CE_ALERT, mp, | 2306 | xfs_alert(mp, |
2437 | "xfs_inode_recover: Bad inode log rec ptr 0x%p, dino ptr 0x%p, dino bp 0x%p, ino %Ld, forkoff 0x%x", | 2307 | "%s: Bad inode log record, rec ptr 0x%p, dino ptr 0x%p, " |
2438 | item, dip, bp, ino, dicp->di_forkoff); | 2308 | "dino bp 0x%p, ino %Ld, forkoff 0x%x", __func__, |
2309 | item, dip, bp, in_f->ilf_ino, dicp->di_forkoff); | ||
2439 | error = EFSCORRUPTED; | 2310 | error = EFSCORRUPTED; |
2440 | goto error; | 2311 | goto error; |
2441 | } | 2312 | } |
2442 | if (unlikely(item->ri_buf[1].i_len > sizeof(struct xfs_icdinode))) { | 2313 | if (unlikely(item->ri_buf[1].i_len > sizeof(struct xfs_icdinode))) { |
2443 | XFS_CORRUPTION_ERROR("xlog_recover_do_inode_trans(7)", | 2314 | XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(7)", |
2444 | XFS_ERRLEVEL_LOW, mp, dicp); | 2315 | XFS_ERRLEVEL_LOW, mp, dicp); |
2445 | xfs_buf_relse(bp); | 2316 | xfs_buf_relse(bp); |
2446 | xfs_fs_cmn_err(CE_ALERT, mp, | 2317 | xfs_alert(mp, |
2447 | "xfs_inode_recover: Bad inode log record length %d, rec ptr 0x%p", | 2318 | "%s: Bad inode log record length %d, rec ptr 0x%p", |
2448 | item->ri_buf[1].i_len, item); | 2319 | __func__, item->ri_buf[1].i_len, item); |
2449 | error = EFSCORRUPTED; | 2320 | error = EFSCORRUPTED; |
2450 | goto error; | 2321 | goto error; |
2451 | } | 2322 | } |
@@ -2532,7 +2403,7 @@ xlog_recover_do_inode_trans( | |||
2532 | break; | 2403 | break; |
2533 | 2404 | ||
2534 | default: | 2405 | default: |
2535 | xlog_warn("XFS: xlog_recover_do_inode_trans: Invalid flag"); | 2406 | xfs_warn(log->l_mp, "%s: Invalid flag", __func__); |
2536 | ASSERT(0); | 2407 | ASSERT(0); |
2537 | xfs_buf_relse(bp); | 2408 | xfs_buf_relse(bp); |
2538 | error = EIO; | 2409 | error = EIO; |
@@ -2556,18 +2427,11 @@ error: | |||
2556 | * of that type. | 2427 | * of that type. |
2557 | */ | 2428 | */ |
2558 | STATIC int | 2429 | STATIC int |
2559 | xlog_recover_do_quotaoff_trans( | 2430 | xlog_recover_quotaoff_pass1( |
2560 | xlog_t *log, | 2431 | xlog_t *log, |
2561 | xlog_recover_item_t *item, | 2432 | xlog_recover_item_t *item) |
2562 | int pass) | ||
2563 | { | 2433 | { |
2564 | xfs_qoff_logformat_t *qoff_f; | 2434 | xfs_qoff_logformat_t *qoff_f = item->ri_buf[0].i_addr; |
2565 | |||
2566 | if (pass == XLOG_RECOVER_PASS2) { | ||
2567 | return (0); | ||
2568 | } | ||
2569 | |||
2570 | qoff_f = item->ri_buf[0].i_addr; | ||
2571 | ASSERT(qoff_f); | 2435 | ASSERT(qoff_f); |
2572 | 2436 | ||
2573 | /* | 2437 | /* |
@@ -2588,22 +2452,17 @@ xlog_recover_do_quotaoff_trans( | |||
2588 | * Recover a dquot record | 2452 | * Recover a dquot record |
2589 | */ | 2453 | */ |
2590 | STATIC int | 2454 | STATIC int |
2591 | xlog_recover_do_dquot_trans( | 2455 | xlog_recover_dquot_pass2( |
2592 | xlog_t *log, | 2456 | xlog_t *log, |
2593 | xlog_recover_item_t *item, | 2457 | xlog_recover_item_t *item) |
2594 | int pass) | ||
2595 | { | 2458 | { |
2596 | xfs_mount_t *mp; | 2459 | xfs_mount_t *mp = log->l_mp; |
2597 | xfs_buf_t *bp; | 2460 | xfs_buf_t *bp; |
2598 | struct xfs_disk_dquot *ddq, *recddq; | 2461 | struct xfs_disk_dquot *ddq, *recddq; |
2599 | int error; | 2462 | int error; |
2600 | xfs_dq_logformat_t *dq_f; | 2463 | xfs_dq_logformat_t *dq_f; |
2601 | uint type; | 2464 | uint type; |
2602 | 2465 | ||
2603 | if (pass == XLOG_RECOVER_PASS1) { | ||
2604 | return 0; | ||
2605 | } | ||
2606 | mp = log->l_mp; | ||
2607 | 2466 | ||
2608 | /* | 2467 | /* |
2609 | * Filesystems are required to send in quota flags at mount time. | 2468 | * Filesystems are required to send in quota flags at mount time. |
@@ -2613,13 +2472,11 @@ xlog_recover_do_dquot_trans( | |||
2613 | 2472 | ||
2614 | recddq = item->ri_buf[1].i_addr; | 2473 | recddq = item->ri_buf[1].i_addr; |
2615 | if (recddq == NULL) { | 2474 | if (recddq == NULL) { |
2616 | cmn_err(CE_ALERT, | 2475 | xfs_alert(log->l_mp, "NULL dquot in %s.", __func__); |
2617 | "XFS: NULL dquot in %s.", __func__); | ||
2618 | return XFS_ERROR(EIO); | 2476 | return XFS_ERROR(EIO); |
2619 | } | 2477 | } |
2620 | if (item->ri_buf[1].i_len < sizeof(xfs_disk_dquot_t)) { | 2478 | if (item->ri_buf[1].i_len < sizeof(xfs_disk_dquot_t)) { |
2621 | cmn_err(CE_ALERT, | 2479 | xfs_alert(log->l_mp, "dquot too small (%d) in %s.", |
2622 | "XFS: dquot too small (%d) in %s.", | ||
2623 | item->ri_buf[1].i_len, __func__); | 2480 | item->ri_buf[1].i_len, __func__); |
2624 | return XFS_ERROR(EIO); | 2481 | return XFS_ERROR(EIO); |
2625 | } | 2482 | } |
@@ -2644,12 +2501,10 @@ xlog_recover_do_dquot_trans( | |||
2644 | */ | 2501 | */ |
2645 | dq_f = item->ri_buf[0].i_addr; | 2502 | dq_f = item->ri_buf[0].i_addr; |
2646 | ASSERT(dq_f); | 2503 | ASSERT(dq_f); |
2647 | if ((error = xfs_qm_dqcheck(recddq, | 2504 | error = xfs_qm_dqcheck(mp, recddq, dq_f->qlf_id, 0, XFS_QMOPT_DOWARN, |
2648 | dq_f->qlf_id, | 2505 | "xlog_recover_dquot_pass2 (log copy)"); |
2649 | 0, XFS_QMOPT_DOWARN, | 2506 | if (error) |
2650 | "xlog_recover_do_dquot_trans (log copy)"))) { | ||
2651 | return XFS_ERROR(EIO); | 2507 | return XFS_ERROR(EIO); |
2652 | } | ||
2653 | ASSERT(dq_f->qlf_len == 1); | 2508 | ASSERT(dq_f->qlf_len == 1); |
2654 | 2509 | ||
2655 | error = xfs_read_buf(mp, mp->m_ddev_targp, | 2510 | error = xfs_read_buf(mp, mp->m_ddev_targp, |
@@ -2669,8 +2524,9 @@ xlog_recover_do_dquot_trans( | |||
2669 | * was among a chunk of dquots created earlier, and we did some | 2524 | * was among a chunk of dquots created earlier, and we did some |
2670 | * minimal initialization then. | 2525 | * minimal initialization then. |
2671 | */ | 2526 | */ |
2672 | if (xfs_qm_dqcheck(ddq, dq_f->qlf_id, 0, XFS_QMOPT_DOWARN, | 2527 | error = xfs_qm_dqcheck(mp, ddq, dq_f->qlf_id, 0, XFS_QMOPT_DOWARN, |
2673 | "xlog_recover_do_dquot_trans")) { | 2528 | "xlog_recover_dquot_pass2"); |
2529 | if (error) { | ||
2674 | xfs_buf_relse(bp); | 2530 | xfs_buf_relse(bp); |
2675 | return XFS_ERROR(EIO); | 2531 | return XFS_ERROR(EIO); |
2676 | } | 2532 | } |
@@ -2693,38 +2549,31 @@ xlog_recover_do_dquot_trans( | |||
2693 | * LSN. | 2549 | * LSN. |
2694 | */ | 2550 | */ |
2695 | STATIC int | 2551 | STATIC int |
2696 | xlog_recover_do_efi_trans( | 2552 | xlog_recover_efi_pass2( |
2697 | xlog_t *log, | 2553 | xlog_t *log, |
2698 | xlog_recover_item_t *item, | 2554 | xlog_recover_item_t *item, |
2699 | xfs_lsn_t lsn, | 2555 | xfs_lsn_t lsn) |
2700 | int pass) | ||
2701 | { | 2556 | { |
2702 | int error; | 2557 | int error; |
2703 | xfs_mount_t *mp; | 2558 | xfs_mount_t *mp = log->l_mp; |
2704 | xfs_efi_log_item_t *efip; | 2559 | xfs_efi_log_item_t *efip; |
2705 | xfs_efi_log_format_t *efi_formatp; | 2560 | xfs_efi_log_format_t *efi_formatp; |
2706 | 2561 | ||
2707 | if (pass == XLOG_RECOVER_PASS1) { | ||
2708 | return 0; | ||
2709 | } | ||
2710 | |||
2711 | efi_formatp = item->ri_buf[0].i_addr; | 2562 | efi_formatp = item->ri_buf[0].i_addr; |
2712 | 2563 | ||
2713 | mp = log->l_mp; | ||
2714 | efip = xfs_efi_init(mp, efi_formatp->efi_nextents); | 2564 | efip = xfs_efi_init(mp, efi_formatp->efi_nextents); |
2715 | if ((error = xfs_efi_copy_format(&(item->ri_buf[0]), | 2565 | if ((error = xfs_efi_copy_format(&(item->ri_buf[0]), |
2716 | &(efip->efi_format)))) { | 2566 | &(efip->efi_format)))) { |
2717 | xfs_efi_item_free(efip); | 2567 | xfs_efi_item_free(efip); |
2718 | return error; | 2568 | return error; |
2719 | } | 2569 | } |
2720 | efip->efi_next_extent = efi_formatp->efi_nextents; | 2570 | atomic_set(&efip->efi_next_extent, efi_formatp->efi_nextents); |
2721 | efip->efi_flags |= XFS_EFI_COMMITTED; | ||
2722 | 2571 | ||
2723 | spin_lock(&log->l_ailp->xa_lock); | 2572 | spin_lock(&log->l_ailp->xa_lock); |
2724 | /* | 2573 | /* |
2725 | * xfs_trans_ail_update() drops the AIL lock. | 2574 | * xfs_trans_ail_update() drops the AIL lock. |
2726 | */ | 2575 | */ |
2727 | xfs_trans_ail_update(log->l_ailp, (xfs_log_item_t *)efip, lsn); | 2576 | xfs_trans_ail_update(log->l_ailp, &efip->efi_item, lsn); |
2728 | return 0; | 2577 | return 0; |
2729 | } | 2578 | } |
2730 | 2579 | ||
@@ -2737,11 +2586,10 @@ xlog_recover_do_efi_trans( | |||
2737 | * efd format structure. If we find it, we remove the efi from the | 2586 | * efd format structure. If we find it, we remove the efi from the |
2738 | * AIL and free it. | 2587 | * AIL and free it. |
2739 | */ | 2588 | */ |
2740 | STATIC void | 2589 | STATIC int |
2741 | xlog_recover_do_efd_trans( | 2590 | xlog_recover_efd_pass2( |
2742 | xlog_t *log, | 2591 | xlog_t *log, |
2743 | xlog_recover_item_t *item, | 2592 | xlog_recover_item_t *item) |
2744 | int pass) | ||
2745 | { | 2593 | { |
2746 | xfs_efd_log_format_t *efd_formatp; | 2594 | xfs_efd_log_format_t *efd_formatp; |
2747 | xfs_efi_log_item_t *efip = NULL; | 2595 | xfs_efi_log_item_t *efip = NULL; |
@@ -2750,10 +2598,6 @@ xlog_recover_do_efd_trans( | |||
2750 | struct xfs_ail_cursor cur; | 2598 | struct xfs_ail_cursor cur; |
2751 | struct xfs_ail *ailp = log->l_ailp; | 2599 | struct xfs_ail *ailp = log->l_ailp; |
2752 | 2600 | ||
2753 | if (pass == XLOG_RECOVER_PASS1) { | ||
2754 | return; | ||
2755 | } | ||
2756 | |||
2757 | efd_formatp = item->ri_buf[0].i_addr; | 2601 | efd_formatp = item->ri_buf[0].i_addr; |
2758 | ASSERT((item->ri_buf[0].i_len == (sizeof(xfs_efd_log_format_32_t) + | 2602 | ASSERT((item->ri_buf[0].i_len == (sizeof(xfs_efd_log_format_32_t) + |
2759 | ((efd_formatp->efd_nextents - 1) * sizeof(xfs_extent_32_t)))) || | 2603 | ((efd_formatp->efd_nextents - 1) * sizeof(xfs_extent_32_t)))) || |
@@ -2785,62 +2629,6 @@ xlog_recover_do_efd_trans( | |||
2785 | } | 2629 | } |
2786 | xfs_trans_ail_cursor_done(ailp, &cur); | 2630 | xfs_trans_ail_cursor_done(ailp, &cur); |
2787 | spin_unlock(&ailp->xa_lock); | 2631 | spin_unlock(&ailp->xa_lock); |
2788 | } | ||
2789 | |||
2790 | /* | ||
2791 | * Perform the transaction | ||
2792 | * | ||
2793 | * If the transaction modifies a buffer or inode, do it now. Otherwise, | ||
2794 | * EFIs and EFDs get queued up by adding entries into the AIL for them. | ||
2795 | */ | ||
2796 | STATIC int | ||
2797 | xlog_recover_do_trans( | ||
2798 | xlog_t *log, | ||
2799 | xlog_recover_t *trans, | ||
2800 | int pass) | ||
2801 | { | ||
2802 | int error = 0; | ||
2803 | xlog_recover_item_t *item; | ||
2804 | |||
2805 | error = xlog_recover_reorder_trans(log, trans, pass); | ||
2806 | if (error) | ||
2807 | return error; | ||
2808 | |||
2809 | list_for_each_entry(item, &trans->r_itemq, ri_list) { | ||
2810 | trace_xfs_log_recover_item_recover(log, trans, item, pass); | ||
2811 | switch (ITEM_TYPE(item)) { | ||
2812 | case XFS_LI_BUF: | ||
2813 | error = xlog_recover_do_buffer_trans(log, item, pass); | ||
2814 | break; | ||
2815 | case XFS_LI_INODE: | ||
2816 | error = xlog_recover_do_inode_trans(log, item, pass); | ||
2817 | break; | ||
2818 | case XFS_LI_EFI: | ||
2819 | error = xlog_recover_do_efi_trans(log, item, | ||
2820 | trans->r_lsn, pass); | ||
2821 | break; | ||
2822 | case XFS_LI_EFD: | ||
2823 | xlog_recover_do_efd_trans(log, item, pass); | ||
2824 | error = 0; | ||
2825 | break; | ||
2826 | case XFS_LI_DQUOT: | ||
2827 | error = xlog_recover_do_dquot_trans(log, item, pass); | ||
2828 | break; | ||
2829 | case XFS_LI_QUOTAOFF: | ||
2830 | error = xlog_recover_do_quotaoff_trans(log, item, | ||
2831 | pass); | ||
2832 | break; | ||
2833 | default: | ||
2834 | xlog_warn( | ||
2835 | "XFS: invalid item type (%d) xlog_recover_do_trans", ITEM_TYPE(item)); | ||
2836 | ASSERT(0); | ||
2837 | error = XFS_ERROR(EIO); | ||
2838 | break; | ||
2839 | } | ||
2840 | |||
2841 | if (error) | ||
2842 | return error; | ||
2843 | } | ||
2844 | 2632 | ||
2845 | return 0; | 2633 | return 0; |
2846 | } | 2634 | } |
@@ -2852,7 +2640,7 @@ xlog_recover_do_trans( | |||
2852 | */ | 2640 | */ |
2853 | STATIC void | 2641 | STATIC void |
2854 | xlog_recover_free_trans( | 2642 | xlog_recover_free_trans( |
2855 | xlog_recover_t *trans) | 2643 | struct xlog_recover *trans) |
2856 | { | 2644 | { |
2857 | xlog_recover_item_t *item, *n; | 2645 | xlog_recover_item_t *item, *n; |
2858 | int i; | 2646 | int i; |
@@ -2871,26 +2659,103 @@ xlog_recover_free_trans( | |||
2871 | } | 2659 | } |
2872 | 2660 | ||
2873 | STATIC int | 2661 | STATIC int |
2662 | xlog_recover_commit_pass1( | ||
2663 | struct log *log, | ||
2664 | struct xlog_recover *trans, | ||
2665 | xlog_recover_item_t *item) | ||
2666 | { | ||
2667 | trace_xfs_log_recover_item_recover(log, trans, item, XLOG_RECOVER_PASS1); | ||
2668 | |||
2669 | switch (ITEM_TYPE(item)) { | ||
2670 | case XFS_LI_BUF: | ||
2671 | return xlog_recover_buffer_pass1(log, item); | ||
2672 | case XFS_LI_QUOTAOFF: | ||
2673 | return xlog_recover_quotaoff_pass1(log, item); | ||
2674 | case XFS_LI_INODE: | ||
2675 | case XFS_LI_EFI: | ||
2676 | case XFS_LI_EFD: | ||
2677 | case XFS_LI_DQUOT: | ||
2678 | /* nothing to do in pass 1 */ | ||
2679 | return 0; | ||
2680 | default: | ||
2681 | xfs_warn(log->l_mp, "%s: invalid item type (%d)", | ||
2682 | __func__, ITEM_TYPE(item)); | ||
2683 | ASSERT(0); | ||
2684 | return XFS_ERROR(EIO); | ||
2685 | } | ||
2686 | } | ||
2687 | |||
2688 | STATIC int | ||
2689 | xlog_recover_commit_pass2( | ||
2690 | struct log *log, | ||
2691 | struct xlog_recover *trans, | ||
2692 | xlog_recover_item_t *item) | ||
2693 | { | ||
2694 | trace_xfs_log_recover_item_recover(log, trans, item, XLOG_RECOVER_PASS2); | ||
2695 | |||
2696 | switch (ITEM_TYPE(item)) { | ||
2697 | case XFS_LI_BUF: | ||
2698 | return xlog_recover_buffer_pass2(log, item); | ||
2699 | case XFS_LI_INODE: | ||
2700 | return xlog_recover_inode_pass2(log, item); | ||
2701 | case XFS_LI_EFI: | ||
2702 | return xlog_recover_efi_pass2(log, item, trans->r_lsn); | ||
2703 | case XFS_LI_EFD: | ||
2704 | return xlog_recover_efd_pass2(log, item); | ||
2705 | case XFS_LI_DQUOT: | ||
2706 | return xlog_recover_dquot_pass2(log, item); | ||
2707 | case XFS_LI_QUOTAOFF: | ||
2708 | /* nothing to do in pass2 */ | ||
2709 | return 0; | ||
2710 | default: | ||
2711 | xfs_warn(log->l_mp, "%s: invalid item type (%d)", | ||
2712 | __func__, ITEM_TYPE(item)); | ||
2713 | ASSERT(0); | ||
2714 | return XFS_ERROR(EIO); | ||
2715 | } | ||
2716 | } | ||
2717 | |||
2718 | /* | ||
2719 | * Perform the transaction. | ||
2720 | * | ||
2721 | * If the transaction modifies a buffer or inode, do it now. Otherwise, | ||
2722 | * EFIs and EFDs get queued up by adding entries into the AIL for them. | ||
2723 | */ | ||
2724 | STATIC int | ||
2874 | xlog_recover_commit_trans( | 2725 | xlog_recover_commit_trans( |
2875 | xlog_t *log, | 2726 | struct log *log, |
2876 | xlog_recover_t *trans, | 2727 | struct xlog_recover *trans, |
2877 | int pass) | 2728 | int pass) |
2878 | { | 2729 | { |
2879 | int error; | 2730 | int error = 0; |
2731 | xlog_recover_item_t *item; | ||
2880 | 2732 | ||
2881 | hlist_del(&trans->r_list); | 2733 | hlist_del(&trans->r_list); |
2882 | if ((error = xlog_recover_do_trans(log, trans, pass))) | 2734 | |
2735 | error = xlog_recover_reorder_trans(log, trans, pass); | ||
2736 | if (error) | ||
2883 | return error; | 2737 | return error; |
2884 | xlog_recover_free_trans(trans); /* no error */ | 2738 | |
2739 | list_for_each_entry(item, &trans->r_itemq, ri_list) { | ||
2740 | if (pass == XLOG_RECOVER_PASS1) | ||
2741 | error = xlog_recover_commit_pass1(log, trans, item); | ||
2742 | else | ||
2743 | error = xlog_recover_commit_pass2(log, trans, item); | ||
2744 | if (error) | ||
2745 | return error; | ||
2746 | } | ||
2747 | |||
2748 | xlog_recover_free_trans(trans); | ||
2885 | return 0; | 2749 | return 0; |
2886 | } | 2750 | } |
2887 | 2751 | ||
2888 | STATIC int | 2752 | STATIC int |
2889 | xlog_recover_unmount_trans( | 2753 | xlog_recover_unmount_trans( |
2754 | struct log *log, | ||
2890 | xlog_recover_t *trans) | 2755 | xlog_recover_t *trans) |
2891 | { | 2756 | { |
2892 | /* Do nothing now */ | 2757 | /* Do nothing now */ |
2893 | xlog_warn("XFS: xlog_recover_unmount_trans: Unmount LR"); | 2758 | xfs_warn(log->l_mp, "%s: Unmount LR", __func__); |
2894 | return 0; | 2759 | return 0; |
2895 | } | 2760 | } |
2896 | 2761 | ||
@@ -2933,8 +2798,8 @@ xlog_recover_process_data( | |||
2933 | dp += sizeof(xlog_op_header_t); | 2798 | dp += sizeof(xlog_op_header_t); |
2934 | if (ohead->oh_clientid != XFS_TRANSACTION && | 2799 | if (ohead->oh_clientid != XFS_TRANSACTION && |
2935 | ohead->oh_clientid != XFS_LOG) { | 2800 | ohead->oh_clientid != XFS_LOG) { |
2936 | xlog_warn( | 2801 | xfs_warn(log->l_mp, "%s: bad clientid 0x%x", |
2937 | "XFS: xlog_recover_process_data: bad clientid"); | 2802 | __func__, ohead->oh_clientid); |
2938 | ASSERT(0); | 2803 | ASSERT(0); |
2939 | return (XFS_ERROR(EIO)); | 2804 | return (XFS_ERROR(EIO)); |
2940 | } | 2805 | } |
@@ -2947,8 +2812,8 @@ xlog_recover_process_data( | |||
2947 | be64_to_cpu(rhead->h_lsn)); | 2812 | be64_to_cpu(rhead->h_lsn)); |
2948 | } else { | 2813 | } else { |
2949 | if (dp + be32_to_cpu(ohead->oh_len) > lp) { | 2814 | if (dp + be32_to_cpu(ohead->oh_len) > lp) { |
2950 | xlog_warn( | 2815 | xfs_warn(log->l_mp, "%s: bad length 0x%x", |
2951 | "XFS: xlog_recover_process_data: bad length"); | 2816 | __func__, be32_to_cpu(ohead->oh_len)); |
2952 | WARN_ON(1); | 2817 | WARN_ON(1); |
2953 | return (XFS_ERROR(EIO)); | 2818 | return (XFS_ERROR(EIO)); |
2954 | } | 2819 | } |
@@ -2961,7 +2826,7 @@ xlog_recover_process_data( | |||
2961 | trans, pass); | 2826 | trans, pass); |
2962 | break; | 2827 | break; |
2963 | case XLOG_UNMOUNT_TRANS: | 2828 | case XLOG_UNMOUNT_TRANS: |
2964 | error = xlog_recover_unmount_trans(trans); | 2829 | error = xlog_recover_unmount_trans(log, trans); |
2965 | break; | 2830 | break; |
2966 | case XLOG_WAS_CONT_TRANS: | 2831 | case XLOG_WAS_CONT_TRANS: |
2967 | error = xlog_recover_add_to_cont_trans(log, | 2832 | error = xlog_recover_add_to_cont_trans(log, |
@@ -2969,8 +2834,8 @@ xlog_recover_process_data( | |||
2969 | be32_to_cpu(ohead->oh_len)); | 2834 | be32_to_cpu(ohead->oh_len)); |
2970 | break; | 2835 | break; |
2971 | case XLOG_START_TRANS: | 2836 | case XLOG_START_TRANS: |
2972 | xlog_warn( | 2837 | xfs_warn(log->l_mp, "%s: bad transaction", |
2973 | "XFS: xlog_recover_process_data: bad transaction"); | 2838 | __func__); |
2974 | ASSERT(0); | 2839 | ASSERT(0); |
2975 | error = XFS_ERROR(EIO); | 2840 | error = XFS_ERROR(EIO); |
2976 | break; | 2841 | break; |
@@ -2980,8 +2845,8 @@ xlog_recover_process_data( | |||
2980 | dp, be32_to_cpu(ohead->oh_len)); | 2845 | dp, be32_to_cpu(ohead->oh_len)); |
2981 | break; | 2846 | break; |
2982 | default: | 2847 | default: |
2983 | xlog_warn( | 2848 | xfs_warn(log->l_mp, "%s: bad flag 0x%x", |
2984 | "XFS: xlog_recover_process_data: bad flag"); | 2849 | __func__, flags); |
2985 | ASSERT(0); | 2850 | ASSERT(0); |
2986 | error = XFS_ERROR(EIO); | 2851 | error = XFS_ERROR(EIO); |
2987 | break; | 2852 | break; |
@@ -3011,7 +2876,7 @@ xlog_recover_process_efi( | |||
3011 | xfs_extent_t *extp; | 2876 | xfs_extent_t *extp; |
3012 | xfs_fsblock_t startblock_fsb; | 2877 | xfs_fsblock_t startblock_fsb; |
3013 | 2878 | ||
3014 | ASSERT(!(efip->efi_flags & XFS_EFI_RECOVERED)); | 2879 | ASSERT(!test_bit(XFS_EFI_RECOVERED, &efip->efi_flags)); |
3015 | 2880 | ||
3016 | /* | 2881 | /* |
3017 | * First check the validity of the extents described by the | 2882 | * First check the validity of the extents described by the |
@@ -3050,7 +2915,7 @@ xlog_recover_process_efi( | |||
3050 | extp->ext_len); | 2915 | extp->ext_len); |
3051 | } | 2916 | } |
3052 | 2917 | ||
3053 | efip->efi_flags |= XFS_EFI_RECOVERED; | 2918 | set_bit(XFS_EFI_RECOVERED, &efip->efi_flags); |
3054 | error = xfs_trans_commit(tp, 0); | 2919 | error = xfs_trans_commit(tp, 0); |
3055 | return error; | 2920 | return error; |
3056 | 2921 | ||
@@ -3107,7 +2972,7 @@ xlog_recover_process_efis( | |||
3107 | * Skip EFIs that we've already processed. | 2972 | * Skip EFIs that we've already processed. |
3108 | */ | 2973 | */ |
3109 | efip = (xfs_efi_log_item_t *)lip; | 2974 | efip = (xfs_efi_log_item_t *)lip; |
3110 | if (efip->efi_flags & XFS_EFI_RECOVERED) { | 2975 | if (test_bit(XFS_EFI_RECOVERED, &efip->efi_flags)) { |
3111 | lip = xfs_trans_ail_cursor_next(ailp, &cur); | 2976 | lip = xfs_trans_ail_cursor_next(ailp, &cur); |
3112 | continue; | 2977 | continue; |
3113 | } | 2978 | } |
@@ -3166,8 +3031,7 @@ xlog_recover_clear_agi_bucket( | |||
3166 | out_abort: | 3031 | out_abort: |
3167 | xfs_trans_cancel(tp, XFS_TRANS_ABORT); | 3032 | xfs_trans_cancel(tp, XFS_TRANS_ABORT); |
3168 | out_error: | 3033 | out_error: |
3169 | xfs_fs_cmn_err(CE_WARN, mp, "xlog_recover_clear_agi_bucket: " | 3034 | xfs_warn(mp, "%s: failed to clear agi %d. Continuing.", __func__, agno); |
3170 | "failed to clear agi %d. Continuing.", agno); | ||
3171 | return; | 3035 | return; |
3172 | } | 3036 | } |
3173 | 3037 | ||
@@ -3418,7 +3282,7 @@ xlog_valid_rec_header( | |||
3418 | if (unlikely( | 3282 | if (unlikely( |
3419 | (!rhead->h_version || | 3283 | (!rhead->h_version || |
3420 | (be32_to_cpu(rhead->h_version) & (~XLOG_VERSION_OKBITS))))) { | 3284 | (be32_to_cpu(rhead->h_version) & (~XLOG_VERSION_OKBITS))))) { |
3421 | xlog_warn("XFS: %s: unrecognised log version (%d).", | 3285 | xfs_warn(log->l_mp, "%s: unrecognised log version (%d).", |
3422 | __func__, be32_to_cpu(rhead->h_version)); | 3286 | __func__, be32_to_cpu(rhead->h_version)); |
3423 | return XFS_ERROR(EIO); | 3287 | return XFS_ERROR(EIO); |
3424 | } | 3288 | } |
@@ -3724,7 +3588,7 @@ xlog_do_log_recovery( | |||
3724 | xfs_daddr_t head_blk, | 3588 | xfs_daddr_t head_blk, |
3725 | xfs_daddr_t tail_blk) | 3589 | xfs_daddr_t tail_blk) |
3726 | { | 3590 | { |
3727 | int error; | 3591 | int error, i; |
3728 | 3592 | ||
3729 | ASSERT(head_blk != tail_blk); | 3593 | ASSERT(head_blk != tail_blk); |
3730 | 3594 | ||
@@ -3732,10 +3596,12 @@ xlog_do_log_recovery( | |||
3732 | * First do a pass to find all of the cancelled buf log items. | 3596 | * First do a pass to find all of the cancelled buf log items. |
3733 | * Store them in the buf_cancel_table for use in the second pass. | 3597 | * Store them in the buf_cancel_table for use in the second pass. |
3734 | */ | 3598 | */ |
3735 | log->l_buf_cancel_table = | 3599 | log->l_buf_cancel_table = kmem_zalloc(XLOG_BC_TABLE_SIZE * |
3736 | (xfs_buf_cancel_t **)kmem_zalloc(XLOG_BC_TABLE_SIZE * | 3600 | sizeof(struct list_head), |
3737 | sizeof(xfs_buf_cancel_t*), | ||
3738 | KM_SLEEP); | 3601 | KM_SLEEP); |
3602 | for (i = 0; i < XLOG_BC_TABLE_SIZE; i++) | ||
3603 | INIT_LIST_HEAD(&log->l_buf_cancel_table[i]); | ||
3604 | |||
3739 | error = xlog_do_recovery_pass(log, head_blk, tail_blk, | 3605 | error = xlog_do_recovery_pass(log, head_blk, tail_blk, |
3740 | XLOG_RECOVER_PASS1); | 3606 | XLOG_RECOVER_PASS1); |
3741 | if (error != 0) { | 3607 | if (error != 0) { |
@@ -3754,7 +3620,7 @@ xlog_do_log_recovery( | |||
3754 | int i; | 3620 | int i; |
3755 | 3621 | ||
3756 | for (i = 0; i < XLOG_BC_TABLE_SIZE; i++) | 3622 | for (i = 0; i < XLOG_BC_TABLE_SIZE; i++) |
3757 | ASSERT(log->l_buf_cancel_table[i] == NULL); | 3623 | ASSERT(list_empty(&log->l_buf_cancel_table[i])); |
3758 | } | 3624 | } |
3759 | #endif /* DEBUG */ | 3625 | #endif /* DEBUG */ |
3760 | 3626 | ||
@@ -3874,10 +3740,9 @@ xlog_recover( | |||
3874 | return error; | 3740 | return error; |
3875 | } | 3741 | } |
3876 | 3742 | ||
3877 | cmn_err(CE_NOTE, | 3743 | xfs_notice(log->l_mp, "Starting recovery (logdev: %s)", |
3878 | "Starting XFS recovery on filesystem: %s (logdev: %s)", | 3744 | log->l_mp->m_logname ? log->l_mp->m_logname |
3879 | log->l_mp->m_fsname, log->l_mp->m_logname ? | 3745 | : "internal"); |
3880 | log->l_mp->m_logname : "internal"); | ||
3881 | 3746 | ||
3882 | error = xlog_do_recover(log, head_blk, tail_blk); | 3747 | error = xlog_do_recover(log, head_blk, tail_blk); |
3883 | log->l_flags |= XLOG_RECOVERY_NEEDED; | 3748 | log->l_flags |= XLOG_RECOVERY_NEEDED; |
@@ -3910,9 +3775,7 @@ xlog_recover_finish( | |||
3910 | int error; | 3775 | int error; |
3911 | error = xlog_recover_process_efis(log); | 3776 | error = xlog_recover_process_efis(log); |
3912 | if (error) { | 3777 | if (error) { |
3913 | cmn_err(CE_ALERT, | 3778 | xfs_alert(log->l_mp, "Failed to recover EFIs"); |
3914 | "Failed to recover EFIs on filesystem: %s", | ||
3915 | log->l_mp->m_fsname); | ||
3916 | return error; | 3779 | return error; |
3917 | } | 3780 | } |
3918 | /* | 3781 | /* |
@@ -3927,15 +3790,12 @@ xlog_recover_finish( | |||
3927 | 3790 | ||
3928 | xlog_recover_check_summary(log); | 3791 | xlog_recover_check_summary(log); |
3929 | 3792 | ||
3930 | cmn_err(CE_NOTE, | 3793 | xfs_notice(log->l_mp, "Ending recovery (logdev: %s)", |
3931 | "Ending XFS recovery on filesystem: %s (logdev: %s)", | 3794 | log->l_mp->m_logname ? log->l_mp->m_logname |
3932 | log->l_mp->m_fsname, log->l_mp->m_logname ? | 3795 | : "internal"); |
3933 | log->l_mp->m_logname : "internal"); | ||
3934 | log->l_flags &= ~XLOG_RECOVERY_NEEDED; | 3796 | log->l_flags &= ~XLOG_RECOVERY_NEEDED; |
3935 | } else { | 3797 | } else { |
3936 | cmn_err(CE_DEBUG, | 3798 | xfs_info(log->l_mp, "Ending clean mount"); |
3937 | "!Ending clean XFS mount for filesystem: %s\n", | ||
3938 | log->l_mp->m_fsname); | ||
3939 | } | 3799 | } |
3940 | return 0; | 3800 | return 0; |
3941 | } | 3801 | } |
@@ -3968,10 +3828,8 @@ xlog_recover_check_summary( | |||
3968 | for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) { | 3828 | for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) { |
3969 | error = xfs_read_agf(mp, NULL, agno, 0, &agfbp); | 3829 | error = xfs_read_agf(mp, NULL, agno, 0, &agfbp); |
3970 | if (error) { | 3830 | if (error) { |
3971 | xfs_fs_cmn_err(CE_ALERT, mp, | 3831 | xfs_alert(mp, "%s agf read failed agno %d error %d", |
3972 | "xlog_recover_check_summary(agf)" | 3832 | __func__, agno, error); |
3973 | "agf read failed agno %d error %d", | ||
3974 | agno, error); | ||
3975 | } else { | 3833 | } else { |
3976 | agfp = XFS_BUF_TO_AGF(agfbp); | 3834 | agfp = XFS_BUF_TO_AGF(agfbp); |
3977 | freeblks += be32_to_cpu(agfp->agf_freeblks) + | 3835 | freeblks += be32_to_cpu(agfp->agf_freeblks) + |
@@ -3980,7 +3838,10 @@ xlog_recover_check_summary( | |||
3980 | } | 3838 | } |
3981 | 3839 | ||
3982 | error = xfs_read_agi(mp, NULL, agno, &agibp); | 3840 | error = xfs_read_agi(mp, NULL, agno, &agibp); |
3983 | if (!error) { | 3841 | if (error) { |
3842 | xfs_alert(mp, "%s agi read failed agno %d error %d", | ||
3843 | __func__, agno, error); | ||
3844 | } else { | ||
3984 | struct xfs_agi *agi = XFS_BUF_TO_AGI(agibp); | 3845 | struct xfs_agi *agi = XFS_BUF_TO_AGI(agibp); |
3985 | 3846 | ||
3986 | itotal += be32_to_cpu(agi->agi_count); | 3847 | itotal += be32_to_cpu(agi->agi_count); |
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 19e9dfa1c254..bb3f9a7b24ed 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c | |||
@@ -133,9 +133,7 @@ xfs_uuid_mount( | |||
133 | return 0; | 133 | return 0; |
134 | 134 | ||
135 | if (uuid_is_nil(uuid)) { | 135 | if (uuid_is_nil(uuid)) { |
136 | cmn_err(CE_WARN, | 136 | xfs_warn(mp, "Filesystem has nil UUID - can't mount"); |
137 | "XFS: Filesystem %s has nil UUID - can't mount", | ||
138 | mp->m_fsname); | ||
139 | return XFS_ERROR(EINVAL); | 137 | return XFS_ERROR(EINVAL); |
140 | } | 138 | } |
141 | 139 | ||
@@ -163,8 +161,7 @@ xfs_uuid_mount( | |||
163 | 161 | ||
164 | out_duplicate: | 162 | out_duplicate: |
165 | mutex_unlock(&xfs_uuid_table_mutex); | 163 | mutex_unlock(&xfs_uuid_table_mutex); |
166 | cmn_err(CE_WARN, "XFS: Filesystem %s has duplicate UUID - can't mount", | 164 | xfs_warn(mp, "Filesystem has duplicate UUID - can't mount"); |
167 | mp->m_fsname); | ||
168 | return XFS_ERROR(EINVAL); | 165 | return XFS_ERROR(EINVAL); |
169 | } | 166 | } |
170 | 167 | ||
@@ -311,6 +308,8 @@ xfs_mount_validate_sb( | |||
311 | xfs_sb_t *sbp, | 308 | xfs_sb_t *sbp, |
312 | int flags) | 309 | int flags) |
313 | { | 310 | { |
311 | int loud = !(flags & XFS_MFSI_QUIET); | ||
312 | |||
314 | /* | 313 | /* |
315 | * If the log device and data device have the | 314 | * If the log device and data device have the |
316 | * same device number, the log is internal. | 315 | * same device number, the log is internal. |
@@ -319,28 +318,32 @@ xfs_mount_validate_sb( | |||
319 | * a volume filesystem in a non-volume manner. | 318 | * a volume filesystem in a non-volume manner. |
320 | */ | 319 | */ |
321 | if (sbp->sb_magicnum != XFS_SB_MAGIC) { | 320 | if (sbp->sb_magicnum != XFS_SB_MAGIC) { |
322 | xfs_fs_mount_cmn_err(flags, "bad magic number"); | 321 | if (loud) |
322 | xfs_warn(mp, "bad magic number"); | ||
323 | return XFS_ERROR(EWRONGFS); | 323 | return XFS_ERROR(EWRONGFS); |
324 | } | 324 | } |
325 | 325 | ||
326 | if (!xfs_sb_good_version(sbp)) { | 326 | if (!xfs_sb_good_version(sbp)) { |
327 | xfs_fs_mount_cmn_err(flags, "bad version"); | 327 | if (loud) |
328 | xfs_warn(mp, "bad version"); | ||
328 | return XFS_ERROR(EWRONGFS); | 329 | return XFS_ERROR(EWRONGFS); |
329 | } | 330 | } |
330 | 331 | ||
331 | if (unlikely( | 332 | if (unlikely( |
332 | sbp->sb_logstart == 0 && mp->m_logdev_targp == mp->m_ddev_targp)) { | 333 | sbp->sb_logstart == 0 && mp->m_logdev_targp == mp->m_ddev_targp)) { |
333 | xfs_fs_mount_cmn_err(flags, | 334 | if (loud) |
334 | "filesystem is marked as having an external log; " | 335 | xfs_warn(mp, |
335 | "specify logdev on the\nmount command line."); | 336 | "filesystem is marked as having an external log; " |
337 | "specify logdev on the mount command line."); | ||
336 | return XFS_ERROR(EINVAL); | 338 | return XFS_ERROR(EINVAL); |
337 | } | 339 | } |
338 | 340 | ||
339 | if (unlikely( | 341 | if (unlikely( |
340 | sbp->sb_logstart != 0 && mp->m_logdev_targp != mp->m_ddev_targp)) { | 342 | sbp->sb_logstart != 0 && mp->m_logdev_targp != mp->m_ddev_targp)) { |
341 | xfs_fs_mount_cmn_err(flags, | 343 | if (loud) |
342 | "filesystem is marked as having an internal log; " | 344 | xfs_warn(mp, |
343 | "do not specify logdev on\nthe mount command line."); | 345 | "filesystem is marked as having an internal log; " |
346 | "do not specify logdev on the mount command line."); | ||
344 | return XFS_ERROR(EINVAL); | 347 | return XFS_ERROR(EINVAL); |
345 | } | 348 | } |
346 | 349 | ||
@@ -369,7 +372,8 @@ xfs_mount_validate_sb( | |||
369 | (sbp->sb_rextsize * sbp->sb_blocksize > XFS_MAX_RTEXTSIZE) || | 372 | (sbp->sb_rextsize * sbp->sb_blocksize > XFS_MAX_RTEXTSIZE) || |
370 | (sbp->sb_rextsize * sbp->sb_blocksize < XFS_MIN_RTEXTSIZE) || | 373 | (sbp->sb_rextsize * sbp->sb_blocksize < XFS_MIN_RTEXTSIZE) || |
371 | (sbp->sb_imax_pct > 100 /* zero sb_imax_pct is valid */))) { | 374 | (sbp->sb_imax_pct > 100 /* zero sb_imax_pct is valid */))) { |
372 | xfs_fs_mount_cmn_err(flags, "SB sanity check 1 failed"); | 375 | if (loud) |
376 | xfs_warn(mp, "SB sanity check 1 failed"); | ||
373 | return XFS_ERROR(EFSCORRUPTED); | 377 | return XFS_ERROR(EFSCORRUPTED); |
374 | } | 378 | } |
375 | 379 | ||
@@ -382,7 +386,8 @@ xfs_mount_validate_sb( | |||
382 | (xfs_drfsbno_t)sbp->sb_agcount * sbp->sb_agblocks || | 386 | (xfs_drfsbno_t)sbp->sb_agcount * sbp->sb_agblocks || |
383 | sbp->sb_dblocks < (xfs_drfsbno_t)(sbp->sb_agcount - 1) * | 387 | sbp->sb_dblocks < (xfs_drfsbno_t)(sbp->sb_agcount - 1) * |
384 | sbp->sb_agblocks + XFS_MIN_AG_BLOCKS)) { | 388 | sbp->sb_agblocks + XFS_MIN_AG_BLOCKS)) { |
385 | xfs_fs_mount_cmn_err(flags, "SB sanity check 2 failed"); | 389 | if (loud) |
390 | xfs_warn(mp, "SB sanity check 2 failed"); | ||
386 | return XFS_ERROR(EFSCORRUPTED); | 391 | return XFS_ERROR(EFSCORRUPTED); |
387 | } | 392 | } |
388 | 393 | ||
@@ -390,12 +395,12 @@ xfs_mount_validate_sb( | |||
390 | * Until this is fixed only page-sized or smaller data blocks work. | 395 | * Until this is fixed only page-sized or smaller data blocks work. |
391 | */ | 396 | */ |
392 | if (unlikely(sbp->sb_blocksize > PAGE_SIZE)) { | 397 | if (unlikely(sbp->sb_blocksize > PAGE_SIZE)) { |
393 | xfs_fs_mount_cmn_err(flags, | 398 | if (loud) { |
394 | "file system with blocksize %d bytes", | 399 | xfs_warn(mp, |
395 | sbp->sb_blocksize); | 400 | "File system with blocksize %d bytes. " |
396 | xfs_fs_mount_cmn_err(flags, | 401 | "Only pagesize (%ld) or less will currently work.", |
397 | "only pagesize (%ld) or less will currently work.", | 402 | sbp->sb_blocksize, PAGE_SIZE); |
398 | PAGE_SIZE); | 403 | } |
399 | return XFS_ERROR(ENOSYS); | 404 | return XFS_ERROR(ENOSYS); |
400 | } | 405 | } |
401 | 406 | ||
@@ -409,21 +414,23 @@ xfs_mount_validate_sb( | |||
409 | case 2048: | 414 | case 2048: |
410 | break; | 415 | break; |
411 | default: | 416 | default: |
412 | xfs_fs_mount_cmn_err(flags, | 417 | if (loud) |
413 | "inode size of %d bytes not supported", | 418 | xfs_warn(mp, "inode size of %d bytes not supported", |
414 | sbp->sb_inodesize); | 419 | sbp->sb_inodesize); |
415 | return XFS_ERROR(ENOSYS); | 420 | return XFS_ERROR(ENOSYS); |
416 | } | 421 | } |
417 | 422 | ||
418 | if (xfs_sb_validate_fsb_count(sbp, sbp->sb_dblocks) || | 423 | if (xfs_sb_validate_fsb_count(sbp, sbp->sb_dblocks) || |
419 | xfs_sb_validate_fsb_count(sbp, sbp->sb_rblocks)) { | 424 | xfs_sb_validate_fsb_count(sbp, sbp->sb_rblocks)) { |
420 | xfs_fs_mount_cmn_err(flags, | 425 | if (loud) |
421 | "file system too large to be mounted on this system."); | 426 | xfs_warn(mp, |
427 | "file system too large to be mounted on this system."); | ||
422 | return XFS_ERROR(EFBIG); | 428 | return XFS_ERROR(EFBIG); |
423 | } | 429 | } |
424 | 430 | ||
425 | if (unlikely(sbp->sb_inprogress)) { | 431 | if (unlikely(sbp->sb_inprogress)) { |
426 | xfs_fs_mount_cmn_err(flags, "file system busy"); | 432 | if (loud) |
433 | xfs_warn(mp, "file system busy"); | ||
427 | return XFS_ERROR(EFSCORRUPTED); | 434 | return XFS_ERROR(EFSCORRUPTED); |
428 | } | 435 | } |
429 | 436 | ||
@@ -431,8 +438,9 @@ xfs_mount_validate_sb( | |||
431 | * Version 1 directory format has never worked on Linux. | 438 | * Version 1 directory format has never worked on Linux. |
432 | */ | 439 | */ |
433 | if (unlikely(!xfs_sb_version_hasdirv2(sbp))) { | 440 | if (unlikely(!xfs_sb_version_hasdirv2(sbp))) { |
434 | xfs_fs_mount_cmn_err(flags, | 441 | if (loud) |
435 | "file system using version 1 directory format"); | 442 | xfs_warn(mp, |
443 | "file system using version 1 directory format"); | ||
436 | return XFS_ERROR(ENOSYS); | 444 | return XFS_ERROR(ENOSYS); |
437 | } | 445 | } |
438 | 446 | ||
@@ -472,7 +480,7 @@ xfs_initialize_perag( | |||
472 | goto out_unwind; | 480 | goto out_unwind; |
473 | pag->pag_agno = index; | 481 | pag->pag_agno = index; |
474 | pag->pag_mount = mp; | 482 | pag->pag_mount = mp; |
475 | rwlock_init(&pag->pag_ici_lock); | 483 | spin_lock_init(&pag->pag_ici_lock); |
476 | mutex_init(&pag->pag_ici_reclaim_lock); | 484 | mutex_init(&pag->pag_ici_reclaim_lock); |
477 | INIT_RADIX_TREE(&pag->pag_ici_root, GFP_ATOMIC); | 485 | INIT_RADIX_TREE(&pag->pag_ici_root, GFP_ATOMIC); |
478 | spin_lock_init(&pag->pag_buf_lock); | 486 | spin_lock_init(&pag->pag_buf_lock); |
@@ -673,6 +681,7 @@ xfs_readsb(xfs_mount_t *mp, int flags) | |||
673 | unsigned int sector_size; | 681 | unsigned int sector_size; |
674 | xfs_buf_t *bp; | 682 | xfs_buf_t *bp; |
675 | int error; | 683 | int error; |
684 | int loud = !(flags & XFS_MFSI_QUIET); | ||
676 | 685 | ||
677 | ASSERT(mp->m_sb_bp == NULL); | 686 | ASSERT(mp->m_sb_bp == NULL); |
678 | ASSERT(mp->m_ddev_targp != NULL); | 687 | ASSERT(mp->m_ddev_targp != NULL); |
@@ -688,7 +697,8 @@ reread: | |||
688 | bp = xfs_buf_read_uncached(mp, mp->m_ddev_targp, | 697 | bp = xfs_buf_read_uncached(mp, mp->m_ddev_targp, |
689 | XFS_SB_DADDR, sector_size, 0); | 698 | XFS_SB_DADDR, sector_size, 0); |
690 | if (!bp) { | 699 | if (!bp) { |
691 | xfs_fs_mount_cmn_err(flags, "SB buffer read failed"); | 700 | if (loud) |
701 | xfs_warn(mp, "SB buffer read failed"); | ||
692 | return EIO; | 702 | return EIO; |
693 | } | 703 | } |
694 | 704 | ||
@@ -699,7 +709,8 @@ reread: | |||
699 | xfs_sb_from_disk(&mp->m_sb, XFS_BUF_TO_SBP(bp)); | 709 | xfs_sb_from_disk(&mp->m_sb, XFS_BUF_TO_SBP(bp)); |
700 | error = xfs_mount_validate_sb(mp, &(mp->m_sb), flags); | 710 | error = xfs_mount_validate_sb(mp, &(mp->m_sb), flags); |
701 | if (error) { | 711 | if (error) { |
702 | xfs_fs_mount_cmn_err(flags, "SB validate failed"); | 712 | if (loud) |
713 | xfs_warn(mp, "SB validate failed"); | ||
703 | goto release_buf; | 714 | goto release_buf; |
704 | } | 715 | } |
705 | 716 | ||
@@ -707,9 +718,9 @@ reread: | |||
707 | * We must be able to do sector-sized and sector-aligned IO. | 718 | * We must be able to do sector-sized and sector-aligned IO. |
708 | */ | 719 | */ |
709 | if (sector_size > mp->m_sb.sb_sectsize) { | 720 | if (sector_size > mp->m_sb.sb_sectsize) { |
710 | xfs_fs_mount_cmn_err(flags, | 721 | if (loud) |
711 | "device supports only %u byte sectors (not %u)", | 722 | xfs_warn(mp, "device supports %u byte sectors (not %u)", |
712 | sector_size, mp->m_sb.sb_sectsize); | 723 | sector_size, mp->m_sb.sb_sectsize); |
713 | error = ENOSYS; | 724 | error = ENOSYS; |
714 | goto release_buf; | 725 | goto release_buf; |
715 | } | 726 | } |
@@ -853,8 +864,7 @@ xfs_update_alignment(xfs_mount_t *mp) | |||
853 | if ((BBTOB(mp->m_dalign) & mp->m_blockmask) || | 864 | if ((BBTOB(mp->m_dalign) & mp->m_blockmask) || |
854 | (BBTOB(mp->m_swidth) & mp->m_blockmask)) { | 865 | (BBTOB(mp->m_swidth) & mp->m_blockmask)) { |
855 | if (mp->m_flags & XFS_MOUNT_RETERR) { | 866 | if (mp->m_flags & XFS_MOUNT_RETERR) { |
856 | cmn_err(CE_WARN, | 867 | xfs_warn(mp, "alignment check 1 failed"); |
857 | "XFS: alignment check 1 failed"); | ||
858 | return XFS_ERROR(EINVAL); | 868 | return XFS_ERROR(EINVAL); |
859 | } | 869 | } |
860 | mp->m_dalign = mp->m_swidth = 0; | 870 | mp->m_dalign = mp->m_swidth = 0; |
@@ -867,8 +877,9 @@ xfs_update_alignment(xfs_mount_t *mp) | |||
867 | if (mp->m_flags & XFS_MOUNT_RETERR) { | 877 | if (mp->m_flags & XFS_MOUNT_RETERR) { |
868 | return XFS_ERROR(EINVAL); | 878 | return XFS_ERROR(EINVAL); |
869 | } | 879 | } |
870 | xfs_fs_cmn_err(CE_WARN, mp, | 880 | xfs_warn(mp, |
871 | "stripe alignment turned off: sunit(%d)/swidth(%d) incompatible with agsize(%d)", | 881 | "stripe alignment turned off: sunit(%d)/swidth(%d) " |
882 | "incompatible with agsize(%d)", | ||
872 | mp->m_dalign, mp->m_swidth, | 883 | mp->m_dalign, mp->m_swidth, |
873 | sbp->sb_agblocks); | 884 | sbp->sb_agblocks); |
874 | 885 | ||
@@ -878,9 +889,9 @@ xfs_update_alignment(xfs_mount_t *mp) | |||
878 | mp->m_swidth = XFS_BB_TO_FSBT(mp, mp->m_swidth); | 889 | mp->m_swidth = XFS_BB_TO_FSBT(mp, mp->m_swidth); |
879 | } else { | 890 | } else { |
880 | if (mp->m_flags & XFS_MOUNT_RETERR) { | 891 | if (mp->m_flags & XFS_MOUNT_RETERR) { |
881 | xfs_fs_cmn_err(CE_WARN, mp, | 892 | xfs_warn(mp, |
882 | "stripe alignment turned off: sunit(%d) less than bsize(%d)", | 893 | "stripe alignment turned off: sunit(%d) less than bsize(%d)", |
883 | mp->m_dalign, | 894 | mp->m_dalign, |
884 | mp->m_blockmask +1); | 895 | mp->m_blockmask +1); |
885 | return XFS_ERROR(EINVAL); | 896 | return XFS_ERROR(EINVAL); |
886 | } | 897 | } |
@@ -975,6 +986,24 @@ xfs_set_rw_sizes(xfs_mount_t *mp) | |||
975 | } | 986 | } |
976 | 987 | ||
977 | /* | 988 | /* |
989 | * precalculate the low space thresholds for dynamic speculative preallocation. | ||
990 | */ | ||
991 | void | ||
992 | xfs_set_low_space_thresholds( | ||
993 | struct xfs_mount *mp) | ||
994 | { | ||
995 | int i; | ||
996 | |||
997 | for (i = 0; i < XFS_LOWSP_MAX; i++) { | ||
998 | __uint64_t space = mp->m_sb.sb_dblocks; | ||
999 | |||
1000 | do_div(space, 100); | ||
1001 | mp->m_low_space[i] = space * (i + 1); | ||
1002 | } | ||
1003 | } | ||
1004 | |||
1005 | |||
1006 | /* | ||
978 | * Set whether we're using inode alignment. | 1007 | * Set whether we're using inode alignment. |
979 | */ | 1008 | */ |
980 | STATIC void | 1009 | STATIC void |
@@ -1008,14 +1037,14 @@ xfs_check_sizes(xfs_mount_t *mp) | |||
1008 | 1037 | ||
1009 | d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks); | 1038 | d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks); |
1010 | if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_dblocks) { | 1039 | if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_dblocks) { |
1011 | cmn_err(CE_WARN, "XFS: filesystem size mismatch detected"); | 1040 | xfs_warn(mp, "filesystem size mismatch detected"); |
1012 | return XFS_ERROR(EFBIG); | 1041 | return XFS_ERROR(EFBIG); |
1013 | } | 1042 | } |
1014 | bp = xfs_buf_read_uncached(mp, mp->m_ddev_targp, | 1043 | bp = xfs_buf_read_uncached(mp, mp->m_ddev_targp, |
1015 | d - XFS_FSS_TO_BB(mp, 1), | 1044 | d - XFS_FSS_TO_BB(mp, 1), |
1016 | BBTOB(XFS_FSS_TO_BB(mp, 1)), 0); | 1045 | BBTOB(XFS_FSS_TO_BB(mp, 1)), 0); |
1017 | if (!bp) { | 1046 | if (!bp) { |
1018 | cmn_err(CE_WARN, "XFS: last sector read failed"); | 1047 | xfs_warn(mp, "last sector read failed"); |
1019 | return EIO; | 1048 | return EIO; |
1020 | } | 1049 | } |
1021 | xfs_buf_relse(bp); | 1050 | xfs_buf_relse(bp); |
@@ -1023,14 +1052,14 @@ xfs_check_sizes(xfs_mount_t *mp) | |||
1023 | if (mp->m_logdev_targp != mp->m_ddev_targp) { | 1052 | if (mp->m_logdev_targp != mp->m_ddev_targp) { |
1024 | d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_logblocks); | 1053 | d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_logblocks); |
1025 | if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_logblocks) { | 1054 | if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_logblocks) { |
1026 | cmn_err(CE_WARN, "XFS: log size mismatch detected"); | 1055 | xfs_warn(mp, "log size mismatch detected"); |
1027 | return XFS_ERROR(EFBIG); | 1056 | return XFS_ERROR(EFBIG); |
1028 | } | 1057 | } |
1029 | bp = xfs_buf_read_uncached(mp, mp->m_logdev_targp, | 1058 | bp = xfs_buf_read_uncached(mp, mp->m_logdev_targp, |
1030 | d - XFS_FSB_TO_BB(mp, 1), | 1059 | d - XFS_FSB_TO_BB(mp, 1), |
1031 | XFS_FSB_TO_B(mp, 1), 0); | 1060 | XFS_FSB_TO_B(mp, 1), 0); |
1032 | if (!bp) { | 1061 | if (!bp) { |
1033 | cmn_err(CE_WARN, "XFS: log device read failed"); | 1062 | xfs_warn(mp, "log device read failed"); |
1034 | return EIO; | 1063 | return EIO; |
1035 | } | 1064 | } |
1036 | xfs_buf_relse(bp); | 1065 | xfs_buf_relse(bp); |
@@ -1068,7 +1097,7 @@ xfs_mount_reset_sbqflags( | |||
1068 | return 0; | 1097 | return 0; |
1069 | 1098 | ||
1070 | #ifdef QUOTADEBUG | 1099 | #ifdef QUOTADEBUG |
1071 | xfs_fs_cmn_err(CE_NOTE, mp, "Writing superblock quota changes"); | 1100 | xfs_notice(mp, "Writing superblock quota changes"); |
1072 | #endif | 1101 | #endif |
1073 | 1102 | ||
1074 | tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SBCHANGE); | 1103 | tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SBCHANGE); |
@@ -1076,8 +1105,7 @@ xfs_mount_reset_sbqflags( | |||
1076 | XFS_DEFAULT_LOG_COUNT); | 1105 | XFS_DEFAULT_LOG_COUNT); |
1077 | if (error) { | 1106 | if (error) { |
1078 | xfs_trans_cancel(tp, 0); | 1107 | xfs_trans_cancel(tp, 0); |
1079 | xfs_fs_cmn_err(CE_ALERT, mp, | 1108 | xfs_alert(mp, "%s: Superblock update failed!", __func__); |
1080 | "xfs_mount_reset_sbqflags: Superblock update failed!"); | ||
1081 | return error; | 1109 | return error; |
1082 | } | 1110 | } |
1083 | 1111 | ||
@@ -1143,8 +1171,7 @@ xfs_mountfs( | |||
1143 | * transaction subsystem is online. | 1171 | * transaction subsystem is online. |
1144 | */ | 1172 | */ |
1145 | if (xfs_sb_has_mismatched_features2(sbp)) { | 1173 | if (xfs_sb_has_mismatched_features2(sbp)) { |
1146 | cmn_err(CE_WARN, | 1174 | xfs_warn(mp, "correcting sb_features alignment problem"); |
1147 | "XFS: correcting sb_features alignment problem"); | ||
1148 | sbp->sb_features2 |= sbp->sb_bad_features2; | 1175 | sbp->sb_features2 |= sbp->sb_bad_features2; |
1149 | sbp->sb_bad_features2 = sbp->sb_features2; | 1176 | sbp->sb_bad_features2 = sbp->sb_features2; |
1150 | mp->m_update_flags |= XFS_SB_FEATURES2 | XFS_SB_BAD_FEATURES2; | 1177 | mp->m_update_flags |= XFS_SB_FEATURES2 | XFS_SB_BAD_FEATURES2; |
@@ -1196,6 +1223,9 @@ xfs_mountfs( | |||
1196 | */ | 1223 | */ |
1197 | xfs_set_rw_sizes(mp); | 1224 | xfs_set_rw_sizes(mp); |
1198 | 1225 | ||
1226 | /* set the low space thresholds for dynamic preallocation */ | ||
1227 | xfs_set_low_space_thresholds(mp); | ||
1228 | |||
1199 | /* | 1229 | /* |
1200 | * Set the inode cluster size. | 1230 | * Set the inode cluster size. |
1201 | * This may still be overridden by the file system | 1231 | * This may still be overridden by the file system |
@@ -1220,7 +1250,7 @@ xfs_mountfs( | |||
1220 | */ | 1250 | */ |
1221 | error = xfs_rtmount_init(mp); | 1251 | error = xfs_rtmount_init(mp); |
1222 | if (error) { | 1252 | if (error) { |
1223 | cmn_err(CE_WARN, "XFS: RT mount failed"); | 1253 | xfs_warn(mp, "RT mount failed"); |
1224 | goto out_remove_uuid; | 1254 | goto out_remove_uuid; |
1225 | } | 1255 | } |
1226 | 1256 | ||
@@ -1251,12 +1281,12 @@ xfs_mountfs( | |||
1251 | INIT_RADIX_TREE(&mp->m_perag_tree, GFP_ATOMIC); | 1281 | INIT_RADIX_TREE(&mp->m_perag_tree, GFP_ATOMIC); |
1252 | error = xfs_initialize_perag(mp, sbp->sb_agcount, &mp->m_maxagi); | 1282 | error = xfs_initialize_perag(mp, sbp->sb_agcount, &mp->m_maxagi); |
1253 | if (error) { | 1283 | if (error) { |
1254 | cmn_err(CE_WARN, "XFS: Failed per-ag init: %d", error); | 1284 | xfs_warn(mp, "Failed per-ag init: %d", error); |
1255 | goto out_remove_uuid; | 1285 | goto out_remove_uuid; |
1256 | } | 1286 | } |
1257 | 1287 | ||
1258 | if (!sbp->sb_logblocks) { | 1288 | if (!sbp->sb_logblocks) { |
1259 | cmn_err(CE_WARN, "XFS: no log defined"); | 1289 | xfs_warn(mp, "no log defined"); |
1260 | XFS_ERROR_REPORT("xfs_mountfs", XFS_ERRLEVEL_LOW, mp); | 1290 | XFS_ERROR_REPORT("xfs_mountfs", XFS_ERRLEVEL_LOW, mp); |
1261 | error = XFS_ERROR(EFSCORRUPTED); | 1291 | error = XFS_ERROR(EFSCORRUPTED); |
1262 | goto out_free_perag; | 1292 | goto out_free_perag; |
@@ -1269,7 +1299,7 @@ xfs_mountfs( | |||
1269 | XFS_FSB_TO_DADDR(mp, sbp->sb_logstart), | 1299 | XFS_FSB_TO_DADDR(mp, sbp->sb_logstart), |
1270 | XFS_FSB_TO_BB(mp, sbp->sb_logblocks)); | 1300 | XFS_FSB_TO_BB(mp, sbp->sb_logblocks)); |
1271 | if (error) { | 1301 | if (error) { |
1272 | cmn_err(CE_WARN, "XFS: log mount failed"); | 1302 | xfs_warn(mp, "log mount failed"); |
1273 | goto out_free_perag; | 1303 | goto out_free_perag; |
1274 | } | 1304 | } |
1275 | 1305 | ||
@@ -1306,16 +1336,14 @@ xfs_mountfs( | |||
1306 | */ | 1336 | */ |
1307 | error = xfs_iget(mp, NULL, sbp->sb_rootino, 0, XFS_ILOCK_EXCL, &rip); | 1337 | error = xfs_iget(mp, NULL, sbp->sb_rootino, 0, XFS_ILOCK_EXCL, &rip); |
1308 | if (error) { | 1338 | if (error) { |
1309 | cmn_err(CE_WARN, "XFS: failed to read root inode"); | 1339 | xfs_warn(mp, "failed to read root inode"); |
1310 | goto out_log_dealloc; | 1340 | goto out_log_dealloc; |
1311 | } | 1341 | } |
1312 | 1342 | ||
1313 | ASSERT(rip != NULL); | 1343 | ASSERT(rip != NULL); |
1314 | 1344 | ||
1315 | if (unlikely((rip->i_d.di_mode & S_IFMT) != S_IFDIR)) { | 1345 | if (unlikely((rip->i_d.di_mode & S_IFMT) != S_IFDIR)) { |
1316 | cmn_err(CE_WARN, "XFS: corrupted root inode"); | 1346 | xfs_warn(mp, "corrupted root inode %llu: not a directory", |
1317 | cmn_err(CE_WARN, "Device %s - root %llu is not a directory", | ||
1318 | XFS_BUFTARG_NAME(mp->m_ddev_targp), | ||
1319 | (unsigned long long)rip->i_ino); | 1347 | (unsigned long long)rip->i_ino); |
1320 | xfs_iunlock(rip, XFS_ILOCK_EXCL); | 1348 | xfs_iunlock(rip, XFS_ILOCK_EXCL); |
1321 | XFS_ERROR_REPORT("xfs_mountfs_int(2)", XFS_ERRLEVEL_LOW, | 1349 | XFS_ERROR_REPORT("xfs_mountfs_int(2)", XFS_ERRLEVEL_LOW, |
@@ -1335,7 +1363,7 @@ xfs_mountfs( | |||
1335 | /* | 1363 | /* |
1336 | * Free up the root inode. | 1364 | * Free up the root inode. |
1337 | */ | 1365 | */ |
1338 | cmn_err(CE_WARN, "XFS: failed to read RT inodes"); | 1366 | xfs_warn(mp, "failed to read RT inodes"); |
1339 | goto out_rele_rip; | 1367 | goto out_rele_rip; |
1340 | } | 1368 | } |
1341 | 1369 | ||
@@ -1347,7 +1375,7 @@ xfs_mountfs( | |||
1347 | if (mp->m_update_flags && !(mp->m_flags & XFS_MOUNT_RDONLY)) { | 1375 | if (mp->m_update_flags && !(mp->m_flags & XFS_MOUNT_RDONLY)) { |
1348 | error = xfs_mount_log_sb(mp, mp->m_update_flags); | 1376 | error = xfs_mount_log_sb(mp, mp->m_update_flags); |
1349 | if (error) { | 1377 | if (error) { |
1350 | cmn_err(CE_WARN, "XFS: failed to write sb changes"); | 1378 | xfs_warn(mp, "failed to write sb changes"); |
1351 | goto out_rtunmount; | 1379 | goto out_rtunmount; |
1352 | } | 1380 | } |
1353 | } | 1381 | } |
@@ -1368,10 +1396,7 @@ xfs_mountfs( | |||
1368 | * quotachecked license. | 1396 | * quotachecked license. |
1369 | */ | 1397 | */ |
1370 | if (mp->m_sb.sb_qflags & XFS_ALL_QUOTA_ACCT) { | 1398 | if (mp->m_sb.sb_qflags & XFS_ALL_QUOTA_ACCT) { |
1371 | cmn_err(CE_NOTE, | 1399 | xfs_notice(mp, "resetting quota flags"); |
1372 | "XFS: resetting qflags for filesystem %s", | ||
1373 | mp->m_fsname); | ||
1374 | |||
1375 | error = xfs_mount_reset_sbqflags(mp); | 1400 | error = xfs_mount_reset_sbqflags(mp); |
1376 | if (error) | 1401 | if (error) |
1377 | return error; | 1402 | return error; |
@@ -1385,7 +1410,7 @@ xfs_mountfs( | |||
1385 | */ | 1410 | */ |
1386 | error = xfs_log_mount_finish(mp); | 1411 | error = xfs_log_mount_finish(mp); |
1387 | if (error) { | 1412 | if (error) { |
1388 | cmn_err(CE_WARN, "XFS: log mount finish failed"); | 1413 | xfs_warn(mp, "log mount finish failed"); |
1389 | goto out_rtunmount; | 1414 | goto out_rtunmount; |
1390 | } | 1415 | } |
1391 | 1416 | ||
@@ -1414,8 +1439,8 @@ xfs_mountfs( | |||
1414 | resblks = xfs_default_resblks(mp); | 1439 | resblks = xfs_default_resblks(mp); |
1415 | error = xfs_reserve_blocks(mp, &resblks, NULL); | 1440 | error = xfs_reserve_blocks(mp, &resblks, NULL); |
1416 | if (error) | 1441 | if (error) |
1417 | cmn_err(CE_WARN, "XFS: Unable to allocate reserve " | 1442 | xfs_warn(mp, |
1418 | "blocks. Continuing without a reserve pool."); | 1443 | "Unable to allocate reserve blocks. Continuing without reserve pool."); |
1419 | } | 1444 | } |
1420 | 1445 | ||
1421 | return 0; | 1446 | return 0; |
@@ -1504,12 +1529,12 @@ xfs_unmountfs( | |||
1504 | resblks = 0; | 1529 | resblks = 0; |
1505 | error = xfs_reserve_blocks(mp, &resblks, NULL); | 1530 | error = xfs_reserve_blocks(mp, &resblks, NULL); |
1506 | if (error) | 1531 | if (error) |
1507 | cmn_err(CE_WARN, "XFS: Unable to free reserved block pool. " | 1532 | xfs_warn(mp, "Unable to free reserved block pool. " |
1508 | "Freespace may not be correct on next mount."); | 1533 | "Freespace may not be correct on next mount."); |
1509 | 1534 | ||
1510 | error = xfs_log_sbcount(mp, 1); | 1535 | error = xfs_log_sbcount(mp, 1); |
1511 | if (error) | 1536 | if (error) |
1512 | cmn_err(CE_WARN, "XFS: Unable to update superblock counters. " | 1537 | xfs_warn(mp, "Unable to update superblock counters. " |
1513 | "Freespace may not be correct on next mount."); | 1538 | "Freespace may not be correct on next mount."); |
1514 | xfs_unmountfs_writesb(mp); | 1539 | xfs_unmountfs_writesb(mp); |
1515 | xfs_unmountfs_wait(mp); /* wait for async bufs */ | 1540 | xfs_unmountfs_wait(mp); /* wait for async bufs */ |
@@ -1992,10 +2017,8 @@ xfs_dev_is_read_only( | |||
1992 | if (xfs_readonly_buftarg(mp->m_ddev_targp) || | 2017 | if (xfs_readonly_buftarg(mp->m_ddev_targp) || |
1993 | xfs_readonly_buftarg(mp->m_logdev_targp) || | 2018 | xfs_readonly_buftarg(mp->m_logdev_targp) || |
1994 | (mp->m_rtdev_targp && xfs_readonly_buftarg(mp->m_rtdev_targp))) { | 2019 | (mp->m_rtdev_targp && xfs_readonly_buftarg(mp->m_rtdev_targp))) { |
1995 | cmn_err(CE_NOTE, | 2020 | xfs_notice(mp, "%s required on read-only device.", message); |
1996 | "XFS: %s required on read-only device.", message); | 2021 | xfs_notice(mp, "write access unavailable, cannot proceed."); |
1997 | cmn_err(CE_NOTE, | ||
1998 | "XFS: write access unavailable, cannot proceed."); | ||
1999 | return EROFS; | 2022 | return EROFS; |
2000 | } | 2023 | } |
2001 | return 0; | 2024 | return 0; |
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index 5861b4980740..19af0ab0d0c6 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h | |||
@@ -103,6 +103,16 @@ extern int xfs_icsb_modify_counters(struct xfs_mount *, xfs_sb_field_t, | |||
103 | xfs_mod_incore_sb(mp, field, delta, rsvd) | 103 | xfs_mod_incore_sb(mp, field, delta, rsvd) |
104 | #endif | 104 | #endif |
105 | 105 | ||
106 | /* dynamic preallocation free space thresholds, 5% down to 1% */ | ||
107 | enum { | ||
108 | XFS_LOWSP_1_PCNT = 0, | ||
109 | XFS_LOWSP_2_PCNT, | ||
110 | XFS_LOWSP_3_PCNT, | ||
111 | XFS_LOWSP_4_PCNT, | ||
112 | XFS_LOWSP_5_PCNT, | ||
113 | XFS_LOWSP_MAX, | ||
114 | }; | ||
115 | |||
106 | typedef struct xfs_mount { | 116 | typedef struct xfs_mount { |
107 | struct super_block *m_super; | 117 | struct super_block *m_super; |
108 | xfs_tid_t m_tid; /* next unused tid for fs */ | 118 | xfs_tid_t m_tid; /* next unused tid for fs */ |
@@ -193,15 +203,14 @@ typedef struct xfs_mount { | |||
193 | struct mutex m_icsb_mutex; /* balancer sync lock */ | 203 | struct mutex m_icsb_mutex; /* balancer sync lock */ |
194 | #endif | 204 | #endif |
195 | struct xfs_mru_cache *m_filestream; /* per-mount filestream data */ | 205 | struct xfs_mru_cache *m_filestream; /* per-mount filestream data */ |
196 | struct task_struct *m_sync_task; /* generalised sync thread */ | 206 | struct delayed_work m_sync_work; /* background sync work */ |
197 | xfs_sync_work_t m_sync_work; /* work item for VFS_SYNC */ | 207 | struct delayed_work m_reclaim_work; /* background inode reclaim */ |
198 | struct list_head m_sync_list; /* sync thread work item list */ | 208 | struct work_struct m_flush_work; /* background inode flush */ |
199 | spinlock_t m_sync_lock; /* work item list lock */ | ||
200 | int m_sync_seq; /* sync thread generation no. */ | ||
201 | wait_queue_head_t m_wait_single_sync_task; | ||
202 | __int64_t m_update_flags; /* sb flags we need to update | 209 | __int64_t m_update_flags; /* sb flags we need to update |
203 | on the next remount,rw */ | 210 | on the next remount,rw */ |
204 | struct shrinker m_inode_shrink; /* inode reclaim shrinker */ | 211 | struct shrinker m_inode_shrink; /* inode reclaim shrinker */ |
212 | int64_t m_low_space[XFS_LOWSP_MAX]; | ||
213 | /* low free space thresholds */ | ||
205 | } xfs_mount_t; | 214 | } xfs_mount_t; |
206 | 215 | ||
207 | /* | 216 | /* |
@@ -379,6 +388,8 @@ extern int xfs_sb_validate_fsb_count(struct xfs_sb *, __uint64_t); | |||
379 | 388 | ||
380 | extern int xfs_dev_is_read_only(struct xfs_mount *, char *); | 389 | extern int xfs_dev_is_read_only(struct xfs_mount *, char *); |
381 | 390 | ||
391 | extern void xfs_set_low_space_thresholds(struct xfs_mount *); | ||
392 | |||
382 | #endif /* __KERNEL__ */ | 393 | #endif /* __KERNEL__ */ |
383 | 394 | ||
384 | extern void xfs_mod_sb(struct xfs_trans *, __int64_t); | 395 | extern void xfs_mod_sb(struct xfs_trans *, __int64_t); |
diff --git a/fs/xfs/xfs_mru_cache.c b/fs/xfs/xfs_mru_cache.c index 45ce15dc5b2b..4aff56395732 100644 --- a/fs/xfs/xfs_mru_cache.c +++ b/fs/xfs/xfs_mru_cache.c | |||
@@ -309,7 +309,7 @@ xfs_mru_cache_init(void) | |||
309 | if (!xfs_mru_elem_zone) | 309 | if (!xfs_mru_elem_zone) |
310 | goto out; | 310 | goto out; |
311 | 311 | ||
312 | xfs_mru_reap_wq = create_singlethread_workqueue("xfs_mru_cache"); | 312 | xfs_mru_reap_wq = alloc_workqueue("xfs_mru_cache", WQ_MEM_RECLAIM, 1); |
313 | if (!xfs_mru_reap_wq) | 313 | if (!xfs_mru_reap_wq) |
314 | goto out_destroy_mru_elem_zone; | 314 | goto out_destroy_mru_elem_zone; |
315 | 315 | ||
@@ -408,7 +408,7 @@ xfs_mru_cache_flush( | |||
408 | spin_lock(&mru->lock); | 408 | spin_lock(&mru->lock); |
409 | if (mru->queued) { | 409 | if (mru->queued) { |
410 | spin_unlock(&mru->lock); | 410 | spin_unlock(&mru->lock); |
411 | cancel_rearming_delayed_workqueue(xfs_mru_reap_wq, &mru->work); | 411 | cancel_delayed_work_sync(&mru->work); |
412 | spin_lock(&mru->lock); | 412 | spin_lock(&mru->lock); |
413 | } | 413 | } |
414 | 414 | ||
diff --git a/fs/xfs/xfs_quota.h b/fs/xfs/xfs_quota.h index 9bb6eda4cd21..a595f29567fe 100644 --- a/fs/xfs/xfs_quota.h +++ b/fs/xfs/xfs_quota.h | |||
@@ -382,7 +382,8 @@ static inline int xfs_qm_sync(struct xfs_mount *mp, int flags) | |||
382 | xfs_trans_reserve_quota_bydquots(tp, mp, ud, gd, nb, ni, \ | 382 | xfs_trans_reserve_quota_bydquots(tp, mp, ud, gd, nb, ni, \ |
383 | f | XFS_QMOPT_RES_REGBLKS) | 383 | f | XFS_QMOPT_RES_REGBLKS) |
384 | 384 | ||
385 | extern int xfs_qm_dqcheck(xfs_disk_dquot_t *, xfs_dqid_t, uint, uint, char *); | 385 | extern int xfs_qm_dqcheck(struct xfs_mount *, xfs_disk_dquot_t *, |
386 | xfs_dqid_t, uint, uint, char *); | ||
386 | extern int xfs_mount_reset_sbqflags(struct xfs_mount *); | 387 | extern int xfs_mount_reset_sbqflags(struct xfs_mount *); |
387 | 388 | ||
388 | #endif /* __KERNEL__ */ | 389 | #endif /* __KERNEL__ */ |
diff --git a/fs/xfs/xfs_rename.c b/fs/xfs/xfs_rename.c index d2af0a8381a6..77a59891734e 100644 --- a/fs/xfs/xfs_rename.c +++ b/fs/xfs/xfs_rename.c | |||
@@ -297,6 +297,7 @@ xfs_rename( | |||
297 | * it and some incremental backup programs won't work without it. | 297 | * it and some incremental backup programs won't work without it. |
298 | */ | 298 | */ |
299 | xfs_trans_ichgtime(tp, src_ip, XFS_ICHGTIME_CHG); | 299 | xfs_trans_ichgtime(tp, src_ip, XFS_ICHGTIME_CHG); |
300 | xfs_trans_log_inode(tp, src_ip, XFS_ILOG_CORE); | ||
300 | 301 | ||
301 | /* | 302 | /* |
302 | * Adjust the link count on src_dp. This is necessary when | 303 | * Adjust the link count on src_dp. This is necessary when |
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c index 12a191385310..8f76fdff4f46 100644 --- a/fs/xfs/xfs_rtalloc.c +++ b/fs/xfs/xfs_rtalloc.c | |||
@@ -76,7 +76,7 @@ xfs_growfs_rt_alloc( | |||
76 | xfs_mount_t *mp, /* file system mount point */ | 76 | xfs_mount_t *mp, /* file system mount point */ |
77 | xfs_extlen_t oblocks, /* old count of blocks */ | 77 | xfs_extlen_t oblocks, /* old count of blocks */ |
78 | xfs_extlen_t nblocks, /* new count of blocks */ | 78 | xfs_extlen_t nblocks, /* new count of blocks */ |
79 | xfs_ino_t ino) /* inode number (bitmap/summary) */ | 79 | xfs_inode_t *ip) /* inode (bitmap/summary) */ |
80 | { | 80 | { |
81 | xfs_fileoff_t bno; /* block number in file */ | 81 | xfs_fileoff_t bno; /* block number in file */ |
82 | xfs_buf_t *bp; /* temporary buffer for zeroing */ | 82 | xfs_buf_t *bp; /* temporary buffer for zeroing */ |
@@ -86,7 +86,6 @@ xfs_growfs_rt_alloc( | |||
86 | xfs_fsblock_t firstblock; /* first block allocated in xaction */ | 86 | xfs_fsblock_t firstblock; /* first block allocated in xaction */ |
87 | xfs_bmap_free_t flist; /* list of freed blocks */ | 87 | xfs_bmap_free_t flist; /* list of freed blocks */ |
88 | xfs_fsblock_t fsbno; /* filesystem block for bno */ | 88 | xfs_fsblock_t fsbno; /* filesystem block for bno */ |
89 | xfs_inode_t *ip; /* pointer to incore inode */ | ||
90 | xfs_bmbt_irec_t map; /* block map output */ | 89 | xfs_bmbt_irec_t map; /* block map output */ |
91 | int nmap; /* number of block maps */ | 90 | int nmap; /* number of block maps */ |
92 | int resblks; /* space reservation */ | 91 | int resblks; /* space reservation */ |
@@ -112,9 +111,9 @@ xfs_growfs_rt_alloc( | |||
112 | /* | 111 | /* |
113 | * Lock the inode. | 112 | * Lock the inode. |
114 | */ | 113 | */ |
115 | if ((error = xfs_trans_iget(mp, tp, ino, 0, | 114 | xfs_ilock(ip, XFS_ILOCK_EXCL); |
116 | XFS_ILOCK_EXCL, &ip))) | 115 | xfs_trans_ijoin_ref(tp, ip, XFS_ILOCK_EXCL); |
117 | goto error_cancel; | 116 | |
118 | xfs_bmap_init(&flist, &firstblock); | 117 | xfs_bmap_init(&flist, &firstblock); |
119 | /* | 118 | /* |
120 | * Allocate blocks to the bitmap file. | 119 | * Allocate blocks to the bitmap file. |
@@ -155,9 +154,8 @@ xfs_growfs_rt_alloc( | |||
155 | /* | 154 | /* |
156 | * Lock the bitmap inode. | 155 | * Lock the bitmap inode. |
157 | */ | 156 | */ |
158 | if ((error = xfs_trans_iget(mp, tp, ino, 0, | 157 | xfs_ilock(ip, XFS_ILOCK_EXCL); |
159 | XFS_ILOCK_EXCL, &ip))) | 158 | xfs_trans_ijoin_ref(tp, ip, XFS_ILOCK_EXCL); |
160 | goto error_cancel; | ||
161 | /* | 159 | /* |
162 | * Get a buffer for the block. | 160 | * Get a buffer for the block. |
163 | */ | 161 | */ |
@@ -1854,7 +1852,6 @@ xfs_growfs_rt( | |||
1854 | xfs_rtblock_t bmbno; /* bitmap block number */ | 1852 | xfs_rtblock_t bmbno; /* bitmap block number */ |
1855 | xfs_buf_t *bp; /* temporary buffer */ | 1853 | xfs_buf_t *bp; /* temporary buffer */ |
1856 | int error; /* error return value */ | 1854 | int error; /* error return value */ |
1857 | xfs_inode_t *ip; /* bitmap inode, used as lock */ | ||
1858 | xfs_mount_t *nmp; /* new (fake) mount structure */ | 1855 | xfs_mount_t *nmp; /* new (fake) mount structure */ |
1859 | xfs_drfsbno_t nrblocks; /* new number of realtime blocks */ | 1856 | xfs_drfsbno_t nrblocks; /* new number of realtime blocks */ |
1860 | xfs_extlen_t nrbmblocks; /* new number of rt bitmap blocks */ | 1857 | xfs_extlen_t nrbmblocks; /* new number of rt bitmap blocks */ |
@@ -1918,11 +1915,11 @@ xfs_growfs_rt( | |||
1918 | /* | 1915 | /* |
1919 | * Allocate space to the bitmap and summary files, as necessary. | 1916 | * Allocate space to the bitmap and summary files, as necessary. |
1920 | */ | 1917 | */ |
1921 | if ((error = xfs_growfs_rt_alloc(mp, rbmblocks, nrbmblocks, | 1918 | error = xfs_growfs_rt_alloc(mp, rbmblocks, nrbmblocks, mp->m_rbmip); |
1922 | mp->m_sb.sb_rbmino))) | 1919 | if (error) |
1923 | return error; | 1920 | return error; |
1924 | if ((error = xfs_growfs_rt_alloc(mp, rsumblocks, nrsumblocks, | 1921 | error = xfs_growfs_rt_alloc(mp, rsumblocks, nrsumblocks, mp->m_rsumip); |
1925 | mp->m_sb.sb_rsumino))) | 1922 | if (error) |
1926 | return error; | 1923 | return error; |
1927 | /* | 1924 | /* |
1928 | * Allocate a new (fake) mount/sb. | 1925 | * Allocate a new (fake) mount/sb. |
@@ -1972,10 +1969,8 @@ xfs_growfs_rt( | |||
1972 | /* | 1969 | /* |
1973 | * Lock out other callers by grabbing the bitmap inode lock. | 1970 | * Lock out other callers by grabbing the bitmap inode lock. |
1974 | */ | 1971 | */ |
1975 | if ((error = xfs_trans_iget(mp, tp, mp->m_sb.sb_rbmino, 0, | 1972 | xfs_ilock(mp->m_rbmip, XFS_ILOCK_EXCL); |
1976 | XFS_ILOCK_EXCL, &ip))) | 1973 | xfs_trans_ijoin_ref(tp, mp->m_rbmip, XFS_ILOCK_EXCL); |
1977 | goto error_cancel; | ||
1978 | ASSERT(ip == mp->m_rbmip); | ||
1979 | /* | 1974 | /* |
1980 | * Update the bitmap inode's size. | 1975 | * Update the bitmap inode's size. |
1981 | */ | 1976 | */ |
@@ -1986,10 +1981,8 @@ xfs_growfs_rt( | |||
1986 | /* | 1981 | /* |
1987 | * Get the summary inode into the transaction. | 1982 | * Get the summary inode into the transaction. |
1988 | */ | 1983 | */ |
1989 | if ((error = xfs_trans_iget(mp, tp, mp->m_sb.sb_rsumino, 0, | 1984 | xfs_ilock(mp->m_rsumip, XFS_ILOCK_EXCL); |
1990 | XFS_ILOCK_EXCL, &ip))) | 1985 | xfs_trans_ijoin_ref(tp, mp->m_rsumip, XFS_ILOCK_EXCL); |
1991 | goto error_cancel; | ||
1992 | ASSERT(ip == mp->m_rsumip); | ||
1993 | /* | 1986 | /* |
1994 | * Update the summary inode's size. | 1987 | * Update the summary inode's size. |
1995 | */ | 1988 | */ |
@@ -2075,15 +2068,15 @@ xfs_rtallocate_extent( | |||
2075 | xfs_extlen_t prod, /* extent product factor */ | 2068 | xfs_extlen_t prod, /* extent product factor */ |
2076 | xfs_rtblock_t *rtblock) /* out: start block allocated */ | 2069 | xfs_rtblock_t *rtblock) /* out: start block allocated */ |
2077 | { | 2070 | { |
2071 | xfs_mount_t *mp = tp->t_mountp; | ||
2078 | int error; /* error value */ | 2072 | int error; /* error value */ |
2079 | xfs_inode_t *ip; /* inode for bitmap file */ | ||
2080 | xfs_mount_t *mp; /* file system mount structure */ | ||
2081 | xfs_rtblock_t r; /* result allocated block */ | 2073 | xfs_rtblock_t r; /* result allocated block */ |
2082 | xfs_fsblock_t sb; /* summary file block number */ | 2074 | xfs_fsblock_t sb; /* summary file block number */ |
2083 | xfs_buf_t *sumbp; /* summary file block buffer */ | 2075 | xfs_buf_t *sumbp; /* summary file block buffer */ |
2084 | 2076 | ||
2077 | ASSERT(xfs_isilocked(mp->m_rbmip, XFS_ILOCK_EXCL)); | ||
2085 | ASSERT(minlen > 0 && minlen <= maxlen); | 2078 | ASSERT(minlen > 0 && minlen <= maxlen); |
2086 | mp = tp->t_mountp; | 2079 | |
2087 | /* | 2080 | /* |
2088 | * If prod is set then figure out what to do to minlen and maxlen. | 2081 | * If prod is set then figure out what to do to minlen and maxlen. |
2089 | */ | 2082 | */ |
@@ -2099,12 +2092,7 @@ xfs_rtallocate_extent( | |||
2099 | return 0; | 2092 | return 0; |
2100 | } | 2093 | } |
2101 | } | 2094 | } |
2102 | /* | 2095 | |
2103 | * Lock out other callers by grabbing the bitmap inode lock. | ||
2104 | */ | ||
2105 | if ((error = xfs_trans_iget(mp, tp, mp->m_sb.sb_rbmino, 0, | ||
2106 | XFS_ILOCK_EXCL, &ip))) | ||
2107 | return error; | ||
2108 | sumbp = NULL; | 2096 | sumbp = NULL; |
2109 | /* | 2097 | /* |
2110 | * Allocate by size, or near another block, or exactly at some block. | 2098 | * Allocate by size, or near another block, or exactly at some block. |
@@ -2123,11 +2111,12 @@ xfs_rtallocate_extent( | |||
2123 | len, &sumbp, &sb, prod, &r); | 2111 | len, &sumbp, &sb, prod, &r); |
2124 | break; | 2112 | break; |
2125 | default: | 2113 | default: |
2114 | error = EIO; | ||
2126 | ASSERT(0); | 2115 | ASSERT(0); |
2127 | } | 2116 | } |
2128 | if (error) { | 2117 | if (error) |
2129 | return error; | 2118 | return error; |
2130 | } | 2119 | |
2131 | /* | 2120 | /* |
2132 | * If it worked, update the superblock. | 2121 | * If it worked, update the superblock. |
2133 | */ | 2122 | */ |
@@ -2155,7 +2144,6 @@ xfs_rtfree_extent( | |||
2155 | xfs_extlen_t len) /* length of extent freed */ | 2144 | xfs_extlen_t len) /* length of extent freed */ |
2156 | { | 2145 | { |
2157 | int error; /* error value */ | 2146 | int error; /* error value */ |
2158 | xfs_inode_t *ip; /* bitmap file inode */ | ||
2159 | xfs_mount_t *mp; /* file system mount structure */ | 2147 | xfs_mount_t *mp; /* file system mount structure */ |
2160 | xfs_fsblock_t sb; /* summary file block number */ | 2148 | xfs_fsblock_t sb; /* summary file block number */ |
2161 | xfs_buf_t *sumbp; /* summary file block buffer */ | 2149 | xfs_buf_t *sumbp; /* summary file block buffer */ |
@@ -2164,9 +2152,9 @@ xfs_rtfree_extent( | |||
2164 | /* | 2152 | /* |
2165 | * Synchronize by locking the bitmap inode. | 2153 | * Synchronize by locking the bitmap inode. |
2166 | */ | 2154 | */ |
2167 | if ((error = xfs_trans_iget(mp, tp, mp->m_sb.sb_rbmino, 0, | 2155 | xfs_ilock(mp->m_rbmip, XFS_ILOCK_EXCL); |
2168 | XFS_ILOCK_EXCL, &ip))) | 2156 | xfs_trans_ijoin_ref(tp, mp->m_rbmip, XFS_ILOCK_EXCL); |
2169 | return error; | 2157 | |
2170 | #if defined(__KERNEL__) && defined(DEBUG) | 2158 | #if defined(__KERNEL__) && defined(DEBUG) |
2171 | /* | 2159 | /* |
2172 | * Check to see that this whole range is currently allocated. | 2160 | * Check to see that this whole range is currently allocated. |
@@ -2199,10 +2187,10 @@ xfs_rtfree_extent( | |||
2199 | */ | 2187 | */ |
2200 | if (tp->t_frextents_delta + mp->m_sb.sb_frextents == | 2188 | if (tp->t_frextents_delta + mp->m_sb.sb_frextents == |
2201 | mp->m_sb.sb_rextents) { | 2189 | mp->m_sb.sb_rextents) { |
2202 | if (!(ip->i_d.di_flags & XFS_DIFLAG_NEWRTBM)) | 2190 | if (!(mp->m_rbmip->i_d.di_flags & XFS_DIFLAG_NEWRTBM)) |
2203 | ip->i_d.di_flags |= XFS_DIFLAG_NEWRTBM; | 2191 | mp->m_rbmip->i_d.di_flags |= XFS_DIFLAG_NEWRTBM; |
2204 | *(__uint64_t *)&ip->i_d.di_atime = 0; | 2192 | *(__uint64_t *)&mp->m_rbmip->i_d.di_atime = 0; |
2205 | xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); | 2193 | xfs_trans_log_inode(tp, mp->m_rbmip, XFS_ILOG_CORE); |
2206 | } | 2194 | } |
2207 | return 0; | 2195 | return 0; |
2208 | } | 2196 | } |
@@ -2222,8 +2210,8 @@ xfs_rtmount_init( | |||
2222 | if (sbp->sb_rblocks == 0) | 2210 | if (sbp->sb_rblocks == 0) |
2223 | return 0; | 2211 | return 0; |
2224 | if (mp->m_rtdev_targp == NULL) { | 2212 | if (mp->m_rtdev_targp == NULL) { |
2225 | cmn_err(CE_WARN, | 2213 | xfs_warn(mp, |
2226 | "XFS: This filesystem has a realtime volume, use rtdev=device option"); | 2214 | "Filesystem has a realtime volume, use rtdev=device option"); |
2227 | return XFS_ERROR(ENODEV); | 2215 | return XFS_ERROR(ENODEV); |
2228 | } | 2216 | } |
2229 | mp->m_rsumlevels = sbp->sb_rextslog + 1; | 2217 | mp->m_rsumlevels = sbp->sb_rextslog + 1; |
@@ -2237,7 +2225,7 @@ xfs_rtmount_init( | |||
2237 | */ | 2225 | */ |
2238 | d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_rblocks); | 2226 | d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_rblocks); |
2239 | if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_rblocks) { | 2227 | if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_rblocks) { |
2240 | cmn_err(CE_WARN, "XFS: realtime mount -- %llu != %llu", | 2228 | xfs_warn(mp, "realtime mount -- %llu != %llu", |
2241 | (unsigned long long) XFS_BB_TO_FSB(mp, d), | 2229 | (unsigned long long) XFS_BB_TO_FSB(mp, d), |
2242 | (unsigned long long) mp->m_sb.sb_rblocks); | 2230 | (unsigned long long) mp->m_sb.sb_rblocks); |
2243 | return XFS_ERROR(EFBIG); | 2231 | return XFS_ERROR(EFBIG); |
@@ -2246,7 +2234,7 @@ xfs_rtmount_init( | |||
2246 | d - XFS_FSB_TO_BB(mp, 1), | 2234 | d - XFS_FSB_TO_BB(mp, 1), |
2247 | XFS_FSB_TO_B(mp, 1), 0); | 2235 | XFS_FSB_TO_B(mp, 1), 0); |
2248 | if (!bp) { | 2236 | if (!bp) { |
2249 | cmn_err(CE_WARN, "XFS: realtime device size check failed"); | 2237 | xfs_warn(mp, "realtime device size check failed"); |
2250 | return EIO; | 2238 | return EIO; |
2251 | } | 2239 | } |
2252 | xfs_buf_relse(bp); | 2240 | xfs_buf_relse(bp); |
@@ -2306,20 +2294,16 @@ xfs_rtpick_extent( | |||
2306 | xfs_rtblock_t *pick) /* result rt extent */ | 2294 | xfs_rtblock_t *pick) /* result rt extent */ |
2307 | { | 2295 | { |
2308 | xfs_rtblock_t b; /* result block */ | 2296 | xfs_rtblock_t b; /* result block */ |
2309 | int error; /* error return value */ | ||
2310 | xfs_inode_t *ip; /* bitmap incore inode */ | ||
2311 | int log2; /* log of sequence number */ | 2297 | int log2; /* log of sequence number */ |
2312 | __uint64_t resid; /* residual after log removed */ | 2298 | __uint64_t resid; /* residual after log removed */ |
2313 | __uint64_t seq; /* sequence number of file creation */ | 2299 | __uint64_t seq; /* sequence number of file creation */ |
2314 | __uint64_t *seqp; /* pointer to seqno in inode */ | 2300 | __uint64_t *seqp; /* pointer to seqno in inode */ |
2315 | 2301 | ||
2316 | if ((error = xfs_trans_iget(mp, tp, mp->m_sb.sb_rbmino, 0, | 2302 | ASSERT(xfs_isilocked(mp->m_rbmip, XFS_ILOCK_EXCL)); |
2317 | XFS_ILOCK_EXCL, &ip))) | 2303 | |
2318 | return error; | 2304 | seqp = (__uint64_t *)&mp->m_rbmip->i_d.di_atime; |
2319 | ASSERT(ip == mp->m_rbmip); | 2305 | if (!(mp->m_rbmip->i_d.di_flags & XFS_DIFLAG_NEWRTBM)) { |
2320 | seqp = (__uint64_t *)&ip->i_d.di_atime; | 2306 | mp->m_rbmip->i_d.di_flags |= XFS_DIFLAG_NEWRTBM; |
2321 | if (!(ip->i_d.di_flags & XFS_DIFLAG_NEWRTBM)) { | ||
2322 | ip->i_d.di_flags |= XFS_DIFLAG_NEWRTBM; | ||
2323 | *seqp = 0; | 2307 | *seqp = 0; |
2324 | } | 2308 | } |
2325 | seq = *seqp; | 2309 | seq = *seqp; |
@@ -2335,7 +2319,7 @@ xfs_rtpick_extent( | |||
2335 | b = mp->m_sb.sb_rextents - len; | 2319 | b = mp->m_sb.sb_rextents - len; |
2336 | } | 2320 | } |
2337 | *seqp = seq + 1; | 2321 | *seqp = seq + 1; |
2338 | xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); | 2322 | xfs_trans_log_inode(tp, mp->m_rbmip, XFS_ILOG_CORE); |
2339 | *pick = b; | 2323 | *pick = b; |
2340 | return 0; | 2324 | return 0; |
2341 | } | 2325 | } |
diff --git a/fs/xfs/xfs_rtalloc.h b/fs/xfs/xfs_rtalloc.h index ff614c29b441..09e1f4f35e97 100644 --- a/fs/xfs/xfs_rtalloc.h +++ b/fs/xfs/xfs_rtalloc.h | |||
@@ -154,7 +154,7 @@ xfs_rtmount_init( | |||
154 | if (mp->m_sb.sb_rblocks == 0) | 154 | if (mp->m_sb.sb_rblocks == 0) |
155 | return 0; | 155 | return 0; |
156 | 156 | ||
157 | cmn_err(CE_WARN, "XFS: Not built with CONFIG_XFS_RT"); | 157 | xfs_warn(mp, "Not built with CONFIG_XFS_RT"); |
158 | return ENOSYS; | 158 | return ENOSYS; |
159 | } | 159 | } |
160 | # define xfs_rtmount_inodes(m) (((mp)->m_sb.sb_rblocks == 0)? 0 : (ENOSYS)) | 160 | # define xfs_rtmount_inodes(m) (((mp)->m_sb.sb_rblocks == 0)? 0 : (ENOSYS)) |
diff --git a/fs/xfs/xfs_rw.c b/fs/xfs/xfs_rw.c index 56861d5daaef..d6d6fdfe9422 100644 --- a/fs/xfs/xfs_rw.c +++ b/fs/xfs/xfs_rw.c | |||
@@ -49,9 +49,9 @@ xfs_do_force_shutdown( | |||
49 | logerror = flags & SHUTDOWN_LOG_IO_ERROR; | 49 | logerror = flags & SHUTDOWN_LOG_IO_ERROR; |
50 | 50 | ||
51 | if (!(flags & SHUTDOWN_FORCE_UMOUNT)) { | 51 | if (!(flags & SHUTDOWN_FORCE_UMOUNT)) { |
52 | cmn_err(CE_NOTE, "xfs_force_shutdown(%s,0x%x) called from " | 52 | xfs_notice(mp, |
53 | "line %d of file %s. Return address = 0x%p", | 53 | "%s(0x%x) called from line %d of file %s. Return address = 0x%p", |
54 | mp->m_fsname, flags, lnnum, fname, __return_address); | 54 | __func__, flags, lnnum, fname, __return_address); |
55 | } | 55 | } |
56 | /* | 56 | /* |
57 | * No need to duplicate efforts. | 57 | * No need to duplicate efforts. |
@@ -69,30 +69,25 @@ xfs_do_force_shutdown( | |||
69 | return; | 69 | return; |
70 | 70 | ||
71 | if (flags & SHUTDOWN_CORRUPT_INCORE) { | 71 | if (flags & SHUTDOWN_CORRUPT_INCORE) { |
72 | xfs_cmn_err(XFS_PTAG_SHUTDOWN_CORRUPT, CE_ALERT, mp, | 72 | xfs_alert_tag(mp, XFS_PTAG_SHUTDOWN_CORRUPT, |
73 | "Corruption of in-memory data detected. Shutting down filesystem: %s", | 73 | "Corruption of in-memory data detected. Shutting down filesystem"); |
74 | mp->m_fsname); | 74 | if (XFS_ERRLEVEL_HIGH <= xfs_error_level) |
75 | if (XFS_ERRLEVEL_HIGH <= xfs_error_level) { | ||
76 | xfs_stack_trace(); | 75 | xfs_stack_trace(); |
77 | } | ||
78 | } else if (!(flags & SHUTDOWN_FORCE_UMOUNT)) { | 76 | } else if (!(flags & SHUTDOWN_FORCE_UMOUNT)) { |
79 | if (logerror) { | 77 | if (logerror) { |
80 | xfs_cmn_err(XFS_PTAG_SHUTDOWN_LOGERROR, CE_ALERT, mp, | 78 | xfs_alert_tag(mp, XFS_PTAG_SHUTDOWN_LOGERROR, |
81 | "Log I/O Error Detected. Shutting down filesystem: %s", | 79 | "Log I/O Error Detected. Shutting down filesystem"); |
82 | mp->m_fsname); | ||
83 | } else if (flags & SHUTDOWN_DEVICE_REQ) { | 80 | } else if (flags & SHUTDOWN_DEVICE_REQ) { |
84 | xfs_cmn_err(XFS_PTAG_SHUTDOWN_IOERROR, CE_ALERT, mp, | 81 | xfs_alert_tag(mp, XFS_PTAG_SHUTDOWN_IOERROR, |
85 | "All device paths lost. Shutting down filesystem: %s", | 82 | "All device paths lost. Shutting down filesystem"); |
86 | mp->m_fsname); | ||
87 | } else if (!(flags & SHUTDOWN_REMOTE_REQ)) { | 83 | } else if (!(flags & SHUTDOWN_REMOTE_REQ)) { |
88 | xfs_cmn_err(XFS_PTAG_SHUTDOWN_IOERROR, CE_ALERT, mp, | 84 | xfs_alert_tag(mp, XFS_PTAG_SHUTDOWN_IOERROR, |
89 | "I/O Error Detected. Shutting down filesystem: %s", | 85 | "I/O Error Detected. Shutting down filesystem"); |
90 | mp->m_fsname); | ||
91 | } | 86 | } |
92 | } | 87 | } |
93 | if (!(flags & SHUTDOWN_FORCE_UMOUNT)) { | 88 | if (!(flags & SHUTDOWN_FORCE_UMOUNT)) { |
94 | cmn_err(CE_ALERT, "Please umount the filesystem, " | 89 | xfs_alert(mp, |
95 | "and rectify the problem(s)"); | 90 | "Please umount the filesystem and rectify the problem(s)"); |
96 | } | 91 | } |
97 | } | 92 | } |
98 | 93 | ||
@@ -106,10 +101,9 @@ xfs_ioerror_alert( | |||
106 | xfs_buf_t *bp, | 101 | xfs_buf_t *bp, |
107 | xfs_daddr_t blkno) | 102 | xfs_daddr_t blkno) |
108 | { | 103 | { |
109 | cmn_err(CE_ALERT, | 104 | xfs_alert(mp, |
110 | "I/O error in filesystem (\"%s\") meta-data dev %s block 0x%llx" | 105 | "I/O error occurred: meta-data dev %s block 0x%llx" |
111 | " (\"%s\") error %d buf count %zd", | 106 | " (\"%s\") error %d buf count %zd", |
112 | (!mp || !mp->m_fsname) ? "(fs name not set)" : mp->m_fsname, | ||
113 | XFS_BUFTARG_NAME(XFS_BUF_TARGET(bp)), | 107 | XFS_BUFTARG_NAME(XFS_BUF_TARGET(bp)), |
114 | (__uint64_t)blkno, func, | 108 | (__uint64_t)blkno, func, |
115 | XFS_BUF_GETERROR(bp), XFS_BUF_COUNT(bp)); | 109 | XFS_BUF_GETERROR(bp), XFS_BUF_COUNT(bp)); |
@@ -173,17 +167,9 @@ xfs_extlen_t | |||
173 | xfs_get_extsz_hint( | 167 | xfs_get_extsz_hint( |
174 | struct xfs_inode *ip) | 168 | struct xfs_inode *ip) |
175 | { | 169 | { |
176 | xfs_extlen_t extsz; | 170 | if ((ip->i_d.di_flags & XFS_DIFLAG_EXTSIZE) && ip->i_d.di_extsize) |
177 | 171 | return ip->i_d.di_extsize; | |
178 | if (unlikely(XFS_IS_REALTIME_INODE(ip))) { | 172 | if (XFS_IS_REALTIME_INODE(ip)) |
179 | extsz = (ip->i_d.di_flags & XFS_DIFLAG_EXTSIZE) | 173 | return ip->i_mount->m_sb.sb_rextsize; |
180 | ? ip->i_d.di_extsize | 174 | return 0; |
181 | : ip->i_mount->m_sb.sb_rextsize; | ||
182 | ASSERT(extsz); | ||
183 | } else { | ||
184 | extsz = (ip->i_d.di_flags & XFS_DIFLAG_EXTSIZE) | ||
185 | ? ip->i_d.di_extsize : 0; | ||
186 | } | ||
187 | |||
188 | return extsz; | ||
189 | } | 175 | } |
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index f6d956b7711e..76922793f64f 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c | |||
@@ -1137,7 +1137,7 @@ out_undo_fdblocks: | |||
1137 | if (blkdelta) | 1137 | if (blkdelta) |
1138 | xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS, -blkdelta, rsvd); | 1138 | xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS, -blkdelta, rsvd); |
1139 | out: | 1139 | out: |
1140 | ASSERT(error = 0); | 1140 | ASSERT(error == 0); |
1141 | return; | 1141 | return; |
1142 | } | 1142 | } |
1143 | 1143 | ||
@@ -1350,7 +1350,7 @@ xfs_trans_fill_vecs( | |||
1350 | * they could be immediately flushed and we'd have to race with the flusher | 1350 | * they could be immediately flushed and we'd have to race with the flusher |
1351 | * trying to pull the item from the AIL as we add it. | 1351 | * trying to pull the item from the AIL as we add it. |
1352 | */ | 1352 | */ |
1353 | void | 1353 | static void |
1354 | xfs_trans_item_committed( | 1354 | xfs_trans_item_committed( |
1355 | struct xfs_log_item *lip, | 1355 | struct xfs_log_item *lip, |
1356 | xfs_lsn_t commit_lsn, | 1356 | xfs_lsn_t commit_lsn, |
@@ -1425,21 +1425,120 @@ xfs_trans_committed( | |||
1425 | xfs_trans_free(tp); | 1425 | xfs_trans_free(tp); |
1426 | } | 1426 | } |
1427 | 1427 | ||
1428 | static inline void | ||
1429 | xfs_log_item_batch_insert( | ||
1430 | struct xfs_ail *ailp, | ||
1431 | struct xfs_log_item **log_items, | ||
1432 | int nr_items, | ||
1433 | xfs_lsn_t commit_lsn) | ||
1434 | { | ||
1435 | int i; | ||
1436 | |||
1437 | spin_lock(&ailp->xa_lock); | ||
1438 | /* xfs_trans_ail_update_bulk drops ailp->xa_lock */ | ||
1439 | xfs_trans_ail_update_bulk(ailp, log_items, nr_items, commit_lsn); | ||
1440 | |||
1441 | for (i = 0; i < nr_items; i++) | ||
1442 | IOP_UNPIN(log_items[i], 0); | ||
1443 | } | ||
1444 | |||
1428 | /* | 1445 | /* |
1429 | * Called from the trans_commit code when we notice that | 1446 | * Bulk operation version of xfs_trans_committed that takes a log vector of |
1430 | * the filesystem is in the middle of a forced shutdown. | 1447 | * items to insert into the AIL. This uses bulk AIL insertion techniques to |
1448 | * minimise lock traffic. | ||
1449 | * | ||
1450 | * If we are called with the aborted flag set, it is because a log write during | ||
1451 | * a CIL checkpoint commit has failed. In this case, all the items in the | ||
1452 | * checkpoint have already gone through IOP_COMMITED and IOP_UNLOCK, which | ||
1453 | * means that checkpoint commit abort handling is treated exactly the same | ||
1454 | * as an iclog write error even though we haven't started any IO yet. Hence in | ||
1455 | * this case all we need to do is IOP_COMMITTED processing, followed by an | ||
1456 | * IOP_UNPIN(aborted) call. | ||
1457 | */ | ||
1458 | void | ||
1459 | xfs_trans_committed_bulk( | ||
1460 | struct xfs_ail *ailp, | ||
1461 | struct xfs_log_vec *log_vector, | ||
1462 | xfs_lsn_t commit_lsn, | ||
1463 | int aborted) | ||
1464 | { | ||
1465 | #define LOG_ITEM_BATCH_SIZE 32 | ||
1466 | struct xfs_log_item *log_items[LOG_ITEM_BATCH_SIZE]; | ||
1467 | struct xfs_log_vec *lv; | ||
1468 | int i = 0; | ||
1469 | |||
1470 | /* unpin all the log items */ | ||
1471 | for (lv = log_vector; lv; lv = lv->lv_next ) { | ||
1472 | struct xfs_log_item *lip = lv->lv_item; | ||
1473 | xfs_lsn_t item_lsn; | ||
1474 | |||
1475 | if (aborted) | ||
1476 | lip->li_flags |= XFS_LI_ABORTED; | ||
1477 | item_lsn = IOP_COMMITTED(lip, commit_lsn); | ||
1478 | |||
1479 | /* item_lsn of -1 means the item was freed */ | ||
1480 | if (XFS_LSN_CMP(item_lsn, (xfs_lsn_t)-1) == 0) | ||
1481 | continue; | ||
1482 | |||
1483 | /* | ||
1484 | * if we are aborting the operation, no point in inserting the | ||
1485 | * object into the AIL as we are in a shutdown situation. | ||
1486 | */ | ||
1487 | if (aborted) { | ||
1488 | ASSERT(XFS_FORCED_SHUTDOWN(ailp->xa_mount)); | ||
1489 | IOP_UNPIN(lip, 1); | ||
1490 | continue; | ||
1491 | } | ||
1492 | |||
1493 | if (item_lsn != commit_lsn) { | ||
1494 | |||
1495 | /* | ||
1496 | * Not a bulk update option due to unusual item_lsn. | ||
1497 | * Push into AIL immediately, rechecking the lsn once | ||
1498 | * we have the ail lock. Then unpin the item. | ||
1499 | */ | ||
1500 | spin_lock(&ailp->xa_lock); | ||
1501 | if (XFS_LSN_CMP(item_lsn, lip->li_lsn) > 0) | ||
1502 | xfs_trans_ail_update(ailp, lip, item_lsn); | ||
1503 | else | ||
1504 | spin_unlock(&ailp->xa_lock); | ||
1505 | IOP_UNPIN(lip, 0); | ||
1506 | continue; | ||
1507 | } | ||
1508 | |||
1509 | /* Item is a candidate for bulk AIL insert. */ | ||
1510 | log_items[i++] = lv->lv_item; | ||
1511 | if (i >= LOG_ITEM_BATCH_SIZE) { | ||
1512 | xfs_log_item_batch_insert(ailp, log_items, | ||
1513 | LOG_ITEM_BATCH_SIZE, commit_lsn); | ||
1514 | i = 0; | ||
1515 | } | ||
1516 | } | ||
1517 | |||
1518 | /* make sure we insert the remainder! */ | ||
1519 | if (i) | ||
1520 | xfs_log_item_batch_insert(ailp, log_items, i, commit_lsn); | ||
1521 | } | ||
1522 | |||
1523 | /* | ||
1524 | * Called from the trans_commit code when we notice that the filesystem is in | ||
1525 | * the middle of a forced shutdown. | ||
1526 | * | ||
1527 | * When we are called here, we have already pinned all the items in the | ||
1528 | * transaction. However, neither IOP_COMMITTING or IOP_UNLOCK has been called | ||
1529 | * so we can simply walk the items in the transaction, unpin them with an abort | ||
1530 | * flag and then free the items. Note that unpinning the items can result in | ||
1531 | * them being freed immediately, so we need to use a safe list traversal method | ||
1532 | * here. | ||
1431 | */ | 1533 | */ |
1432 | STATIC void | 1534 | STATIC void |
1433 | xfs_trans_uncommit( | 1535 | xfs_trans_uncommit( |
1434 | struct xfs_trans *tp, | 1536 | struct xfs_trans *tp, |
1435 | uint flags) | 1537 | uint flags) |
1436 | { | 1538 | { |
1437 | struct xfs_log_item_desc *lidp; | 1539 | struct xfs_log_item_desc *lidp, *n; |
1438 | 1540 | ||
1439 | list_for_each_entry(lidp, &tp->t_items, lid_trans) { | 1541 | list_for_each_entry_safe(lidp, n, &tp->t_items, lid_trans) { |
1440 | /* | ||
1441 | * Unpin all but those that aren't dirty. | ||
1442 | */ | ||
1443 | if (lidp->lid_flags & XFS_LID_DIRTY) | 1542 | if (lidp->lid_flags & XFS_LID_DIRTY) |
1444 | IOP_UNPIN(lidp->lid_item, 1); | 1543 | IOP_UNPIN(lidp->lid_item, 1); |
1445 | } | 1544 | } |
@@ -1656,7 +1755,6 @@ xfs_trans_commit_cil( | |||
1656 | int flags) | 1755 | int flags) |
1657 | { | 1756 | { |
1658 | struct xfs_log_vec *log_vector; | 1757 | struct xfs_log_vec *log_vector; |
1659 | int error; | ||
1660 | 1758 | ||
1661 | /* | 1759 | /* |
1662 | * Get each log item to allocate a vector structure for | 1760 | * Get each log item to allocate a vector structure for |
@@ -1667,9 +1765,7 @@ xfs_trans_commit_cil( | |||
1667 | if (!log_vector) | 1765 | if (!log_vector) |
1668 | return ENOMEM; | 1766 | return ENOMEM; |
1669 | 1767 | ||
1670 | error = xfs_log_commit_cil(mp, tp, log_vector, commit_lsn, flags); | 1768 | xfs_log_commit_cil(mp, tp, log_vector, commit_lsn, flags); |
1671 | if (error) | ||
1672 | return error; | ||
1673 | 1769 | ||
1674 | current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS); | 1770 | current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS); |
1675 | xfs_trans_free(tp); | 1771 | xfs_trans_free(tp); |
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h index 246286b77a86..06a9759b6352 100644 --- a/fs/xfs/xfs_trans.h +++ b/fs/xfs/xfs_trans.h | |||
@@ -294,8 +294,8 @@ struct xfs_log_item_desc { | |||
294 | #define XFS_ALLOC_BTREE_REF 2 | 294 | #define XFS_ALLOC_BTREE_REF 2 |
295 | #define XFS_BMAP_BTREE_REF 2 | 295 | #define XFS_BMAP_BTREE_REF 2 |
296 | #define XFS_DIR_BTREE_REF 2 | 296 | #define XFS_DIR_BTREE_REF 2 |
297 | #define XFS_INO_REF 2 | ||
297 | #define XFS_ATTR_BTREE_REF 1 | 298 | #define XFS_ATTR_BTREE_REF 1 |
298 | #define XFS_INO_REF 1 | ||
299 | #define XFS_DQUOT_REF 1 | 299 | #define XFS_DQUOT_REF 1 |
300 | 300 | ||
301 | #ifdef __KERNEL__ | 301 | #ifdef __KERNEL__ |
@@ -469,8 +469,6 @@ void xfs_trans_inode_buf(xfs_trans_t *, struct xfs_buf *); | |||
469 | void xfs_trans_stale_inode_buf(xfs_trans_t *, struct xfs_buf *); | 469 | void xfs_trans_stale_inode_buf(xfs_trans_t *, struct xfs_buf *); |
470 | void xfs_trans_dquot_buf(xfs_trans_t *, struct xfs_buf *, uint); | 470 | void xfs_trans_dquot_buf(xfs_trans_t *, struct xfs_buf *, uint); |
471 | void xfs_trans_inode_alloc_buf(xfs_trans_t *, struct xfs_buf *); | 471 | void xfs_trans_inode_alloc_buf(xfs_trans_t *, struct xfs_buf *); |
472 | int xfs_trans_iget(struct xfs_mount *, xfs_trans_t *, | ||
473 | xfs_ino_t , uint, uint, struct xfs_inode **); | ||
474 | void xfs_trans_ichgtime(struct xfs_trans *, struct xfs_inode *, int); | 472 | void xfs_trans_ichgtime(struct xfs_trans *, struct xfs_inode *, int); |
475 | void xfs_trans_ijoin_ref(struct xfs_trans *, struct xfs_inode *, uint); | 473 | void xfs_trans_ijoin_ref(struct xfs_trans *, struct xfs_inode *, uint); |
476 | void xfs_trans_ijoin(struct xfs_trans *, struct xfs_inode *); | 474 | void xfs_trans_ijoin(struct xfs_trans *, struct xfs_inode *); |
diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c index dc9069568ff7..acdb92f14d51 100644 --- a/fs/xfs/xfs_trans_ail.c +++ b/fs/xfs/xfs_trans_ail.c | |||
@@ -28,74 +28,138 @@ | |||
28 | #include "xfs_trans_priv.h" | 28 | #include "xfs_trans_priv.h" |
29 | #include "xfs_error.h" | 29 | #include "xfs_error.h" |
30 | 30 | ||
31 | STATIC void xfs_ail_insert(struct xfs_ail *, xfs_log_item_t *); | 31 | struct workqueue_struct *xfs_ail_wq; /* AIL workqueue */ |
32 | STATIC xfs_log_item_t * xfs_ail_delete(struct xfs_ail *, xfs_log_item_t *); | ||
33 | STATIC xfs_log_item_t * xfs_ail_min(struct xfs_ail *); | ||
34 | STATIC xfs_log_item_t * xfs_ail_next(struct xfs_ail *, xfs_log_item_t *); | ||
35 | 32 | ||
36 | #ifdef DEBUG | 33 | #ifdef DEBUG |
37 | STATIC void xfs_ail_check(struct xfs_ail *, xfs_log_item_t *); | 34 | /* |
38 | #else | 35 | * Check that the list is sorted as it should be. |
36 | */ | ||
37 | STATIC void | ||
38 | xfs_ail_check( | ||
39 | struct xfs_ail *ailp, | ||
40 | xfs_log_item_t *lip) | ||
41 | { | ||
42 | xfs_log_item_t *prev_lip; | ||
43 | |||
44 | if (list_empty(&ailp->xa_ail)) | ||
45 | return; | ||
46 | |||
47 | /* | ||
48 | * Check the next and previous entries are valid. | ||
49 | */ | ||
50 | ASSERT((lip->li_flags & XFS_LI_IN_AIL) != 0); | ||
51 | prev_lip = list_entry(lip->li_ail.prev, xfs_log_item_t, li_ail); | ||
52 | if (&prev_lip->li_ail != &ailp->xa_ail) | ||
53 | ASSERT(XFS_LSN_CMP(prev_lip->li_lsn, lip->li_lsn) <= 0); | ||
54 | |||
55 | prev_lip = list_entry(lip->li_ail.next, xfs_log_item_t, li_ail); | ||
56 | if (&prev_lip->li_ail != &ailp->xa_ail) | ||
57 | ASSERT(XFS_LSN_CMP(prev_lip->li_lsn, lip->li_lsn) >= 0); | ||
58 | |||
59 | |||
60 | #ifdef XFS_TRANS_DEBUG | ||
61 | /* | ||
62 | * Walk the list checking lsn ordering, and that every entry has the | ||
63 | * XFS_LI_IN_AIL flag set. This is really expensive, so only do it | ||
64 | * when specifically debugging the transaction subsystem. | ||
65 | */ | ||
66 | prev_lip = list_entry(&ailp->xa_ail, xfs_log_item_t, li_ail); | ||
67 | list_for_each_entry(lip, &ailp->xa_ail, li_ail) { | ||
68 | if (&prev_lip->li_ail != &ailp->xa_ail) | ||
69 | ASSERT(XFS_LSN_CMP(prev_lip->li_lsn, lip->li_lsn) <= 0); | ||
70 | ASSERT((lip->li_flags & XFS_LI_IN_AIL) != 0); | ||
71 | prev_lip = lip; | ||
72 | } | ||
73 | #endif /* XFS_TRANS_DEBUG */ | ||
74 | } | ||
75 | #else /* !DEBUG */ | ||
39 | #define xfs_ail_check(a,l) | 76 | #define xfs_ail_check(a,l) |
40 | #endif /* DEBUG */ | 77 | #endif /* DEBUG */ |
41 | 78 | ||
79 | /* | ||
80 | * Return a pointer to the first item in the AIL. If the AIL is empty, then | ||
81 | * return NULL. | ||
82 | */ | ||
83 | static xfs_log_item_t * | ||
84 | xfs_ail_min( | ||
85 | struct xfs_ail *ailp) | ||
86 | { | ||
87 | if (list_empty(&ailp->xa_ail)) | ||
88 | return NULL; | ||
89 | |||
90 | return list_first_entry(&ailp->xa_ail, xfs_log_item_t, li_ail); | ||
91 | } | ||
92 | |||
93 | /* | ||
94 | * Return a pointer to the last item in the AIL. If the AIL is empty, then | ||
95 | * return NULL. | ||
96 | */ | ||
97 | static xfs_log_item_t * | ||
98 | xfs_ail_max( | ||
99 | struct xfs_ail *ailp) | ||
100 | { | ||
101 | if (list_empty(&ailp->xa_ail)) | ||
102 | return NULL; | ||
103 | |||
104 | return list_entry(ailp->xa_ail.prev, xfs_log_item_t, li_ail); | ||
105 | } | ||
106 | |||
107 | /* | ||
108 | * Return a pointer to the item which follows the given item in the AIL. If | ||
109 | * the given item is the last item in the list, then return NULL. | ||
110 | */ | ||
111 | static xfs_log_item_t * | ||
112 | xfs_ail_next( | ||
113 | struct xfs_ail *ailp, | ||
114 | xfs_log_item_t *lip) | ||
115 | { | ||
116 | if (lip->li_ail.next == &ailp->xa_ail) | ||
117 | return NULL; | ||
118 | |||
119 | return list_first_entry(&lip->li_ail, xfs_log_item_t, li_ail); | ||
120 | } | ||
42 | 121 | ||
43 | /* | 122 | /* |
44 | * This is called by the log manager code to determine the LSN | 123 | * This is called by the log manager code to determine the LSN of the tail of |
45 | * of the tail of the log. This is exactly the LSN of the first | 124 | * the log. This is exactly the LSN of the first item in the AIL. If the AIL |
46 | * item in the AIL. If the AIL is empty, then this function | 125 | * is empty, then this function returns 0. |
47 | * returns 0. | ||
48 | * | 126 | * |
49 | * We need the AIL lock in order to get a coherent read of the | 127 | * We need the AIL lock in order to get a coherent read of the lsn of the last |
50 | * lsn of the last item in the AIL. | 128 | * item in the AIL. |
51 | */ | 129 | */ |
52 | xfs_lsn_t | 130 | xfs_lsn_t |
53 | xfs_trans_ail_tail( | 131 | xfs_ail_min_lsn( |
54 | struct xfs_ail *ailp) | 132 | struct xfs_ail *ailp) |
55 | { | 133 | { |
56 | xfs_lsn_t lsn; | 134 | xfs_lsn_t lsn = 0; |
57 | xfs_log_item_t *lip; | 135 | xfs_log_item_t *lip; |
58 | 136 | ||
59 | spin_lock(&ailp->xa_lock); | 137 | spin_lock(&ailp->xa_lock); |
60 | lip = xfs_ail_min(ailp); | 138 | lip = xfs_ail_min(ailp); |
61 | if (lip == NULL) { | 139 | if (lip) |
62 | lsn = (xfs_lsn_t)0; | ||
63 | } else { | ||
64 | lsn = lip->li_lsn; | 140 | lsn = lip->li_lsn; |
65 | } | ||
66 | spin_unlock(&ailp->xa_lock); | 141 | spin_unlock(&ailp->xa_lock); |
67 | 142 | ||
68 | return lsn; | 143 | return lsn; |
69 | } | 144 | } |
70 | 145 | ||
71 | /* | 146 | /* |
72 | * xfs_trans_push_ail | 147 | * Return the maximum lsn held in the AIL, or zero if the AIL is empty. |
73 | * | ||
74 | * This routine is called to move the tail of the AIL forward. It does this by | ||
75 | * trying to flush items in the AIL whose lsns are below the given | ||
76 | * threshold_lsn. | ||
77 | * | ||
78 | * the push is run asynchronously in a separate thread, so we return the tail | ||
79 | * of the log right now instead of the tail after the push. This means we will | ||
80 | * either continue right away, or we will sleep waiting on the async thread to | ||
81 | * do its work. | ||
82 | * | ||
83 | * We do this unlocked - we only need to know whether there is anything in the | ||
84 | * AIL at the time we are called. We don't need to access the contents of | ||
85 | * any of the objects, so the lock is not needed. | ||
86 | */ | 148 | */ |
87 | void | 149 | static xfs_lsn_t |
88 | xfs_trans_ail_push( | 150 | xfs_ail_max_lsn( |
89 | struct xfs_ail *ailp, | 151 | struct xfs_ail *ailp) |
90 | xfs_lsn_t threshold_lsn) | ||
91 | { | 152 | { |
92 | xfs_log_item_t *lip; | 153 | xfs_lsn_t lsn = 0; |
154 | xfs_log_item_t *lip; | ||
93 | 155 | ||
94 | lip = xfs_ail_min(ailp); | 156 | spin_lock(&ailp->xa_lock); |
95 | if (lip && !XFS_FORCED_SHUTDOWN(ailp->xa_mount)) { | 157 | lip = xfs_ail_max(ailp); |
96 | if (XFS_LSN_CMP(threshold_lsn, ailp->xa_target) > 0) | 158 | if (lip) |
97 | xfsaild_wakeup(ailp, threshold_lsn); | 159 | lsn = lip->li_lsn; |
98 | } | 160 | spin_unlock(&ailp->xa_lock); |
161 | |||
162 | return lsn; | ||
99 | } | 163 | } |
100 | 164 | ||
101 | /* | 165 | /* |
@@ -236,16 +300,57 @@ out: | |||
236 | } | 300 | } |
237 | 301 | ||
238 | /* | 302 | /* |
239 | * xfsaild_push does the work of pushing on the AIL. Returning a timeout of | 303 | * splice the log item list into the AIL at the given LSN. |
240 | * zero indicates that the caller should sleep until woken. | ||
241 | */ | 304 | */ |
242 | long | 305 | static void |
243 | xfsaild_push( | 306 | xfs_ail_splice( |
244 | struct xfs_ail *ailp, | 307 | struct xfs_ail *ailp, |
245 | xfs_lsn_t *last_lsn) | 308 | struct list_head *list, |
309 | xfs_lsn_t lsn) | ||
310 | { | ||
311 | xfs_log_item_t *next_lip; | ||
312 | |||
313 | /* If the list is empty, just insert the item. */ | ||
314 | if (list_empty(&ailp->xa_ail)) { | ||
315 | list_splice(list, &ailp->xa_ail); | ||
316 | return; | ||
317 | } | ||
318 | |||
319 | list_for_each_entry_reverse(next_lip, &ailp->xa_ail, li_ail) { | ||
320 | if (XFS_LSN_CMP(next_lip->li_lsn, lsn) <= 0) | ||
321 | break; | ||
322 | } | ||
323 | |||
324 | ASSERT(&next_lip->li_ail == &ailp->xa_ail || | ||
325 | XFS_LSN_CMP(next_lip->li_lsn, lsn) <= 0); | ||
326 | |||
327 | list_splice_init(list, &next_lip->li_ail); | ||
328 | } | ||
329 | |||
330 | /* | ||
331 | * Delete the given item from the AIL. Return a pointer to the item. | ||
332 | */ | ||
333 | static void | ||
334 | xfs_ail_delete( | ||
335 | struct xfs_ail *ailp, | ||
336 | xfs_log_item_t *lip) | ||
246 | { | 337 | { |
247 | long tout = 0; | 338 | xfs_ail_check(ailp, lip); |
248 | xfs_lsn_t last_pushed_lsn = *last_lsn; | 339 | list_del(&lip->li_ail); |
340 | xfs_trans_ail_cursor_clear(ailp, lip); | ||
341 | } | ||
342 | |||
343 | /* | ||
344 | * xfs_ail_worker does the work of pushing on the AIL. It will requeue itself | ||
345 | * to run at a later time if there is more work to do to complete the push. | ||
346 | */ | ||
347 | STATIC void | ||
348 | xfs_ail_worker( | ||
349 | struct work_struct *work) | ||
350 | { | ||
351 | struct xfs_ail *ailp = container_of(to_delayed_work(work), | ||
352 | struct xfs_ail, xa_work); | ||
353 | long tout; | ||
249 | xfs_lsn_t target = ailp->xa_target; | 354 | xfs_lsn_t target = ailp->xa_target; |
250 | xfs_lsn_t lsn; | 355 | xfs_lsn_t lsn; |
251 | xfs_log_item_t *lip; | 356 | xfs_log_item_t *lip; |
@@ -256,15 +361,15 @@ xfsaild_push( | |||
256 | 361 | ||
257 | spin_lock(&ailp->xa_lock); | 362 | spin_lock(&ailp->xa_lock); |
258 | xfs_trans_ail_cursor_init(ailp, cur); | 363 | xfs_trans_ail_cursor_init(ailp, cur); |
259 | lip = xfs_trans_ail_cursor_first(ailp, cur, *last_lsn); | 364 | lip = xfs_trans_ail_cursor_first(ailp, cur, ailp->xa_last_pushed_lsn); |
260 | if (!lip || XFS_FORCED_SHUTDOWN(mp)) { | 365 | if (!lip || XFS_FORCED_SHUTDOWN(mp)) { |
261 | /* | 366 | /* |
262 | * AIL is empty or our push has reached the end. | 367 | * AIL is empty or our push has reached the end. |
263 | */ | 368 | */ |
264 | xfs_trans_ail_cursor_done(ailp, cur); | 369 | xfs_trans_ail_cursor_done(ailp, cur); |
265 | spin_unlock(&ailp->xa_lock); | 370 | spin_unlock(&ailp->xa_lock); |
266 | *last_lsn = 0; | 371 | ailp->xa_last_pushed_lsn = 0; |
267 | return tout; | 372 | return; |
268 | } | 373 | } |
269 | 374 | ||
270 | XFS_STATS_INC(xs_push_ail); | 375 | XFS_STATS_INC(xs_push_ail); |
@@ -301,13 +406,13 @@ xfsaild_push( | |||
301 | case XFS_ITEM_SUCCESS: | 406 | case XFS_ITEM_SUCCESS: |
302 | XFS_STATS_INC(xs_push_ail_success); | 407 | XFS_STATS_INC(xs_push_ail_success); |
303 | IOP_PUSH(lip); | 408 | IOP_PUSH(lip); |
304 | last_pushed_lsn = lsn; | 409 | ailp->xa_last_pushed_lsn = lsn; |
305 | break; | 410 | break; |
306 | 411 | ||
307 | case XFS_ITEM_PUSHBUF: | 412 | case XFS_ITEM_PUSHBUF: |
308 | XFS_STATS_INC(xs_push_ail_pushbuf); | 413 | XFS_STATS_INC(xs_push_ail_pushbuf); |
309 | IOP_PUSHBUF(lip); | 414 | IOP_PUSHBUF(lip); |
310 | last_pushed_lsn = lsn; | 415 | ailp->xa_last_pushed_lsn = lsn; |
311 | push_xfsbufd = 1; | 416 | push_xfsbufd = 1; |
312 | break; | 417 | break; |
313 | 418 | ||
@@ -319,7 +424,7 @@ xfsaild_push( | |||
319 | 424 | ||
320 | case XFS_ITEM_LOCKED: | 425 | case XFS_ITEM_LOCKED: |
321 | XFS_STATS_INC(xs_push_ail_locked); | 426 | XFS_STATS_INC(xs_push_ail_locked); |
322 | last_pushed_lsn = lsn; | 427 | ailp->xa_last_pushed_lsn = lsn; |
323 | stuck++; | 428 | stuck++; |
324 | break; | 429 | break; |
325 | 430 | ||
@@ -374,9 +479,23 @@ xfsaild_push( | |||
374 | wake_up_process(mp->m_ddev_targp->bt_task); | 479 | wake_up_process(mp->m_ddev_targp->bt_task); |
375 | } | 480 | } |
376 | 481 | ||
482 | /* assume we have more work to do in a short while */ | ||
483 | tout = 10; | ||
377 | if (!count) { | 484 | if (!count) { |
378 | /* We're past our target or empty, so idle */ | 485 | /* We're past our target or empty, so idle */ |
379 | last_pushed_lsn = 0; | 486 | ailp->xa_last_pushed_lsn = 0; |
487 | |||
488 | /* | ||
489 | * Check for an updated push target before clearing the | ||
490 | * XFS_AIL_PUSHING_BIT. If the target changed, we've got more | ||
491 | * work to do. Wait a bit longer before starting that work. | ||
492 | */ | ||
493 | smp_rmb(); | ||
494 | if (ailp->xa_target == target) { | ||
495 | clear_bit(XFS_AIL_PUSHING_BIT, &ailp->xa_flags); | ||
496 | return; | ||
497 | } | ||
498 | tout = 50; | ||
380 | } else if (XFS_LSN_CMP(lsn, target) >= 0) { | 499 | } else if (XFS_LSN_CMP(lsn, target) >= 0) { |
381 | /* | 500 | /* |
382 | * We reached the target so wait a bit longer for I/O to | 501 | * We reached the target so wait a bit longer for I/O to |
@@ -384,7 +503,7 @@ xfsaild_push( | |||
384 | * start the next scan from the start of the AIL. | 503 | * start the next scan from the start of the AIL. |
385 | */ | 504 | */ |
386 | tout = 50; | 505 | tout = 50; |
387 | last_pushed_lsn = 0; | 506 | ailp->xa_last_pushed_lsn = 0; |
388 | } else if ((stuck * 100) / count > 90) { | 507 | } else if ((stuck * 100) / count > 90) { |
389 | /* | 508 | /* |
390 | * Either there is a lot of contention on the AIL or we | 509 | * Either there is a lot of contention on the AIL or we |
@@ -396,14 +515,61 @@ xfsaild_push( | |||
396 | * continuing from where we were. | 515 | * continuing from where we were. |
397 | */ | 516 | */ |
398 | tout = 20; | 517 | tout = 20; |
399 | } else { | ||
400 | /* more to do, but wait a short while before continuing */ | ||
401 | tout = 10; | ||
402 | } | 518 | } |
403 | *last_lsn = last_pushed_lsn; | 519 | |
404 | return tout; | 520 | /* There is more to do, requeue us. */ |
521 | queue_delayed_work(xfs_syncd_wq, &ailp->xa_work, | ||
522 | msecs_to_jiffies(tout)); | ||
405 | } | 523 | } |
406 | 524 | ||
525 | /* | ||
526 | * This routine is called to move the tail of the AIL forward. It does this by | ||
527 | * trying to flush items in the AIL whose lsns are below the given | ||
528 | * threshold_lsn. | ||
529 | * | ||
530 | * The push is run asynchronously in a workqueue, which means the caller needs | ||
531 | * to handle waiting on the async flush for space to become available. | ||
532 | * We don't want to interrupt any push that is in progress, hence we only queue | ||
533 | * work if we set the pushing bit approriately. | ||
534 | * | ||
535 | * We do this unlocked - we only need to know whether there is anything in the | ||
536 | * AIL at the time we are called. We don't need to access the contents of | ||
537 | * any of the objects, so the lock is not needed. | ||
538 | */ | ||
539 | void | ||
540 | xfs_ail_push( | ||
541 | struct xfs_ail *ailp, | ||
542 | xfs_lsn_t threshold_lsn) | ||
543 | { | ||
544 | xfs_log_item_t *lip; | ||
545 | |||
546 | lip = xfs_ail_min(ailp); | ||
547 | if (!lip || XFS_FORCED_SHUTDOWN(ailp->xa_mount) || | ||
548 | XFS_LSN_CMP(threshold_lsn, ailp->xa_target) <= 0) | ||
549 | return; | ||
550 | |||
551 | /* | ||
552 | * Ensure that the new target is noticed in push code before it clears | ||
553 | * the XFS_AIL_PUSHING_BIT. | ||
554 | */ | ||
555 | smp_wmb(); | ||
556 | ailp->xa_target = threshold_lsn; | ||
557 | if (!test_and_set_bit(XFS_AIL_PUSHING_BIT, &ailp->xa_flags)) | ||
558 | queue_delayed_work(xfs_syncd_wq, &ailp->xa_work, 0); | ||
559 | } | ||
560 | |||
561 | /* | ||
562 | * Push out all items in the AIL immediately | ||
563 | */ | ||
564 | void | ||
565 | xfs_ail_push_all( | ||
566 | struct xfs_ail *ailp) | ||
567 | { | ||
568 | xfs_lsn_t threshold_lsn = xfs_ail_max_lsn(ailp); | ||
569 | |||
570 | if (threshold_lsn) | ||
571 | xfs_ail_push(ailp, threshold_lsn); | ||
572 | } | ||
407 | 573 | ||
408 | /* | 574 | /* |
409 | * This is to be called when an item is unlocked that may have | 575 | * This is to be called when an item is unlocked that may have |
@@ -449,129 +615,152 @@ xfs_trans_unlocked_item( | |||
449 | xfs_log_move_tail(ailp->xa_mount, 1); | 615 | xfs_log_move_tail(ailp->xa_mount, 1); |
450 | } /* xfs_trans_unlocked_item */ | 616 | } /* xfs_trans_unlocked_item */ |
451 | 617 | ||
452 | |||
453 | /* | 618 | /* |
454 | * Update the position of the item in the AIL with the new | 619 | * xfs_trans_ail_update - bulk AIL insertion operation. |
455 | * lsn. If it is not yet in the AIL, add it. Otherwise, move | 620 | * |
456 | * it to its new position by removing it and re-adding it. | 621 | * @xfs_trans_ail_update takes an array of log items that all need to be |
622 | * positioned at the same LSN in the AIL. If an item is not in the AIL, it will | ||
623 | * be added. Otherwise, it will be repositioned by removing it and re-adding | ||
624 | * it to the AIL. If we move the first item in the AIL, update the log tail to | ||
625 | * match the new minimum LSN in the AIL. | ||
626 | * | ||
627 | * This function takes the AIL lock once to execute the update operations on | ||
628 | * all the items in the array, and as such should not be called with the AIL | ||
629 | * lock held. As a result, once we have the AIL lock, we need to check each log | ||
630 | * item LSN to confirm it needs to be moved forward in the AIL. | ||
457 | * | 631 | * |
458 | * Wakeup anyone with an lsn less than the item's lsn. If the item | 632 | * To optimise the insert operation, we delete all the items from the AIL in |
459 | * we move in the AIL is the minimum one, update the tail lsn in the | 633 | * the first pass, moving them into a temporary list, then splice the temporary |
460 | * log manager. | 634 | * list into the correct position in the AIL. This avoids needing to do an |
635 | * insert operation on every item. | ||
461 | * | 636 | * |
462 | * This function must be called with the AIL lock held. The lock | 637 | * This function must be called with the AIL lock held. The lock is dropped |
463 | * is dropped before returning. | 638 | * before returning. |
464 | */ | 639 | */ |
465 | void | 640 | void |
466 | xfs_trans_ail_update( | 641 | xfs_trans_ail_update_bulk( |
467 | struct xfs_ail *ailp, | 642 | struct xfs_ail *ailp, |
468 | xfs_log_item_t *lip, | 643 | struct xfs_log_item **log_items, |
469 | xfs_lsn_t lsn) __releases(ailp->xa_lock) | 644 | int nr_items, |
645 | xfs_lsn_t lsn) __releases(ailp->xa_lock) | ||
470 | { | 646 | { |
471 | xfs_log_item_t *dlip = NULL; | 647 | xfs_log_item_t *mlip; |
472 | xfs_log_item_t *mlip; /* ptr to minimum lip */ | ||
473 | xfs_lsn_t tail_lsn; | 648 | xfs_lsn_t tail_lsn; |
649 | int mlip_changed = 0; | ||
650 | int i; | ||
651 | LIST_HEAD(tmp); | ||
474 | 652 | ||
475 | mlip = xfs_ail_min(ailp); | 653 | mlip = xfs_ail_min(ailp); |
476 | 654 | ||
477 | if (lip->li_flags & XFS_LI_IN_AIL) { | 655 | for (i = 0; i < nr_items; i++) { |
478 | dlip = xfs_ail_delete(ailp, lip); | 656 | struct xfs_log_item *lip = log_items[i]; |
479 | ASSERT(dlip == lip); | 657 | if (lip->li_flags & XFS_LI_IN_AIL) { |
480 | xfs_trans_ail_cursor_clear(ailp, dlip); | 658 | /* check if we really need to move the item */ |
481 | } else { | 659 | if (XFS_LSN_CMP(lsn, lip->li_lsn) <= 0) |
482 | lip->li_flags |= XFS_LI_IN_AIL; | 660 | continue; |
661 | |||
662 | xfs_ail_delete(ailp, lip); | ||
663 | if (mlip == lip) | ||
664 | mlip_changed = 1; | ||
665 | } else { | ||
666 | lip->li_flags |= XFS_LI_IN_AIL; | ||
667 | } | ||
668 | lip->li_lsn = lsn; | ||
669 | list_add(&lip->li_ail, &tmp); | ||
483 | } | 670 | } |
484 | 671 | ||
485 | lip->li_lsn = lsn; | 672 | xfs_ail_splice(ailp, &tmp, lsn); |
486 | xfs_ail_insert(ailp, lip); | ||
487 | 673 | ||
488 | if (mlip == dlip) { | 674 | if (!mlip_changed) { |
489 | mlip = xfs_ail_min(ailp); | ||
490 | /* | ||
491 | * It is not safe to access mlip after the AIL lock is | ||
492 | * dropped, so we must get a copy of li_lsn before we do | ||
493 | * so. This is especially important on 32-bit platforms | ||
494 | * where accessing and updating 64-bit values like li_lsn | ||
495 | * is not atomic. | ||
496 | */ | ||
497 | tail_lsn = mlip->li_lsn; | ||
498 | spin_unlock(&ailp->xa_lock); | ||
499 | xfs_log_move_tail(ailp->xa_mount, tail_lsn); | ||
500 | } else { | ||
501 | spin_unlock(&ailp->xa_lock); | 675 | spin_unlock(&ailp->xa_lock); |
676 | return; | ||
502 | } | 677 | } |
503 | 678 | ||
504 | 679 | /* | |
505 | } /* xfs_trans_update_ail */ | 680 | * It is not safe to access mlip after the AIL lock is dropped, so we |
681 | * must get a copy of li_lsn before we do so. This is especially | ||
682 | * important on 32-bit platforms where accessing and updating 64-bit | ||
683 | * values like li_lsn is not atomic. | ||
684 | */ | ||
685 | mlip = xfs_ail_min(ailp); | ||
686 | tail_lsn = mlip->li_lsn; | ||
687 | spin_unlock(&ailp->xa_lock); | ||
688 | xfs_log_move_tail(ailp->xa_mount, tail_lsn); | ||
689 | } | ||
506 | 690 | ||
507 | /* | 691 | /* |
508 | * Delete the given item from the AIL. It must already be in | 692 | * xfs_trans_ail_delete_bulk - remove multiple log items from the AIL |
509 | * the AIL. | 693 | * |
694 | * @xfs_trans_ail_delete_bulk takes an array of log items that all need to | ||
695 | * removed from the AIL. The caller is already holding the AIL lock, and done | ||
696 | * all the checks necessary to ensure the items passed in via @log_items are | ||
697 | * ready for deletion. This includes checking that the items are in the AIL. | ||
510 | * | 698 | * |
511 | * Wakeup anyone with an lsn less than item's lsn. If the item | 699 | * For each log item to be removed, unlink it from the AIL, clear the IN_AIL |
512 | * we delete in the AIL is the minimum one, update the tail lsn in the | 700 | * flag from the item and reset the item's lsn to 0. If we remove the first |
513 | * log manager. | 701 | * item in the AIL, update the log tail to match the new minimum LSN in the |
702 | * AIL. | ||
514 | * | 703 | * |
515 | * Clear the IN_AIL flag from the item, reset its lsn to 0, and | 704 | * This function will not drop the AIL lock until all items are removed from |
516 | * bump the AIL's generation count to indicate that the tree | 705 | * the AIL to minimise the amount of lock traffic on the AIL. This does not |
517 | * has changed. | 706 | * greatly increase the AIL hold time, but does significantly reduce the amount |
707 | * of traffic on the lock, especially during IO completion. | ||
518 | * | 708 | * |
519 | * This function must be called with the AIL lock held. The lock | 709 | * This function must be called with the AIL lock held. The lock is dropped |
520 | * is dropped before returning. | 710 | * before returning. |
521 | */ | 711 | */ |
522 | void | 712 | void |
523 | xfs_trans_ail_delete( | 713 | xfs_trans_ail_delete_bulk( |
524 | struct xfs_ail *ailp, | 714 | struct xfs_ail *ailp, |
525 | xfs_log_item_t *lip) __releases(ailp->xa_lock) | 715 | struct xfs_log_item **log_items, |
716 | int nr_items) __releases(ailp->xa_lock) | ||
526 | { | 717 | { |
527 | xfs_log_item_t *dlip; | ||
528 | xfs_log_item_t *mlip; | 718 | xfs_log_item_t *mlip; |
529 | xfs_lsn_t tail_lsn; | 719 | xfs_lsn_t tail_lsn; |
720 | int mlip_changed = 0; | ||
721 | int i; | ||
530 | 722 | ||
531 | if (lip->li_flags & XFS_LI_IN_AIL) { | 723 | mlip = xfs_ail_min(ailp); |
532 | mlip = xfs_ail_min(ailp); | ||
533 | dlip = xfs_ail_delete(ailp, lip); | ||
534 | ASSERT(dlip == lip); | ||
535 | xfs_trans_ail_cursor_clear(ailp, dlip); | ||
536 | 724 | ||
725 | for (i = 0; i < nr_items; i++) { | ||
726 | struct xfs_log_item *lip = log_items[i]; | ||
727 | if (!(lip->li_flags & XFS_LI_IN_AIL)) { | ||
728 | struct xfs_mount *mp = ailp->xa_mount; | ||
537 | 729 | ||
538 | lip->li_flags &= ~XFS_LI_IN_AIL; | ||
539 | lip->li_lsn = 0; | ||
540 | |||
541 | if (mlip == dlip) { | ||
542 | mlip = xfs_ail_min(ailp); | ||
543 | /* | ||
544 | * It is not safe to access mlip after the AIL lock | ||
545 | * is dropped, so we must get a copy of li_lsn | ||
546 | * before we do so. This is especially important | ||
547 | * on 32-bit platforms where accessing and updating | ||
548 | * 64-bit values like li_lsn is not atomic. | ||
549 | */ | ||
550 | tail_lsn = mlip ? mlip->li_lsn : 0; | ||
551 | spin_unlock(&ailp->xa_lock); | ||
552 | xfs_log_move_tail(ailp->xa_mount, tail_lsn); | ||
553 | } else { | ||
554 | spin_unlock(&ailp->xa_lock); | 730 | spin_unlock(&ailp->xa_lock); |
731 | if (!XFS_FORCED_SHUTDOWN(mp)) { | ||
732 | xfs_alert_tag(mp, XFS_PTAG_AILDELETE, | ||
733 | "%s: attempting to delete a log item that is not in the AIL", | ||
734 | __func__); | ||
735 | xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); | ||
736 | } | ||
737 | return; | ||
555 | } | 738 | } |
739 | |||
740 | xfs_ail_delete(ailp, lip); | ||
741 | lip->li_flags &= ~XFS_LI_IN_AIL; | ||
742 | lip->li_lsn = 0; | ||
743 | if (mlip == lip) | ||
744 | mlip_changed = 1; | ||
556 | } | 745 | } |
557 | else { | ||
558 | /* | ||
559 | * If the file system is not being shutdown, we are in | ||
560 | * serious trouble if we get to this stage. | ||
561 | */ | ||
562 | struct xfs_mount *mp = ailp->xa_mount; | ||
563 | 746 | ||
747 | if (!mlip_changed) { | ||
564 | spin_unlock(&ailp->xa_lock); | 748 | spin_unlock(&ailp->xa_lock); |
565 | if (!XFS_FORCED_SHUTDOWN(mp)) { | 749 | return; |
566 | xfs_cmn_err(XFS_PTAG_AILDELETE, CE_ALERT, mp, | ||
567 | "%s: attempting to delete a log item that is not in the AIL", | ||
568 | __func__); | ||
569 | xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); | ||
570 | } | ||
571 | } | 750 | } |
572 | } | ||
573 | |||
574 | 751 | ||
752 | /* | ||
753 | * It is not safe to access mlip after the AIL lock is dropped, so we | ||
754 | * must get a copy of li_lsn before we do so. This is especially | ||
755 | * important on 32-bit platforms where accessing and updating 64-bit | ||
756 | * values like li_lsn is not atomic. It is possible we've emptied the | ||
757 | * AIL here, so if that is the case, pass an LSN of 0 to the tail move. | ||
758 | */ | ||
759 | mlip = xfs_ail_min(ailp); | ||
760 | tail_lsn = mlip ? mlip->li_lsn : 0; | ||
761 | spin_unlock(&ailp->xa_lock); | ||
762 | xfs_log_move_tail(ailp->xa_mount, tail_lsn); | ||
763 | } | ||
575 | 764 | ||
576 | /* | 765 | /* |
577 | * The active item list (AIL) is a doubly linked list of log | 766 | * The active item list (AIL) is a doubly linked list of log |
@@ -592,7 +781,6 @@ xfs_trans_ail_init( | |||
592 | xfs_mount_t *mp) | 781 | xfs_mount_t *mp) |
593 | { | 782 | { |
594 | struct xfs_ail *ailp; | 783 | struct xfs_ail *ailp; |
595 | int error; | ||
596 | 784 | ||
597 | ailp = kmem_zalloc(sizeof(struct xfs_ail), KM_MAYFAIL); | 785 | ailp = kmem_zalloc(sizeof(struct xfs_ail), KM_MAYFAIL); |
598 | if (!ailp) | 786 | if (!ailp) |
@@ -601,15 +789,9 @@ xfs_trans_ail_init( | |||
601 | ailp->xa_mount = mp; | 789 | ailp->xa_mount = mp; |
602 | INIT_LIST_HEAD(&ailp->xa_ail); | 790 | INIT_LIST_HEAD(&ailp->xa_ail); |
603 | spin_lock_init(&ailp->xa_lock); | 791 | spin_lock_init(&ailp->xa_lock); |
604 | error = xfsaild_start(ailp); | 792 | INIT_DELAYED_WORK(&ailp->xa_work, xfs_ail_worker); |
605 | if (error) | ||
606 | goto out_free_ailp; | ||
607 | mp->m_ail = ailp; | 793 | mp->m_ail = ailp; |
608 | return 0; | 794 | return 0; |
609 | |||
610 | out_free_ailp: | ||
611 | kmem_free(ailp); | ||
612 | return error; | ||
613 | } | 795 | } |
614 | 796 | ||
615 | void | 797 | void |
@@ -618,135 +800,6 @@ xfs_trans_ail_destroy( | |||
618 | { | 800 | { |
619 | struct xfs_ail *ailp = mp->m_ail; | 801 | struct xfs_ail *ailp = mp->m_ail; |
620 | 802 | ||
621 | xfsaild_stop(ailp); | 803 | cancel_delayed_work_sync(&ailp->xa_work); |
622 | kmem_free(ailp); | 804 | kmem_free(ailp); |
623 | } | 805 | } |
624 | |||
625 | /* | ||
626 | * Insert the given log item into the AIL. | ||
627 | * We almost always insert at the end of the list, so on inserts | ||
628 | * we search from the end of the list to find where the | ||
629 | * new item belongs. | ||
630 | */ | ||
631 | STATIC void | ||
632 | xfs_ail_insert( | ||
633 | struct xfs_ail *ailp, | ||
634 | xfs_log_item_t *lip) | ||
635 | /* ARGSUSED */ | ||
636 | { | ||
637 | xfs_log_item_t *next_lip; | ||
638 | |||
639 | /* | ||
640 | * If the list is empty, just insert the item. | ||
641 | */ | ||
642 | if (list_empty(&ailp->xa_ail)) { | ||
643 | list_add(&lip->li_ail, &ailp->xa_ail); | ||
644 | return; | ||
645 | } | ||
646 | |||
647 | list_for_each_entry_reverse(next_lip, &ailp->xa_ail, li_ail) { | ||
648 | if (XFS_LSN_CMP(next_lip->li_lsn, lip->li_lsn) <= 0) | ||
649 | break; | ||
650 | } | ||
651 | |||
652 | ASSERT((&next_lip->li_ail == &ailp->xa_ail) || | ||
653 | (XFS_LSN_CMP(next_lip->li_lsn, lip->li_lsn) <= 0)); | ||
654 | |||
655 | list_add(&lip->li_ail, &next_lip->li_ail); | ||
656 | |||
657 | xfs_ail_check(ailp, lip); | ||
658 | return; | ||
659 | } | ||
660 | |||
661 | /* | ||
662 | * Delete the given item from the AIL. Return a pointer to the item. | ||
663 | */ | ||
664 | /*ARGSUSED*/ | ||
665 | STATIC xfs_log_item_t * | ||
666 | xfs_ail_delete( | ||
667 | struct xfs_ail *ailp, | ||
668 | xfs_log_item_t *lip) | ||
669 | /* ARGSUSED */ | ||
670 | { | ||
671 | xfs_ail_check(ailp, lip); | ||
672 | |||
673 | list_del(&lip->li_ail); | ||
674 | |||
675 | return lip; | ||
676 | } | ||
677 | |||
678 | /* | ||
679 | * Return a pointer to the first item in the AIL. | ||
680 | * If the AIL is empty, then return NULL. | ||
681 | */ | ||
682 | STATIC xfs_log_item_t * | ||
683 | xfs_ail_min( | ||
684 | struct xfs_ail *ailp) | ||
685 | /* ARGSUSED */ | ||
686 | { | ||
687 | if (list_empty(&ailp->xa_ail)) | ||
688 | return NULL; | ||
689 | |||
690 | return list_first_entry(&ailp->xa_ail, xfs_log_item_t, li_ail); | ||
691 | } | ||
692 | |||
693 | /* | ||
694 | * Return a pointer to the item which follows | ||
695 | * the given item in the AIL. If the given item | ||
696 | * is the last item in the list, then return NULL. | ||
697 | */ | ||
698 | STATIC xfs_log_item_t * | ||
699 | xfs_ail_next( | ||
700 | struct xfs_ail *ailp, | ||
701 | xfs_log_item_t *lip) | ||
702 | /* ARGSUSED */ | ||
703 | { | ||
704 | if (lip->li_ail.next == &ailp->xa_ail) | ||
705 | return NULL; | ||
706 | |||
707 | return list_first_entry(&lip->li_ail, xfs_log_item_t, li_ail); | ||
708 | } | ||
709 | |||
710 | #ifdef DEBUG | ||
711 | /* | ||
712 | * Check that the list is sorted as it should be. | ||
713 | */ | ||
714 | STATIC void | ||
715 | xfs_ail_check( | ||
716 | struct xfs_ail *ailp, | ||
717 | xfs_log_item_t *lip) | ||
718 | { | ||
719 | xfs_log_item_t *prev_lip; | ||
720 | |||
721 | if (list_empty(&ailp->xa_ail)) | ||
722 | return; | ||
723 | |||
724 | /* | ||
725 | * Check the next and previous entries are valid. | ||
726 | */ | ||
727 | ASSERT((lip->li_flags & XFS_LI_IN_AIL) != 0); | ||
728 | prev_lip = list_entry(lip->li_ail.prev, xfs_log_item_t, li_ail); | ||
729 | if (&prev_lip->li_ail != &ailp->xa_ail) | ||
730 | ASSERT(XFS_LSN_CMP(prev_lip->li_lsn, lip->li_lsn) <= 0); | ||
731 | |||
732 | prev_lip = list_entry(lip->li_ail.next, xfs_log_item_t, li_ail); | ||
733 | if (&prev_lip->li_ail != &ailp->xa_ail) | ||
734 | ASSERT(XFS_LSN_CMP(prev_lip->li_lsn, lip->li_lsn) >= 0); | ||
735 | |||
736 | |||
737 | #ifdef XFS_TRANS_DEBUG | ||
738 | /* | ||
739 | * Walk the list checking lsn ordering, and that every entry has the | ||
740 | * XFS_LI_IN_AIL flag set. This is really expensive, so only do it | ||
741 | * when specifically debugging the transaction subsystem. | ||
742 | */ | ||
743 | prev_lip = list_entry(&ailp->xa_ail, xfs_log_item_t, li_ail); | ||
744 | list_for_each_entry(lip, &ailp->xa_ail, li_ail) { | ||
745 | if (&prev_lip->li_ail != &ailp->xa_ail) | ||
746 | ASSERT(XFS_LSN_CMP(prev_lip->li_lsn, lip->li_lsn) <= 0); | ||
747 | ASSERT((lip->li_flags & XFS_LI_IN_AIL) != 0); | ||
748 | prev_lip = lip; | ||
749 | } | ||
750 | #endif /* XFS_TRANS_DEBUG */ | ||
751 | } | ||
752 | #endif /* DEBUG */ | ||
diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c index c47918c302a5..03b3b7f85a3b 100644 --- a/fs/xfs/xfs_trans_buf.c +++ b/fs/xfs/xfs_trans_buf.c | |||
@@ -305,7 +305,7 @@ xfs_trans_read_buf( | |||
305 | if (xfs_error_target == target) { | 305 | if (xfs_error_target == target) { |
306 | if (((xfs_req_num++) % xfs_error_mod) == 0) { | 306 | if (((xfs_req_num++) % xfs_error_mod) == 0) { |
307 | xfs_buf_relse(bp); | 307 | xfs_buf_relse(bp); |
308 | cmn_err(CE_DEBUG, "Returning error!\n"); | 308 | xfs_debug(mp, "Returning error!"); |
309 | return XFS_ERROR(EIO); | 309 | return XFS_ERROR(EIO); |
310 | } | 310 | } |
311 | } | 311 | } |
@@ -383,7 +383,8 @@ xfs_trans_read_buf( | |||
383 | bp = xfs_buf_read(target, blkno, len, flags | XBF_DONT_BLOCK); | 383 | bp = xfs_buf_read(target, blkno, len, flags | XBF_DONT_BLOCK); |
384 | if (bp == NULL) { | 384 | if (bp == NULL) { |
385 | *bpp = NULL; | 385 | *bpp = NULL; |
386 | return 0; | 386 | return (flags & XBF_TRYLOCK) ? |
387 | 0 : XFS_ERROR(ENOMEM); | ||
387 | } | 388 | } |
388 | if (XFS_BUF_GETERROR(bp) != 0) { | 389 | if (XFS_BUF_GETERROR(bp) != 0) { |
389 | XFS_BUF_SUPER_STALE(bp); | 390 | XFS_BUF_SUPER_STALE(bp); |
@@ -403,7 +404,7 @@ xfs_trans_read_buf( | |||
403 | xfs_force_shutdown(tp->t_mountp, | 404 | xfs_force_shutdown(tp->t_mountp, |
404 | SHUTDOWN_META_IO_ERROR); | 405 | SHUTDOWN_META_IO_ERROR); |
405 | xfs_buf_relse(bp); | 406 | xfs_buf_relse(bp); |
406 | cmn_err(CE_DEBUG, "Returning trans error!\n"); | 407 | xfs_debug(mp, "Returning trans error!"); |
407 | return XFS_ERROR(EIO); | 408 | return XFS_ERROR(EIO); |
408 | } | 409 | } |
409 | } | 410 | } |
@@ -427,7 +428,7 @@ shutdown_abort: | |||
427 | */ | 428 | */ |
428 | #if defined(DEBUG) | 429 | #if defined(DEBUG) |
429 | if (XFS_BUF_ISSTALE(bp) && XFS_BUF_ISDELAYWRITE(bp)) | 430 | if (XFS_BUF_ISSTALE(bp) && XFS_BUF_ISDELAYWRITE(bp)) |
430 | cmn_err(CE_NOTE, "about to pop assert, bp == 0x%p", bp); | 431 | xfs_notice(mp, "about to pop assert, bp == 0x%p", bp); |
431 | #endif | 432 | #endif |
432 | ASSERT((XFS_BUF_BFLAGS(bp) & (XBF_STALE|XBF_DELWRI)) != | 433 | ASSERT((XFS_BUF_BFLAGS(bp) & (XBF_STALE|XBF_DELWRI)) != |
433 | (XBF_STALE|XBF_DELWRI)); | 434 | (XBF_STALE|XBF_DELWRI)); |
diff --git a/fs/xfs/xfs_trans_extfree.c b/fs/xfs/xfs_trans_extfree.c index f783d5e9fa70..f7590f5badea 100644 --- a/fs/xfs/xfs_trans_extfree.c +++ b/fs/xfs/xfs_trans_extfree.c | |||
@@ -69,12 +69,16 @@ xfs_trans_log_efi_extent(xfs_trans_t *tp, | |||
69 | tp->t_flags |= XFS_TRANS_DIRTY; | 69 | tp->t_flags |= XFS_TRANS_DIRTY; |
70 | efip->efi_item.li_desc->lid_flags |= XFS_LID_DIRTY; | 70 | efip->efi_item.li_desc->lid_flags |= XFS_LID_DIRTY; |
71 | 71 | ||
72 | next_extent = efip->efi_next_extent; | 72 | /* |
73 | * atomic_inc_return gives us the value after the increment; | ||
74 | * we want to use it as an array index so we need to subtract 1 from | ||
75 | * it. | ||
76 | */ | ||
77 | next_extent = atomic_inc_return(&efip->efi_next_extent) - 1; | ||
73 | ASSERT(next_extent < efip->efi_format.efi_nextents); | 78 | ASSERT(next_extent < efip->efi_format.efi_nextents); |
74 | extp = &(efip->efi_format.efi_extents[next_extent]); | 79 | extp = &(efip->efi_format.efi_extents[next_extent]); |
75 | extp->ext_start = start_block; | 80 | extp->ext_start = start_block; |
76 | extp->ext_len = ext_len; | 81 | extp->ext_len = ext_len; |
77 | efip->efi_next_extent++; | ||
78 | } | 82 | } |
79 | 83 | ||
80 | 84 | ||
diff --git a/fs/xfs/xfs_trans_inode.c b/fs/xfs/xfs_trans_inode.c index ccb34532768b..048b0c689d3e 100644 --- a/fs/xfs/xfs_trans_inode.c +++ b/fs/xfs/xfs_trans_inode.c | |||
@@ -44,28 +44,6 @@ xfs_trans_inode_broot_debug( | |||
44 | #endif | 44 | #endif |
45 | 45 | ||
46 | /* | 46 | /* |
47 | * Get an inode and join it to the transaction. | ||
48 | */ | ||
49 | int | ||
50 | xfs_trans_iget( | ||
51 | xfs_mount_t *mp, | ||
52 | xfs_trans_t *tp, | ||
53 | xfs_ino_t ino, | ||
54 | uint flags, | ||
55 | uint lock_flags, | ||
56 | xfs_inode_t **ipp) | ||
57 | { | ||
58 | int error; | ||
59 | |||
60 | error = xfs_iget(mp, tp, ino, flags, lock_flags, ipp); | ||
61 | if (!error && tp) { | ||
62 | xfs_trans_ijoin(tp, *ipp); | ||
63 | (*ipp)->i_itemp->ili_lock_flags = lock_flags; | ||
64 | } | ||
65 | return error; | ||
66 | } | ||
67 | |||
68 | /* | ||
69 | * Add a locked inode to the transaction. | 47 | * Add a locked inode to the transaction. |
70 | * | 48 | * |
71 | * The inode must be locked, and it cannot be associated with any transaction. | 49 | * The inode must be locked, and it cannot be associated with any transaction. |
@@ -103,7 +81,7 @@ xfs_trans_ijoin( | |||
103 | * | 81 | * |
104 | * | 82 | * |
105 | * Grabs a reference to the inode which will be dropped when the transaction | 83 | * Grabs a reference to the inode which will be dropped when the transaction |
106 | * is commited. The inode will also be unlocked at that point. The inode | 84 | * is committed. The inode will also be unlocked at that point. The inode |
107 | * must be locked, and it cannot be associated with any transaction. | 85 | * must be locked, and it cannot be associated with any transaction. |
108 | */ | 86 | */ |
109 | void | 87 | void |
diff --git a/fs/xfs/xfs_trans_priv.h b/fs/xfs/xfs_trans_priv.h index 62da86c90de5..6b164e9e9a1f 100644 --- a/fs/xfs/xfs_trans_priv.h +++ b/fs/xfs/xfs_trans_priv.h | |||
@@ -22,15 +22,17 @@ struct xfs_log_item; | |||
22 | struct xfs_log_item_desc; | 22 | struct xfs_log_item_desc; |
23 | struct xfs_mount; | 23 | struct xfs_mount; |
24 | struct xfs_trans; | 24 | struct xfs_trans; |
25 | struct xfs_ail; | ||
26 | struct xfs_log_vec; | ||
25 | 27 | ||
26 | void xfs_trans_add_item(struct xfs_trans *, struct xfs_log_item *); | 28 | void xfs_trans_add_item(struct xfs_trans *, struct xfs_log_item *); |
27 | void xfs_trans_del_item(struct xfs_log_item *); | 29 | void xfs_trans_del_item(struct xfs_log_item *); |
28 | void xfs_trans_free_items(struct xfs_trans *tp, xfs_lsn_t commit_lsn, | 30 | void xfs_trans_free_items(struct xfs_trans *tp, xfs_lsn_t commit_lsn, |
29 | int flags); | 31 | int flags); |
30 | void xfs_trans_item_committed(struct xfs_log_item *lip, | ||
31 | xfs_lsn_t commit_lsn, int aborted); | ||
32 | void xfs_trans_unreserve_and_mod_sb(struct xfs_trans *tp); | 32 | void xfs_trans_unreserve_and_mod_sb(struct xfs_trans *tp); |
33 | 33 | ||
34 | void xfs_trans_committed_bulk(struct xfs_ail *ailp, struct xfs_log_vec *lv, | ||
35 | xfs_lsn_t commit_lsn, int aborted); | ||
34 | /* | 36 | /* |
35 | * AIL traversal cursor. | 37 | * AIL traversal cursor. |
36 | * | 38 | * |
@@ -63,28 +65,52 @@ struct xfs_ail_cursor { | |||
63 | struct xfs_ail { | 65 | struct xfs_ail { |
64 | struct xfs_mount *xa_mount; | 66 | struct xfs_mount *xa_mount; |
65 | struct list_head xa_ail; | 67 | struct list_head xa_ail; |
66 | uint xa_gen; | ||
67 | struct task_struct *xa_task; | ||
68 | xfs_lsn_t xa_target; | 68 | xfs_lsn_t xa_target; |
69 | struct xfs_ail_cursor xa_cursors; | 69 | struct xfs_ail_cursor xa_cursors; |
70 | spinlock_t xa_lock; | 70 | spinlock_t xa_lock; |
71 | struct delayed_work xa_work; | ||
72 | xfs_lsn_t xa_last_pushed_lsn; | ||
73 | unsigned long xa_flags; | ||
71 | }; | 74 | }; |
72 | 75 | ||
76 | #define XFS_AIL_PUSHING_BIT 0 | ||
77 | |||
73 | /* | 78 | /* |
74 | * From xfs_trans_ail.c | 79 | * From xfs_trans_ail.c |
75 | */ | 80 | */ |
76 | void xfs_trans_ail_update(struct xfs_ail *ailp, | 81 | |
77 | struct xfs_log_item *lip, xfs_lsn_t lsn) | 82 | extern struct workqueue_struct *xfs_ail_wq; /* AIL workqueue */ |
78 | __releases(ailp->xa_lock); | 83 | |
79 | void xfs_trans_ail_delete(struct xfs_ail *ailp, | 84 | void xfs_trans_ail_update_bulk(struct xfs_ail *ailp, |
80 | struct xfs_log_item *lip) | 85 | struct xfs_log_item **log_items, int nr_items, |
81 | __releases(ailp->xa_lock); | 86 | xfs_lsn_t lsn) __releases(ailp->xa_lock); |
82 | void xfs_trans_ail_push(struct xfs_ail *, xfs_lsn_t); | 87 | static inline void |
88 | xfs_trans_ail_update( | ||
89 | struct xfs_ail *ailp, | ||
90 | struct xfs_log_item *lip, | ||
91 | xfs_lsn_t lsn) __releases(ailp->xa_lock) | ||
92 | { | ||
93 | xfs_trans_ail_update_bulk(ailp, &lip, 1, lsn); | ||
94 | } | ||
95 | |||
96 | void xfs_trans_ail_delete_bulk(struct xfs_ail *ailp, | ||
97 | struct xfs_log_item **log_items, int nr_items) | ||
98 | __releases(ailp->xa_lock); | ||
99 | static inline void | ||
100 | xfs_trans_ail_delete( | ||
101 | struct xfs_ail *ailp, | ||
102 | xfs_log_item_t *lip) __releases(ailp->xa_lock) | ||
103 | { | ||
104 | xfs_trans_ail_delete_bulk(ailp, &lip, 1); | ||
105 | } | ||
106 | |||
107 | void xfs_ail_push(struct xfs_ail *, xfs_lsn_t); | ||
108 | void xfs_ail_push_all(struct xfs_ail *); | ||
109 | xfs_lsn_t xfs_ail_min_lsn(struct xfs_ail *ailp); | ||
110 | |||
83 | void xfs_trans_unlocked_item(struct xfs_ail *, | 111 | void xfs_trans_unlocked_item(struct xfs_ail *, |
84 | xfs_log_item_t *); | 112 | xfs_log_item_t *); |
85 | 113 | ||
86 | xfs_lsn_t xfs_trans_ail_tail(struct xfs_ail *ailp); | ||
87 | |||
88 | struct xfs_log_item *xfs_trans_ail_cursor_first(struct xfs_ail *ailp, | 114 | struct xfs_log_item *xfs_trans_ail_cursor_first(struct xfs_ail *ailp, |
89 | struct xfs_ail_cursor *cur, | 115 | struct xfs_ail_cursor *cur, |
90 | xfs_lsn_t lsn); | 116 | xfs_lsn_t lsn); |
@@ -93,11 +119,6 @@ struct xfs_log_item *xfs_trans_ail_cursor_next(struct xfs_ail *ailp, | |||
93 | void xfs_trans_ail_cursor_done(struct xfs_ail *ailp, | 119 | void xfs_trans_ail_cursor_done(struct xfs_ail *ailp, |
94 | struct xfs_ail_cursor *cur); | 120 | struct xfs_ail_cursor *cur); |
95 | 121 | ||
96 | long xfsaild_push(struct xfs_ail *, xfs_lsn_t *); | ||
97 | void xfsaild_wakeup(struct xfs_ail *, xfs_lsn_t); | ||
98 | int xfsaild_start(struct xfs_ail *); | ||
99 | void xfsaild_stop(struct xfs_ail *); | ||
100 | |||
101 | #if BITS_PER_LONG != 64 | 122 | #if BITS_PER_LONG != 64 |
102 | static inline void | 123 | static inline void |
103 | xfs_trans_ail_copy_lsn( | 124 | xfs_trans_ail_copy_lsn( |
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index 8e4a63c4151a..b7a5fe7c52c8 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c | |||
@@ -953,7 +953,7 @@ xfs_release( | |||
953 | * If we previously truncated this file and removed old data | 953 | * If we previously truncated this file and removed old data |
954 | * in the process, we want to initiate "early" writeout on | 954 | * in the process, we want to initiate "early" writeout on |
955 | * the last close. This is an attempt to combat the notorious | 955 | * the last close. This is an attempt to combat the notorious |
956 | * NULL files problem which is particularly noticable from a | 956 | * NULL files problem which is particularly noticeable from a |
957 | * truncate down, buffered (re-)write (delalloc), followed by | 957 | * truncate down, buffered (re-)write (delalloc), followed by |
958 | * a crash. What we are effectively doing here is | 958 | * a crash. What we are effectively doing here is |
959 | * significantly reducing the time window where we'd otherwise | 959 | * significantly reducing the time window where we'd otherwise |
@@ -964,29 +964,48 @@ xfs_release( | |||
964 | xfs_flush_pages(ip, 0, -1, XBF_ASYNC, FI_NONE); | 964 | xfs_flush_pages(ip, 0, -1, XBF_ASYNC, FI_NONE); |
965 | } | 965 | } |
966 | 966 | ||
967 | if (ip->i_d.di_nlink != 0) { | 967 | if (ip->i_d.di_nlink == 0) |
968 | if ((((ip->i_d.di_mode & S_IFMT) == S_IFREG) && | 968 | return 0; |
969 | ((ip->i_size > 0) || (VN_CACHED(VFS_I(ip)) > 0 || | ||
970 | ip->i_delayed_blks > 0)) && | ||
971 | (ip->i_df.if_flags & XFS_IFEXTENTS)) && | ||
972 | (!(ip->i_d.di_flags & | ||
973 | (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)))) { | ||
974 | 969 | ||
975 | /* | 970 | if ((((ip->i_d.di_mode & S_IFMT) == S_IFREG) && |
976 | * If we can't get the iolock just skip truncating | 971 | ((ip->i_size > 0) || (VN_CACHED(VFS_I(ip)) > 0 || |
977 | * the blocks past EOF because we could deadlock | 972 | ip->i_delayed_blks > 0)) && |
978 | * with the mmap_sem otherwise. We'll get another | 973 | (ip->i_df.if_flags & XFS_IFEXTENTS)) && |
979 | * chance to drop them once the last reference to | 974 | (!(ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)))) { |
980 | * the inode is dropped, so we'll never leak blocks | 975 | |
981 | * permanently. | 976 | /* |
982 | */ | 977 | * If we can't get the iolock just skip truncating the blocks |
983 | error = xfs_free_eofblocks(mp, ip, | 978 | * past EOF because we could deadlock with the mmap_sem |
984 | XFS_FREE_EOF_TRYLOCK); | 979 | * otherwise. We'll get another chance to drop them once the |
985 | if (error) | 980 | * last reference to the inode is dropped, so we'll never leak |
986 | return error; | 981 | * blocks permanently. |
987 | } | 982 | * |
988 | } | 983 | * Further, check if the inode is being opened, written and |
984 | * closed frequently and we have delayed allocation blocks | ||
985 | * outstanding (e.g. streaming writes from the NFS server), | ||
986 | * truncating the blocks past EOF will cause fragmentation to | ||
987 | * occur. | ||
988 | * | ||
989 | * In this case don't do the truncation, either, but we have to | ||
990 | * be careful how we detect this case. Blocks beyond EOF show | ||
991 | * up as i_delayed_blks even when the inode is clean, so we | ||
992 | * need to truncate them away first before checking for a dirty | ||
993 | * release. Hence on the first dirty close we will still remove | ||
994 | * the speculative allocation, but after that we will leave it | ||
995 | * in place. | ||
996 | */ | ||
997 | if (xfs_iflags_test(ip, XFS_IDIRTY_RELEASE)) | ||
998 | return 0; | ||
999 | |||
1000 | error = xfs_free_eofblocks(mp, ip, | ||
1001 | XFS_FREE_EOF_TRYLOCK); | ||
1002 | if (error) | ||
1003 | return error; | ||
989 | 1004 | ||
1005 | /* delalloc blocks after truncation means it really is dirty */ | ||
1006 | if (ip->i_delayed_blks) | ||
1007 | xfs_iflags_set(ip, XFS_IDIRTY_RELEASE); | ||
1008 | } | ||
990 | return 0; | 1009 | return 0; |
991 | } | 1010 | } |
992 | 1011 | ||
@@ -1170,9 +1189,8 @@ xfs_inactive( | |||
1170 | * inode might be lost for a long time or forever. | 1189 | * inode might be lost for a long time or forever. |
1171 | */ | 1190 | */ |
1172 | if (!XFS_FORCED_SHUTDOWN(mp)) { | 1191 | if (!XFS_FORCED_SHUTDOWN(mp)) { |
1173 | cmn_err(CE_NOTE, | 1192 | xfs_notice(mp, "%s: xfs_ifree returned error %d", |
1174 | "xfs_inactive: xfs_ifree() returned an error = %d on %s", | 1193 | __func__, error); |
1175 | error, mp->m_fsname); | ||
1176 | xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR); | 1194 | xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR); |
1177 | } | 1195 | } |
1178 | xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT); | 1196 | xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT); |
@@ -1189,12 +1207,12 @@ xfs_inactive( | |||
1189 | */ | 1207 | */ |
1190 | error = xfs_bmap_finish(&tp, &free_list, &committed); | 1208 | error = xfs_bmap_finish(&tp, &free_list, &committed); |
1191 | if (error) | 1209 | if (error) |
1192 | xfs_fs_cmn_err(CE_NOTE, mp, "xfs_inactive: " | 1210 | xfs_notice(mp, "%s: xfs_bmap_finish returned error %d", |
1193 | "xfs_bmap_finish() returned error %d", error); | 1211 | __func__, error); |
1194 | error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); | 1212 | error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); |
1195 | if (error) | 1213 | if (error) |
1196 | xfs_fs_cmn_err(CE_NOTE, mp, "xfs_inactive: " | 1214 | xfs_notice(mp, "%s: xfs_trans_commit returned error %d", |
1197 | "xfs_trans_commit() returned error %d", error); | 1215 | __func__, error); |
1198 | } | 1216 | } |
1199 | 1217 | ||
1200 | /* | 1218 | /* |
@@ -1291,7 +1309,7 @@ xfs_create( | |||
1291 | error = xfs_qm_vop_dqalloc(dp, current_fsuid(), current_fsgid(), prid, | 1309 | error = xfs_qm_vop_dqalloc(dp, current_fsuid(), current_fsgid(), prid, |
1292 | XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, &udqp, &gdqp); | 1310 | XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, &udqp, &gdqp); |
1293 | if (error) | 1311 | if (error) |
1294 | goto std_return; | 1312 | return error; |
1295 | 1313 | ||
1296 | if (is_dir) { | 1314 | if (is_dir) { |
1297 | rdev = 0; | 1315 | rdev = 0; |
@@ -1371,12 +1389,6 @@ xfs_create( | |||
1371 | } | 1389 | } |
1372 | 1390 | ||
1373 | /* | 1391 | /* |
1374 | * At this point, we've gotten a newly allocated inode. | ||
1375 | * It is locked (and joined to the transaction). | ||
1376 | */ | ||
1377 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); | ||
1378 | |||
1379 | /* | ||
1380 | * Now we join the directory inode to the transaction. We do not do it | 1392 | * Now we join the directory inode to the transaction. We do not do it |
1381 | * earlier because xfs_dir_ialloc might commit the previous transaction | 1393 | * earlier because xfs_dir_ialloc might commit the previous transaction |
1382 | * (and release all the locks). An error from here on will result in | 1394 | * (and release all the locks). An error from here on will result in |
@@ -1421,22 +1433,13 @@ xfs_create( | |||
1421 | */ | 1433 | */ |
1422 | xfs_qm_vop_create_dqattach(tp, ip, udqp, gdqp); | 1434 | xfs_qm_vop_create_dqattach(tp, ip, udqp, gdqp); |
1423 | 1435 | ||
1424 | /* | ||
1425 | * xfs_trans_commit normally decrements the vnode ref count | ||
1426 | * when it unlocks the inode. Since we want to return the | ||
1427 | * vnode to the caller, we bump the vnode ref count now. | ||
1428 | */ | ||
1429 | IHOLD(ip); | ||
1430 | |||
1431 | error = xfs_bmap_finish(&tp, &free_list, &committed); | 1436 | error = xfs_bmap_finish(&tp, &free_list, &committed); |
1432 | if (error) | 1437 | if (error) |
1433 | goto out_abort_rele; | 1438 | goto out_bmap_cancel; |
1434 | 1439 | ||
1435 | error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); | 1440 | error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); |
1436 | if (error) { | 1441 | if (error) |
1437 | IRELE(ip); | 1442 | goto out_release_inode; |
1438 | goto out_dqrele; | ||
1439 | } | ||
1440 | 1443 | ||
1441 | xfs_qm_dqrele(udqp); | 1444 | xfs_qm_dqrele(udqp); |
1442 | xfs_qm_dqrele(gdqp); | 1445 | xfs_qm_dqrele(gdqp); |
@@ -1450,27 +1453,21 @@ xfs_create( | |||
1450 | cancel_flags |= XFS_TRANS_ABORT; | 1453 | cancel_flags |= XFS_TRANS_ABORT; |
1451 | out_trans_cancel: | 1454 | out_trans_cancel: |
1452 | xfs_trans_cancel(tp, cancel_flags); | 1455 | xfs_trans_cancel(tp, cancel_flags); |
1453 | out_dqrele: | 1456 | out_release_inode: |
1457 | /* | ||
1458 | * Wait until after the current transaction is aborted to | ||
1459 | * release the inode. This prevents recursive transactions | ||
1460 | * and deadlocks from xfs_inactive. | ||
1461 | */ | ||
1462 | if (ip) | ||
1463 | IRELE(ip); | ||
1464 | |||
1454 | xfs_qm_dqrele(udqp); | 1465 | xfs_qm_dqrele(udqp); |
1455 | xfs_qm_dqrele(gdqp); | 1466 | xfs_qm_dqrele(gdqp); |
1456 | 1467 | ||
1457 | if (unlock_dp_on_error) | 1468 | if (unlock_dp_on_error) |
1458 | xfs_iunlock(dp, XFS_ILOCK_EXCL); | 1469 | xfs_iunlock(dp, XFS_ILOCK_EXCL); |
1459 | std_return: | ||
1460 | return error; | 1470 | return error; |
1461 | |||
1462 | out_abort_rele: | ||
1463 | /* | ||
1464 | * Wait until after the current transaction is aborted to | ||
1465 | * release the inode. This prevents recursive transactions | ||
1466 | * and deadlocks from xfs_inactive. | ||
1467 | */ | ||
1468 | xfs_bmap_cancel(&free_list); | ||
1469 | cancel_flags |= XFS_TRANS_ABORT; | ||
1470 | xfs_trans_cancel(tp, cancel_flags); | ||
1471 | IRELE(ip); | ||
1472 | unlock_dp_on_error = B_FALSE; | ||
1473 | goto out_dqrele; | ||
1474 | } | 1471 | } |
1475 | 1472 | ||
1476 | #ifdef DEBUG | 1473 | #ifdef DEBUG |
@@ -2095,9 +2092,8 @@ xfs_symlink( | |||
2095 | XFS_BMAPI_WRITE | XFS_BMAPI_METADATA, | 2092 | XFS_BMAPI_WRITE | XFS_BMAPI_METADATA, |
2096 | &first_block, resblks, mval, &nmaps, | 2093 | &first_block, resblks, mval, &nmaps, |
2097 | &free_list); | 2094 | &free_list); |
2098 | if (error) { | 2095 | if (error) |
2099 | goto error1; | 2096 | goto error2; |
2100 | } | ||
2101 | 2097 | ||
2102 | if (resblks) | 2098 | if (resblks) |
2103 | resblks -= fs_blocks; | 2099 | resblks -= fs_blocks; |
@@ -2129,7 +2125,7 @@ xfs_symlink( | |||
2129 | error = xfs_dir_createname(tp, dp, link_name, ip->i_ino, | 2125 | error = xfs_dir_createname(tp, dp, link_name, ip->i_ino, |
2130 | &first_block, &free_list, resblks); | 2126 | &first_block, &free_list, resblks); |
2131 | if (error) | 2127 | if (error) |
2132 | goto error1; | 2128 | goto error2; |
2133 | xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); | 2129 | xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); |
2134 | xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE); | 2130 | xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE); |
2135 | 2131 | ||
@@ -2142,13 +2138,6 @@ xfs_symlink( | |||
2142 | xfs_trans_set_sync(tp); | 2138 | xfs_trans_set_sync(tp); |
2143 | } | 2139 | } |
2144 | 2140 | ||
2145 | /* | ||
2146 | * xfs_trans_commit normally decrements the vnode ref count | ||
2147 | * when it unlocks the inode. Since we want to return the | ||
2148 | * vnode to the caller, we bump the vnode ref count now. | ||
2149 | */ | ||
2150 | IHOLD(ip); | ||
2151 | |||
2152 | error = xfs_bmap_finish(&tp, &free_list, &committed); | 2141 | error = xfs_bmap_finish(&tp, &free_list, &committed); |
2153 | if (error) { | 2142 | if (error) { |
2154 | goto error2; | 2143 | goto error2; |
@@ -2842,7 +2831,8 @@ xfs_change_file_space( | |||
2842 | ip->i_d.di_flags &= ~XFS_DIFLAG_PREALLOC; | 2831 | ip->i_d.di_flags &= ~XFS_DIFLAG_PREALLOC; |
2843 | 2832 | ||
2844 | xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); | 2833 | xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); |
2845 | xfs_trans_set_sync(tp); | 2834 | if (attr_flags & XFS_ATTR_SYNC) |
2835 | xfs_trans_set_sync(tp); | ||
2846 | 2836 | ||
2847 | error = xfs_trans_commit(tp, 0); | 2837 | error = xfs_trans_commit(tp, 0); |
2848 | 2838 | ||
diff --git a/fs/xfs/xfs_vnodeops.h b/fs/xfs/xfs_vnodeops.h index f6702927eee4..3bcd23353d6c 100644 --- a/fs/xfs/xfs_vnodeops.h +++ b/fs/xfs/xfs_vnodeops.h | |||
@@ -18,6 +18,7 @@ int xfs_setattr(struct xfs_inode *ip, struct iattr *vap, int flags); | |||
18 | #define XFS_ATTR_NONBLOCK 0x02 /* return EAGAIN if operation would block */ | 18 | #define XFS_ATTR_NONBLOCK 0x02 /* return EAGAIN if operation would block */ |
19 | #define XFS_ATTR_NOLOCK 0x04 /* Don't grab any conflicting locks */ | 19 | #define XFS_ATTR_NOLOCK 0x04 /* Don't grab any conflicting locks */ |
20 | #define XFS_ATTR_NOACL 0x08 /* Don't call xfs_acl_chmod */ | 20 | #define XFS_ATTR_NOACL 0x08 /* Don't call xfs_acl_chmod */ |
21 | #define XFS_ATTR_SYNC 0x10 /* synchronous operation required */ | ||
21 | 22 | ||
22 | int xfs_readlink(struct xfs_inode *ip, char *link); | 23 | int xfs_readlink(struct xfs_inode *ip, char *link); |
23 | int xfs_release(struct xfs_inode *ip); | 24 | int xfs_release(struct xfs_inode *ip); |