diff options
author | Glenn Elliott <gelliott@cs.unc.edu> | 2012-03-04 19:47:13 -0500 |
---|---|---|
committer | Glenn Elliott <gelliott@cs.unc.edu> | 2012-03-04 19:47:13 -0500 |
commit | c71c03bda1e86c9d5198c5d83f712e695c4f2a1e (patch) | |
tree | ecb166cb3e2b7e2adb3b5e292245fefd23381ac8 /fs/xfs | |
parent | ea53c912f8a86a8567697115b6a0d8152beee5c8 (diff) | |
parent | 6a00f206debf8a5c8899055726ad127dbeeed098 (diff) |
Merge branch 'mpi-master' into wip-k-fmlpwip-k-fmlp
Conflicts:
litmus/sched_cedf.c
Diffstat (limited to 'fs/xfs')
103 files changed, 6926 insertions, 6521 deletions
diff --git a/fs/xfs/Kconfig b/fs/xfs/Kconfig index 480f28127f09..6100ec0fa1d4 100644 --- a/fs/xfs/Kconfig +++ b/fs/xfs/Kconfig | |||
@@ -22,6 +22,7 @@ config XFS_FS | |||
22 | config XFS_QUOTA | 22 | config XFS_QUOTA |
23 | bool "XFS Quota support" | 23 | bool "XFS Quota support" |
24 | depends on XFS_FS | 24 | depends on XFS_FS |
25 | select QUOTACTL | ||
25 | help | 26 | help |
26 | If you say Y here, you will be able to set limits for disk usage on | 27 | If you say Y here, you will be able to set limits for disk usage on |
27 | a per user and/or a per group basis under XFS. XFS considers quota | 28 | a per user and/or a per group basis under XFS. XFS considers quota |
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile index 0dce969d6cad..284a7c89697e 100644 --- a/fs/xfs/Makefile +++ b/fs/xfs/Makefile | |||
@@ -16,14 +16,11 @@ | |||
16 | # Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | 16 | # Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA |
17 | # | 17 | # |
18 | 18 | ||
19 | EXTRA_CFLAGS += -I$(src) -I$(src)/linux-2.6 | 19 | ccflags-y := -I$(src) -I$(src)/linux-2.6 |
20 | ccflags-$(CONFIG_XFS_DEBUG) += -g | ||
20 | 21 | ||
21 | XFS_LINUX := linux-2.6 | 22 | XFS_LINUX := linux-2.6 |
22 | 23 | ||
23 | ifeq ($(CONFIG_XFS_DEBUG),y) | ||
24 | EXTRA_CFLAGS += -g | ||
25 | endif | ||
26 | |||
27 | obj-$(CONFIG_XFS_FS) += xfs.o | 24 | obj-$(CONFIG_XFS_FS) += xfs.o |
28 | 25 | ||
29 | xfs-y += linux-2.6/xfs_trace.o | 26 | xfs-y += linux-2.6/xfs_trace.o |
@@ -98,17 +95,17 @@ xfs-y += $(addprefix $(XFS_LINUX)/, \ | |||
98 | kmem.o \ | 95 | kmem.o \ |
99 | xfs_aops.o \ | 96 | xfs_aops.o \ |
100 | xfs_buf.o \ | 97 | xfs_buf.o \ |
98 | xfs_discard.o \ | ||
101 | xfs_export.o \ | 99 | xfs_export.o \ |
102 | xfs_file.o \ | 100 | xfs_file.o \ |
103 | xfs_fs_subr.o \ | 101 | xfs_fs_subr.o \ |
104 | xfs_globals.o \ | 102 | xfs_globals.o \ |
105 | xfs_ioctl.o \ | 103 | xfs_ioctl.o \ |
106 | xfs_iops.o \ | 104 | xfs_iops.o \ |
105 | xfs_message.o \ | ||
107 | xfs_super.o \ | 106 | xfs_super.o \ |
108 | xfs_sync.o \ | 107 | xfs_sync.o \ |
109 | xfs_xattr.o) | 108 | xfs_xattr.o) |
110 | 109 | ||
111 | # Objects in support/ | 110 | # Objects in support/ |
112 | xfs-y += $(addprefix support/, \ | 111 | xfs-y += support/uuid.o |
113 | debug.o \ | ||
114 | uuid.o) | ||
diff --git a/fs/xfs/linux-2.6/kmem.c b/fs/xfs/linux-2.6/kmem.c index 666c9db48eb6..a907de565db3 100644 --- a/fs/xfs/linux-2.6/kmem.c +++ b/fs/xfs/linux-2.6/kmem.c | |||
@@ -23,6 +23,7 @@ | |||
23 | #include <linux/backing-dev.h> | 23 | #include <linux/backing-dev.h> |
24 | #include "time.h" | 24 | #include "time.h" |
25 | #include "kmem.h" | 25 | #include "kmem.h" |
26 | #include "xfs_message.h" | ||
26 | 27 | ||
27 | /* | 28 | /* |
28 | * Greedy allocation. May fail and may return vmalloced memory. | 29 | * Greedy allocation. May fail and may return vmalloced memory. |
@@ -56,8 +57,8 @@ kmem_alloc(size_t size, unsigned int __nocast flags) | |||
56 | if (ptr || (flags & (KM_MAYFAIL|KM_NOSLEEP))) | 57 | if (ptr || (flags & (KM_MAYFAIL|KM_NOSLEEP))) |
57 | return ptr; | 58 | return ptr; |
58 | if (!(++retries % 100)) | 59 | if (!(++retries % 100)) |
59 | printk(KERN_ERR "XFS: possible memory allocation " | 60 | xfs_err(NULL, |
60 | "deadlock in %s (mode:0x%x)\n", | 61 | "possible memory allocation deadlock in %s (mode:0x%x)", |
61 | __func__, lflags); | 62 | __func__, lflags); |
62 | congestion_wait(BLK_RW_ASYNC, HZ/50); | 63 | congestion_wait(BLK_RW_ASYNC, HZ/50); |
63 | } while (1); | 64 | } while (1); |
@@ -112,8 +113,8 @@ kmem_zone_alloc(kmem_zone_t *zone, unsigned int __nocast flags) | |||
112 | if (ptr || (flags & (KM_MAYFAIL|KM_NOSLEEP))) | 113 | if (ptr || (flags & (KM_MAYFAIL|KM_NOSLEEP))) |
113 | return ptr; | 114 | return ptr; |
114 | if (!(++retries % 100)) | 115 | if (!(++retries % 100)) |
115 | printk(KERN_ERR "XFS: possible memory allocation " | 116 | xfs_err(NULL, |
116 | "deadlock in %s (mode:0x%x)\n", | 117 | "possible memory allocation deadlock in %s (mode:0x%x)", |
117 | __func__, lflags); | 118 | __func__, lflags); |
118 | congestion_wait(BLK_RW_ASYNC, HZ/50); | 119 | congestion_wait(BLK_RW_ASYNC, HZ/50); |
119 | } while (1); | 120 | } while (1); |
diff --git a/fs/xfs/linux-2.6/sv.h b/fs/xfs/linux-2.6/sv.h deleted file mode 100644 index 4dfc7c370819..000000000000 --- a/fs/xfs/linux-2.6/sv.h +++ /dev/null | |||
@@ -1,59 +0,0 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc. | ||
3 | * All Rights Reserved. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or | ||
6 | * modify it under the terms of the GNU General Public License as | ||
7 | * published by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope that it would be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, write the Free Software Foundation, | ||
16 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
17 | */ | ||
18 | #ifndef __XFS_SUPPORT_SV_H__ | ||
19 | #define __XFS_SUPPORT_SV_H__ | ||
20 | |||
21 | #include <linux/wait.h> | ||
22 | #include <linux/sched.h> | ||
23 | #include <linux/spinlock.h> | ||
24 | |||
25 | /* | ||
26 | * Synchronisation variables. | ||
27 | * | ||
28 | * (Parameters "pri", "svf" and "rts" are not implemented) | ||
29 | */ | ||
30 | |||
31 | typedef struct sv_s { | ||
32 | wait_queue_head_t waiters; | ||
33 | } sv_t; | ||
34 | |||
35 | static inline void _sv_wait(sv_t *sv, spinlock_t *lock) | ||
36 | { | ||
37 | DECLARE_WAITQUEUE(wait, current); | ||
38 | |||
39 | add_wait_queue_exclusive(&sv->waiters, &wait); | ||
40 | __set_current_state(TASK_UNINTERRUPTIBLE); | ||
41 | spin_unlock(lock); | ||
42 | |||
43 | schedule(); | ||
44 | |||
45 | remove_wait_queue(&sv->waiters, &wait); | ||
46 | } | ||
47 | |||
48 | #define sv_init(sv,flag,name) \ | ||
49 | init_waitqueue_head(&(sv)->waiters) | ||
50 | #define sv_destroy(sv) \ | ||
51 | /*NOTHING*/ | ||
52 | #define sv_wait(sv, pri, lock, s) \ | ||
53 | _sv_wait(sv, lock) | ||
54 | #define sv_signal(sv) \ | ||
55 | wake_up(&(sv)->waiters) | ||
56 | #define sv_broadcast(sv) \ | ||
57 | wake_up_all(&(sv)->waiters) | ||
58 | |||
59 | #endif /* __XFS_SUPPORT_SV_H__ */ | ||
diff --git a/fs/xfs/linux-2.6/xfs_acl.c b/fs/xfs/linux-2.6/xfs_acl.c index b2771862fd3d..39f4f809bb68 100644 --- a/fs/xfs/linux-2.6/xfs_acl.c +++ b/fs/xfs/linux-2.6/xfs_acl.c | |||
@@ -219,12 +219,13 @@ xfs_set_acl(struct inode *inode, int type, struct posix_acl *acl) | |||
219 | } | 219 | } |
220 | 220 | ||
221 | int | 221 | int |
222 | xfs_check_acl(struct inode *inode, int mask) | 222 | xfs_check_acl(struct inode *inode, int mask, unsigned int flags) |
223 | { | 223 | { |
224 | struct xfs_inode *ip = XFS_I(inode); | 224 | struct xfs_inode *ip; |
225 | struct posix_acl *acl; | 225 | struct posix_acl *acl; |
226 | int error = -EAGAIN; | 226 | int error = -EAGAIN; |
227 | 227 | ||
228 | ip = XFS_I(inode); | ||
228 | trace_xfs_check_acl(ip); | 229 | trace_xfs_check_acl(ip); |
229 | 230 | ||
230 | /* | 231 | /* |
@@ -234,6 +235,12 @@ xfs_check_acl(struct inode *inode, int mask) | |||
234 | if (!XFS_IFORK_Q(ip)) | 235 | if (!XFS_IFORK_Q(ip)) |
235 | return -EAGAIN; | 236 | return -EAGAIN; |
236 | 237 | ||
238 | if (flags & IPERM_FLAG_RCU) { | ||
239 | if (!negative_cached_acl(inode, ACL_TYPE_ACCESS)) | ||
240 | return -ECHILD; | ||
241 | return -EAGAIN; | ||
242 | } | ||
243 | |||
237 | acl = xfs_get_acl(inode, ACL_TYPE_ACCESS); | 244 | acl = xfs_get_acl(inode, ACL_TYPE_ACCESS); |
238 | if (IS_ERR(acl)) | 245 | if (IS_ERR(acl)) |
239 | return PTR_ERR(acl); | 246 | return PTR_ERR(acl); |
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c index b552f816de15..79ce38be15a1 100644 --- a/fs/xfs/linux-2.6/xfs_aops.c +++ b/fs/xfs/linux-2.6/xfs_aops.c | |||
@@ -38,15 +38,6 @@ | |||
38 | #include <linux/pagevec.h> | 38 | #include <linux/pagevec.h> |
39 | #include <linux/writeback.h> | 39 | #include <linux/writeback.h> |
40 | 40 | ||
41 | /* | ||
42 | * Types of I/O for bmap clustering and I/O completion tracking. | ||
43 | */ | ||
44 | enum { | ||
45 | IO_READ, /* mapping for a read */ | ||
46 | IO_DELAY, /* mapping covers delalloc region */ | ||
47 | IO_UNWRITTEN, /* mapping covers allocated but uninitialized data */ | ||
48 | IO_NEW /* just allocated */ | ||
49 | }; | ||
50 | 41 | ||
51 | /* | 42 | /* |
52 | * Prime number of hash buckets since address is used as the key. | 43 | * Prime number of hash buckets since address is used as the key. |
@@ -182,9 +173,6 @@ xfs_setfilesize( | |||
182 | xfs_inode_t *ip = XFS_I(ioend->io_inode); | 173 | xfs_inode_t *ip = XFS_I(ioend->io_inode); |
183 | xfs_fsize_t isize; | 174 | xfs_fsize_t isize; |
184 | 175 | ||
185 | ASSERT((ip->i_d.di_mode & S_IFMT) == S_IFREG); | ||
186 | ASSERT(ioend->io_type != IO_READ); | ||
187 | |||
188 | if (unlikely(ioend->io_error)) | 176 | if (unlikely(ioend->io_error)) |
189 | return 0; | 177 | return 0; |
190 | 178 | ||
@@ -244,10 +232,8 @@ xfs_end_io( | |||
244 | * We might have to update the on-disk file size after extending | 232 | * We might have to update the on-disk file size after extending |
245 | * writes. | 233 | * writes. |
246 | */ | 234 | */ |
247 | if (ioend->io_type != IO_READ) { | 235 | error = xfs_setfilesize(ioend); |
248 | error = xfs_setfilesize(ioend); | 236 | ASSERT(!error || error == EAGAIN); |
249 | ASSERT(!error || error == EAGAIN); | ||
250 | } | ||
251 | 237 | ||
252 | /* | 238 | /* |
253 | * If we didn't complete processing of the ioend, requeue it to the | 239 | * If we didn't complete processing of the ioend, requeue it to the |
@@ -318,14 +304,63 @@ STATIC int | |||
318 | xfs_map_blocks( | 304 | xfs_map_blocks( |
319 | struct inode *inode, | 305 | struct inode *inode, |
320 | loff_t offset, | 306 | loff_t offset, |
321 | ssize_t count, | ||
322 | struct xfs_bmbt_irec *imap, | 307 | struct xfs_bmbt_irec *imap, |
323 | int flags) | 308 | int type, |
309 | int nonblocking) | ||
324 | { | 310 | { |
325 | int nmaps = 1; | 311 | struct xfs_inode *ip = XFS_I(inode); |
326 | int new = 0; | 312 | struct xfs_mount *mp = ip->i_mount; |
313 | ssize_t count = 1 << inode->i_blkbits; | ||
314 | xfs_fileoff_t offset_fsb, end_fsb; | ||
315 | int error = 0; | ||
316 | int bmapi_flags = XFS_BMAPI_ENTIRE; | ||
317 | int nimaps = 1; | ||
318 | |||
319 | if (XFS_FORCED_SHUTDOWN(mp)) | ||
320 | return -XFS_ERROR(EIO); | ||
321 | |||
322 | if (type == IO_UNWRITTEN) | ||
323 | bmapi_flags |= XFS_BMAPI_IGSTATE; | ||
324 | |||
325 | if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) { | ||
326 | if (nonblocking) | ||
327 | return -XFS_ERROR(EAGAIN); | ||
328 | xfs_ilock(ip, XFS_ILOCK_SHARED); | ||
329 | } | ||
330 | |||
331 | ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE || | ||
332 | (ip->i_df.if_flags & XFS_IFEXTENTS)); | ||
333 | ASSERT(offset <= mp->m_maxioffset); | ||
334 | |||
335 | if (offset + count > mp->m_maxioffset) | ||
336 | count = mp->m_maxioffset - offset; | ||
337 | end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count); | ||
338 | offset_fsb = XFS_B_TO_FSBT(mp, offset); | ||
339 | error = xfs_bmapi(NULL, ip, offset_fsb, end_fsb - offset_fsb, | ||
340 | bmapi_flags, NULL, 0, imap, &nimaps, NULL); | ||
341 | xfs_iunlock(ip, XFS_ILOCK_SHARED); | ||
327 | 342 | ||
328 | return -xfs_iomap(XFS_I(inode), offset, count, flags, imap, &nmaps, &new); | 343 | if (error) |
344 | return -XFS_ERROR(error); | ||
345 | |||
346 | if (type == IO_DELALLOC && | ||
347 | (!nimaps || isnullstartblock(imap->br_startblock))) { | ||
348 | error = xfs_iomap_write_allocate(ip, offset, count, imap); | ||
349 | if (!error) | ||
350 | trace_xfs_map_blocks_alloc(ip, offset, count, type, imap); | ||
351 | return -XFS_ERROR(error); | ||
352 | } | ||
353 | |||
354 | #ifdef DEBUG | ||
355 | if (type == IO_UNWRITTEN) { | ||
356 | ASSERT(nimaps); | ||
357 | ASSERT(imap->br_startblock != HOLESTARTBLOCK); | ||
358 | ASSERT(imap->br_startblock != DELAYSTARTBLOCK); | ||
359 | } | ||
360 | #endif | ||
361 | if (nimaps) | ||
362 | trace_xfs_map_blocks_found(ip, offset, count, type, imap); | ||
363 | return 0; | ||
329 | } | 364 | } |
330 | 365 | ||
331 | STATIC int | 366 | STATIC int |
@@ -378,28 +413,19 @@ xfs_submit_ioend_bio( | |||
378 | if (xfs_ioend_new_eof(ioend)) | 413 | if (xfs_ioend_new_eof(ioend)) |
379 | xfs_mark_inode_dirty(XFS_I(ioend->io_inode)); | 414 | xfs_mark_inode_dirty(XFS_I(ioend->io_inode)); |
380 | 415 | ||
381 | submit_bio(wbc->sync_mode == WB_SYNC_ALL ? | 416 | submit_bio(wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE, bio); |
382 | WRITE_SYNC_PLUG : WRITE, bio); | ||
383 | ASSERT(!bio_flagged(bio, BIO_EOPNOTSUPP)); | ||
384 | bio_put(bio); | ||
385 | } | 417 | } |
386 | 418 | ||
387 | STATIC struct bio * | 419 | STATIC struct bio * |
388 | xfs_alloc_ioend_bio( | 420 | xfs_alloc_ioend_bio( |
389 | struct buffer_head *bh) | 421 | struct buffer_head *bh) |
390 | { | 422 | { |
391 | struct bio *bio; | ||
392 | int nvecs = bio_get_nr_vecs(bh->b_bdev); | 423 | int nvecs = bio_get_nr_vecs(bh->b_bdev); |
393 | 424 | struct bio *bio = bio_alloc(GFP_NOIO, nvecs); | |
394 | do { | ||
395 | bio = bio_alloc(GFP_NOIO, nvecs); | ||
396 | nvecs >>= 1; | ||
397 | } while (!bio); | ||
398 | 425 | ||
399 | ASSERT(bio->bi_private == NULL); | 426 | ASSERT(bio->bi_private == NULL); |
400 | bio->bi_sector = bh->b_blocknr * (bh->b_size >> 9); | 427 | bio->bi_sector = bh->b_blocknr * (bh->b_size >> 9); |
401 | bio->bi_bdev = bh->b_bdev; | 428 | bio->bi_bdev = bh->b_bdev; |
402 | bio_get(bio); | ||
403 | return bio; | 429 | return bio; |
404 | } | 430 | } |
405 | 431 | ||
@@ -470,9 +496,8 @@ xfs_submit_ioend( | |||
470 | /* Pass 1 - start writeback */ | 496 | /* Pass 1 - start writeback */ |
471 | do { | 497 | do { |
472 | next = ioend->io_list; | 498 | next = ioend->io_list; |
473 | for (bh = ioend->io_buffer_head; bh; bh = bh->b_private) { | 499 | for (bh = ioend->io_buffer_head; bh; bh = bh->b_private) |
474 | xfs_start_buffer_writeback(bh); | 500 | xfs_start_buffer_writeback(bh); |
475 | } | ||
476 | } while ((ioend = next) != NULL); | 501 | } while ((ioend = next) != NULL); |
477 | 502 | ||
478 | /* Pass 2 - submit I/O */ | 503 | /* Pass 2 - submit I/O */ |
@@ -600,117 +625,13 @@ xfs_map_at_offset( | |||
600 | ASSERT(imap->br_startblock != HOLESTARTBLOCK); | 625 | ASSERT(imap->br_startblock != HOLESTARTBLOCK); |
601 | ASSERT(imap->br_startblock != DELAYSTARTBLOCK); | 626 | ASSERT(imap->br_startblock != DELAYSTARTBLOCK); |
602 | 627 | ||
603 | lock_buffer(bh); | ||
604 | xfs_map_buffer(inode, bh, imap, offset); | 628 | xfs_map_buffer(inode, bh, imap, offset); |
605 | bh->b_bdev = xfs_find_bdev_for_inode(inode); | ||
606 | set_buffer_mapped(bh); | 629 | set_buffer_mapped(bh); |
607 | clear_buffer_delay(bh); | 630 | clear_buffer_delay(bh); |
608 | clear_buffer_unwritten(bh); | 631 | clear_buffer_unwritten(bh); |
609 | } | 632 | } |
610 | 633 | ||
611 | /* | 634 | /* |
612 | * Look for a page at index that is suitable for clustering. | ||
613 | */ | ||
614 | STATIC unsigned int | ||
615 | xfs_probe_page( | ||
616 | struct page *page, | ||
617 | unsigned int pg_offset) | ||
618 | { | ||
619 | struct buffer_head *bh, *head; | ||
620 | int ret = 0; | ||
621 | |||
622 | if (PageWriteback(page)) | ||
623 | return 0; | ||
624 | if (!PageDirty(page)) | ||
625 | return 0; | ||
626 | if (!page->mapping) | ||
627 | return 0; | ||
628 | if (!page_has_buffers(page)) | ||
629 | return 0; | ||
630 | |||
631 | bh = head = page_buffers(page); | ||
632 | do { | ||
633 | if (!buffer_uptodate(bh)) | ||
634 | break; | ||
635 | if (!buffer_mapped(bh)) | ||
636 | break; | ||
637 | ret += bh->b_size; | ||
638 | if (ret >= pg_offset) | ||
639 | break; | ||
640 | } while ((bh = bh->b_this_page) != head); | ||
641 | |||
642 | return ret; | ||
643 | } | ||
644 | |||
645 | STATIC size_t | ||
646 | xfs_probe_cluster( | ||
647 | struct inode *inode, | ||
648 | struct page *startpage, | ||
649 | struct buffer_head *bh, | ||
650 | struct buffer_head *head) | ||
651 | { | ||
652 | struct pagevec pvec; | ||
653 | pgoff_t tindex, tlast, tloff; | ||
654 | size_t total = 0; | ||
655 | int done = 0, i; | ||
656 | |||
657 | /* First sum forwards in this page */ | ||
658 | do { | ||
659 | if (!buffer_uptodate(bh) || !buffer_mapped(bh)) | ||
660 | return total; | ||
661 | total += bh->b_size; | ||
662 | } while ((bh = bh->b_this_page) != head); | ||
663 | |||
664 | /* if we reached the end of the page, sum forwards in following pages */ | ||
665 | tlast = i_size_read(inode) >> PAGE_CACHE_SHIFT; | ||
666 | tindex = startpage->index + 1; | ||
667 | |||
668 | /* Prune this back to avoid pathological behavior */ | ||
669 | tloff = min(tlast, startpage->index + 64); | ||
670 | |||
671 | pagevec_init(&pvec, 0); | ||
672 | while (!done && tindex <= tloff) { | ||
673 | unsigned len = min_t(pgoff_t, PAGEVEC_SIZE, tlast - tindex + 1); | ||
674 | |||
675 | if (!pagevec_lookup(&pvec, inode->i_mapping, tindex, len)) | ||
676 | break; | ||
677 | |||
678 | for (i = 0; i < pagevec_count(&pvec); i++) { | ||
679 | struct page *page = pvec.pages[i]; | ||
680 | size_t pg_offset, pg_len = 0; | ||
681 | |||
682 | if (tindex == tlast) { | ||
683 | pg_offset = | ||
684 | i_size_read(inode) & (PAGE_CACHE_SIZE - 1); | ||
685 | if (!pg_offset) { | ||
686 | done = 1; | ||
687 | break; | ||
688 | } | ||
689 | } else | ||
690 | pg_offset = PAGE_CACHE_SIZE; | ||
691 | |||
692 | if (page->index == tindex && trylock_page(page)) { | ||
693 | pg_len = xfs_probe_page(page, pg_offset); | ||
694 | unlock_page(page); | ||
695 | } | ||
696 | |||
697 | if (!pg_len) { | ||
698 | done = 1; | ||
699 | break; | ||
700 | } | ||
701 | |||
702 | total += pg_len; | ||
703 | tindex++; | ||
704 | } | ||
705 | |||
706 | pagevec_release(&pvec); | ||
707 | cond_resched(); | ||
708 | } | ||
709 | |||
710 | return total; | ||
711 | } | ||
712 | |||
713 | /* | ||
714 | * Test if a given page is suitable for writing as part of an unwritten | 635 | * Test if a given page is suitable for writing as part of an unwritten |
715 | * or delayed allocate extent. | 636 | * or delayed allocate extent. |
716 | */ | 637 | */ |
@@ -731,9 +652,9 @@ xfs_is_delayed_page( | |||
731 | if (buffer_unwritten(bh)) | 652 | if (buffer_unwritten(bh)) |
732 | acceptable = (type == IO_UNWRITTEN); | 653 | acceptable = (type == IO_UNWRITTEN); |
733 | else if (buffer_delay(bh)) | 654 | else if (buffer_delay(bh)) |
734 | acceptable = (type == IO_DELAY); | 655 | acceptable = (type == IO_DELALLOC); |
735 | else if (buffer_dirty(bh) && buffer_mapped(bh)) | 656 | else if (buffer_dirty(bh) && buffer_mapped(bh)) |
736 | acceptable = (type == IO_NEW); | 657 | acceptable = (type == IO_OVERWRITE); |
737 | else | 658 | else |
738 | break; | 659 | break; |
739 | } while ((bh = bh->b_this_page) != head); | 660 | } while ((bh = bh->b_this_page) != head); |
@@ -758,8 +679,7 @@ xfs_convert_page( | |||
758 | loff_t tindex, | 679 | loff_t tindex, |
759 | struct xfs_bmbt_irec *imap, | 680 | struct xfs_bmbt_irec *imap, |
760 | xfs_ioend_t **ioendp, | 681 | xfs_ioend_t **ioendp, |
761 | struct writeback_control *wbc, | 682 | struct writeback_control *wbc) |
762 | int all_bh) | ||
763 | { | 683 | { |
764 | struct buffer_head *bh, *head; | 684 | struct buffer_head *bh, *head; |
765 | xfs_off_t end_offset; | 685 | xfs_off_t end_offset; |
@@ -814,37 +734,30 @@ xfs_convert_page( | |||
814 | continue; | 734 | continue; |
815 | } | 735 | } |
816 | 736 | ||
817 | if (buffer_unwritten(bh) || buffer_delay(bh)) { | 737 | if (buffer_unwritten(bh) || buffer_delay(bh) || |
738 | buffer_mapped(bh)) { | ||
818 | if (buffer_unwritten(bh)) | 739 | if (buffer_unwritten(bh)) |
819 | type = IO_UNWRITTEN; | 740 | type = IO_UNWRITTEN; |
741 | else if (buffer_delay(bh)) | ||
742 | type = IO_DELALLOC; | ||
820 | else | 743 | else |
821 | type = IO_DELAY; | 744 | type = IO_OVERWRITE; |
822 | 745 | ||
823 | if (!xfs_imap_valid(inode, imap, offset)) { | 746 | if (!xfs_imap_valid(inode, imap, offset)) { |
824 | done = 1; | 747 | done = 1; |
825 | continue; | 748 | continue; |
826 | } | 749 | } |
827 | 750 | ||
828 | ASSERT(imap->br_startblock != HOLESTARTBLOCK); | 751 | lock_buffer(bh); |
829 | ASSERT(imap->br_startblock != DELAYSTARTBLOCK); | 752 | if (type != IO_OVERWRITE) |
830 | 753 | xfs_map_at_offset(inode, bh, imap, offset); | |
831 | xfs_map_at_offset(inode, bh, imap, offset); | ||
832 | xfs_add_to_ioend(inode, bh, offset, type, | 754 | xfs_add_to_ioend(inode, bh, offset, type, |
833 | ioendp, done); | 755 | ioendp, done); |
834 | 756 | ||
835 | page_dirty--; | 757 | page_dirty--; |
836 | count++; | 758 | count++; |
837 | } else { | 759 | } else { |
838 | type = IO_NEW; | 760 | done = 1; |
839 | if (buffer_mapped(bh) && all_bh) { | ||
840 | lock_buffer(bh); | ||
841 | xfs_add_to_ioend(inode, bh, offset, | ||
842 | type, ioendp, done); | ||
843 | count++; | ||
844 | page_dirty--; | ||
845 | } else { | ||
846 | done = 1; | ||
847 | } | ||
848 | } | 761 | } |
849 | } while (offset += len, (bh = bh->b_this_page) != head); | 762 | } while (offset += len, (bh = bh->b_this_page) != head); |
850 | 763 | ||
@@ -876,7 +789,6 @@ xfs_cluster_write( | |||
876 | struct xfs_bmbt_irec *imap, | 789 | struct xfs_bmbt_irec *imap, |
877 | xfs_ioend_t **ioendp, | 790 | xfs_ioend_t **ioendp, |
878 | struct writeback_control *wbc, | 791 | struct writeback_control *wbc, |
879 | int all_bh, | ||
880 | pgoff_t tlast) | 792 | pgoff_t tlast) |
881 | { | 793 | { |
882 | struct pagevec pvec; | 794 | struct pagevec pvec; |
@@ -891,7 +803,7 @@ xfs_cluster_write( | |||
891 | 803 | ||
892 | for (i = 0; i < pagevec_count(&pvec); i++) { | 804 | for (i = 0; i < pagevec_count(&pvec); i++) { |
893 | done = xfs_convert_page(inode, pvec.pages[i], tindex++, | 805 | done = xfs_convert_page(inode, pvec.pages[i], tindex++, |
894 | imap, ioendp, wbc, all_bh); | 806 | imap, ioendp, wbc); |
895 | if (done) | 807 | if (done) |
896 | break; | 808 | break; |
897 | } | 809 | } |
@@ -934,83 +846,38 @@ xfs_aops_discard_page( | |||
934 | struct xfs_inode *ip = XFS_I(inode); | 846 | struct xfs_inode *ip = XFS_I(inode); |
935 | struct buffer_head *bh, *head; | 847 | struct buffer_head *bh, *head; |
936 | loff_t offset = page_offset(page); | 848 | loff_t offset = page_offset(page); |
937 | ssize_t len = 1 << inode->i_blkbits; | ||
938 | 849 | ||
939 | if (!xfs_is_delayed_page(page, IO_DELAY)) | 850 | if (!xfs_is_delayed_page(page, IO_DELALLOC)) |
940 | goto out_invalidate; | 851 | goto out_invalidate; |
941 | 852 | ||
942 | if (XFS_FORCED_SHUTDOWN(ip->i_mount)) | 853 | if (XFS_FORCED_SHUTDOWN(ip->i_mount)) |
943 | goto out_invalidate; | 854 | goto out_invalidate; |
944 | 855 | ||
945 | xfs_fs_cmn_err(CE_ALERT, ip->i_mount, | 856 | xfs_alert(ip->i_mount, |
946 | "page discard on page %p, inode 0x%llx, offset %llu.", | 857 | "page discard on page %p, inode 0x%llx, offset %llu.", |
947 | page, ip->i_ino, offset); | 858 | page, ip->i_ino, offset); |
948 | 859 | ||
949 | xfs_ilock(ip, XFS_ILOCK_EXCL); | 860 | xfs_ilock(ip, XFS_ILOCK_EXCL); |
950 | bh = head = page_buffers(page); | 861 | bh = head = page_buffers(page); |
951 | do { | 862 | do { |
952 | int done; | ||
953 | xfs_fileoff_t offset_fsb; | ||
954 | xfs_bmbt_irec_t imap; | ||
955 | int nimaps = 1; | ||
956 | int error; | 863 | int error; |
957 | xfs_fsblock_t firstblock; | 864 | xfs_fileoff_t start_fsb; |
958 | xfs_bmap_free_t flist; | ||
959 | 865 | ||
960 | if (!buffer_delay(bh)) | 866 | if (!buffer_delay(bh)) |
961 | goto next_buffer; | 867 | goto next_buffer; |
962 | 868 | ||
963 | offset_fsb = XFS_B_TO_FSBT(ip->i_mount, offset); | 869 | start_fsb = XFS_B_TO_FSBT(ip->i_mount, offset); |
964 | 870 | error = xfs_bmap_punch_delalloc_range(ip, start_fsb, 1); | |
965 | /* | ||
966 | * Map the range first and check that it is a delalloc extent | ||
967 | * before trying to unmap the range. Otherwise we will be | ||
968 | * trying to remove a real extent (which requires a | ||
969 | * transaction) or a hole, which is probably a bad idea... | ||
970 | */ | ||
971 | error = xfs_bmapi(NULL, ip, offset_fsb, 1, | ||
972 | XFS_BMAPI_ENTIRE, NULL, 0, &imap, | ||
973 | &nimaps, NULL); | ||
974 | |||
975 | if (error) { | ||
976 | /* something screwed, just bail */ | ||
977 | if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) { | ||
978 | xfs_fs_cmn_err(CE_ALERT, ip->i_mount, | ||
979 | "page discard failed delalloc mapping lookup."); | ||
980 | } | ||
981 | break; | ||
982 | } | ||
983 | if (!nimaps) { | ||
984 | /* nothing there */ | ||
985 | goto next_buffer; | ||
986 | } | ||
987 | if (imap.br_startblock != DELAYSTARTBLOCK) { | ||
988 | /* been converted, ignore */ | ||
989 | goto next_buffer; | ||
990 | } | ||
991 | WARN_ON(imap.br_blockcount == 0); | ||
992 | |||
993 | /* | ||
994 | * Note: while we initialise the firstblock/flist pair, they | ||
995 | * should never be used because blocks should never be | ||
996 | * allocated or freed for a delalloc extent and hence we need | ||
997 | * don't cancel or finish them after the xfs_bunmapi() call. | ||
998 | */ | ||
999 | xfs_bmap_init(&flist, &firstblock); | ||
1000 | error = xfs_bunmapi(NULL, ip, offset_fsb, 1, 0, 1, &firstblock, | ||
1001 | &flist, &done); | ||
1002 | |||
1003 | ASSERT(!flist.xbf_count && !flist.xbf_first); | ||
1004 | if (error) { | 871 | if (error) { |
1005 | /* something screwed, just bail */ | 872 | /* something screwed, just bail */ |
1006 | if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) { | 873 | if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) { |
1007 | xfs_fs_cmn_err(CE_ALERT, ip->i_mount, | 874 | xfs_alert(ip->i_mount, |
1008 | "page discard unable to remove delalloc mapping."); | 875 | "page discard unable to remove delalloc mapping."); |
1009 | } | 876 | } |
1010 | break; | 877 | break; |
1011 | } | 878 | } |
1012 | next_buffer: | 879 | next_buffer: |
1013 | offset += len; | 880 | offset += 1 << inode->i_blkbits; |
1014 | 881 | ||
1015 | } while ((bh = bh->b_this_page) != head); | 882 | } while ((bh = bh->b_this_page) != head); |
1016 | 883 | ||
@@ -1047,10 +914,10 @@ xfs_vm_writepage( | |||
1047 | unsigned int type; | 914 | unsigned int type; |
1048 | __uint64_t end_offset; | 915 | __uint64_t end_offset; |
1049 | pgoff_t end_index, last_index; | 916 | pgoff_t end_index, last_index; |
1050 | ssize_t size, len; | 917 | ssize_t len; |
1051 | int flags, err, imap_valid = 0, uptodate = 1; | 918 | int err, imap_valid = 0, uptodate = 1; |
1052 | int count = 0; | 919 | int count = 0; |
1053 | int all_bh = 0; | 920 | int nonblocking = 0; |
1054 | 921 | ||
1055 | trace_xfs_writepage(inode, page, 0); | 922 | trace_xfs_writepage(inode, page, 0); |
1056 | 923 | ||
@@ -1101,110 +968,78 @@ xfs_vm_writepage( | |||
1101 | 968 | ||
1102 | bh = head = page_buffers(page); | 969 | bh = head = page_buffers(page); |
1103 | offset = page_offset(page); | 970 | offset = page_offset(page); |
1104 | flags = BMAPI_READ; | 971 | type = IO_OVERWRITE; |
1105 | type = IO_NEW; | 972 | |
973 | if (wbc->sync_mode == WB_SYNC_NONE && wbc->nonblocking) | ||
974 | nonblocking = 1; | ||
1106 | 975 | ||
1107 | do { | 976 | do { |
977 | int new_ioend = 0; | ||
978 | |||
1108 | if (offset >= end_offset) | 979 | if (offset >= end_offset) |
1109 | break; | 980 | break; |
1110 | if (!buffer_uptodate(bh)) | 981 | if (!buffer_uptodate(bh)) |
1111 | uptodate = 0; | 982 | uptodate = 0; |
1112 | 983 | ||
1113 | /* | 984 | /* |
1114 | * A hole may still be marked uptodate because discard_buffer | 985 | * set_page_dirty dirties all buffers in a page, independent |
1115 | * leaves the flag set. | 986 | * of their state. The dirty state however is entirely |
987 | * meaningless for holes (!mapped && uptodate), so skip | ||
988 | * buffers covering holes here. | ||
1116 | */ | 989 | */ |
1117 | if (!buffer_mapped(bh) && buffer_uptodate(bh)) { | 990 | if (!buffer_mapped(bh) && buffer_uptodate(bh)) { |
1118 | ASSERT(!buffer_dirty(bh)); | ||
1119 | imap_valid = 0; | 991 | imap_valid = 0; |
1120 | continue; | 992 | continue; |
1121 | } | 993 | } |
1122 | 994 | ||
1123 | if (imap_valid) | 995 | if (buffer_unwritten(bh)) { |
1124 | imap_valid = xfs_imap_valid(inode, &imap, offset); | 996 | if (type != IO_UNWRITTEN) { |
1125 | |||
1126 | if (buffer_unwritten(bh) || buffer_delay(bh)) { | ||
1127 | int new_ioend = 0; | ||
1128 | |||
1129 | /* | ||
1130 | * Make sure we don't use a read-only iomap | ||
1131 | */ | ||
1132 | if (flags == BMAPI_READ) | ||
1133 | imap_valid = 0; | ||
1134 | |||
1135 | if (buffer_unwritten(bh)) { | ||
1136 | type = IO_UNWRITTEN; | 997 | type = IO_UNWRITTEN; |
1137 | flags = BMAPI_WRITE | BMAPI_IGNSTATE; | 998 | imap_valid = 0; |
1138 | } else if (buffer_delay(bh)) { | ||
1139 | type = IO_DELAY; | ||
1140 | flags = BMAPI_ALLOCATE; | ||
1141 | |||
1142 | if (wbc->sync_mode == WB_SYNC_NONE && | ||
1143 | wbc->nonblocking) | ||
1144 | flags |= BMAPI_TRYLOCK; | ||
1145 | } | ||
1146 | |||
1147 | if (!imap_valid) { | ||
1148 | /* | ||
1149 | * If we didn't have a valid mapping then we | ||
1150 | * need to ensure that we put the new mapping | ||
1151 | * in a new ioend structure. This needs to be | ||
1152 | * done to ensure that the ioends correctly | ||
1153 | * reflect the block mappings at io completion | ||
1154 | * for unwritten extent conversion. | ||
1155 | */ | ||
1156 | new_ioend = 1; | ||
1157 | err = xfs_map_blocks(inode, offset, len, | ||
1158 | &imap, flags); | ||
1159 | if (err) | ||
1160 | goto error; | ||
1161 | imap_valid = xfs_imap_valid(inode, &imap, | ||
1162 | offset); | ||
1163 | } | 999 | } |
1164 | if (imap_valid) { | 1000 | } else if (buffer_delay(bh)) { |
1165 | xfs_map_at_offset(inode, bh, &imap, offset); | 1001 | if (type != IO_DELALLOC) { |
1166 | xfs_add_to_ioend(inode, bh, offset, type, | 1002 | type = IO_DELALLOC; |
1167 | &ioend, new_ioend); | 1003 | imap_valid = 0; |
1168 | count++; | ||
1169 | } | 1004 | } |
1170 | } else if (buffer_uptodate(bh)) { | 1005 | } else if (buffer_uptodate(bh)) { |
1171 | /* | 1006 | if (type != IO_OVERWRITE) { |
1172 | * we got here because the buffer is already mapped. | 1007 | type = IO_OVERWRITE; |
1173 | * That means it must already have extents allocated | 1008 | imap_valid = 0; |
1174 | * underneath it. Map the extent by reading it. | 1009 | } |
1175 | */ | 1010 | } else { |
1176 | if (!imap_valid || flags != BMAPI_READ) { | 1011 | if (PageUptodate(page)) { |
1177 | flags = BMAPI_READ; | 1012 | ASSERT(buffer_mapped(bh)); |
1178 | size = xfs_probe_cluster(inode, page, bh, head); | 1013 | imap_valid = 0; |
1179 | err = xfs_map_blocks(inode, offset, size, | ||
1180 | &imap, flags); | ||
1181 | if (err) | ||
1182 | goto error; | ||
1183 | imap_valid = xfs_imap_valid(inode, &imap, | ||
1184 | offset); | ||
1185 | } | 1014 | } |
1015 | continue; | ||
1016 | } | ||
1186 | 1017 | ||
1018 | if (imap_valid) | ||
1019 | imap_valid = xfs_imap_valid(inode, &imap, offset); | ||
1020 | if (!imap_valid) { | ||
1187 | /* | 1021 | /* |
1188 | * We set the type to IO_NEW in case we are doing a | 1022 | * If we didn't have a valid mapping then we need to |
1189 | * small write at EOF that is extending the file but | 1023 | * put the new mapping into a separate ioend structure. |
1190 | * without needing an allocation. We need to update the | 1024 | * This ensures non-contiguous extents always have |
1191 | * file size on I/O completion in this case so it is | 1025 | * separate ioends, which is particularly important |
1192 | * the same case as having just allocated a new extent | 1026 | * for unwritten extent conversion at I/O completion |
1193 | * that we are writing into for the first time. | 1027 | * time. |
1194 | */ | 1028 | */ |
1195 | type = IO_NEW; | 1029 | new_ioend = 1; |
1196 | if (trylock_buffer(bh)) { | 1030 | err = xfs_map_blocks(inode, offset, &imap, type, |
1197 | if (imap_valid) | 1031 | nonblocking); |
1198 | all_bh = 1; | 1032 | if (err) |
1199 | xfs_add_to_ioend(inode, bh, offset, type, | 1033 | goto error; |
1200 | &ioend, !imap_valid); | 1034 | imap_valid = xfs_imap_valid(inode, &imap, offset); |
1201 | count++; | 1035 | } |
1202 | } else { | 1036 | if (imap_valid) { |
1203 | imap_valid = 0; | 1037 | lock_buffer(bh); |
1204 | } | 1038 | if (type != IO_OVERWRITE) |
1205 | } else if (PageUptodate(page)) { | 1039 | xfs_map_at_offset(inode, bh, &imap, offset); |
1206 | ASSERT(buffer_mapped(bh)); | 1040 | xfs_add_to_ioend(inode, bh, offset, type, &ioend, |
1207 | imap_valid = 0; | 1041 | new_ioend); |
1042 | count++; | ||
1208 | } | 1043 | } |
1209 | 1044 | ||
1210 | if (!iohead) | 1045 | if (!iohead) |
@@ -1233,7 +1068,7 @@ xfs_vm_writepage( | |||
1233 | end_index = last_index; | 1068 | end_index = last_index; |
1234 | 1069 | ||
1235 | xfs_cluster_write(inode, page->index + 1, &imap, &ioend, | 1070 | xfs_cluster_write(inode, page->index + 1, &imap, &ioend, |
1236 | wbc, all_bh, end_index); | 1071 | wbc, end_index); |
1237 | } | 1072 | } |
1238 | 1073 | ||
1239 | if (iohead) | 1074 | if (iohead) |
@@ -1302,13 +1137,19 @@ __xfs_get_blocks( | |||
1302 | int create, | 1137 | int create, |
1303 | int direct) | 1138 | int direct) |
1304 | { | 1139 | { |
1305 | int flags = create ? BMAPI_WRITE : BMAPI_READ; | 1140 | struct xfs_inode *ip = XFS_I(inode); |
1141 | struct xfs_mount *mp = ip->i_mount; | ||
1142 | xfs_fileoff_t offset_fsb, end_fsb; | ||
1143 | int error = 0; | ||
1144 | int lockmode = 0; | ||
1306 | struct xfs_bmbt_irec imap; | 1145 | struct xfs_bmbt_irec imap; |
1146 | int nimaps = 1; | ||
1307 | xfs_off_t offset; | 1147 | xfs_off_t offset; |
1308 | ssize_t size; | 1148 | ssize_t size; |
1309 | int nimap = 1; | ||
1310 | int new = 0; | 1149 | int new = 0; |
1311 | int error; | 1150 | |
1151 | if (XFS_FORCED_SHUTDOWN(mp)) | ||
1152 | return -XFS_ERROR(EIO); | ||
1312 | 1153 | ||
1313 | offset = (xfs_off_t)iblock << inode->i_blkbits; | 1154 | offset = (xfs_off_t)iblock << inode->i_blkbits; |
1314 | ASSERT(bh_result->b_size >= (1 << inode->i_blkbits)); | 1155 | ASSERT(bh_result->b_size >= (1 << inode->i_blkbits)); |
@@ -1317,15 +1158,45 @@ __xfs_get_blocks( | |||
1317 | if (!create && direct && offset >= i_size_read(inode)) | 1158 | if (!create && direct && offset >= i_size_read(inode)) |
1318 | return 0; | 1159 | return 0; |
1319 | 1160 | ||
1320 | if (direct && create) | 1161 | if (create) { |
1321 | flags |= BMAPI_DIRECT; | 1162 | lockmode = XFS_ILOCK_EXCL; |
1163 | xfs_ilock(ip, lockmode); | ||
1164 | } else { | ||
1165 | lockmode = xfs_ilock_map_shared(ip); | ||
1166 | } | ||
1322 | 1167 | ||
1323 | error = xfs_iomap(XFS_I(inode), offset, size, flags, &imap, &nimap, | 1168 | ASSERT(offset <= mp->m_maxioffset); |
1324 | &new); | 1169 | if (offset + size > mp->m_maxioffset) |
1170 | size = mp->m_maxioffset - offset; | ||
1171 | end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + size); | ||
1172 | offset_fsb = XFS_B_TO_FSBT(mp, offset); | ||
1173 | |||
1174 | error = xfs_bmapi(NULL, ip, offset_fsb, end_fsb - offset_fsb, | ||
1175 | XFS_BMAPI_ENTIRE, NULL, 0, &imap, &nimaps, NULL); | ||
1325 | if (error) | 1176 | if (error) |
1326 | return -error; | 1177 | goto out_unlock; |
1327 | if (nimap == 0) | 1178 | |
1328 | return 0; | 1179 | if (create && |
1180 | (!nimaps || | ||
1181 | (imap.br_startblock == HOLESTARTBLOCK || | ||
1182 | imap.br_startblock == DELAYSTARTBLOCK))) { | ||
1183 | if (direct) { | ||
1184 | error = xfs_iomap_write_direct(ip, offset, size, | ||
1185 | &imap, nimaps); | ||
1186 | } else { | ||
1187 | error = xfs_iomap_write_delay(ip, offset, size, &imap); | ||
1188 | } | ||
1189 | if (error) | ||
1190 | goto out_unlock; | ||
1191 | |||
1192 | trace_xfs_get_blocks_alloc(ip, offset, size, 0, &imap); | ||
1193 | } else if (nimaps) { | ||
1194 | trace_xfs_get_blocks_found(ip, offset, size, 0, &imap); | ||
1195 | } else { | ||
1196 | trace_xfs_get_blocks_notfound(ip, offset, size); | ||
1197 | goto out_unlock; | ||
1198 | } | ||
1199 | xfs_iunlock(ip, lockmode); | ||
1329 | 1200 | ||
1330 | if (imap.br_startblock != HOLESTARTBLOCK && | 1201 | if (imap.br_startblock != HOLESTARTBLOCK && |
1331 | imap.br_startblock != DELAYSTARTBLOCK) { | 1202 | imap.br_startblock != DELAYSTARTBLOCK) { |
@@ -1392,6 +1263,10 @@ __xfs_get_blocks( | |||
1392 | } | 1263 | } |
1393 | 1264 | ||
1394 | return 0; | 1265 | return 0; |
1266 | |||
1267 | out_unlock: | ||
1268 | xfs_iunlock(ip, lockmode); | ||
1269 | return -error; | ||
1395 | } | 1270 | } |
1396 | 1271 | ||
1397 | int | 1272 | int |
@@ -1420,7 +1295,7 @@ xfs_get_blocks_direct( | |||
1420 | * If the private argument is non-NULL __xfs_get_blocks signals us that we | 1295 | * If the private argument is non-NULL __xfs_get_blocks signals us that we |
1421 | * need to issue a transaction to convert the range from unwritten to written | 1296 | * need to issue a transaction to convert the range from unwritten to written |
1422 | * extents. In case this is regular synchronous I/O we just call xfs_end_io | 1297 | * extents. In case this is regular synchronous I/O we just call xfs_end_io |
1423 | * to do this and we are done. But in case this was a successfull AIO | 1298 | * to do this and we are done. But in case this was a successful AIO |
1424 | * request this handler is called from interrupt context, from which we | 1299 | * request this handler is called from interrupt context, from which we |
1425 | * can't start transactions. In that case offload the I/O completion to | 1300 | * can't start transactions. In that case offload the I/O completion to |
1426 | * the workqueues we also use for buffered I/O completion. | 1301 | * the workqueues we also use for buffered I/O completion. |
@@ -1479,7 +1354,7 @@ xfs_vm_direct_IO( | |||
1479 | ssize_t ret; | 1354 | ssize_t ret; |
1480 | 1355 | ||
1481 | if (rw & WRITE) { | 1356 | if (rw & WRITE) { |
1482 | iocb->private = xfs_alloc_ioend(inode, IO_NEW); | 1357 | iocb->private = xfs_alloc_ioend(inode, IO_DIRECT); |
1483 | 1358 | ||
1484 | ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov, | 1359 | ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov, |
1485 | offset, nr_segs, | 1360 | offset, nr_segs, |
@@ -1505,11 +1380,42 @@ xfs_vm_write_failed( | |||
1505 | struct inode *inode = mapping->host; | 1380 | struct inode *inode = mapping->host; |
1506 | 1381 | ||
1507 | if (to > inode->i_size) { | 1382 | if (to > inode->i_size) { |
1508 | struct iattr ia = { | 1383 | /* |
1509 | .ia_valid = ATTR_SIZE | ATTR_FORCE, | 1384 | * punch out the delalloc blocks we have already allocated. We |
1510 | .ia_size = inode->i_size, | 1385 | * don't call xfs_setattr() to do this as we may be in the |
1511 | }; | 1386 | * middle of a multi-iovec write and so the vfs inode->i_size |
1512 | xfs_setattr(XFS_I(inode), &ia, XFS_ATTR_NOLOCK); | 1387 | * will not match the xfs ip->i_size and so it will zero too |
1388 | * much. Hence we jus truncate the page cache to zero what is | ||
1389 | * necessary and punch the delalloc blocks directly. | ||
1390 | */ | ||
1391 | struct xfs_inode *ip = XFS_I(inode); | ||
1392 | xfs_fileoff_t start_fsb; | ||
1393 | xfs_fileoff_t end_fsb; | ||
1394 | int error; | ||
1395 | |||
1396 | truncate_pagecache(inode, to, inode->i_size); | ||
1397 | |||
1398 | /* | ||
1399 | * Check if there are any blocks that are outside of i_size | ||
1400 | * that need to be trimmed back. | ||
1401 | */ | ||
1402 | start_fsb = XFS_B_TO_FSB(ip->i_mount, inode->i_size) + 1; | ||
1403 | end_fsb = XFS_B_TO_FSB(ip->i_mount, to); | ||
1404 | if (end_fsb <= start_fsb) | ||
1405 | return; | ||
1406 | |||
1407 | xfs_ilock(ip, XFS_ILOCK_EXCL); | ||
1408 | error = xfs_bmap_punch_delalloc_range(ip, start_fsb, | ||
1409 | end_fsb - start_fsb); | ||
1410 | if (error) { | ||
1411 | /* something screwed, just bail */ | ||
1412 | if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) { | ||
1413 | xfs_alert(ip->i_mount, | ||
1414 | "xfs_vm_write_failed: unable to clean up ino %lld", | ||
1415 | ip->i_ino); | ||
1416 | } | ||
1417 | } | ||
1418 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | ||
1513 | } | 1419 | } |
1514 | } | 1420 | } |
1515 | 1421 | ||
@@ -1588,7 +1494,6 @@ const struct address_space_operations xfs_address_space_operations = { | |||
1588 | .readpages = xfs_vm_readpages, | 1494 | .readpages = xfs_vm_readpages, |
1589 | .writepage = xfs_vm_writepage, | 1495 | .writepage = xfs_vm_writepage, |
1590 | .writepages = xfs_vm_writepages, | 1496 | .writepages = xfs_vm_writepages, |
1591 | .sync_page = block_sync_page, | ||
1592 | .releasepage = xfs_vm_releasepage, | 1497 | .releasepage = xfs_vm_releasepage, |
1593 | .invalidatepage = xfs_vm_invalidatepage, | 1498 | .invalidatepage = xfs_vm_invalidatepage, |
1594 | .write_begin = xfs_vm_write_begin, | 1499 | .write_begin = xfs_vm_write_begin, |
diff --git a/fs/xfs/linux-2.6/xfs_aops.h b/fs/xfs/linux-2.6/xfs_aops.h index c5057fb6237a..71f721e1a71f 100644 --- a/fs/xfs/linux-2.6/xfs_aops.h +++ b/fs/xfs/linux-2.6/xfs_aops.h | |||
@@ -23,6 +23,22 @@ extern struct workqueue_struct *xfsconvertd_workqueue; | |||
23 | extern mempool_t *xfs_ioend_pool; | 23 | extern mempool_t *xfs_ioend_pool; |
24 | 24 | ||
25 | /* | 25 | /* |
26 | * Types of I/O for bmap clustering and I/O completion tracking. | ||
27 | */ | ||
28 | enum { | ||
29 | IO_DIRECT = 0, /* special case for direct I/O ioends */ | ||
30 | IO_DELALLOC, /* mapping covers delalloc region */ | ||
31 | IO_UNWRITTEN, /* mapping covers allocated but uninitialized data */ | ||
32 | IO_OVERWRITE, /* mapping covers already allocated extent */ | ||
33 | }; | ||
34 | |||
35 | #define XFS_IO_TYPES \ | ||
36 | { 0, "" }, \ | ||
37 | { IO_DELALLOC, "delalloc" }, \ | ||
38 | { IO_UNWRITTEN, "unwritten" }, \ | ||
39 | { IO_OVERWRITE, "overwrite" } | ||
40 | |||
41 | /* | ||
26 | * xfs_ioend struct manages large extent writes for XFS. | 42 | * xfs_ioend struct manages large extent writes for XFS. |
27 | * It can manage several multi-page bio's at once. | 43 | * It can manage several multi-page bio's at once. |
28 | */ | 44 | */ |
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index 286e36e21dae..5e68099db2a5 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c | |||
@@ -33,7 +33,6 @@ | |||
33 | #include <linux/migrate.h> | 33 | #include <linux/migrate.h> |
34 | #include <linux/backing-dev.h> | 34 | #include <linux/backing-dev.h> |
35 | #include <linux/freezer.h> | 35 | #include <linux/freezer.h> |
36 | #include <linux/list_sort.h> | ||
37 | 36 | ||
38 | #include "xfs_sb.h" | 37 | #include "xfs_sb.h" |
39 | #include "xfs_inum.h" | 38 | #include "xfs_inum.h" |
@@ -44,12 +43,7 @@ | |||
44 | 43 | ||
45 | static kmem_zone_t *xfs_buf_zone; | 44 | static kmem_zone_t *xfs_buf_zone; |
46 | STATIC int xfsbufd(void *); | 45 | STATIC int xfsbufd(void *); |
47 | STATIC int xfsbufd_wakeup(struct shrinker *, int, gfp_t); | ||
48 | STATIC void xfs_buf_delwri_queue(xfs_buf_t *, int); | 46 | STATIC void xfs_buf_delwri_queue(xfs_buf_t *, int); |
49 | static struct shrinker xfs_buf_shake = { | ||
50 | .shrink = xfsbufd_wakeup, | ||
51 | .seeks = DEFAULT_SEEKS, | ||
52 | }; | ||
53 | 47 | ||
54 | static struct workqueue_struct *xfslogd_workqueue; | 48 | static struct workqueue_struct *xfslogd_workqueue; |
55 | struct workqueue_struct *xfsdatad_workqueue; | 49 | struct workqueue_struct *xfsdatad_workqueue; |
@@ -99,77 +93,79 @@ xfs_buf_vmap_len( | |||
99 | } | 93 | } |
100 | 94 | ||
101 | /* | 95 | /* |
102 | * Page Region interfaces. | 96 | * xfs_buf_lru_add - add a buffer to the LRU. |
103 | * | 97 | * |
104 | * For pages in filesystems where the blocksize is smaller than the | 98 | * The LRU takes a new reference to the buffer so that it will only be freed |
105 | * pagesize, we use the page->private field (long) to hold a bitmap | 99 | * once the shrinker takes the buffer off the LRU. |
106 | * of uptodate regions within the page. | ||
107 | * | ||
108 | * Each such region is "bytes per page / bits per long" bytes long. | ||
109 | * | ||
110 | * NBPPR == number-of-bytes-per-page-region | ||
111 | * BTOPR == bytes-to-page-region (rounded up) | ||
112 | * BTOPRT == bytes-to-page-region-truncated (rounded down) | ||
113 | */ | 100 | */ |
114 | #if (BITS_PER_LONG == 32) | 101 | STATIC void |
115 | #define PRSHIFT (PAGE_CACHE_SHIFT - 5) /* (32 == 1<<5) */ | 102 | xfs_buf_lru_add( |
116 | #elif (BITS_PER_LONG == 64) | 103 | struct xfs_buf *bp) |
117 | #define PRSHIFT (PAGE_CACHE_SHIFT - 6) /* (64 == 1<<6) */ | ||
118 | #else | ||
119 | #error BITS_PER_LONG must be 32 or 64 | ||
120 | #endif | ||
121 | #define NBPPR (PAGE_CACHE_SIZE/BITS_PER_LONG) | ||
122 | #define BTOPR(b) (((unsigned int)(b) + (NBPPR - 1)) >> PRSHIFT) | ||
123 | #define BTOPRT(b) (((unsigned int)(b) >> PRSHIFT)) | ||
124 | |||
125 | STATIC unsigned long | ||
126 | page_region_mask( | ||
127 | size_t offset, | ||
128 | size_t length) | ||
129 | { | 104 | { |
130 | unsigned long mask; | 105 | struct xfs_buftarg *btp = bp->b_target; |
131 | int first, final; | ||
132 | |||
133 | first = BTOPR(offset); | ||
134 | final = BTOPRT(offset + length - 1); | ||
135 | first = min(first, final); | ||
136 | |||
137 | mask = ~0UL; | ||
138 | mask <<= BITS_PER_LONG - (final - first); | ||
139 | mask >>= BITS_PER_LONG - (final); | ||
140 | |||
141 | ASSERT(offset + length <= PAGE_CACHE_SIZE); | ||
142 | ASSERT((final - first) < BITS_PER_LONG && (final - first) >= 0); | ||
143 | 106 | ||
144 | return mask; | 107 | spin_lock(&btp->bt_lru_lock); |
108 | if (list_empty(&bp->b_lru)) { | ||
109 | atomic_inc(&bp->b_hold); | ||
110 | list_add_tail(&bp->b_lru, &btp->bt_lru); | ||
111 | btp->bt_lru_nr++; | ||
112 | } | ||
113 | spin_unlock(&btp->bt_lru_lock); | ||
145 | } | 114 | } |
146 | 115 | ||
116 | /* | ||
117 | * xfs_buf_lru_del - remove a buffer from the LRU | ||
118 | * | ||
119 | * The unlocked check is safe here because it only occurs when there are not | ||
120 | * b_lru_ref counts left on the inode under the pag->pag_buf_lock. it is there | ||
121 | * to optimise the shrinker removing the buffer from the LRU and calling | ||
122 | * xfs_buf_free(). i.e. it removes an unnecessary round trip on the | ||
123 | * bt_lru_lock. | ||
124 | */ | ||
147 | STATIC void | 125 | STATIC void |
148 | set_page_region( | 126 | xfs_buf_lru_del( |
149 | struct page *page, | 127 | struct xfs_buf *bp) |
150 | size_t offset, | ||
151 | size_t length) | ||
152 | { | 128 | { |
153 | set_page_private(page, | 129 | struct xfs_buftarg *btp = bp->b_target; |
154 | page_private(page) | page_region_mask(offset, length)); | ||
155 | if (page_private(page) == ~0UL) | ||
156 | SetPageUptodate(page); | ||
157 | } | ||
158 | 130 | ||
159 | STATIC int | 131 | if (list_empty(&bp->b_lru)) |
160 | test_page_region( | 132 | return; |
161 | struct page *page, | ||
162 | size_t offset, | ||
163 | size_t length) | ||
164 | { | ||
165 | unsigned long mask = page_region_mask(offset, length); | ||
166 | 133 | ||
167 | return (mask && (page_private(page) & mask) == mask); | 134 | spin_lock(&btp->bt_lru_lock); |
135 | if (!list_empty(&bp->b_lru)) { | ||
136 | list_del_init(&bp->b_lru); | ||
137 | btp->bt_lru_nr--; | ||
138 | } | ||
139 | spin_unlock(&btp->bt_lru_lock); | ||
168 | } | 140 | } |
169 | 141 | ||
170 | /* | 142 | /* |
171 | * Internal xfs_buf_t object manipulation | 143 | * When we mark a buffer stale, we remove the buffer from the LRU and clear the |
144 | * b_lru_ref count so that the buffer is freed immediately when the buffer | ||
145 | * reference count falls to zero. If the buffer is already on the LRU, we need | ||
146 | * to remove the reference that LRU holds on the buffer. | ||
147 | * | ||
148 | * This prevents build-up of stale buffers on the LRU. | ||
172 | */ | 149 | */ |
150 | void | ||
151 | xfs_buf_stale( | ||
152 | struct xfs_buf *bp) | ||
153 | { | ||
154 | bp->b_flags |= XBF_STALE; | ||
155 | atomic_set(&(bp)->b_lru_ref, 0); | ||
156 | if (!list_empty(&bp->b_lru)) { | ||
157 | struct xfs_buftarg *btp = bp->b_target; | ||
158 | |||
159 | spin_lock(&btp->bt_lru_lock); | ||
160 | if (!list_empty(&bp->b_lru)) { | ||
161 | list_del_init(&bp->b_lru); | ||
162 | btp->bt_lru_nr--; | ||
163 | atomic_dec(&bp->b_hold); | ||
164 | } | ||
165 | spin_unlock(&btp->bt_lru_lock); | ||
166 | } | ||
167 | ASSERT(atomic_read(&bp->b_hold) >= 1); | ||
168 | } | ||
173 | 169 | ||
174 | STATIC void | 170 | STATIC void |
175 | _xfs_buf_initialize( | 171 | _xfs_buf_initialize( |
@@ -186,10 +182,12 @@ _xfs_buf_initialize( | |||
186 | 182 | ||
187 | memset(bp, 0, sizeof(xfs_buf_t)); | 183 | memset(bp, 0, sizeof(xfs_buf_t)); |
188 | atomic_set(&bp->b_hold, 1); | 184 | atomic_set(&bp->b_hold, 1); |
185 | atomic_set(&bp->b_lru_ref, 1); | ||
189 | init_completion(&bp->b_iowait); | 186 | init_completion(&bp->b_iowait); |
187 | INIT_LIST_HEAD(&bp->b_lru); | ||
190 | INIT_LIST_HEAD(&bp->b_list); | 188 | INIT_LIST_HEAD(&bp->b_list); |
191 | INIT_LIST_HEAD(&bp->b_hash_list); | 189 | RB_CLEAR_NODE(&bp->b_rbnode); |
192 | init_MUTEX_LOCKED(&bp->b_sema); /* held, no waiters */ | 190 | sema_init(&bp->b_sema, 0); /* held, no waiters */ |
193 | XB_SET_OWNER(bp); | 191 | XB_SET_OWNER(bp); |
194 | bp->b_target = target; | 192 | bp->b_target = target; |
195 | bp->b_file_offset = range_base; | 193 | bp->b_file_offset = range_base; |
@@ -262,9 +260,9 @@ xfs_buf_free( | |||
262 | { | 260 | { |
263 | trace_xfs_buf_free(bp, _RET_IP_); | 261 | trace_xfs_buf_free(bp, _RET_IP_); |
264 | 262 | ||
265 | ASSERT(list_empty(&bp->b_hash_list)); | 263 | ASSERT(list_empty(&bp->b_lru)); |
266 | 264 | ||
267 | if (bp->b_flags & (_XBF_PAGE_CACHE|_XBF_PAGES)) { | 265 | if (bp->b_flags & _XBF_PAGES) { |
268 | uint i; | 266 | uint i; |
269 | 267 | ||
270 | if (xfs_buf_is_vmapped(bp)) | 268 | if (xfs_buf_is_vmapped(bp)) |
@@ -274,56 +272,77 @@ xfs_buf_free( | |||
274 | for (i = 0; i < bp->b_page_count; i++) { | 272 | for (i = 0; i < bp->b_page_count; i++) { |
275 | struct page *page = bp->b_pages[i]; | 273 | struct page *page = bp->b_pages[i]; |
276 | 274 | ||
277 | if (bp->b_flags & _XBF_PAGE_CACHE) | 275 | __free_page(page); |
278 | ASSERT(!PagePrivate(page)); | ||
279 | page_cache_release(page); | ||
280 | } | 276 | } |
281 | } | 277 | } else if (bp->b_flags & _XBF_KMEM) |
278 | kmem_free(bp->b_addr); | ||
282 | _xfs_buf_free_pages(bp); | 279 | _xfs_buf_free_pages(bp); |
283 | xfs_buf_deallocate(bp); | 280 | xfs_buf_deallocate(bp); |
284 | } | 281 | } |
285 | 282 | ||
286 | /* | 283 | /* |
287 | * Finds all pages for buffer in question and builds it's page list. | 284 | * Allocates all the pages for buffer in question and builds it's page list. |
288 | */ | 285 | */ |
289 | STATIC int | 286 | STATIC int |
290 | _xfs_buf_lookup_pages( | 287 | xfs_buf_allocate_memory( |
291 | xfs_buf_t *bp, | 288 | xfs_buf_t *bp, |
292 | uint flags) | 289 | uint flags) |
293 | { | 290 | { |
294 | struct address_space *mapping = bp->b_target->bt_mapping; | ||
295 | size_t blocksize = bp->b_target->bt_bsize; | ||
296 | size_t size = bp->b_count_desired; | 291 | size_t size = bp->b_count_desired; |
297 | size_t nbytes, offset; | 292 | size_t nbytes, offset; |
298 | gfp_t gfp_mask = xb_to_gfp(flags); | 293 | gfp_t gfp_mask = xb_to_gfp(flags); |
299 | unsigned short page_count, i; | 294 | unsigned short page_count, i; |
300 | pgoff_t first; | ||
301 | xfs_off_t end; | 295 | xfs_off_t end; |
302 | int error; | 296 | int error; |
303 | 297 | ||
298 | /* | ||
299 | * for buffers that are contained within a single page, just allocate | ||
300 | * the memory from the heap - there's no need for the complexity of | ||
301 | * page arrays to keep allocation down to order 0. | ||
302 | */ | ||
303 | if (bp->b_buffer_length < PAGE_SIZE) { | ||
304 | bp->b_addr = kmem_alloc(bp->b_buffer_length, xb_to_km(flags)); | ||
305 | if (!bp->b_addr) { | ||
306 | /* low memory - use alloc_page loop instead */ | ||
307 | goto use_alloc_page; | ||
308 | } | ||
309 | |||
310 | if (((unsigned long)(bp->b_addr + bp->b_buffer_length - 1) & | ||
311 | PAGE_MASK) != | ||
312 | ((unsigned long)bp->b_addr & PAGE_MASK)) { | ||
313 | /* b_addr spans two pages - use alloc_page instead */ | ||
314 | kmem_free(bp->b_addr); | ||
315 | bp->b_addr = NULL; | ||
316 | goto use_alloc_page; | ||
317 | } | ||
318 | bp->b_offset = offset_in_page(bp->b_addr); | ||
319 | bp->b_pages = bp->b_page_array; | ||
320 | bp->b_pages[0] = virt_to_page(bp->b_addr); | ||
321 | bp->b_page_count = 1; | ||
322 | bp->b_flags |= XBF_MAPPED | _XBF_KMEM; | ||
323 | return 0; | ||
324 | } | ||
325 | |||
326 | use_alloc_page: | ||
304 | end = bp->b_file_offset + bp->b_buffer_length; | 327 | end = bp->b_file_offset + bp->b_buffer_length; |
305 | page_count = xfs_buf_btoc(end) - xfs_buf_btoct(bp->b_file_offset); | 328 | page_count = xfs_buf_btoc(end) - xfs_buf_btoct(bp->b_file_offset); |
306 | |||
307 | error = _xfs_buf_get_pages(bp, page_count, flags); | 329 | error = _xfs_buf_get_pages(bp, page_count, flags); |
308 | if (unlikely(error)) | 330 | if (unlikely(error)) |
309 | return error; | 331 | return error; |
310 | bp->b_flags |= _XBF_PAGE_CACHE; | ||
311 | 332 | ||
312 | offset = bp->b_offset; | 333 | offset = bp->b_offset; |
313 | first = bp->b_file_offset >> PAGE_CACHE_SHIFT; | 334 | bp->b_flags |= _XBF_PAGES; |
314 | 335 | ||
315 | for (i = 0; i < bp->b_page_count; i++) { | 336 | for (i = 0; i < bp->b_page_count; i++) { |
316 | struct page *page; | 337 | struct page *page; |
317 | uint retries = 0; | 338 | uint retries = 0; |
318 | 339 | retry: | |
319 | retry: | 340 | page = alloc_page(gfp_mask); |
320 | page = find_or_create_page(mapping, first + i, gfp_mask); | ||
321 | if (unlikely(page == NULL)) { | 341 | if (unlikely(page == NULL)) { |
322 | if (flags & XBF_READ_AHEAD) { | 342 | if (flags & XBF_READ_AHEAD) { |
323 | bp->b_page_count = i; | 343 | bp->b_page_count = i; |
324 | for (i = 0; i < bp->b_page_count; i++) | 344 | error = ENOMEM; |
325 | unlock_page(bp->b_pages[i]); | 345 | goto out_free_pages; |
326 | return -ENOMEM; | ||
327 | } | 346 | } |
328 | 347 | ||
329 | /* | 348 | /* |
@@ -333,65 +352,55 @@ _xfs_buf_lookup_pages( | |||
333 | * handle buffer allocation failures we can't do much. | 352 | * handle buffer allocation failures we can't do much. |
334 | */ | 353 | */ |
335 | if (!(++retries % 100)) | 354 | if (!(++retries % 100)) |
336 | printk(KERN_ERR | 355 | xfs_err(NULL, |
337 | "XFS: possible memory allocation " | 356 | "possible memory allocation deadlock in %s (mode:0x%x)", |
338 | "deadlock in %s (mode:0x%x)\n", | ||
339 | __func__, gfp_mask); | 357 | __func__, gfp_mask); |
340 | 358 | ||
341 | XFS_STATS_INC(xb_page_retries); | 359 | XFS_STATS_INC(xb_page_retries); |
342 | xfsbufd_wakeup(NULL, 0, gfp_mask); | ||
343 | congestion_wait(BLK_RW_ASYNC, HZ/50); | 360 | congestion_wait(BLK_RW_ASYNC, HZ/50); |
344 | goto retry; | 361 | goto retry; |
345 | } | 362 | } |
346 | 363 | ||
347 | XFS_STATS_INC(xb_page_found); | 364 | XFS_STATS_INC(xb_page_found); |
348 | 365 | ||
349 | nbytes = min_t(size_t, size, PAGE_CACHE_SIZE - offset); | 366 | nbytes = min_t(size_t, size, PAGE_SIZE - offset); |
350 | size -= nbytes; | 367 | size -= nbytes; |
351 | |||
352 | ASSERT(!PagePrivate(page)); | ||
353 | if (!PageUptodate(page)) { | ||
354 | page_count--; | ||
355 | if (blocksize >= PAGE_CACHE_SIZE) { | ||
356 | if (flags & XBF_READ) | ||
357 | bp->b_flags |= _XBF_PAGE_LOCKED; | ||
358 | } else if (!PagePrivate(page)) { | ||
359 | if (test_page_region(page, offset, nbytes)) | ||
360 | page_count++; | ||
361 | } | ||
362 | } | ||
363 | |||
364 | bp->b_pages[i] = page; | 368 | bp->b_pages[i] = page; |
365 | offset = 0; | 369 | offset = 0; |
366 | } | 370 | } |
371 | return 0; | ||
367 | 372 | ||
368 | if (!(bp->b_flags & _XBF_PAGE_LOCKED)) { | 373 | out_free_pages: |
369 | for (i = 0; i < bp->b_page_count; i++) | 374 | for (i = 0; i < bp->b_page_count; i++) |
370 | unlock_page(bp->b_pages[i]); | 375 | __free_page(bp->b_pages[i]); |
371 | } | ||
372 | |||
373 | if (page_count == bp->b_page_count) | ||
374 | bp->b_flags |= XBF_DONE; | ||
375 | |||
376 | return error; | 376 | return error; |
377 | } | 377 | } |
378 | 378 | ||
379 | /* | 379 | /* |
380 | * Map buffer into kernel address-space if nessecary. | 380 | * Map buffer into kernel address-space if necessary. |
381 | */ | 381 | */ |
382 | STATIC int | 382 | STATIC int |
383 | _xfs_buf_map_pages( | 383 | _xfs_buf_map_pages( |
384 | xfs_buf_t *bp, | 384 | xfs_buf_t *bp, |
385 | uint flags) | 385 | uint flags) |
386 | { | 386 | { |
387 | /* A single page buffer is always mappable */ | 387 | ASSERT(bp->b_flags & _XBF_PAGES); |
388 | if (bp->b_page_count == 1) { | 388 | if (bp->b_page_count == 1) { |
389 | /* A single page buffer is always mappable */ | ||
389 | bp->b_addr = page_address(bp->b_pages[0]) + bp->b_offset; | 390 | bp->b_addr = page_address(bp->b_pages[0]) + bp->b_offset; |
390 | bp->b_flags |= XBF_MAPPED; | 391 | bp->b_flags |= XBF_MAPPED; |
391 | } else if (flags & XBF_MAPPED) { | 392 | } else if (flags & XBF_MAPPED) { |
392 | bp->b_addr = vm_map_ram(bp->b_pages, bp->b_page_count, | 393 | int retried = 0; |
393 | -1, PAGE_KERNEL); | 394 | |
394 | if (unlikely(bp->b_addr == NULL)) | 395 | do { |
396 | bp->b_addr = vm_map_ram(bp->b_pages, bp->b_page_count, | ||
397 | -1, PAGE_KERNEL); | ||
398 | if (bp->b_addr) | ||
399 | break; | ||
400 | vm_unmap_aliases(); | ||
401 | } while (retried++ <= 1); | ||
402 | |||
403 | if (!bp->b_addr) | ||
395 | return -ENOMEM; | 404 | return -ENOMEM; |
396 | bp->b_addr += bp->b_offset; | 405 | bp->b_addr += bp->b_offset; |
397 | bp->b_flags |= XBF_MAPPED; | 406 | bp->b_flags |= XBF_MAPPED; |
@@ -422,8 +431,10 @@ _xfs_buf_find( | |||
422 | { | 431 | { |
423 | xfs_off_t range_base; | 432 | xfs_off_t range_base; |
424 | size_t range_length; | 433 | size_t range_length; |
425 | xfs_bufhash_t *hash; | 434 | struct xfs_perag *pag; |
426 | xfs_buf_t *bp, *n; | 435 | struct rb_node **rbp; |
436 | struct rb_node *parent; | ||
437 | xfs_buf_t *bp; | ||
427 | 438 | ||
428 | range_base = (ioff << BBSHIFT); | 439 | range_base = (ioff << BBSHIFT); |
429 | range_length = (isize << BBSHIFT); | 440 | range_length = (isize << BBSHIFT); |
@@ -432,14 +443,37 @@ _xfs_buf_find( | |||
432 | ASSERT(!(range_length < (1 << btp->bt_sshift))); | 443 | ASSERT(!(range_length < (1 << btp->bt_sshift))); |
433 | ASSERT(!(range_base & (xfs_off_t)btp->bt_smask)); | 444 | ASSERT(!(range_base & (xfs_off_t)btp->bt_smask)); |
434 | 445 | ||
435 | hash = &btp->bt_hash[hash_long((unsigned long)ioff, btp->bt_hashshift)]; | 446 | /* get tree root */ |
436 | 447 | pag = xfs_perag_get(btp->bt_mount, | |
437 | spin_lock(&hash->bh_lock); | 448 | xfs_daddr_to_agno(btp->bt_mount, ioff)); |
438 | 449 | ||
439 | list_for_each_entry_safe(bp, n, &hash->bh_list, b_hash_list) { | 450 | /* walk tree */ |
440 | ASSERT(btp == bp->b_target); | 451 | spin_lock(&pag->pag_buf_lock); |
441 | if (bp->b_file_offset == range_base && | 452 | rbp = &pag->pag_buf_tree.rb_node; |
442 | bp->b_buffer_length == range_length) { | 453 | parent = NULL; |
454 | bp = NULL; | ||
455 | while (*rbp) { | ||
456 | parent = *rbp; | ||
457 | bp = rb_entry(parent, struct xfs_buf, b_rbnode); | ||
458 | |||
459 | if (range_base < bp->b_file_offset) | ||
460 | rbp = &(*rbp)->rb_left; | ||
461 | else if (range_base > bp->b_file_offset) | ||
462 | rbp = &(*rbp)->rb_right; | ||
463 | else { | ||
464 | /* | ||
465 | * found a block offset match. If the range doesn't | ||
466 | * match, the only way this is allowed is if the buffer | ||
467 | * in the cache is stale and the transaction that made | ||
468 | * it stale has not yet committed. i.e. we are | ||
469 | * reallocating a busy extent. Skip this buffer and | ||
470 | * continue searching to the right for an exact match. | ||
471 | */ | ||
472 | if (bp->b_buffer_length != range_length) { | ||
473 | ASSERT(bp->b_flags & XBF_STALE); | ||
474 | rbp = &(*rbp)->rb_right; | ||
475 | continue; | ||
476 | } | ||
443 | atomic_inc(&bp->b_hold); | 477 | atomic_inc(&bp->b_hold); |
444 | goto found; | 478 | goto found; |
445 | } | 479 | } |
@@ -449,46 +483,42 @@ _xfs_buf_find( | |||
449 | if (new_bp) { | 483 | if (new_bp) { |
450 | _xfs_buf_initialize(new_bp, btp, range_base, | 484 | _xfs_buf_initialize(new_bp, btp, range_base, |
451 | range_length, flags); | 485 | range_length, flags); |
452 | new_bp->b_hash = hash; | 486 | rb_link_node(&new_bp->b_rbnode, parent, rbp); |
453 | list_add(&new_bp->b_hash_list, &hash->bh_list); | 487 | rb_insert_color(&new_bp->b_rbnode, &pag->pag_buf_tree); |
488 | /* the buffer keeps the perag reference until it is freed */ | ||
489 | new_bp->b_pag = pag; | ||
490 | spin_unlock(&pag->pag_buf_lock); | ||
454 | } else { | 491 | } else { |
455 | XFS_STATS_INC(xb_miss_locked); | 492 | XFS_STATS_INC(xb_miss_locked); |
493 | spin_unlock(&pag->pag_buf_lock); | ||
494 | xfs_perag_put(pag); | ||
456 | } | 495 | } |
457 | |||
458 | spin_unlock(&hash->bh_lock); | ||
459 | return new_bp; | 496 | return new_bp; |
460 | 497 | ||
461 | found: | 498 | found: |
462 | spin_unlock(&hash->bh_lock); | 499 | spin_unlock(&pag->pag_buf_lock); |
500 | xfs_perag_put(pag); | ||
463 | 501 | ||
464 | /* Attempt to get the semaphore without sleeping, | 502 | if (xfs_buf_cond_lock(bp)) { |
465 | * if this does not work then we need to drop the | 503 | /* failed, so wait for the lock if requested. */ |
466 | * spinlock and do a hard attempt on the semaphore. | ||
467 | */ | ||
468 | if (down_trylock(&bp->b_sema)) { | ||
469 | if (!(flags & XBF_TRYLOCK)) { | 504 | if (!(flags & XBF_TRYLOCK)) { |
470 | /* wait for buffer ownership */ | ||
471 | xfs_buf_lock(bp); | 505 | xfs_buf_lock(bp); |
472 | XFS_STATS_INC(xb_get_locked_waited); | 506 | XFS_STATS_INC(xb_get_locked_waited); |
473 | } else { | 507 | } else { |
474 | /* We asked for a trylock and failed, no need | ||
475 | * to look at file offset and length here, we | ||
476 | * know that this buffer at least overlaps our | ||
477 | * buffer and is locked, therefore our buffer | ||
478 | * either does not exist, or is this buffer. | ||
479 | */ | ||
480 | xfs_buf_rele(bp); | 508 | xfs_buf_rele(bp); |
481 | XFS_STATS_INC(xb_busy_locked); | 509 | XFS_STATS_INC(xb_busy_locked); |
482 | return NULL; | 510 | return NULL; |
483 | } | 511 | } |
484 | } else { | ||
485 | /* trylock worked */ | ||
486 | XB_SET_OWNER(bp); | ||
487 | } | 512 | } |
488 | 513 | ||
514 | /* | ||
515 | * if the buffer is stale, clear all the external state associated with | ||
516 | * it. We need to keep flags such as how we allocated the buffer memory | ||
517 | * intact here. | ||
518 | */ | ||
489 | if (bp->b_flags & XBF_STALE) { | 519 | if (bp->b_flags & XBF_STALE) { |
490 | ASSERT((bp->b_flags & _XBF_DELWRI_Q) == 0); | 520 | ASSERT((bp->b_flags & _XBF_DELWRI_Q) == 0); |
491 | bp->b_flags &= XBF_MAPPED; | 521 | bp->b_flags &= XBF_MAPPED | _XBF_KMEM | _XBF_PAGES; |
492 | } | 522 | } |
493 | 523 | ||
494 | trace_xfs_buf_find(bp, flags, _RET_IP_); | 524 | trace_xfs_buf_find(bp, flags, _RET_IP_); |
@@ -509,7 +539,7 @@ xfs_buf_get( | |||
509 | xfs_buf_flags_t flags) | 539 | xfs_buf_flags_t flags) |
510 | { | 540 | { |
511 | xfs_buf_t *bp, *new_bp; | 541 | xfs_buf_t *bp, *new_bp; |
512 | int error = 0, i; | 542 | int error = 0; |
513 | 543 | ||
514 | new_bp = xfs_buf_allocate(flags); | 544 | new_bp = xfs_buf_allocate(flags); |
515 | if (unlikely(!new_bp)) | 545 | if (unlikely(!new_bp)) |
@@ -517,7 +547,7 @@ xfs_buf_get( | |||
517 | 547 | ||
518 | bp = _xfs_buf_find(target, ioff, isize, flags, new_bp); | 548 | bp = _xfs_buf_find(target, ioff, isize, flags, new_bp); |
519 | if (bp == new_bp) { | 549 | if (bp == new_bp) { |
520 | error = _xfs_buf_lookup_pages(bp, flags); | 550 | error = xfs_buf_allocate_memory(bp, flags); |
521 | if (error) | 551 | if (error) |
522 | goto no_buffer; | 552 | goto no_buffer; |
523 | } else { | 553 | } else { |
@@ -526,14 +556,11 @@ xfs_buf_get( | |||
526 | return NULL; | 556 | return NULL; |
527 | } | 557 | } |
528 | 558 | ||
529 | for (i = 0; i < bp->b_page_count; i++) | ||
530 | mark_page_accessed(bp->b_pages[i]); | ||
531 | |||
532 | if (!(bp->b_flags & XBF_MAPPED)) { | 559 | if (!(bp->b_flags & XBF_MAPPED)) { |
533 | error = _xfs_buf_map_pages(bp, flags); | 560 | error = _xfs_buf_map_pages(bp, flags); |
534 | if (unlikely(error)) { | 561 | if (unlikely(error)) { |
535 | printk(KERN_WARNING "%s: failed to map pages\n", | 562 | xfs_warn(target->bt_mount, |
536 | __func__); | 563 | "%s: failed to map pages\n", __func__); |
537 | goto no_buffer; | 564 | goto no_buffer; |
538 | } | 565 | } |
539 | } | 566 | } |
@@ -625,17 +652,47 @@ void | |||
625 | xfs_buf_readahead( | 652 | xfs_buf_readahead( |
626 | xfs_buftarg_t *target, | 653 | xfs_buftarg_t *target, |
627 | xfs_off_t ioff, | 654 | xfs_off_t ioff, |
628 | size_t isize, | 655 | size_t isize) |
629 | xfs_buf_flags_t flags) | ||
630 | { | 656 | { |
631 | struct backing_dev_info *bdi; | 657 | if (bdi_read_congested(target->bt_bdi)) |
632 | |||
633 | bdi = target->bt_mapping->backing_dev_info; | ||
634 | if (bdi_read_congested(bdi)) | ||
635 | return; | 658 | return; |
636 | 659 | ||
637 | flags |= (XBF_TRYLOCK|XBF_ASYNC|XBF_READ_AHEAD); | 660 | xfs_buf_read(target, ioff, isize, |
638 | xfs_buf_read(target, ioff, isize, flags); | 661 | XBF_TRYLOCK|XBF_ASYNC|XBF_READ_AHEAD|XBF_DONT_BLOCK); |
662 | } | ||
663 | |||
664 | /* | ||
665 | * Read an uncached buffer from disk. Allocates and returns a locked | ||
666 | * buffer containing the disk contents or nothing. | ||
667 | */ | ||
668 | struct xfs_buf * | ||
669 | xfs_buf_read_uncached( | ||
670 | struct xfs_mount *mp, | ||
671 | struct xfs_buftarg *target, | ||
672 | xfs_daddr_t daddr, | ||
673 | size_t length, | ||
674 | int flags) | ||
675 | { | ||
676 | xfs_buf_t *bp; | ||
677 | int error; | ||
678 | |||
679 | bp = xfs_buf_get_uncached(target, length, flags); | ||
680 | if (!bp) | ||
681 | return NULL; | ||
682 | |||
683 | /* set up the buffer for a read IO */ | ||
684 | xfs_buf_lock(bp); | ||
685 | XFS_BUF_SET_ADDR(bp, daddr); | ||
686 | XFS_BUF_READ(bp); | ||
687 | XFS_BUF_BUSY(bp); | ||
688 | |||
689 | xfsbdstrat(mp, bp); | ||
690 | error = xfs_buf_iowait(bp); | ||
691 | if (error || bp->b_error) { | ||
692 | xfs_buf_relse(bp); | ||
693 | return NULL; | ||
694 | } | ||
695 | return bp; | ||
639 | } | 696 | } |
640 | 697 | ||
641 | xfs_buf_t * | 698 | xfs_buf_t * |
@@ -651,6 +708,27 @@ xfs_buf_get_empty( | |||
651 | return bp; | 708 | return bp; |
652 | } | 709 | } |
653 | 710 | ||
711 | /* | ||
712 | * Return a buffer allocated as an empty buffer and associated to external | ||
713 | * memory via xfs_buf_associate_memory() back to it's empty state. | ||
714 | */ | ||
715 | void | ||
716 | xfs_buf_set_empty( | ||
717 | struct xfs_buf *bp, | ||
718 | size_t len) | ||
719 | { | ||
720 | if (bp->b_pages) | ||
721 | _xfs_buf_free_pages(bp); | ||
722 | |||
723 | bp->b_pages = NULL; | ||
724 | bp->b_page_count = 0; | ||
725 | bp->b_addr = NULL; | ||
726 | bp->b_file_offset = 0; | ||
727 | bp->b_buffer_length = bp->b_count_desired = len; | ||
728 | bp->b_bn = XFS_BUF_DADDR_NULL; | ||
729 | bp->b_flags &= ~XBF_MAPPED; | ||
730 | } | ||
731 | |||
654 | static inline struct page * | 732 | static inline struct page * |
655 | mem_to_page( | 733 | mem_to_page( |
656 | void *addr) | 734 | void *addr) |
@@ -675,10 +753,10 @@ xfs_buf_associate_memory( | |||
675 | size_t buflen; | 753 | size_t buflen; |
676 | int page_count; | 754 | int page_count; |
677 | 755 | ||
678 | pageaddr = (unsigned long)mem & PAGE_CACHE_MASK; | 756 | pageaddr = (unsigned long)mem & PAGE_MASK; |
679 | offset = (unsigned long)mem - pageaddr; | 757 | offset = (unsigned long)mem - pageaddr; |
680 | buflen = PAGE_CACHE_ALIGN(len + offset); | 758 | buflen = PAGE_ALIGN(len + offset); |
681 | page_count = buflen >> PAGE_CACHE_SHIFT; | 759 | page_count = buflen >> PAGE_SHIFT; |
682 | 760 | ||
683 | /* Free any previous set of page pointers */ | 761 | /* Free any previous set of page pointers */ |
684 | if (bp->b_pages) | 762 | if (bp->b_pages) |
@@ -695,21 +773,21 @@ xfs_buf_associate_memory( | |||
695 | 773 | ||
696 | for (i = 0; i < bp->b_page_count; i++) { | 774 | for (i = 0; i < bp->b_page_count; i++) { |
697 | bp->b_pages[i] = mem_to_page((void *)pageaddr); | 775 | bp->b_pages[i] = mem_to_page((void *)pageaddr); |
698 | pageaddr += PAGE_CACHE_SIZE; | 776 | pageaddr += PAGE_SIZE; |
699 | } | 777 | } |
700 | 778 | ||
701 | bp->b_count_desired = len; | 779 | bp->b_count_desired = len; |
702 | bp->b_buffer_length = buflen; | 780 | bp->b_buffer_length = buflen; |
703 | bp->b_flags |= XBF_MAPPED; | 781 | bp->b_flags |= XBF_MAPPED; |
704 | bp->b_flags &= ~_XBF_PAGE_LOCKED; | ||
705 | 782 | ||
706 | return 0; | 783 | return 0; |
707 | } | 784 | } |
708 | 785 | ||
709 | xfs_buf_t * | 786 | xfs_buf_t * |
710 | xfs_buf_get_noaddr( | 787 | xfs_buf_get_uncached( |
788 | struct xfs_buftarg *target, | ||
711 | size_t len, | 789 | size_t len, |
712 | xfs_buftarg_t *target) | 790 | int flags) |
713 | { | 791 | { |
714 | unsigned long page_count = PAGE_ALIGN(len) >> PAGE_SHIFT; | 792 | unsigned long page_count = PAGE_ALIGN(len) >> PAGE_SHIFT; |
715 | int error, i; | 793 | int error, i; |
@@ -725,7 +803,7 @@ xfs_buf_get_noaddr( | |||
725 | goto fail_free_buf; | 803 | goto fail_free_buf; |
726 | 804 | ||
727 | for (i = 0; i < page_count; i++) { | 805 | for (i = 0; i < page_count; i++) { |
728 | bp->b_pages[i] = alloc_page(GFP_KERNEL); | 806 | bp->b_pages[i] = alloc_page(xb_to_gfp(flags)); |
729 | if (!bp->b_pages[i]) | 807 | if (!bp->b_pages[i]) |
730 | goto fail_free_mem; | 808 | goto fail_free_mem; |
731 | } | 809 | } |
@@ -733,14 +811,14 @@ xfs_buf_get_noaddr( | |||
733 | 811 | ||
734 | error = _xfs_buf_map_pages(bp, XBF_MAPPED); | 812 | error = _xfs_buf_map_pages(bp, XBF_MAPPED); |
735 | if (unlikely(error)) { | 813 | if (unlikely(error)) { |
736 | printk(KERN_WARNING "%s: failed to map pages\n", | 814 | xfs_warn(target->bt_mount, |
737 | __func__); | 815 | "%s: failed to map pages\n", __func__); |
738 | goto fail_free_mem; | 816 | goto fail_free_mem; |
739 | } | 817 | } |
740 | 818 | ||
741 | xfs_buf_unlock(bp); | 819 | xfs_buf_unlock(bp); |
742 | 820 | ||
743 | trace_xfs_buf_get_noaddr(bp, _RET_IP_); | 821 | trace_xfs_buf_get_uncached(bp, _RET_IP_); |
744 | return bp; | 822 | return bp; |
745 | 823 | ||
746 | fail_free_mem: | 824 | fail_free_mem: |
@@ -774,29 +852,32 @@ void | |||
774 | xfs_buf_rele( | 852 | xfs_buf_rele( |
775 | xfs_buf_t *bp) | 853 | xfs_buf_t *bp) |
776 | { | 854 | { |
777 | xfs_bufhash_t *hash = bp->b_hash; | 855 | struct xfs_perag *pag = bp->b_pag; |
778 | 856 | ||
779 | trace_xfs_buf_rele(bp, _RET_IP_); | 857 | trace_xfs_buf_rele(bp, _RET_IP_); |
780 | 858 | ||
781 | if (unlikely(!hash)) { | 859 | if (!pag) { |
782 | ASSERT(!bp->b_relse); | 860 | ASSERT(list_empty(&bp->b_lru)); |
861 | ASSERT(RB_EMPTY_NODE(&bp->b_rbnode)); | ||
783 | if (atomic_dec_and_test(&bp->b_hold)) | 862 | if (atomic_dec_and_test(&bp->b_hold)) |
784 | xfs_buf_free(bp); | 863 | xfs_buf_free(bp); |
785 | return; | 864 | return; |
786 | } | 865 | } |
787 | 866 | ||
867 | ASSERT(!RB_EMPTY_NODE(&bp->b_rbnode)); | ||
868 | |||
788 | ASSERT(atomic_read(&bp->b_hold) > 0); | 869 | ASSERT(atomic_read(&bp->b_hold) > 0); |
789 | if (atomic_dec_and_lock(&bp->b_hold, &hash->bh_lock)) { | 870 | if (atomic_dec_and_lock(&bp->b_hold, &pag->pag_buf_lock)) { |
790 | if (bp->b_relse) { | 871 | if (!(bp->b_flags & XBF_STALE) && |
791 | atomic_inc(&bp->b_hold); | 872 | atomic_read(&bp->b_lru_ref)) { |
792 | spin_unlock(&hash->bh_lock); | 873 | xfs_buf_lru_add(bp); |
793 | (*(bp->b_relse)) (bp); | 874 | spin_unlock(&pag->pag_buf_lock); |
794 | } else if (bp->b_flags & XBF_FS_MANAGED) { | ||
795 | spin_unlock(&hash->bh_lock); | ||
796 | } else { | 875 | } else { |
876 | xfs_buf_lru_del(bp); | ||
797 | ASSERT(!(bp->b_flags & (XBF_DELWRI|_XBF_DELWRI_Q))); | 877 | ASSERT(!(bp->b_flags & (XBF_DELWRI|_XBF_DELWRI_Q))); |
798 | list_del_init(&bp->b_hash_list); | 878 | rb_erase(&bp->b_rbnode, &pag->pag_buf_tree); |
799 | spin_unlock(&hash->bh_lock); | 879 | spin_unlock(&pag->pag_buf_lock); |
880 | xfs_perag_put(pag); | ||
800 | xfs_buf_free(bp); | 881 | xfs_buf_free(bp); |
801 | } | 882 | } |
802 | } | 883 | } |
@@ -804,20 +885,15 @@ xfs_buf_rele( | |||
804 | 885 | ||
805 | 886 | ||
806 | /* | 887 | /* |
807 | * Mutual exclusion on buffers. Locking model: | 888 | * Lock a buffer object, if it is not already locked. |
808 | * | 889 | * |
809 | * Buffers associated with inodes for which buffer locking | 890 | * If we come across a stale, pinned, locked buffer, we know that we are |
810 | * is not enabled are not protected by semaphores, and are | 891 | * being asked to lock a buffer that has been reallocated. Because it is |
811 | * assumed to be exclusively owned by the caller. There is a | 892 | * pinned, we know that the log has not been pushed to disk and hence it |
812 | * spinlock in the buffer, used by the caller when concurrent | 893 | * will still be locked. Rather than continuing to have trylock attempts |
813 | * access is possible. | 894 | * fail until someone else pushes the log, push it ourselves before |
814 | */ | 895 | * returning. This means that the xfsaild will not get stuck trying |
815 | 896 | * to push on stale inode buffers. | |
816 | /* | ||
817 | * Locks a buffer object, if it is not already locked. | ||
818 | * Note that this in no way locks the underlying pages, so it is only | ||
819 | * useful for synchronizing concurrent use of buffer objects, not for | ||
820 | * synchronizing independent access to the underlying pages. | ||
821 | */ | 897 | */ |
822 | int | 898 | int |
823 | xfs_buf_cond_lock( | 899 | xfs_buf_cond_lock( |
@@ -828,6 +904,8 @@ xfs_buf_cond_lock( | |||
828 | locked = down_trylock(&bp->b_sema) == 0; | 904 | locked = down_trylock(&bp->b_sema) == 0; |
829 | if (locked) | 905 | if (locked) |
830 | XB_SET_OWNER(bp); | 906 | XB_SET_OWNER(bp); |
907 | else if (atomic_read(&bp->b_pin_count) && (bp->b_flags & XBF_STALE)) | ||
908 | xfs_log_force(bp->b_target->bt_mount, 0); | ||
831 | 909 | ||
832 | trace_xfs_buf_cond_lock(bp, _RET_IP_); | 910 | trace_xfs_buf_cond_lock(bp, _RET_IP_); |
833 | return locked ? 0 : -EBUSY; | 911 | return locked ? 0 : -EBUSY; |
@@ -841,10 +919,7 @@ xfs_buf_lock_value( | |||
841 | } | 919 | } |
842 | 920 | ||
843 | /* | 921 | /* |
844 | * Locks a buffer object. | 922 | * Lock a buffer object. |
845 | * Note that this in no way locks the underlying pages, so it is only | ||
846 | * useful for synchronizing concurrent use of buffer objects, not for | ||
847 | * synchronizing independent access to the underlying pages. | ||
848 | * | 923 | * |
849 | * If we come across a stale, pinned, locked buffer, we know that we | 924 | * If we come across a stale, pinned, locked buffer, we know that we |
850 | * are being asked to lock a buffer that has been reallocated. Because | 925 | * are being asked to lock a buffer that has been reallocated. Because |
@@ -859,9 +934,7 @@ xfs_buf_lock( | |||
859 | trace_xfs_buf_lock(bp, _RET_IP_); | 934 | trace_xfs_buf_lock(bp, _RET_IP_); |
860 | 935 | ||
861 | if (atomic_read(&bp->b_pin_count) && (bp->b_flags & XBF_STALE)) | 936 | if (atomic_read(&bp->b_pin_count) && (bp->b_flags & XBF_STALE)) |
862 | xfs_log_force(bp->b_mount, 0); | 937 | xfs_log_force(bp->b_target->bt_mount, 0); |
863 | if (atomic_read(&bp->b_io_remaining)) | ||
864 | blk_run_address_space(bp->b_target->bt_mapping); | ||
865 | down(&bp->b_sema); | 938 | down(&bp->b_sema); |
866 | XB_SET_OWNER(bp); | 939 | XB_SET_OWNER(bp); |
867 | 940 | ||
@@ -905,9 +978,7 @@ xfs_buf_wait_unpin( | |||
905 | set_current_state(TASK_UNINTERRUPTIBLE); | 978 | set_current_state(TASK_UNINTERRUPTIBLE); |
906 | if (atomic_read(&bp->b_pin_count) == 0) | 979 | if (atomic_read(&bp->b_pin_count) == 0) |
907 | break; | 980 | break; |
908 | if (atomic_read(&bp->b_io_remaining)) | 981 | io_schedule(); |
909 | blk_run_address_space(bp->b_target->bt_mapping); | ||
910 | schedule(); | ||
911 | } | 982 | } |
912 | remove_wait_queue(&bp->b_waiters, &wait); | 983 | remove_wait_queue(&bp->b_waiters, &wait); |
913 | set_current_state(TASK_RUNNING); | 984 | set_current_state(TASK_RUNNING); |
@@ -924,19 +995,7 @@ xfs_buf_iodone_work( | |||
924 | xfs_buf_t *bp = | 995 | xfs_buf_t *bp = |
925 | container_of(work, xfs_buf_t, b_iodone_work); | 996 | container_of(work, xfs_buf_t, b_iodone_work); |
926 | 997 | ||
927 | /* | 998 | if (bp->b_iodone) |
928 | * We can get an EOPNOTSUPP to ordered writes. Here we clear the | ||
929 | * ordered flag and reissue them. Because we can't tell the higher | ||
930 | * layers directly that they should not issue ordered I/O anymore, they | ||
931 | * need to check if the _XFS_BARRIER_FAILED flag was set during I/O completion. | ||
932 | */ | ||
933 | if ((bp->b_error == EOPNOTSUPP) && | ||
934 | (bp->b_flags & (XBF_ORDERED|XBF_ASYNC)) == (XBF_ORDERED|XBF_ASYNC)) { | ||
935 | trace_xfs_buf_ordered_retry(bp, _RET_IP_); | ||
936 | bp->b_flags &= ~XBF_ORDERED; | ||
937 | bp->b_flags |= _XFS_BARRIER_FAILED; | ||
938 | xfs_buf_iorequest(bp); | ||
939 | } else if (bp->b_iodone) | ||
940 | (*(bp->b_iodone))(bp); | 999 | (*(bp->b_iodone))(bp); |
941 | else if (bp->b_flags & XBF_ASYNC) | 1000 | else if (bp->b_flags & XBF_ASYNC) |
942 | xfs_buf_relse(bp); | 1001 | xfs_buf_relse(bp); |
@@ -982,7 +1041,6 @@ xfs_bwrite( | |||
982 | { | 1041 | { |
983 | int error; | 1042 | int error; |
984 | 1043 | ||
985 | bp->b_mount = mp; | ||
986 | bp->b_flags |= XBF_WRITE; | 1044 | bp->b_flags |= XBF_WRITE; |
987 | bp->b_flags &= ~(XBF_ASYNC | XBF_READ); | 1045 | bp->b_flags &= ~(XBF_ASYNC | XBF_READ); |
988 | 1046 | ||
@@ -1003,8 +1061,6 @@ xfs_bdwrite( | |||
1003 | { | 1061 | { |
1004 | trace_xfs_buf_bdwrite(bp, _RET_IP_); | 1062 | trace_xfs_buf_bdwrite(bp, _RET_IP_); |
1005 | 1063 | ||
1006 | bp->b_mount = mp; | ||
1007 | |||
1008 | bp->b_flags &= ~XBF_READ; | 1064 | bp->b_flags &= ~XBF_READ; |
1009 | bp->b_flags |= (XBF_DELWRI | XBF_ASYNC); | 1065 | bp->b_flags |= (XBF_DELWRI | XBF_ASYNC); |
1010 | 1066 | ||
@@ -1013,7 +1069,7 @@ xfs_bdwrite( | |||
1013 | 1069 | ||
1014 | /* | 1070 | /* |
1015 | * Called when we want to stop a buffer from getting written or read. | 1071 | * Called when we want to stop a buffer from getting written or read. |
1016 | * We attach the EIO error, muck with its flags, and call biodone | 1072 | * We attach the EIO error, muck with its flags, and call xfs_buf_ioend |
1017 | * so that the proper iodone callbacks get called. | 1073 | * so that the proper iodone callbacks get called. |
1018 | */ | 1074 | */ |
1019 | STATIC int | 1075 | STATIC int |
@@ -1030,21 +1086,21 @@ xfs_bioerror( | |||
1030 | XFS_BUF_ERROR(bp, EIO); | 1086 | XFS_BUF_ERROR(bp, EIO); |
1031 | 1087 | ||
1032 | /* | 1088 | /* |
1033 | * We're calling biodone, so delete XBF_DONE flag. | 1089 | * We're calling xfs_buf_ioend, so delete XBF_DONE flag. |
1034 | */ | 1090 | */ |
1035 | XFS_BUF_UNREAD(bp); | 1091 | XFS_BUF_UNREAD(bp); |
1036 | XFS_BUF_UNDELAYWRITE(bp); | 1092 | XFS_BUF_UNDELAYWRITE(bp); |
1037 | XFS_BUF_UNDONE(bp); | 1093 | XFS_BUF_UNDONE(bp); |
1038 | XFS_BUF_STALE(bp); | 1094 | XFS_BUF_STALE(bp); |
1039 | 1095 | ||
1040 | xfs_biodone(bp); | 1096 | xfs_buf_ioend(bp, 0); |
1041 | 1097 | ||
1042 | return EIO; | 1098 | return EIO; |
1043 | } | 1099 | } |
1044 | 1100 | ||
1045 | /* | 1101 | /* |
1046 | * Same as xfs_bioerror, except that we are releasing the buffer | 1102 | * Same as xfs_bioerror, except that we are releasing the buffer |
1047 | * here ourselves, and avoiding the biodone call. | 1103 | * here ourselves, and avoiding the xfs_buf_ioend call. |
1048 | * This is meant for userdata errors; metadata bufs come with | 1104 | * This is meant for userdata errors; metadata bufs come with |
1049 | * iodone functions attached, so that we can track down errors. | 1105 | * iodone functions attached, so that we can track down errors. |
1050 | */ | 1106 | */ |
@@ -1093,7 +1149,7 @@ int | |||
1093 | xfs_bdstrat_cb( | 1149 | xfs_bdstrat_cb( |
1094 | struct xfs_buf *bp) | 1150 | struct xfs_buf *bp) |
1095 | { | 1151 | { |
1096 | if (XFS_FORCED_SHUTDOWN(bp->b_mount)) { | 1152 | if (XFS_FORCED_SHUTDOWN(bp->b_target->bt_mount)) { |
1097 | trace_xfs_bdstrat_shut(bp, _RET_IP_); | 1153 | trace_xfs_bdstrat_shut(bp, _RET_IP_); |
1098 | /* | 1154 | /* |
1099 | * Metadata write that didn't get logged but | 1155 | * Metadata write that didn't get logged but |
@@ -1134,10 +1190,8 @@ _xfs_buf_ioend( | |||
1134 | xfs_buf_t *bp, | 1190 | xfs_buf_t *bp, |
1135 | int schedule) | 1191 | int schedule) |
1136 | { | 1192 | { |
1137 | if (atomic_dec_and_test(&bp->b_io_remaining) == 1) { | 1193 | if (atomic_dec_and_test(&bp->b_io_remaining) == 1) |
1138 | bp->b_flags &= ~_XBF_PAGE_LOCKED; | ||
1139 | xfs_buf_ioend(bp, schedule); | 1194 | xfs_buf_ioend(bp, schedule); |
1140 | } | ||
1141 | } | 1195 | } |
1142 | 1196 | ||
1143 | STATIC void | 1197 | STATIC void |
@@ -1146,35 +1200,12 @@ xfs_buf_bio_end_io( | |||
1146 | int error) | 1200 | int error) |
1147 | { | 1201 | { |
1148 | xfs_buf_t *bp = (xfs_buf_t *)bio->bi_private; | 1202 | xfs_buf_t *bp = (xfs_buf_t *)bio->bi_private; |
1149 | unsigned int blocksize = bp->b_target->bt_bsize; | ||
1150 | struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; | ||
1151 | 1203 | ||
1152 | xfs_buf_ioerror(bp, -error); | 1204 | xfs_buf_ioerror(bp, -error); |
1153 | 1205 | ||
1154 | if (!error && xfs_buf_is_vmapped(bp) && (bp->b_flags & XBF_READ)) | 1206 | if (!error && xfs_buf_is_vmapped(bp) && (bp->b_flags & XBF_READ)) |
1155 | invalidate_kernel_vmap_range(bp->b_addr, xfs_buf_vmap_len(bp)); | 1207 | invalidate_kernel_vmap_range(bp->b_addr, xfs_buf_vmap_len(bp)); |
1156 | 1208 | ||
1157 | do { | ||
1158 | struct page *page = bvec->bv_page; | ||
1159 | |||
1160 | ASSERT(!PagePrivate(page)); | ||
1161 | if (unlikely(bp->b_error)) { | ||
1162 | if (bp->b_flags & XBF_READ) | ||
1163 | ClearPageUptodate(page); | ||
1164 | } else if (blocksize >= PAGE_CACHE_SIZE) { | ||
1165 | SetPageUptodate(page); | ||
1166 | } else if (!PagePrivate(page) && | ||
1167 | (bp->b_flags & _XBF_PAGE_CACHE)) { | ||
1168 | set_page_region(page, bvec->bv_offset, bvec->bv_len); | ||
1169 | } | ||
1170 | |||
1171 | if (--bvec >= bio->bi_io_vec) | ||
1172 | prefetchw(&bvec->bv_page->flags); | ||
1173 | |||
1174 | if (bp->b_flags & _XBF_PAGE_LOCKED) | ||
1175 | unlock_page(page); | ||
1176 | } while (bvec >= bio->bi_io_vec); | ||
1177 | |||
1178 | _xfs_buf_ioend(bp, 1); | 1209 | _xfs_buf_ioend(bp, 1); |
1179 | bio_put(bio); | 1210 | bio_put(bio); |
1180 | } | 1211 | } |
@@ -1188,14 +1219,13 @@ _xfs_buf_ioapply( | |||
1188 | int offset = bp->b_offset; | 1219 | int offset = bp->b_offset; |
1189 | int size = bp->b_count_desired; | 1220 | int size = bp->b_count_desired; |
1190 | sector_t sector = bp->b_bn; | 1221 | sector_t sector = bp->b_bn; |
1191 | unsigned int blocksize = bp->b_target->bt_bsize; | ||
1192 | 1222 | ||
1193 | total_nr_pages = bp->b_page_count; | 1223 | total_nr_pages = bp->b_page_count; |
1194 | map_i = 0; | 1224 | map_i = 0; |
1195 | 1225 | ||
1196 | if (bp->b_flags & XBF_ORDERED) { | 1226 | if (bp->b_flags & XBF_ORDERED) { |
1197 | ASSERT(!(bp->b_flags & XBF_READ)); | 1227 | ASSERT(!(bp->b_flags & XBF_READ)); |
1198 | rw = WRITE_BARRIER; | 1228 | rw = WRITE_FLUSH_FUA; |
1199 | } else if (bp->b_flags & XBF_LOG_BUFFER) { | 1229 | } else if (bp->b_flags & XBF_LOG_BUFFER) { |
1200 | ASSERT(!(bp->b_flags & XBF_READ_AHEAD)); | 1230 | ASSERT(!(bp->b_flags & XBF_READ_AHEAD)); |
1201 | bp->b_flags &= ~_XBF_RUN_QUEUES; | 1231 | bp->b_flags &= ~_XBF_RUN_QUEUES; |
@@ -1209,29 +1239,6 @@ _xfs_buf_ioapply( | |||
1209 | (bp->b_flags & XBF_READ_AHEAD) ? READA : READ; | 1239 | (bp->b_flags & XBF_READ_AHEAD) ? READA : READ; |
1210 | } | 1240 | } |
1211 | 1241 | ||
1212 | /* Special code path for reading a sub page size buffer in -- | ||
1213 | * we populate up the whole page, and hence the other metadata | ||
1214 | * in the same page. This optimization is only valid when the | ||
1215 | * filesystem block size is not smaller than the page size. | ||
1216 | */ | ||
1217 | if ((bp->b_buffer_length < PAGE_CACHE_SIZE) && | ||
1218 | ((bp->b_flags & (XBF_READ|_XBF_PAGE_LOCKED)) == | ||
1219 | (XBF_READ|_XBF_PAGE_LOCKED)) && | ||
1220 | (blocksize >= PAGE_CACHE_SIZE)) { | ||
1221 | bio = bio_alloc(GFP_NOIO, 1); | ||
1222 | |||
1223 | bio->bi_bdev = bp->b_target->bt_bdev; | ||
1224 | bio->bi_sector = sector - (offset >> BBSHIFT); | ||
1225 | bio->bi_end_io = xfs_buf_bio_end_io; | ||
1226 | bio->bi_private = bp; | ||
1227 | |||
1228 | bio_add_page(bio, bp->b_pages[0], PAGE_CACHE_SIZE, 0); | ||
1229 | size = 0; | ||
1230 | |||
1231 | atomic_inc(&bp->b_io_remaining); | ||
1232 | |||
1233 | goto submit_io; | ||
1234 | } | ||
1235 | 1242 | ||
1236 | next_chunk: | 1243 | next_chunk: |
1237 | atomic_inc(&bp->b_io_remaining); | 1244 | atomic_inc(&bp->b_io_remaining); |
@@ -1245,8 +1252,9 @@ next_chunk: | |||
1245 | bio->bi_end_io = xfs_buf_bio_end_io; | 1252 | bio->bi_end_io = xfs_buf_bio_end_io; |
1246 | bio->bi_private = bp; | 1253 | bio->bi_private = bp; |
1247 | 1254 | ||
1255 | |||
1248 | for (; size && nr_pages; nr_pages--, map_i++) { | 1256 | for (; size && nr_pages; nr_pages--, map_i++) { |
1249 | int rbytes, nbytes = PAGE_CACHE_SIZE - offset; | 1257 | int rbytes, nbytes = PAGE_SIZE - offset; |
1250 | 1258 | ||
1251 | if (nbytes > size) | 1259 | if (nbytes > size) |
1252 | nbytes = size; | 1260 | nbytes = size; |
@@ -1261,7 +1269,6 @@ next_chunk: | |||
1261 | total_nr_pages--; | 1269 | total_nr_pages--; |
1262 | } | 1270 | } |
1263 | 1271 | ||
1264 | submit_io: | ||
1265 | if (likely(bio->bi_size)) { | 1272 | if (likely(bio->bi_size)) { |
1266 | if (xfs_buf_is_vmapped(bp)) { | 1273 | if (xfs_buf_is_vmapped(bp)) { |
1267 | flush_kernel_vmap_range(bp->b_addr, | 1274 | flush_kernel_vmap_range(bp->b_addr, |
@@ -1271,18 +1278,7 @@ submit_io: | |||
1271 | if (size) | 1278 | if (size) |
1272 | goto next_chunk; | 1279 | goto next_chunk; |
1273 | } else { | 1280 | } else { |
1274 | /* | ||
1275 | * if we get here, no pages were added to the bio. However, | ||
1276 | * we can't just error out here - if the pages are locked then | ||
1277 | * we have to unlock them otherwise we can hang on a later | ||
1278 | * access to the page. | ||
1279 | */ | ||
1280 | xfs_buf_ioerror(bp, EIO); | 1281 | xfs_buf_ioerror(bp, EIO); |
1281 | if (bp->b_flags & _XBF_PAGE_LOCKED) { | ||
1282 | int i; | ||
1283 | for (i = 0; i < bp->b_page_count; i++) | ||
1284 | unlock_page(bp->b_pages[i]); | ||
1285 | } | ||
1286 | bio_put(bio); | 1282 | bio_put(bio); |
1287 | } | 1283 | } |
1288 | } | 1284 | } |
@@ -1327,8 +1323,6 @@ xfs_buf_iowait( | |||
1327 | { | 1323 | { |
1328 | trace_xfs_buf_iowait(bp, _RET_IP_); | 1324 | trace_xfs_buf_iowait(bp, _RET_IP_); |
1329 | 1325 | ||
1330 | if (atomic_read(&bp->b_io_remaining)) | ||
1331 | blk_run_address_space(bp->b_target->bt_mapping); | ||
1332 | wait_for_completion(&bp->b_iowait); | 1326 | wait_for_completion(&bp->b_iowait); |
1333 | 1327 | ||
1334 | trace_xfs_buf_iowait_done(bp, _RET_IP_); | 1328 | trace_xfs_buf_iowait_done(bp, _RET_IP_); |
@@ -1346,8 +1340,8 @@ xfs_buf_offset( | |||
1346 | return XFS_BUF_PTR(bp) + offset; | 1340 | return XFS_BUF_PTR(bp) + offset; |
1347 | 1341 | ||
1348 | offset += bp->b_offset; | 1342 | offset += bp->b_offset; |
1349 | page = bp->b_pages[offset >> PAGE_CACHE_SHIFT]; | 1343 | page = bp->b_pages[offset >> PAGE_SHIFT]; |
1350 | return (xfs_caddr_t)page_address(page) + (offset & (PAGE_CACHE_SIZE-1)); | 1344 | return (xfs_caddr_t)page_address(page) + (offset & (PAGE_SIZE-1)); |
1351 | } | 1345 | } |
1352 | 1346 | ||
1353 | /* | 1347 | /* |
@@ -1369,9 +1363,9 @@ xfs_buf_iomove( | |||
1369 | page = bp->b_pages[xfs_buf_btoct(boff + bp->b_offset)]; | 1363 | page = bp->b_pages[xfs_buf_btoct(boff + bp->b_offset)]; |
1370 | cpoff = xfs_buf_poff(boff + bp->b_offset); | 1364 | cpoff = xfs_buf_poff(boff + bp->b_offset); |
1371 | csize = min_t(size_t, | 1365 | csize = min_t(size_t, |
1372 | PAGE_CACHE_SIZE-cpoff, bp->b_count_desired-boff); | 1366 | PAGE_SIZE-cpoff, bp->b_count_desired-boff); |
1373 | 1367 | ||
1374 | ASSERT(((csize + cpoff) <= PAGE_CACHE_SIZE)); | 1368 | ASSERT(((csize + cpoff) <= PAGE_SIZE)); |
1375 | 1369 | ||
1376 | switch (mode) { | 1370 | switch (mode) { |
1377 | case XBRW_ZERO: | 1371 | case XBRW_ZERO: |
@@ -1394,89 +1388,84 @@ xfs_buf_iomove( | |||
1394 | */ | 1388 | */ |
1395 | 1389 | ||
1396 | /* | 1390 | /* |
1397 | * Wait for any bufs with callbacks that have been submitted but | 1391 | * Wait for any bufs with callbacks that have been submitted but have not yet |
1398 | * have not yet returned... walk the hash list for the target. | 1392 | * returned. These buffers will have an elevated hold count, so wait on those |
1393 | * while freeing all the buffers only held by the LRU. | ||
1399 | */ | 1394 | */ |
1400 | void | 1395 | void |
1401 | xfs_wait_buftarg( | 1396 | xfs_wait_buftarg( |
1402 | xfs_buftarg_t *btp) | 1397 | struct xfs_buftarg *btp) |
1403 | { | 1398 | { |
1404 | xfs_buf_t *bp, *n; | 1399 | struct xfs_buf *bp; |
1405 | xfs_bufhash_t *hash; | 1400 | |
1406 | uint i; | 1401 | restart: |
1407 | 1402 | spin_lock(&btp->bt_lru_lock); | |
1408 | for (i = 0; i < (1 << btp->bt_hashshift); i++) { | 1403 | while (!list_empty(&btp->bt_lru)) { |
1409 | hash = &btp->bt_hash[i]; | 1404 | bp = list_first_entry(&btp->bt_lru, struct xfs_buf, b_lru); |
1410 | again: | 1405 | if (atomic_read(&bp->b_hold) > 1) { |
1411 | spin_lock(&hash->bh_lock); | 1406 | spin_unlock(&btp->bt_lru_lock); |
1412 | list_for_each_entry_safe(bp, n, &hash->bh_list, b_hash_list) { | 1407 | delay(100); |
1413 | ASSERT(btp == bp->b_target); | 1408 | goto restart; |
1414 | if (!(bp->b_flags & XBF_FS_MANAGED)) { | ||
1415 | spin_unlock(&hash->bh_lock); | ||
1416 | /* | ||
1417 | * Catch superblock reference count leaks | ||
1418 | * immediately | ||
1419 | */ | ||
1420 | BUG_ON(bp->b_bn == 0); | ||
1421 | delay(100); | ||
1422 | goto again; | ||
1423 | } | ||
1424 | } | 1409 | } |
1425 | spin_unlock(&hash->bh_lock); | 1410 | /* |
1411 | * clear the LRU reference count so the bufer doesn't get | ||
1412 | * ignored in xfs_buf_rele(). | ||
1413 | */ | ||
1414 | atomic_set(&bp->b_lru_ref, 0); | ||
1415 | spin_unlock(&btp->bt_lru_lock); | ||
1416 | xfs_buf_rele(bp); | ||
1417 | spin_lock(&btp->bt_lru_lock); | ||
1426 | } | 1418 | } |
1419 | spin_unlock(&btp->bt_lru_lock); | ||
1427 | } | 1420 | } |
1428 | 1421 | ||
1429 | /* | 1422 | int |
1430 | * Allocate buffer hash table for a given target. | 1423 | xfs_buftarg_shrink( |
1431 | * For devices containing metadata (i.e. not the log/realtime devices) | 1424 | struct shrinker *shrink, |
1432 | * we need to allocate a much larger hash table. | 1425 | struct shrink_control *sc) |
1433 | */ | ||
1434 | STATIC void | ||
1435 | xfs_alloc_bufhash( | ||
1436 | xfs_buftarg_t *btp, | ||
1437 | int external) | ||
1438 | { | 1426 | { |
1439 | unsigned int i; | 1427 | struct xfs_buftarg *btp = container_of(shrink, |
1428 | struct xfs_buftarg, bt_shrinker); | ||
1429 | struct xfs_buf *bp; | ||
1430 | int nr_to_scan = sc->nr_to_scan; | ||
1431 | LIST_HEAD(dispose); | ||
1440 | 1432 | ||
1441 | btp->bt_hashshift = external ? 3 : 12; /* 8 or 4096 buckets */ | 1433 | if (!nr_to_scan) |
1442 | btp->bt_hash = kmem_zalloc_large((1 << btp->bt_hashshift) * | 1434 | return btp->bt_lru_nr; |
1443 | sizeof(xfs_bufhash_t)); | ||
1444 | for (i = 0; i < (1 << btp->bt_hashshift); i++) { | ||
1445 | spin_lock_init(&btp->bt_hash[i].bh_lock); | ||
1446 | INIT_LIST_HEAD(&btp->bt_hash[i].bh_list); | ||
1447 | } | ||
1448 | } | ||
1449 | 1435 | ||
1450 | STATIC void | 1436 | spin_lock(&btp->bt_lru_lock); |
1451 | xfs_free_bufhash( | 1437 | while (!list_empty(&btp->bt_lru)) { |
1452 | xfs_buftarg_t *btp) | 1438 | if (nr_to_scan-- <= 0) |
1453 | { | 1439 | break; |
1454 | kmem_free_large(btp->bt_hash); | ||
1455 | btp->bt_hash = NULL; | ||
1456 | } | ||
1457 | 1440 | ||
1458 | /* | 1441 | bp = list_first_entry(&btp->bt_lru, struct xfs_buf, b_lru); |
1459 | * buftarg list for delwrite queue processing | ||
1460 | */ | ||
1461 | static LIST_HEAD(xfs_buftarg_list); | ||
1462 | static DEFINE_SPINLOCK(xfs_buftarg_lock); | ||
1463 | 1442 | ||
1464 | STATIC void | 1443 | /* |
1465 | xfs_register_buftarg( | 1444 | * Decrement the b_lru_ref count unless the value is already |
1466 | xfs_buftarg_t *btp) | 1445 | * zero. If the value is already zero, we need to reclaim the |
1467 | { | 1446 | * buffer, otherwise it gets another trip through the LRU. |
1468 | spin_lock(&xfs_buftarg_lock); | 1447 | */ |
1469 | list_add(&btp->bt_list, &xfs_buftarg_list); | 1448 | if (!atomic_add_unless(&bp->b_lru_ref, -1, 0)) { |
1470 | spin_unlock(&xfs_buftarg_lock); | 1449 | list_move_tail(&bp->b_lru, &btp->bt_lru); |
1471 | } | 1450 | continue; |
1451 | } | ||
1472 | 1452 | ||
1473 | STATIC void | 1453 | /* |
1474 | xfs_unregister_buftarg( | 1454 | * remove the buffer from the LRU now to avoid needing another |
1475 | xfs_buftarg_t *btp) | 1455 | * lock round trip inside xfs_buf_rele(). |
1476 | { | 1456 | */ |
1477 | spin_lock(&xfs_buftarg_lock); | 1457 | list_move(&bp->b_lru, &dispose); |
1478 | list_del(&btp->bt_list); | 1458 | btp->bt_lru_nr--; |
1479 | spin_unlock(&xfs_buftarg_lock); | 1459 | } |
1460 | spin_unlock(&btp->bt_lru_lock); | ||
1461 | |||
1462 | while (!list_empty(&dispose)) { | ||
1463 | bp = list_first_entry(&dispose, struct xfs_buf, b_lru); | ||
1464 | list_del_init(&bp->b_lru); | ||
1465 | xfs_buf_rele(bp); | ||
1466 | } | ||
1467 | |||
1468 | return btp->bt_lru_nr; | ||
1480 | } | 1469 | } |
1481 | 1470 | ||
1482 | void | 1471 | void |
@@ -1484,18 +1473,13 @@ xfs_free_buftarg( | |||
1484 | struct xfs_mount *mp, | 1473 | struct xfs_mount *mp, |
1485 | struct xfs_buftarg *btp) | 1474 | struct xfs_buftarg *btp) |
1486 | { | 1475 | { |
1476 | unregister_shrinker(&btp->bt_shrinker); | ||
1477 | |||
1487 | xfs_flush_buftarg(btp, 1); | 1478 | xfs_flush_buftarg(btp, 1); |
1488 | if (mp->m_flags & XFS_MOUNT_BARRIER) | 1479 | if (mp->m_flags & XFS_MOUNT_BARRIER) |
1489 | xfs_blkdev_issue_flush(btp); | 1480 | xfs_blkdev_issue_flush(btp); |
1490 | xfs_free_bufhash(btp); | ||
1491 | iput(btp->bt_mapping->host); | ||
1492 | 1481 | ||
1493 | /* Unregister the buftarg first so that we don't get a | ||
1494 | * wakeup finding a non-existent task | ||
1495 | */ | ||
1496 | xfs_unregister_buftarg(btp); | ||
1497 | kthread_stop(btp->bt_task); | 1482 | kthread_stop(btp->bt_task); |
1498 | |||
1499 | kmem_free(btp); | 1483 | kmem_free(btp); |
1500 | } | 1484 | } |
1501 | 1485 | ||
@@ -1511,21 +1495,12 @@ xfs_setsize_buftarg_flags( | |||
1511 | btp->bt_smask = sectorsize - 1; | 1495 | btp->bt_smask = sectorsize - 1; |
1512 | 1496 | ||
1513 | if (set_blocksize(btp->bt_bdev, sectorsize)) { | 1497 | if (set_blocksize(btp->bt_bdev, sectorsize)) { |
1514 | printk(KERN_WARNING | 1498 | xfs_warn(btp->bt_mount, |
1515 | "XFS: Cannot set_blocksize to %u on device %s\n", | 1499 | "Cannot set_blocksize to %u on device %s\n", |
1516 | sectorsize, XFS_BUFTARG_NAME(btp)); | 1500 | sectorsize, XFS_BUFTARG_NAME(btp)); |
1517 | return EINVAL; | 1501 | return EINVAL; |
1518 | } | 1502 | } |
1519 | 1503 | ||
1520 | if (verbose && | ||
1521 | (PAGE_CACHE_SIZE / BITS_PER_LONG) > sectorsize) { | ||
1522 | printk(KERN_WARNING | ||
1523 | "XFS: %u byte sectors in use on device %s. " | ||
1524 | "This is suboptimal; %u or greater is ideal.\n", | ||
1525 | sectorsize, XFS_BUFTARG_NAME(btp), | ||
1526 | (unsigned int)PAGE_CACHE_SIZE / BITS_PER_LONG); | ||
1527 | } | ||
1528 | |||
1529 | return 0; | 1504 | return 0; |
1530 | } | 1505 | } |
1531 | 1506 | ||
@@ -1540,7 +1515,7 @@ xfs_setsize_buftarg_early( | |||
1540 | struct block_device *bdev) | 1515 | struct block_device *bdev) |
1541 | { | 1516 | { |
1542 | return xfs_setsize_buftarg_flags(btp, | 1517 | return xfs_setsize_buftarg_flags(btp, |
1543 | PAGE_CACHE_SIZE, bdev_logical_block_size(bdev), 0); | 1518 | PAGE_SIZE, bdev_logical_block_size(bdev), 0); |
1544 | } | 1519 | } |
1545 | 1520 | ||
1546 | int | 1521 | int |
@@ -1553,62 +1528,22 @@ xfs_setsize_buftarg( | |||
1553 | } | 1528 | } |
1554 | 1529 | ||
1555 | STATIC int | 1530 | STATIC int |
1556 | xfs_mapping_buftarg( | ||
1557 | xfs_buftarg_t *btp, | ||
1558 | struct block_device *bdev) | ||
1559 | { | ||
1560 | struct backing_dev_info *bdi; | ||
1561 | struct inode *inode; | ||
1562 | struct address_space *mapping; | ||
1563 | static const struct address_space_operations mapping_aops = { | ||
1564 | .sync_page = block_sync_page, | ||
1565 | .migratepage = fail_migrate_page, | ||
1566 | }; | ||
1567 | |||
1568 | inode = new_inode(bdev->bd_inode->i_sb); | ||
1569 | if (!inode) { | ||
1570 | printk(KERN_WARNING | ||
1571 | "XFS: Cannot allocate mapping inode for device %s\n", | ||
1572 | XFS_BUFTARG_NAME(btp)); | ||
1573 | return ENOMEM; | ||
1574 | } | ||
1575 | inode->i_mode = S_IFBLK; | ||
1576 | inode->i_bdev = bdev; | ||
1577 | inode->i_rdev = bdev->bd_dev; | ||
1578 | bdi = blk_get_backing_dev_info(bdev); | ||
1579 | if (!bdi) | ||
1580 | bdi = &default_backing_dev_info; | ||
1581 | mapping = &inode->i_data; | ||
1582 | mapping->a_ops = &mapping_aops; | ||
1583 | mapping->backing_dev_info = bdi; | ||
1584 | mapping_set_gfp_mask(mapping, GFP_NOFS); | ||
1585 | btp->bt_mapping = mapping; | ||
1586 | return 0; | ||
1587 | } | ||
1588 | |||
1589 | STATIC int | ||
1590 | xfs_alloc_delwrite_queue( | 1531 | xfs_alloc_delwrite_queue( |
1591 | xfs_buftarg_t *btp, | 1532 | xfs_buftarg_t *btp, |
1592 | const char *fsname) | 1533 | const char *fsname) |
1593 | { | 1534 | { |
1594 | int error = 0; | ||
1595 | |||
1596 | INIT_LIST_HEAD(&btp->bt_list); | ||
1597 | INIT_LIST_HEAD(&btp->bt_delwrite_queue); | 1535 | INIT_LIST_HEAD(&btp->bt_delwrite_queue); |
1598 | spin_lock_init(&btp->bt_delwrite_lock); | 1536 | spin_lock_init(&btp->bt_delwrite_lock); |
1599 | btp->bt_flags = 0; | 1537 | btp->bt_flags = 0; |
1600 | btp->bt_task = kthread_run(xfsbufd, btp, "xfsbufd/%s", fsname); | 1538 | btp->bt_task = kthread_run(xfsbufd, btp, "xfsbufd/%s", fsname); |
1601 | if (IS_ERR(btp->bt_task)) { | 1539 | if (IS_ERR(btp->bt_task)) |
1602 | error = PTR_ERR(btp->bt_task); | 1540 | return PTR_ERR(btp->bt_task); |
1603 | goto out_error; | 1541 | return 0; |
1604 | } | ||
1605 | xfs_register_buftarg(btp); | ||
1606 | out_error: | ||
1607 | return error; | ||
1608 | } | 1542 | } |
1609 | 1543 | ||
1610 | xfs_buftarg_t * | 1544 | xfs_buftarg_t * |
1611 | xfs_alloc_buftarg( | 1545 | xfs_alloc_buftarg( |
1546 | struct xfs_mount *mp, | ||
1612 | struct block_device *bdev, | 1547 | struct block_device *bdev, |
1613 | int external, | 1548 | int external, |
1614 | const char *fsname) | 1549 | const char *fsname) |
@@ -1617,15 +1552,22 @@ xfs_alloc_buftarg( | |||
1617 | 1552 | ||
1618 | btp = kmem_zalloc(sizeof(*btp), KM_SLEEP); | 1553 | btp = kmem_zalloc(sizeof(*btp), KM_SLEEP); |
1619 | 1554 | ||
1555 | btp->bt_mount = mp; | ||
1620 | btp->bt_dev = bdev->bd_dev; | 1556 | btp->bt_dev = bdev->bd_dev; |
1621 | btp->bt_bdev = bdev; | 1557 | btp->bt_bdev = bdev; |
1622 | if (xfs_setsize_buftarg_early(btp, bdev)) | 1558 | btp->bt_bdi = blk_get_backing_dev_info(bdev); |
1559 | if (!btp->bt_bdi) | ||
1623 | goto error; | 1560 | goto error; |
1624 | if (xfs_mapping_buftarg(btp, bdev)) | 1561 | |
1562 | INIT_LIST_HEAD(&btp->bt_lru); | ||
1563 | spin_lock_init(&btp->bt_lru_lock); | ||
1564 | if (xfs_setsize_buftarg_early(btp, bdev)) | ||
1625 | goto error; | 1565 | goto error; |
1626 | if (xfs_alloc_delwrite_queue(btp, fsname)) | 1566 | if (xfs_alloc_delwrite_queue(btp, fsname)) |
1627 | goto error; | 1567 | goto error; |
1628 | xfs_alloc_bufhash(btp, external); | 1568 | btp->bt_shrinker.shrink = xfs_buftarg_shrink; |
1569 | btp->bt_shrinker.seeks = DEFAULT_SEEKS; | ||
1570 | register_shrinker(&btp->bt_shrinker); | ||
1629 | return btp; | 1571 | return btp; |
1630 | 1572 | ||
1631 | error: | 1573 | error: |
@@ -1730,27 +1672,6 @@ xfs_buf_runall_queues( | |||
1730 | flush_workqueue(queue); | 1672 | flush_workqueue(queue); |
1731 | } | 1673 | } |
1732 | 1674 | ||
1733 | STATIC int | ||
1734 | xfsbufd_wakeup( | ||
1735 | struct shrinker *shrink, | ||
1736 | int priority, | ||
1737 | gfp_t mask) | ||
1738 | { | ||
1739 | xfs_buftarg_t *btp; | ||
1740 | |||
1741 | spin_lock(&xfs_buftarg_lock); | ||
1742 | list_for_each_entry(btp, &xfs_buftarg_list, bt_list) { | ||
1743 | if (test_bit(XBT_FORCE_SLEEP, &btp->bt_flags)) | ||
1744 | continue; | ||
1745 | if (list_empty(&btp->bt_delwrite_queue)) | ||
1746 | continue; | ||
1747 | set_bit(XBT_FORCE_FLUSH, &btp->bt_flags); | ||
1748 | wake_up_process(btp->bt_task); | ||
1749 | } | ||
1750 | spin_unlock(&xfs_buftarg_lock); | ||
1751 | return 0; | ||
1752 | } | ||
1753 | |||
1754 | /* | 1675 | /* |
1755 | * Move as many buffers as specified to the supplied list | 1676 | * Move as many buffers as specified to the supplied list |
1756 | * idicating if we skipped any buffers to prevent deadlocks. | 1677 | * idicating if we skipped any buffers to prevent deadlocks. |
@@ -1771,7 +1692,6 @@ xfs_buf_delwri_split( | |||
1771 | INIT_LIST_HEAD(list); | 1692 | INIT_LIST_HEAD(list); |
1772 | spin_lock(dwlk); | 1693 | spin_lock(dwlk); |
1773 | list_for_each_entry_safe(bp, n, dwq, b_list) { | 1694 | list_for_each_entry_safe(bp, n, dwq, b_list) { |
1774 | trace_xfs_buf_delwri_split(bp, _RET_IP_); | ||
1775 | ASSERT(bp->b_flags & XBF_DELWRI); | 1695 | ASSERT(bp->b_flags & XBF_DELWRI); |
1776 | 1696 | ||
1777 | if (!XFS_BUF_ISPINNED(bp) && !xfs_buf_cond_lock(bp)) { | 1697 | if (!XFS_BUF_ISPINNED(bp) && !xfs_buf_cond_lock(bp)) { |
@@ -1785,6 +1705,7 @@ xfs_buf_delwri_split( | |||
1785 | _XBF_RUN_QUEUES); | 1705 | _XBF_RUN_QUEUES); |
1786 | bp->b_flags |= XBF_WRITE; | 1706 | bp->b_flags |= XBF_WRITE; |
1787 | list_move_tail(&bp->b_list, list); | 1707 | list_move_tail(&bp->b_list, list); |
1708 | trace_xfs_buf_delwri_split(bp, _RET_IP_); | ||
1788 | } else | 1709 | } else |
1789 | skipped++; | 1710 | skipped++; |
1790 | } | 1711 | } |
@@ -1838,8 +1759,8 @@ xfsbufd( | |||
1838 | do { | 1759 | do { |
1839 | long age = xfs_buf_age_centisecs * msecs_to_jiffies(10); | 1760 | long age = xfs_buf_age_centisecs * msecs_to_jiffies(10); |
1840 | long tout = xfs_buf_timer_centisecs * msecs_to_jiffies(10); | 1761 | long tout = xfs_buf_timer_centisecs * msecs_to_jiffies(10); |
1841 | int count = 0; | ||
1842 | struct list_head tmp; | 1762 | struct list_head tmp; |
1763 | struct blk_plug plug; | ||
1843 | 1764 | ||
1844 | if (unlikely(freezing(current))) { | 1765 | if (unlikely(freezing(current))) { |
1845 | set_bit(XBT_FORCE_SLEEP, &target->bt_flags); | 1766 | set_bit(XBT_FORCE_SLEEP, &target->bt_flags); |
@@ -1855,16 +1776,15 @@ xfsbufd( | |||
1855 | 1776 | ||
1856 | xfs_buf_delwri_split(target, &tmp, age); | 1777 | xfs_buf_delwri_split(target, &tmp, age); |
1857 | list_sort(NULL, &tmp, xfs_buf_cmp); | 1778 | list_sort(NULL, &tmp, xfs_buf_cmp); |
1779 | |||
1780 | blk_start_plug(&plug); | ||
1858 | while (!list_empty(&tmp)) { | 1781 | while (!list_empty(&tmp)) { |
1859 | struct xfs_buf *bp; | 1782 | struct xfs_buf *bp; |
1860 | bp = list_first_entry(&tmp, struct xfs_buf, b_list); | 1783 | bp = list_first_entry(&tmp, struct xfs_buf, b_list); |
1861 | list_del_init(&bp->b_list); | 1784 | list_del_init(&bp->b_list); |
1862 | xfs_bdstrat_cb(bp); | 1785 | xfs_bdstrat_cb(bp); |
1863 | count++; | ||
1864 | } | 1786 | } |
1865 | if (count) | 1787 | blk_finish_plug(&plug); |
1866 | blk_run_address_space(target->bt_mapping); | ||
1867 | |||
1868 | } while (!kthread_should_stop()); | 1788 | } while (!kthread_should_stop()); |
1869 | 1789 | ||
1870 | return 0; | 1790 | return 0; |
@@ -1884,6 +1804,7 @@ xfs_flush_buftarg( | |||
1884 | int pincount = 0; | 1804 | int pincount = 0; |
1885 | LIST_HEAD(tmp_list); | 1805 | LIST_HEAD(tmp_list); |
1886 | LIST_HEAD(wait_list); | 1806 | LIST_HEAD(wait_list); |
1807 | struct blk_plug plug; | ||
1887 | 1808 | ||
1888 | xfs_buf_runall_queues(xfsconvertd_workqueue); | 1809 | xfs_buf_runall_queues(xfsconvertd_workqueue); |
1889 | xfs_buf_runall_queues(xfsdatad_workqueue); | 1810 | xfs_buf_runall_queues(xfsdatad_workqueue); |
@@ -1898,6 +1819,8 @@ xfs_flush_buftarg( | |||
1898 | * we do that after issuing all the IO. | 1819 | * we do that after issuing all the IO. |
1899 | */ | 1820 | */ |
1900 | list_sort(NULL, &tmp_list, xfs_buf_cmp); | 1821 | list_sort(NULL, &tmp_list, xfs_buf_cmp); |
1822 | |||
1823 | blk_start_plug(&plug); | ||
1901 | while (!list_empty(&tmp_list)) { | 1824 | while (!list_empty(&tmp_list)) { |
1902 | bp = list_first_entry(&tmp_list, struct xfs_buf, b_list); | 1825 | bp = list_first_entry(&tmp_list, struct xfs_buf, b_list); |
1903 | ASSERT(target == bp->b_target); | 1826 | ASSERT(target == bp->b_target); |
@@ -1908,15 +1831,15 @@ xfs_flush_buftarg( | |||
1908 | } | 1831 | } |
1909 | xfs_bdstrat_cb(bp); | 1832 | xfs_bdstrat_cb(bp); |
1910 | } | 1833 | } |
1834 | blk_finish_plug(&plug); | ||
1911 | 1835 | ||
1912 | if (wait) { | 1836 | if (wait) { |
1913 | /* Expedite and wait for IO to complete. */ | 1837 | /* Wait for IO to complete. */ |
1914 | blk_run_address_space(target->bt_mapping); | ||
1915 | while (!list_empty(&wait_list)) { | 1838 | while (!list_empty(&wait_list)) { |
1916 | bp = list_first_entry(&wait_list, struct xfs_buf, b_list); | 1839 | bp = list_first_entry(&wait_list, struct xfs_buf, b_list); |
1917 | 1840 | ||
1918 | list_del_init(&bp->b_list); | 1841 | list_del_init(&bp->b_list); |
1919 | xfs_iowait(bp); | 1842 | xfs_buf_iowait(bp); |
1920 | xfs_buf_relse(bp); | 1843 | xfs_buf_relse(bp); |
1921 | } | 1844 | } |
1922 | } | 1845 | } |
@@ -1933,19 +1856,19 @@ xfs_buf_init(void) | |||
1933 | goto out; | 1856 | goto out; |
1934 | 1857 | ||
1935 | xfslogd_workqueue = alloc_workqueue("xfslogd", | 1858 | xfslogd_workqueue = alloc_workqueue("xfslogd", |
1936 | WQ_RESCUER | WQ_HIGHPRI, 1); | 1859 | WQ_MEM_RECLAIM | WQ_HIGHPRI, 1); |
1937 | if (!xfslogd_workqueue) | 1860 | if (!xfslogd_workqueue) |
1938 | goto out_free_buf_zone; | 1861 | goto out_free_buf_zone; |
1939 | 1862 | ||
1940 | xfsdatad_workqueue = create_workqueue("xfsdatad"); | 1863 | xfsdatad_workqueue = alloc_workqueue("xfsdatad", WQ_MEM_RECLAIM, 1); |
1941 | if (!xfsdatad_workqueue) | 1864 | if (!xfsdatad_workqueue) |
1942 | goto out_destroy_xfslogd_workqueue; | 1865 | goto out_destroy_xfslogd_workqueue; |
1943 | 1866 | ||
1944 | xfsconvertd_workqueue = create_workqueue("xfsconvertd"); | 1867 | xfsconvertd_workqueue = alloc_workqueue("xfsconvertd", |
1868 | WQ_MEM_RECLAIM, 1); | ||
1945 | if (!xfsconvertd_workqueue) | 1869 | if (!xfsconvertd_workqueue) |
1946 | goto out_destroy_xfsdatad_workqueue; | 1870 | goto out_destroy_xfsdatad_workqueue; |
1947 | 1871 | ||
1948 | register_shrinker(&xfs_buf_shake); | ||
1949 | return 0; | 1872 | return 0; |
1950 | 1873 | ||
1951 | out_destroy_xfsdatad_workqueue: | 1874 | out_destroy_xfsdatad_workqueue: |
@@ -1961,7 +1884,6 @@ xfs_buf_init(void) | |||
1961 | void | 1884 | void |
1962 | xfs_buf_terminate(void) | 1885 | xfs_buf_terminate(void) |
1963 | { | 1886 | { |
1964 | unregister_shrinker(&xfs_buf_shake); | ||
1965 | destroy_workqueue(xfsconvertd_workqueue); | 1887 | destroy_workqueue(xfsconvertd_workqueue); |
1966 | destroy_workqueue(xfsdatad_workqueue); | 1888 | destroy_workqueue(xfsdatad_workqueue); |
1967 | destroy_workqueue(xfslogd_workqueue); | 1889 | destroy_workqueue(xfslogd_workqueue); |
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h index 2a05614f0b92..50a7d5fb3b73 100644 --- a/fs/xfs/linux-2.6/xfs_buf.h +++ b/fs/xfs/linux-2.6/xfs_buf.h | |||
@@ -51,7 +51,6 @@ typedef enum { | |||
51 | #define XBF_DONE (1 << 5) /* all pages in the buffer uptodate */ | 51 | #define XBF_DONE (1 << 5) /* all pages in the buffer uptodate */ |
52 | #define XBF_DELWRI (1 << 6) /* buffer has dirty pages */ | 52 | #define XBF_DELWRI (1 << 6) /* buffer has dirty pages */ |
53 | #define XBF_STALE (1 << 7) /* buffer has been staled, do not find it */ | 53 | #define XBF_STALE (1 << 7) /* buffer has been staled, do not find it */ |
54 | #define XBF_FS_MANAGED (1 << 8) /* filesystem controls freeing memory */ | ||
55 | #define XBF_ORDERED (1 << 11)/* use ordered writes */ | 54 | #define XBF_ORDERED (1 << 11)/* use ordered writes */ |
56 | #define XBF_READ_AHEAD (1 << 12)/* asynchronous read-ahead */ | 55 | #define XBF_READ_AHEAD (1 << 12)/* asynchronous read-ahead */ |
57 | #define XBF_LOG_BUFFER (1 << 13)/* this is a buffer used for the log */ | 56 | #define XBF_LOG_BUFFER (1 << 13)/* this is a buffer used for the log */ |
@@ -62,38 +61,11 @@ typedef enum { | |||
62 | #define XBF_DONT_BLOCK (1 << 16)/* do not block in current thread */ | 61 | #define XBF_DONT_BLOCK (1 << 16)/* do not block in current thread */ |
63 | 62 | ||
64 | /* flags used only internally */ | 63 | /* flags used only internally */ |
65 | #define _XBF_PAGE_CACHE (1 << 17)/* backed by pagecache */ | ||
66 | #define _XBF_PAGES (1 << 18)/* backed by refcounted pages */ | 64 | #define _XBF_PAGES (1 << 18)/* backed by refcounted pages */ |
67 | #define _XBF_RUN_QUEUES (1 << 19)/* run block device task queue */ | 65 | #define _XBF_RUN_QUEUES (1 << 19)/* run block device task queue */ |
66 | #define _XBF_KMEM (1 << 20)/* backed by heap memory */ | ||
68 | #define _XBF_DELWRI_Q (1 << 21)/* buffer on delwri queue */ | 67 | #define _XBF_DELWRI_Q (1 << 21)/* buffer on delwri queue */ |
69 | 68 | ||
70 | /* | ||
71 | * Special flag for supporting metadata blocks smaller than a FSB. | ||
72 | * | ||
73 | * In this case we can have multiple xfs_buf_t on a single page and | ||
74 | * need to lock out concurrent xfs_buf_t readers as they only | ||
75 | * serialise access to the buffer. | ||
76 | * | ||
77 | * If the FSB size >= PAGE_CACHE_SIZE case, we have no serialisation | ||
78 | * between reads of the page. Hence we can have one thread read the | ||
79 | * page and modify it, but then race with another thread that thinks | ||
80 | * the page is not up-to-date and hence reads it again. | ||
81 | * | ||
82 | * The result is that the first modifcation to the page is lost. | ||
83 | * This sort of AGF/AGI reading race can happen when unlinking inodes | ||
84 | * that require truncation and results in the AGI unlinked list | ||
85 | * modifications being lost. | ||
86 | */ | ||
87 | #define _XBF_PAGE_LOCKED (1 << 22) | ||
88 | |||
89 | /* | ||
90 | * If we try a barrier write, but it fails we have to communicate | ||
91 | * this to the upper layers. Unfortunately b_error gets overwritten | ||
92 | * when the buffer is re-issued so we have to add another flag to | ||
93 | * keep this information. | ||
94 | */ | ||
95 | #define _XFS_BARRIER_FAILED (1 << 23) | ||
96 | |||
97 | typedef unsigned int xfs_buf_flags_t; | 69 | typedef unsigned int xfs_buf_flags_t; |
98 | 70 | ||
99 | #define XFS_BUF_FLAGS \ | 71 | #define XFS_BUF_FLAGS \ |
@@ -104,19 +76,15 @@ typedef unsigned int xfs_buf_flags_t; | |||
104 | { XBF_DONE, "DONE" }, \ | 76 | { XBF_DONE, "DONE" }, \ |
105 | { XBF_DELWRI, "DELWRI" }, \ | 77 | { XBF_DELWRI, "DELWRI" }, \ |
106 | { XBF_STALE, "STALE" }, \ | 78 | { XBF_STALE, "STALE" }, \ |
107 | { XBF_FS_MANAGED, "FS_MANAGED" }, \ | ||
108 | { XBF_ORDERED, "ORDERED" }, \ | 79 | { XBF_ORDERED, "ORDERED" }, \ |
109 | { XBF_READ_AHEAD, "READ_AHEAD" }, \ | 80 | { XBF_READ_AHEAD, "READ_AHEAD" }, \ |
110 | { XBF_LOCK, "LOCK" }, /* should never be set */\ | 81 | { XBF_LOCK, "LOCK" }, /* should never be set */\ |
111 | { XBF_TRYLOCK, "TRYLOCK" }, /* ditto */\ | 82 | { XBF_TRYLOCK, "TRYLOCK" }, /* ditto */\ |
112 | { XBF_DONT_BLOCK, "DONT_BLOCK" }, /* ditto */\ | 83 | { XBF_DONT_BLOCK, "DONT_BLOCK" }, /* ditto */\ |
113 | { _XBF_PAGE_CACHE, "PAGE_CACHE" }, \ | ||
114 | { _XBF_PAGES, "PAGES" }, \ | 84 | { _XBF_PAGES, "PAGES" }, \ |
115 | { _XBF_RUN_QUEUES, "RUN_QUEUES" }, \ | 85 | { _XBF_RUN_QUEUES, "RUN_QUEUES" }, \ |
116 | { _XBF_DELWRI_Q, "DELWRI_Q" }, \ | 86 | { _XBF_KMEM, "KMEM" }, \ |
117 | { _XBF_PAGE_LOCKED, "PAGE_LOCKED" }, \ | 87 | { _XBF_DELWRI_Q, "DELWRI_Q" } |
118 | { _XFS_BARRIER_FAILED, "BARRIER_FAILED" } | ||
119 | |||
120 | 88 | ||
121 | typedef enum { | 89 | typedef enum { |
122 | XBT_FORCE_SLEEP = 0, | 90 | XBT_FORCE_SLEEP = 0, |
@@ -131,70 +99,67 @@ typedef struct xfs_bufhash { | |||
131 | typedef struct xfs_buftarg { | 99 | typedef struct xfs_buftarg { |
132 | dev_t bt_dev; | 100 | dev_t bt_dev; |
133 | struct block_device *bt_bdev; | 101 | struct block_device *bt_bdev; |
134 | struct address_space *bt_mapping; | 102 | struct backing_dev_info *bt_bdi; |
103 | struct xfs_mount *bt_mount; | ||
135 | unsigned int bt_bsize; | 104 | unsigned int bt_bsize; |
136 | unsigned int bt_sshift; | 105 | unsigned int bt_sshift; |
137 | size_t bt_smask; | 106 | size_t bt_smask; |
138 | 107 | ||
139 | /* per device buffer hash table */ | ||
140 | uint bt_hashshift; | ||
141 | xfs_bufhash_t *bt_hash; | ||
142 | |||
143 | /* per device delwri queue */ | 108 | /* per device delwri queue */ |
144 | struct task_struct *bt_task; | 109 | struct task_struct *bt_task; |
145 | struct list_head bt_list; | ||
146 | struct list_head bt_delwrite_queue; | 110 | struct list_head bt_delwrite_queue; |
147 | spinlock_t bt_delwrite_lock; | 111 | spinlock_t bt_delwrite_lock; |
148 | unsigned long bt_flags; | 112 | unsigned long bt_flags; |
149 | } xfs_buftarg_t; | ||
150 | 113 | ||
151 | /* | 114 | /* LRU control structures */ |
152 | * xfs_buf_t: Buffer structure for pagecache-based buffers | 115 | struct shrinker bt_shrinker; |
153 | * | 116 | struct list_head bt_lru; |
154 | * This buffer structure is used by the pagecache buffer management routines | 117 | spinlock_t bt_lru_lock; |
155 | * to refer to an assembly of pages forming a logical buffer. | 118 | unsigned int bt_lru_nr; |
156 | * | 119 | } xfs_buftarg_t; |
157 | * The buffer structure is used on a temporary basis only, and discarded when | ||
158 | * released. The real data storage is recorded in the pagecache. Buffers are | ||
159 | * hashed to the block device on which the file system resides. | ||
160 | */ | ||
161 | 120 | ||
162 | struct xfs_buf; | 121 | struct xfs_buf; |
163 | typedef void (*xfs_buf_iodone_t)(struct xfs_buf *); | 122 | typedef void (*xfs_buf_iodone_t)(struct xfs_buf *); |
164 | typedef void (*xfs_buf_relse_t)(struct xfs_buf *); | ||
165 | typedef int (*xfs_buf_bdstrat_t)(struct xfs_buf *); | ||
166 | 123 | ||
167 | #define XB_PAGES 2 | 124 | #define XB_PAGES 2 |
168 | 125 | ||
169 | typedef struct xfs_buf { | 126 | typedef struct xfs_buf { |
127 | /* | ||
128 | * first cacheline holds all the fields needed for an uncontended cache | ||
129 | * hit to be fully processed. The semaphore straddles the cacheline | ||
130 | * boundary, but the counter and lock sits on the first cacheline, | ||
131 | * which is the only bit that is touched if we hit the semaphore | ||
132 | * fast-path on locking. | ||
133 | */ | ||
134 | struct rb_node b_rbnode; /* rbtree node */ | ||
135 | xfs_off_t b_file_offset; /* offset in file */ | ||
136 | size_t b_buffer_length;/* size of buffer in bytes */ | ||
137 | atomic_t b_hold; /* reference count */ | ||
138 | atomic_t b_lru_ref; /* lru reclaim ref count */ | ||
139 | xfs_buf_flags_t b_flags; /* status flags */ | ||
170 | struct semaphore b_sema; /* semaphore for lockables */ | 140 | struct semaphore b_sema; /* semaphore for lockables */ |
171 | unsigned long b_queuetime; /* time buffer was queued */ | 141 | |
172 | atomic_t b_pin_count; /* pin count */ | 142 | struct list_head b_lru; /* lru list */ |
173 | wait_queue_head_t b_waiters; /* unpin waiters */ | 143 | wait_queue_head_t b_waiters; /* unpin waiters */ |
174 | struct list_head b_list; | 144 | struct list_head b_list; |
175 | xfs_buf_flags_t b_flags; /* status flags */ | 145 | struct xfs_perag *b_pag; /* contains rbtree root */ |
176 | struct list_head b_hash_list; /* hash table list */ | ||
177 | xfs_bufhash_t *b_hash; /* hash table list start */ | ||
178 | xfs_buftarg_t *b_target; /* buffer target (device) */ | 146 | xfs_buftarg_t *b_target; /* buffer target (device) */ |
179 | atomic_t b_hold; /* reference count */ | ||
180 | xfs_daddr_t b_bn; /* block number for I/O */ | 147 | xfs_daddr_t b_bn; /* block number for I/O */ |
181 | xfs_off_t b_file_offset; /* offset in file */ | ||
182 | size_t b_buffer_length;/* size of buffer in bytes */ | ||
183 | size_t b_count_desired;/* desired transfer size */ | 148 | size_t b_count_desired;/* desired transfer size */ |
184 | void *b_addr; /* virtual address of buffer */ | 149 | void *b_addr; /* virtual address of buffer */ |
185 | struct work_struct b_iodone_work; | 150 | struct work_struct b_iodone_work; |
186 | atomic_t b_io_remaining; /* #outstanding I/O requests */ | ||
187 | xfs_buf_iodone_t b_iodone; /* I/O completion function */ | 151 | xfs_buf_iodone_t b_iodone; /* I/O completion function */ |
188 | xfs_buf_relse_t b_relse; /* releasing function */ | ||
189 | struct completion b_iowait; /* queue for I/O waiters */ | 152 | struct completion b_iowait; /* queue for I/O waiters */ |
190 | void *b_fspriv; | 153 | void *b_fspriv; |
191 | void *b_fspriv2; | 154 | void *b_fspriv2; |
192 | struct xfs_mount *b_mount; | ||
193 | unsigned short b_error; /* error code on I/O */ | ||
194 | unsigned int b_page_count; /* size of page array */ | ||
195 | unsigned int b_offset; /* page offset in first page */ | ||
196 | struct page **b_pages; /* array of page pointers */ | 155 | struct page **b_pages; /* array of page pointers */ |
197 | struct page *b_page_array[XB_PAGES]; /* inline pages */ | 156 | struct page *b_page_array[XB_PAGES]; /* inline pages */ |
157 | unsigned long b_queuetime; /* time buffer was queued */ | ||
158 | atomic_t b_pin_count; /* pin count */ | ||
159 | atomic_t b_io_remaining; /* #outstanding I/O requests */ | ||
160 | unsigned int b_page_count; /* size of page array */ | ||
161 | unsigned int b_offset; /* page offset in first page */ | ||
162 | unsigned short b_error; /* error code on I/O */ | ||
198 | #ifdef XFS_BUF_LOCK_TRACKING | 163 | #ifdef XFS_BUF_LOCK_TRACKING |
199 | int b_last_holder; | 164 | int b_last_holder; |
200 | #endif | 165 | #endif |
@@ -213,11 +178,14 @@ extern xfs_buf_t *xfs_buf_read(xfs_buftarg_t *, xfs_off_t, size_t, | |||
213 | xfs_buf_flags_t); | 178 | xfs_buf_flags_t); |
214 | 179 | ||
215 | extern xfs_buf_t *xfs_buf_get_empty(size_t, xfs_buftarg_t *); | 180 | extern xfs_buf_t *xfs_buf_get_empty(size_t, xfs_buftarg_t *); |
216 | extern xfs_buf_t *xfs_buf_get_noaddr(size_t, xfs_buftarg_t *); | 181 | extern void xfs_buf_set_empty(struct xfs_buf *bp, size_t len); |
182 | extern xfs_buf_t *xfs_buf_get_uncached(struct xfs_buftarg *, size_t, int); | ||
217 | extern int xfs_buf_associate_memory(xfs_buf_t *, void *, size_t); | 183 | extern int xfs_buf_associate_memory(xfs_buf_t *, void *, size_t); |
218 | extern void xfs_buf_hold(xfs_buf_t *); | 184 | extern void xfs_buf_hold(xfs_buf_t *); |
219 | extern void xfs_buf_readahead(xfs_buftarg_t *, xfs_off_t, size_t, | 185 | extern void xfs_buf_readahead(xfs_buftarg_t *, xfs_off_t, size_t); |
220 | xfs_buf_flags_t); | 186 | struct xfs_buf *xfs_buf_read_uncached(struct xfs_mount *mp, |
187 | struct xfs_buftarg *target, | ||
188 | xfs_daddr_t daddr, size_t length, int flags); | ||
221 | 189 | ||
222 | /* Releasing Buffers */ | 190 | /* Releasing Buffers */ |
223 | extern void xfs_buf_free(xfs_buf_t *); | 191 | extern void xfs_buf_free(xfs_buf_t *); |
@@ -242,6 +210,8 @@ extern int xfs_buf_iorequest(xfs_buf_t *); | |||
242 | extern int xfs_buf_iowait(xfs_buf_t *); | 210 | extern int xfs_buf_iowait(xfs_buf_t *); |
243 | extern void xfs_buf_iomove(xfs_buf_t *, size_t, size_t, void *, | 211 | extern void xfs_buf_iomove(xfs_buf_t *, size_t, size_t, void *, |
244 | xfs_buf_rw_t); | 212 | xfs_buf_rw_t); |
213 | #define xfs_buf_zero(bp, off, len) \ | ||
214 | xfs_buf_iomove((bp), (off), (len), NULL, XBRW_ZERO) | ||
245 | 215 | ||
246 | static inline int xfs_buf_geterror(xfs_buf_t *bp) | 216 | static inline int xfs_buf_geterror(xfs_buf_t *bp) |
247 | { | 217 | { |
@@ -267,7 +237,8 @@ extern void xfs_buf_terminate(void); | |||
267 | #define XFS_BUF_ZEROFLAGS(bp) ((bp)->b_flags &= \ | 237 | #define XFS_BUF_ZEROFLAGS(bp) ((bp)->b_flags &= \ |
268 | ~(XBF_READ|XBF_WRITE|XBF_ASYNC|XBF_DELWRI|XBF_ORDERED)) | 238 | ~(XBF_READ|XBF_WRITE|XBF_ASYNC|XBF_DELWRI|XBF_ORDERED)) |
269 | 239 | ||
270 | #define XFS_BUF_STALE(bp) ((bp)->b_flags |= XBF_STALE) | 240 | void xfs_buf_stale(struct xfs_buf *bp); |
241 | #define XFS_BUF_STALE(bp) xfs_buf_stale(bp); | ||
271 | #define XFS_BUF_UNSTALE(bp) ((bp)->b_flags &= ~XBF_STALE) | 242 | #define XFS_BUF_UNSTALE(bp) ((bp)->b_flags &= ~XBF_STALE) |
272 | #define XFS_BUF_ISSTALE(bp) ((bp)->b_flags & XBF_STALE) | 243 | #define XFS_BUF_ISSTALE(bp) ((bp)->b_flags & XBF_STALE) |
273 | #define XFS_BUF_SUPER_STALE(bp) do { \ | 244 | #define XFS_BUF_SUPER_STALE(bp) do { \ |
@@ -276,8 +247,6 @@ extern void xfs_buf_terminate(void); | |||
276 | XFS_BUF_DONE(bp); \ | 247 | XFS_BUF_DONE(bp); \ |
277 | } while (0) | 248 | } while (0) |
278 | 249 | ||
279 | #define XFS_BUF_UNMANAGE(bp) ((bp)->b_flags &= ~XBF_FS_MANAGED) | ||
280 | |||
281 | #define XFS_BUF_DELAYWRITE(bp) ((bp)->b_flags |= XBF_DELWRI) | 250 | #define XFS_BUF_DELAYWRITE(bp) ((bp)->b_flags |= XBF_DELWRI) |
282 | #define XFS_BUF_UNDELAYWRITE(bp) xfs_buf_delwri_dequeue(bp) | 251 | #define XFS_BUF_UNDELAYWRITE(bp) xfs_buf_delwri_dequeue(bp) |
283 | #define XFS_BUF_ISDELAYWRITE(bp) ((bp)->b_flags & XBF_DELWRI) | 252 | #define XFS_BUF_ISDELAYWRITE(bp) ((bp)->b_flags & XBF_DELWRI) |
@@ -320,7 +289,6 @@ extern void xfs_buf_terminate(void); | |||
320 | #define XFS_BUF_FSPRIVATE2(bp, type) ((type)(bp)->b_fspriv2) | 289 | #define XFS_BUF_FSPRIVATE2(bp, type) ((type)(bp)->b_fspriv2) |
321 | #define XFS_BUF_SET_FSPRIVATE2(bp, val) ((bp)->b_fspriv2 = (void*)(val)) | 290 | #define XFS_BUF_SET_FSPRIVATE2(bp, val) ((bp)->b_fspriv2 = (void*)(val)) |
322 | #define XFS_BUF_SET_START(bp) do { } while (0) | 291 | #define XFS_BUF_SET_START(bp) do { } while (0) |
323 | #define XFS_BUF_SET_BRELSE_FUNC(bp, func) ((bp)->b_relse = (func)) | ||
324 | 292 | ||
325 | #define XFS_BUF_PTR(bp) (xfs_caddr_t)((bp)->b_addr) | 293 | #define XFS_BUF_PTR(bp) (xfs_caddr_t)((bp)->b_addr) |
326 | #define XFS_BUF_SET_PTR(bp, val, cnt) xfs_buf_associate_memory(bp, val, cnt) | 294 | #define XFS_BUF_SET_PTR(bp, val, cnt) xfs_buf_associate_memory(bp, val, cnt) |
@@ -333,9 +301,15 @@ extern void xfs_buf_terminate(void); | |||
333 | #define XFS_BUF_SIZE(bp) ((bp)->b_buffer_length) | 301 | #define XFS_BUF_SIZE(bp) ((bp)->b_buffer_length) |
334 | #define XFS_BUF_SET_SIZE(bp, cnt) ((bp)->b_buffer_length = (cnt)) | 302 | #define XFS_BUF_SET_SIZE(bp, cnt) ((bp)->b_buffer_length = (cnt)) |
335 | 303 | ||
336 | #define XFS_BUF_SET_VTYPE_REF(bp, type, ref) do { } while (0) | 304 | static inline void |
305 | xfs_buf_set_ref( | ||
306 | struct xfs_buf *bp, | ||
307 | int lru_ref) | ||
308 | { | ||
309 | atomic_set(&bp->b_lru_ref, lru_ref); | ||
310 | } | ||
311 | #define XFS_BUF_SET_VTYPE_REF(bp, type, ref) xfs_buf_set_ref(bp, ref) | ||
337 | #define XFS_BUF_SET_VTYPE(bp, type) do { } while (0) | 312 | #define XFS_BUF_SET_VTYPE(bp, type) do { } while (0) |
338 | #define XFS_BUF_SET_REF(bp, ref) do { } while (0) | ||
339 | 313 | ||
340 | #define XFS_BUF_ISPINNED(bp) atomic_read(&((bp)->b_pin_count)) | 314 | #define XFS_BUF_ISPINNED(bp) atomic_read(&((bp)->b_pin_count)) |
341 | 315 | ||
@@ -351,30 +325,15 @@ extern void xfs_buf_terminate(void); | |||
351 | 325 | ||
352 | static inline void xfs_buf_relse(xfs_buf_t *bp) | 326 | static inline void xfs_buf_relse(xfs_buf_t *bp) |
353 | { | 327 | { |
354 | if (!bp->b_relse) | 328 | xfs_buf_unlock(bp); |
355 | xfs_buf_unlock(bp); | ||
356 | xfs_buf_rele(bp); | 329 | xfs_buf_rele(bp); |
357 | } | 330 | } |
358 | 331 | ||
359 | #define xfs_biodone(bp) xfs_buf_ioend(bp, 0) | ||
360 | |||
361 | #define xfs_biomove(bp, off, len, data, rw) \ | ||
362 | xfs_buf_iomove((bp), (off), (len), (data), \ | ||
363 | ((rw) == XBF_WRITE) ? XBRW_WRITE : XBRW_READ) | ||
364 | |||
365 | #define xfs_biozero(bp, off, len) \ | ||
366 | xfs_buf_iomove((bp), (off), (len), NULL, XBRW_ZERO) | ||
367 | |||
368 | #define xfs_iowait(bp) xfs_buf_iowait(bp) | ||
369 | |||
370 | #define xfs_baread(target, rablkno, ralen) \ | ||
371 | xfs_buf_readahead((target), (rablkno), (ralen), XBF_DONT_BLOCK) | ||
372 | |||
373 | |||
374 | /* | 332 | /* |
375 | * Handling of buftargs. | 333 | * Handling of buftargs. |
376 | */ | 334 | */ |
377 | extern xfs_buftarg_t *xfs_alloc_buftarg(struct block_device *, int, const char *); | 335 | extern xfs_buftarg_t *xfs_alloc_buftarg(struct xfs_mount *, |
336 | struct block_device *, int, const char *); | ||
378 | extern void xfs_free_buftarg(struct xfs_mount *, struct xfs_buftarg *); | 337 | extern void xfs_free_buftarg(struct xfs_mount *, struct xfs_buftarg *); |
379 | extern void xfs_wait_buftarg(xfs_buftarg_t *); | 338 | extern void xfs_wait_buftarg(xfs_buftarg_t *); |
380 | extern int xfs_setsize_buftarg(xfs_buftarg_t *, unsigned int, unsigned int); | 339 | extern int xfs_setsize_buftarg(xfs_buftarg_t *, unsigned int, unsigned int); |
diff --git a/fs/xfs/linux-2.6/xfs_cred.h b/fs/xfs/linux-2.6/xfs_cred.h deleted file mode 100644 index 55bddf3b6091..000000000000 --- a/fs/xfs/linux-2.6/xfs_cred.h +++ /dev/null | |||
@@ -1,28 +0,0 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc. | ||
3 | * All Rights Reserved. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or | ||
6 | * modify it under the terms of the GNU General Public License as | ||
7 | * published by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope that it would be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, write the Free Software Foundation, | ||
16 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
17 | */ | ||
18 | #ifndef __XFS_CRED_H__ | ||
19 | #define __XFS_CRED_H__ | ||
20 | |||
21 | #include <linux/capability.h> | ||
22 | |||
23 | /* | ||
24 | * Credentials | ||
25 | */ | ||
26 | typedef const struct cred cred_t; | ||
27 | |||
28 | #endif /* __XFS_CRED_H__ */ | ||
diff --git a/fs/xfs/linux-2.6/xfs_discard.c b/fs/xfs/linux-2.6/xfs_discard.c new file mode 100644 index 000000000000..244e797dae32 --- /dev/null +++ b/fs/xfs/linux-2.6/xfs_discard.c | |||
@@ -0,0 +1,222 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2010 Red Hat, Inc. | ||
3 | * All Rights Reserved. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or | ||
6 | * modify it under the terms of the GNU General Public License as | ||
7 | * published by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope that it would be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, write the Free Software Foundation, | ||
16 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
17 | */ | ||
18 | #include "xfs.h" | ||
19 | #include "xfs_sb.h" | ||
20 | #include "xfs_inum.h" | ||
21 | #include "xfs_log.h" | ||
22 | #include "xfs_ag.h" | ||
23 | #include "xfs_mount.h" | ||
24 | #include "xfs_quota.h" | ||
25 | #include "xfs_trans.h" | ||
26 | #include "xfs_alloc_btree.h" | ||
27 | #include "xfs_bmap_btree.h" | ||
28 | #include "xfs_ialloc_btree.h" | ||
29 | #include "xfs_btree.h" | ||
30 | #include "xfs_inode.h" | ||
31 | #include "xfs_alloc.h" | ||
32 | #include "xfs_error.h" | ||
33 | #include "xfs_discard.h" | ||
34 | #include "xfs_trace.h" | ||
35 | |||
36 | STATIC int | ||
37 | xfs_trim_extents( | ||
38 | struct xfs_mount *mp, | ||
39 | xfs_agnumber_t agno, | ||
40 | xfs_fsblock_t start, | ||
41 | xfs_fsblock_t len, | ||
42 | xfs_fsblock_t minlen, | ||
43 | __uint64_t *blocks_trimmed) | ||
44 | { | ||
45 | struct block_device *bdev = mp->m_ddev_targp->bt_bdev; | ||
46 | struct xfs_btree_cur *cur; | ||
47 | struct xfs_buf *agbp; | ||
48 | struct xfs_perag *pag; | ||
49 | int error; | ||
50 | int i; | ||
51 | |||
52 | pag = xfs_perag_get(mp, agno); | ||
53 | |||
54 | error = xfs_alloc_read_agf(mp, NULL, agno, 0, &agbp); | ||
55 | if (error || !agbp) | ||
56 | goto out_put_perag; | ||
57 | |||
58 | cur = xfs_allocbt_init_cursor(mp, NULL, agbp, agno, XFS_BTNUM_CNT); | ||
59 | |||
60 | /* | ||
61 | * Force out the log. This means any transactions that might have freed | ||
62 | * space before we took the AGF buffer lock are now on disk, and the | ||
63 | * volatile disk cache is flushed. | ||
64 | */ | ||
65 | xfs_log_force(mp, XFS_LOG_SYNC); | ||
66 | |||
67 | /* | ||
68 | * Look up the longest btree in the AGF and start with it. | ||
69 | */ | ||
70 | error = xfs_alloc_lookup_le(cur, 0, | ||
71 | XFS_BUF_TO_AGF(agbp)->agf_longest, &i); | ||
72 | if (error) | ||
73 | goto out_del_cursor; | ||
74 | |||
75 | /* | ||
76 | * Loop until we are done with all extents that are large | ||
77 | * enough to be worth discarding. | ||
78 | */ | ||
79 | while (i) { | ||
80 | xfs_agblock_t fbno; | ||
81 | xfs_extlen_t flen; | ||
82 | |||
83 | error = xfs_alloc_get_rec(cur, &fbno, &flen, &i); | ||
84 | if (error) | ||
85 | goto out_del_cursor; | ||
86 | XFS_WANT_CORRUPTED_GOTO(i == 1, out_del_cursor); | ||
87 | ASSERT(flen <= XFS_BUF_TO_AGF(agbp)->agf_longest); | ||
88 | |||
89 | /* | ||
90 | * Too small? Give up. | ||
91 | */ | ||
92 | if (flen < minlen) { | ||
93 | trace_xfs_discard_toosmall(mp, agno, fbno, flen); | ||
94 | goto out_del_cursor; | ||
95 | } | ||
96 | |||
97 | /* | ||
98 | * If the extent is entirely outside of the range we are | ||
99 | * supposed to discard skip it. Do not bother to trim | ||
100 | * down partially overlapping ranges for now. | ||
101 | */ | ||
102 | if (XFS_AGB_TO_FSB(mp, agno, fbno) + flen < start || | ||
103 | XFS_AGB_TO_FSB(mp, agno, fbno) >= start + len) { | ||
104 | trace_xfs_discard_exclude(mp, agno, fbno, flen); | ||
105 | goto next_extent; | ||
106 | } | ||
107 | |||
108 | /* | ||
109 | * If any blocks in the range are still busy, skip the | ||
110 | * discard and try again the next time. | ||
111 | */ | ||
112 | if (xfs_alloc_busy_search(mp, agno, fbno, flen)) { | ||
113 | trace_xfs_discard_busy(mp, agno, fbno, flen); | ||
114 | goto next_extent; | ||
115 | } | ||
116 | |||
117 | trace_xfs_discard_extent(mp, agno, fbno, flen); | ||
118 | error = -blkdev_issue_discard(bdev, | ||
119 | XFS_AGB_TO_DADDR(mp, agno, fbno), | ||
120 | XFS_FSB_TO_BB(mp, flen), | ||
121 | GFP_NOFS, 0); | ||
122 | if (error) | ||
123 | goto out_del_cursor; | ||
124 | *blocks_trimmed += flen; | ||
125 | |||
126 | next_extent: | ||
127 | error = xfs_btree_decrement(cur, 0, &i); | ||
128 | if (error) | ||
129 | goto out_del_cursor; | ||
130 | } | ||
131 | |||
132 | out_del_cursor: | ||
133 | xfs_btree_del_cursor(cur, error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR); | ||
134 | xfs_buf_relse(agbp); | ||
135 | out_put_perag: | ||
136 | xfs_perag_put(pag); | ||
137 | return error; | ||
138 | } | ||
139 | |||
140 | int | ||
141 | xfs_ioc_trim( | ||
142 | struct xfs_mount *mp, | ||
143 | struct fstrim_range __user *urange) | ||
144 | { | ||
145 | struct request_queue *q = mp->m_ddev_targp->bt_bdev->bd_disk->queue; | ||
146 | unsigned int granularity = q->limits.discard_granularity; | ||
147 | struct fstrim_range range; | ||
148 | xfs_fsblock_t start, len, minlen; | ||
149 | xfs_agnumber_t start_agno, end_agno, agno; | ||
150 | __uint64_t blocks_trimmed = 0; | ||
151 | int error, last_error = 0; | ||
152 | |||
153 | if (!capable(CAP_SYS_ADMIN)) | ||
154 | return -XFS_ERROR(EPERM); | ||
155 | if (!blk_queue_discard(q)) | ||
156 | return -XFS_ERROR(EOPNOTSUPP); | ||
157 | if (copy_from_user(&range, urange, sizeof(range))) | ||
158 | return -XFS_ERROR(EFAULT); | ||
159 | |||
160 | /* | ||
161 | * Truncating down the len isn't actually quite correct, but using | ||
162 | * XFS_B_TO_FSB would mean we trivially get overflows for values | ||
163 | * of ULLONG_MAX or slightly lower. And ULLONG_MAX is the default | ||
164 | * used by the fstrim application. In the end it really doesn't | ||
165 | * matter as trimming blocks is an advisory interface. | ||
166 | */ | ||
167 | start = XFS_B_TO_FSBT(mp, range.start); | ||
168 | len = XFS_B_TO_FSBT(mp, range.len); | ||
169 | minlen = XFS_B_TO_FSB(mp, max_t(u64, granularity, range.minlen)); | ||
170 | |||
171 | start_agno = XFS_FSB_TO_AGNO(mp, start); | ||
172 | if (start_agno >= mp->m_sb.sb_agcount) | ||
173 | return -XFS_ERROR(EINVAL); | ||
174 | |||
175 | end_agno = XFS_FSB_TO_AGNO(mp, start + len); | ||
176 | if (end_agno >= mp->m_sb.sb_agcount) | ||
177 | end_agno = mp->m_sb.sb_agcount - 1; | ||
178 | |||
179 | for (agno = start_agno; agno <= end_agno; agno++) { | ||
180 | error = -xfs_trim_extents(mp, agno, start, len, minlen, | ||
181 | &blocks_trimmed); | ||
182 | if (error) | ||
183 | last_error = error; | ||
184 | } | ||
185 | |||
186 | if (last_error) | ||
187 | return last_error; | ||
188 | |||
189 | range.len = XFS_FSB_TO_B(mp, blocks_trimmed); | ||
190 | if (copy_to_user(urange, &range, sizeof(range))) | ||
191 | return -XFS_ERROR(EFAULT); | ||
192 | return 0; | ||
193 | } | ||
194 | |||
195 | int | ||
196 | xfs_discard_extents( | ||
197 | struct xfs_mount *mp, | ||
198 | struct list_head *list) | ||
199 | { | ||
200 | struct xfs_busy_extent *busyp; | ||
201 | int error = 0; | ||
202 | |||
203 | list_for_each_entry(busyp, list, list) { | ||
204 | trace_xfs_discard_extent(mp, busyp->agno, busyp->bno, | ||
205 | busyp->length); | ||
206 | |||
207 | error = -blkdev_issue_discard(mp->m_ddev_targp->bt_bdev, | ||
208 | XFS_AGB_TO_DADDR(mp, busyp->agno, busyp->bno), | ||
209 | XFS_FSB_TO_BB(mp, busyp->length), | ||
210 | GFP_NOFS, 0); | ||
211 | if (error && error != EOPNOTSUPP) { | ||
212 | xfs_info(mp, | ||
213 | "discard failed for extent [0x%llu,%u], error %d", | ||
214 | (unsigned long long)busyp->bno, | ||
215 | busyp->length, | ||
216 | error); | ||
217 | return error; | ||
218 | } | ||
219 | } | ||
220 | |||
221 | return 0; | ||
222 | } | ||
diff --git a/fs/xfs/linux-2.6/xfs_discard.h b/fs/xfs/linux-2.6/xfs_discard.h new file mode 100644 index 000000000000..344879aea646 --- /dev/null +++ b/fs/xfs/linux-2.6/xfs_discard.h | |||
@@ -0,0 +1,10 @@ | |||
1 | #ifndef XFS_DISCARD_H | ||
2 | #define XFS_DISCARD_H 1 | ||
3 | |||
4 | struct fstrim_range; | ||
5 | struct list_head; | ||
6 | |||
7 | extern int xfs_ioc_trim(struct xfs_mount *, struct fstrim_range __user *); | ||
8 | extern int xfs_discard_extents(struct xfs_mount *, struct list_head *); | ||
9 | |||
10 | #endif /* XFS_DISCARD_H */ | ||
diff --git a/fs/xfs/linux-2.6/xfs_export.c b/fs/xfs/linux-2.6/xfs_export.c index 3764d74790ec..f4f878fc0083 100644 --- a/fs/xfs/linux-2.6/xfs_export.c +++ b/fs/xfs/linux-2.6/xfs_export.c | |||
@@ -70,8 +70,16 @@ xfs_fs_encode_fh( | |||
70 | else | 70 | else |
71 | fileid_type = FILEID_INO32_GEN_PARENT; | 71 | fileid_type = FILEID_INO32_GEN_PARENT; |
72 | 72 | ||
73 | /* filesystem may contain 64bit inode numbers */ | 73 | /* |
74 | if (!(XFS_M(inode->i_sb)->m_flags & XFS_MOUNT_SMALL_INUMS)) | 74 | * If the the filesystem may contain 64bit inode numbers, we need |
75 | * to use larger file handles that can represent them. | ||
76 | * | ||
77 | * While we only allocate inodes that do not fit into 32 bits any | ||
78 | * large enough filesystem may contain them, thus the slightly | ||
79 | * confusing looking conditional below. | ||
80 | */ | ||
81 | if (!(XFS_M(inode->i_sb)->m_flags & XFS_MOUNT_SMALL_INUMS) || | ||
82 | (XFS_M(inode->i_sb)->m_flags & XFS_MOUNT_32BITINODES)) | ||
75 | fileid_type |= XFS_FILEID_TYPE_64FLAG; | 83 | fileid_type |= XFS_FILEID_TYPE_64FLAG; |
76 | 84 | ||
77 | /* | 85 | /* |
@@ -81,8 +89,10 @@ xfs_fs_encode_fh( | |||
81 | * seven combinations work. The real answer is "don't use v2". | 89 | * seven combinations work. The real answer is "don't use v2". |
82 | */ | 90 | */ |
83 | len = xfs_fileid_length(fileid_type); | 91 | len = xfs_fileid_length(fileid_type); |
84 | if (*max_len < len) | 92 | if (*max_len < len) { |
93 | *max_len = len; | ||
85 | return 255; | 94 | return 255; |
95 | } | ||
86 | *max_len = len; | 96 | *max_len = len; |
87 | 97 | ||
88 | switch (fileid_type) { | 98 | switch (fileid_type) { |
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c index ba8ad422a165..7f782af286bf 100644 --- a/fs/xfs/linux-2.6/xfs_file.c +++ b/fs/xfs/linux-2.6/xfs_file.c | |||
@@ -37,10 +37,45 @@ | |||
37 | #include "xfs_trace.h" | 37 | #include "xfs_trace.h" |
38 | 38 | ||
39 | #include <linux/dcache.h> | 39 | #include <linux/dcache.h> |
40 | #include <linux/falloc.h> | ||
40 | 41 | ||
41 | static const struct vm_operations_struct xfs_file_vm_ops; | 42 | static const struct vm_operations_struct xfs_file_vm_ops; |
42 | 43 | ||
43 | /* | 44 | /* |
45 | * Locking primitives for read and write IO paths to ensure we consistently use | ||
46 | * and order the inode->i_mutex, ip->i_lock and ip->i_iolock. | ||
47 | */ | ||
48 | static inline void | ||
49 | xfs_rw_ilock( | ||
50 | struct xfs_inode *ip, | ||
51 | int type) | ||
52 | { | ||
53 | if (type & XFS_IOLOCK_EXCL) | ||
54 | mutex_lock(&VFS_I(ip)->i_mutex); | ||
55 | xfs_ilock(ip, type); | ||
56 | } | ||
57 | |||
58 | static inline void | ||
59 | xfs_rw_iunlock( | ||
60 | struct xfs_inode *ip, | ||
61 | int type) | ||
62 | { | ||
63 | xfs_iunlock(ip, type); | ||
64 | if (type & XFS_IOLOCK_EXCL) | ||
65 | mutex_unlock(&VFS_I(ip)->i_mutex); | ||
66 | } | ||
67 | |||
68 | static inline void | ||
69 | xfs_rw_ilock_demote( | ||
70 | struct xfs_inode *ip, | ||
71 | int type) | ||
72 | { | ||
73 | xfs_ilock_demote(ip, type); | ||
74 | if (type & XFS_IOLOCK_EXCL) | ||
75 | mutex_unlock(&VFS_I(ip)->i_mutex); | ||
76 | } | ||
77 | |||
78 | /* | ||
44 | * xfs_iozero | 79 | * xfs_iozero |
45 | * | 80 | * |
46 | * xfs_iozero clears the specified range of buffer supplied, | 81 | * xfs_iozero clears the specified range of buffer supplied, |
@@ -96,19 +131,34 @@ xfs_file_fsync( | |||
96 | { | 131 | { |
97 | struct inode *inode = file->f_mapping->host; | 132 | struct inode *inode = file->f_mapping->host; |
98 | struct xfs_inode *ip = XFS_I(inode); | 133 | struct xfs_inode *ip = XFS_I(inode); |
134 | struct xfs_mount *mp = ip->i_mount; | ||
99 | struct xfs_trans *tp; | 135 | struct xfs_trans *tp; |
100 | int error = 0; | 136 | int error = 0; |
101 | int log_flushed = 0; | 137 | int log_flushed = 0; |
102 | 138 | ||
103 | trace_xfs_file_fsync(ip); | 139 | trace_xfs_file_fsync(ip); |
104 | 140 | ||
105 | if (XFS_FORCED_SHUTDOWN(ip->i_mount)) | 141 | if (XFS_FORCED_SHUTDOWN(mp)) |
106 | return -XFS_ERROR(EIO); | 142 | return -XFS_ERROR(EIO); |
107 | 143 | ||
108 | xfs_iflags_clear(ip, XFS_ITRUNCATED); | 144 | xfs_iflags_clear(ip, XFS_ITRUNCATED); |
109 | 145 | ||
110 | xfs_ioend_wait(ip); | 146 | xfs_ioend_wait(ip); |
111 | 147 | ||
148 | if (mp->m_flags & XFS_MOUNT_BARRIER) { | ||
149 | /* | ||
150 | * If we have an RT and/or log subvolume we need to make sure | ||
151 | * to flush the write cache the device used for file data | ||
152 | * first. This is to ensure newly written file data make | ||
153 | * it to disk before logging the new inode size in case of | ||
154 | * an extending write. | ||
155 | */ | ||
156 | if (XFS_IS_REALTIME_INODE(ip)) | ||
157 | xfs_blkdev_issue_flush(mp->m_rtdev_targp); | ||
158 | else if (mp->m_logdev_targp != mp->m_ddev_targp) | ||
159 | xfs_blkdev_issue_flush(mp->m_ddev_targp); | ||
160 | } | ||
161 | |||
112 | /* | 162 | /* |
113 | * We always need to make sure that the required inode state is safe on | 163 | * We always need to make sure that the required inode state is safe on |
114 | * disk. The inode might be clean but we still might need to force the | 164 | * disk. The inode might be clean but we still might need to force the |
@@ -140,9 +190,9 @@ xfs_file_fsync( | |||
140 | * updates. The sync transaction will also force the log. | 190 | * updates. The sync transaction will also force the log. |
141 | */ | 191 | */ |
142 | xfs_iunlock(ip, XFS_ILOCK_SHARED); | 192 | xfs_iunlock(ip, XFS_ILOCK_SHARED); |
143 | tp = xfs_trans_alloc(ip->i_mount, XFS_TRANS_FSYNC_TS); | 193 | tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS); |
144 | error = xfs_trans_reserve(tp, 0, | 194 | error = xfs_trans_reserve(tp, 0, |
145 | XFS_FSYNC_TS_LOG_RES(ip->i_mount), 0, 0, 0); | 195 | XFS_FSYNC_TS_LOG_RES(mp), 0, 0, 0); |
146 | if (error) { | 196 | if (error) { |
147 | xfs_trans_cancel(tp, 0); | 197 | xfs_trans_cancel(tp, 0); |
148 | return -error; | 198 | return -error; |
@@ -174,28 +224,25 @@ xfs_file_fsync( | |||
174 | * force the log. | 224 | * force the log. |
175 | */ | 225 | */ |
176 | if (xfs_ipincount(ip)) { | 226 | if (xfs_ipincount(ip)) { |
177 | error = _xfs_log_force_lsn(ip->i_mount, | 227 | error = _xfs_log_force_lsn(mp, |
178 | ip->i_itemp->ili_last_lsn, | 228 | ip->i_itemp->ili_last_lsn, |
179 | XFS_LOG_SYNC, &log_flushed); | 229 | XFS_LOG_SYNC, &log_flushed); |
180 | } | 230 | } |
181 | xfs_iunlock(ip, XFS_ILOCK_SHARED); | 231 | xfs_iunlock(ip, XFS_ILOCK_SHARED); |
182 | } | 232 | } |
183 | 233 | ||
184 | if (ip->i_mount->m_flags & XFS_MOUNT_BARRIER) { | 234 | /* |
185 | /* | 235 | * If we only have a single device, and the log force about was |
186 | * If the log write didn't issue an ordered tag we need | 236 | * a no-op we might have to flush the data device cache here. |
187 | * to flush the disk cache for the data device now. | 237 | * This can only happen for fdatasync/O_DSYNC if we were overwriting |
188 | */ | 238 | * an already allocated file and thus do not have any metadata to |
189 | if (!log_flushed) | 239 | * commit. |
190 | xfs_blkdev_issue_flush(ip->i_mount->m_ddev_targp); | 240 | */ |
191 | 241 | if ((mp->m_flags & XFS_MOUNT_BARRIER) && | |
192 | /* | 242 | mp->m_logdev_targp == mp->m_ddev_targp && |
193 | * If this inode is on the RT dev we need to flush that | 243 | !XFS_IS_REALTIME_INODE(ip) && |
194 | * cache as well. | 244 | !log_flushed) |
195 | */ | 245 | xfs_blkdev_issue_flush(mp->m_ddev_targp); |
196 | if (XFS_IS_REALTIME_INODE(ip)) | ||
197 | xfs_blkdev_issue_flush(ip->i_mount->m_rtdev_targp); | ||
198 | } | ||
199 | 246 | ||
200 | return -error; | 247 | return -error; |
201 | } | 248 | } |
@@ -262,22 +309,21 @@ xfs_file_aio_read( | |||
262 | if (XFS_FORCED_SHUTDOWN(mp)) | 309 | if (XFS_FORCED_SHUTDOWN(mp)) |
263 | return -EIO; | 310 | return -EIO; |
264 | 311 | ||
265 | if (unlikely(ioflags & IO_ISDIRECT)) | ||
266 | mutex_lock(&inode->i_mutex); | ||
267 | xfs_ilock(ip, XFS_IOLOCK_SHARED); | ||
268 | |||
269 | if (unlikely(ioflags & IO_ISDIRECT)) { | 312 | if (unlikely(ioflags & IO_ISDIRECT)) { |
313 | xfs_rw_ilock(ip, XFS_IOLOCK_EXCL); | ||
314 | |||
270 | if (inode->i_mapping->nrpages) { | 315 | if (inode->i_mapping->nrpages) { |
271 | ret = -xfs_flushinval_pages(ip, | 316 | ret = -xfs_flushinval_pages(ip, |
272 | (iocb->ki_pos & PAGE_CACHE_MASK), | 317 | (iocb->ki_pos & PAGE_CACHE_MASK), |
273 | -1, FI_REMAPF_LOCKED); | 318 | -1, FI_REMAPF_LOCKED); |
319 | if (ret) { | ||
320 | xfs_rw_iunlock(ip, XFS_IOLOCK_EXCL); | ||
321 | return ret; | ||
322 | } | ||
274 | } | 323 | } |
275 | mutex_unlock(&inode->i_mutex); | 324 | xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL); |
276 | if (ret) { | 325 | } else |
277 | xfs_iunlock(ip, XFS_IOLOCK_SHARED); | 326 | xfs_rw_ilock(ip, XFS_IOLOCK_SHARED); |
278 | return ret; | ||
279 | } | ||
280 | } | ||
281 | 327 | ||
282 | trace_xfs_file_read(ip, size, iocb->ki_pos, ioflags); | 328 | trace_xfs_file_read(ip, size, iocb->ki_pos, ioflags); |
283 | 329 | ||
@@ -285,7 +331,7 @@ xfs_file_aio_read( | |||
285 | if (ret > 0) | 331 | if (ret > 0) |
286 | XFS_STATS_ADD(xs_read_bytes, ret); | 332 | XFS_STATS_ADD(xs_read_bytes, ret); |
287 | 333 | ||
288 | xfs_iunlock(ip, XFS_IOLOCK_SHARED); | 334 | xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED); |
289 | return ret; | 335 | return ret; |
290 | } | 336 | } |
291 | 337 | ||
@@ -309,7 +355,7 @@ xfs_file_splice_read( | |||
309 | if (XFS_FORCED_SHUTDOWN(ip->i_mount)) | 355 | if (XFS_FORCED_SHUTDOWN(ip->i_mount)) |
310 | return -EIO; | 356 | return -EIO; |
311 | 357 | ||
312 | xfs_ilock(ip, XFS_IOLOCK_SHARED); | 358 | xfs_rw_ilock(ip, XFS_IOLOCK_SHARED); |
313 | 359 | ||
314 | trace_xfs_file_splice_read(ip, count, *ppos, ioflags); | 360 | trace_xfs_file_splice_read(ip, count, *ppos, ioflags); |
315 | 361 | ||
@@ -317,10 +363,61 @@ xfs_file_splice_read( | |||
317 | if (ret > 0) | 363 | if (ret > 0) |
318 | XFS_STATS_ADD(xs_read_bytes, ret); | 364 | XFS_STATS_ADD(xs_read_bytes, ret); |
319 | 365 | ||
320 | xfs_iunlock(ip, XFS_IOLOCK_SHARED); | 366 | xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED); |
321 | return ret; | 367 | return ret; |
322 | } | 368 | } |
323 | 369 | ||
370 | STATIC void | ||
371 | xfs_aio_write_isize_update( | ||
372 | struct inode *inode, | ||
373 | loff_t *ppos, | ||
374 | ssize_t bytes_written) | ||
375 | { | ||
376 | struct xfs_inode *ip = XFS_I(inode); | ||
377 | xfs_fsize_t isize = i_size_read(inode); | ||
378 | |||
379 | if (bytes_written > 0) | ||
380 | XFS_STATS_ADD(xs_write_bytes, bytes_written); | ||
381 | |||
382 | if (unlikely(bytes_written < 0 && bytes_written != -EFAULT && | ||
383 | *ppos > isize)) | ||
384 | *ppos = isize; | ||
385 | |||
386 | if (*ppos > ip->i_size) { | ||
387 | xfs_rw_ilock(ip, XFS_ILOCK_EXCL); | ||
388 | if (*ppos > ip->i_size) | ||
389 | ip->i_size = *ppos; | ||
390 | xfs_rw_iunlock(ip, XFS_ILOCK_EXCL); | ||
391 | } | ||
392 | } | ||
393 | |||
394 | /* | ||
395 | * If this was a direct or synchronous I/O that failed (such as ENOSPC) then | ||
396 | * part of the I/O may have been written to disk before the error occurred. In | ||
397 | * this case the on-disk file size may have been adjusted beyond the in-memory | ||
398 | * file size and now needs to be truncated back. | ||
399 | */ | ||
400 | STATIC void | ||
401 | xfs_aio_write_newsize_update( | ||
402 | struct xfs_inode *ip) | ||
403 | { | ||
404 | if (ip->i_new_size) { | ||
405 | xfs_rw_ilock(ip, XFS_ILOCK_EXCL); | ||
406 | ip->i_new_size = 0; | ||
407 | if (ip->i_d.di_size > ip->i_size) | ||
408 | ip->i_d.di_size = ip->i_size; | ||
409 | xfs_rw_iunlock(ip, XFS_ILOCK_EXCL); | ||
410 | } | ||
411 | } | ||
412 | |||
413 | /* | ||
414 | * xfs_file_splice_write() does not use xfs_rw_ilock() because | ||
415 | * generic_file_splice_write() takes the i_mutex itself. This, in theory, | ||
416 | * couuld cause lock inversions between the aio_write path and the splice path | ||
417 | * if someone is doing concurrent splice(2) based writes and write(2) based | ||
418 | * writes to the same inode. The only real way to fix this is to re-implement | ||
419 | * the generic code here with correct locking orders. | ||
420 | */ | ||
324 | STATIC ssize_t | 421 | STATIC ssize_t |
325 | xfs_file_splice_write( | 422 | xfs_file_splice_write( |
326 | struct pipe_inode_info *pipe, | 423 | struct pipe_inode_info *pipe, |
@@ -331,7 +428,7 @@ xfs_file_splice_write( | |||
331 | { | 428 | { |
332 | struct inode *inode = outfilp->f_mapping->host; | 429 | struct inode *inode = outfilp->f_mapping->host; |
333 | struct xfs_inode *ip = XFS_I(inode); | 430 | struct xfs_inode *ip = XFS_I(inode); |
334 | xfs_fsize_t isize, new_size; | 431 | xfs_fsize_t new_size; |
335 | int ioflags = 0; | 432 | int ioflags = 0; |
336 | ssize_t ret; | 433 | ssize_t ret; |
337 | 434 | ||
@@ -355,27 +452,9 @@ xfs_file_splice_write( | |||
355 | trace_xfs_file_splice_write(ip, count, *ppos, ioflags); | 452 | trace_xfs_file_splice_write(ip, count, *ppos, ioflags); |
356 | 453 | ||
357 | ret = generic_file_splice_write(pipe, outfilp, ppos, count, flags); | 454 | ret = generic_file_splice_write(pipe, outfilp, ppos, count, flags); |
358 | if (ret > 0) | ||
359 | XFS_STATS_ADD(xs_write_bytes, ret); | ||
360 | 455 | ||
361 | isize = i_size_read(inode); | 456 | xfs_aio_write_isize_update(inode, ppos, ret); |
362 | if (unlikely(ret < 0 && ret != -EFAULT && *ppos > isize)) | 457 | xfs_aio_write_newsize_update(ip); |
363 | *ppos = isize; | ||
364 | |||
365 | if (*ppos > ip->i_size) { | ||
366 | xfs_ilock(ip, XFS_ILOCK_EXCL); | ||
367 | if (*ppos > ip->i_size) | ||
368 | ip->i_size = *ppos; | ||
369 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | ||
370 | } | ||
371 | |||
372 | if (ip->i_new_size) { | ||
373 | xfs_ilock(ip, XFS_ILOCK_EXCL); | ||
374 | ip->i_new_size = 0; | ||
375 | if (ip->i_d.di_size > ip->i_size) | ||
376 | ip->i_d.di_size = ip->i_size; | ||
377 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | ||
378 | } | ||
379 | xfs_iunlock(ip, XFS_IOLOCK_EXCL); | 458 | xfs_iunlock(ip, XFS_IOLOCK_EXCL); |
380 | return ret; | 459 | return ret; |
381 | } | 460 | } |
@@ -562,247 +641,318 @@ out_lock: | |||
562 | return error; | 641 | return error; |
563 | } | 642 | } |
564 | 643 | ||
644 | /* | ||
645 | * Common pre-write limit and setup checks. | ||
646 | * | ||
647 | * Returns with iolock held according to @iolock. | ||
648 | */ | ||
565 | STATIC ssize_t | 649 | STATIC ssize_t |
566 | xfs_file_aio_write( | 650 | xfs_file_aio_write_checks( |
567 | struct kiocb *iocb, | 651 | struct file *file, |
568 | const struct iovec *iovp, | 652 | loff_t *pos, |
569 | unsigned long nr_segs, | 653 | size_t *count, |
570 | loff_t pos) | 654 | int *iolock) |
571 | { | 655 | { |
572 | struct file *file = iocb->ki_filp; | 656 | struct inode *inode = file->f_mapping->host; |
573 | struct address_space *mapping = file->f_mapping; | ||
574 | struct inode *inode = mapping->host; | ||
575 | struct xfs_inode *ip = XFS_I(inode); | 657 | struct xfs_inode *ip = XFS_I(inode); |
576 | struct xfs_mount *mp = ip->i_mount; | 658 | xfs_fsize_t new_size; |
577 | ssize_t ret = 0, error = 0; | 659 | int error = 0; |
578 | int ioflags = 0; | ||
579 | xfs_fsize_t isize, new_size; | ||
580 | int iolock; | ||
581 | size_t ocount = 0, count; | ||
582 | int need_i_mutex; | ||
583 | 660 | ||
584 | XFS_STATS_INC(xs_write_calls); | 661 | error = generic_write_checks(file, pos, count, S_ISBLK(inode->i_mode)); |
662 | if (error) { | ||
663 | xfs_rw_iunlock(ip, XFS_ILOCK_EXCL | *iolock); | ||
664 | *iolock = 0; | ||
665 | return error; | ||
666 | } | ||
585 | 667 | ||
586 | BUG_ON(iocb->ki_pos != pos); | 668 | new_size = *pos + *count; |
669 | if (new_size > ip->i_size) | ||
670 | ip->i_new_size = new_size; | ||
587 | 671 | ||
588 | if (unlikely(file->f_flags & O_DIRECT)) | 672 | if (likely(!(file->f_mode & FMODE_NOCMTIME))) |
589 | ioflags |= IO_ISDIRECT; | 673 | file_update_time(file); |
590 | if (file->f_mode & FMODE_NOCMTIME) | ||
591 | ioflags |= IO_INVIS; | ||
592 | 674 | ||
593 | error = generic_segment_checks(iovp, &nr_segs, &ocount, VERIFY_READ); | 675 | /* |
676 | * If the offset is beyond the size of the file, we need to zero any | ||
677 | * blocks that fall between the existing EOF and the start of this | ||
678 | * write. | ||
679 | */ | ||
680 | if (*pos > ip->i_size) | ||
681 | error = -xfs_zero_eof(ip, *pos, ip->i_size); | ||
682 | |||
683 | xfs_rw_iunlock(ip, XFS_ILOCK_EXCL); | ||
594 | if (error) | 684 | if (error) |
595 | return error; | 685 | return error; |
596 | 686 | ||
597 | count = ocount; | 687 | /* |
598 | if (count == 0) | 688 | * If we're writing the file then make sure to clear the setuid and |
599 | return 0; | 689 | * setgid bits if the process is not being run by root. This keeps |
600 | 690 | * people from modifying setuid and setgid binaries. | |
601 | xfs_wait_for_freeze(mp, SB_FREEZE_WRITE); | 691 | */ |
692 | return file_remove_suid(file); | ||
602 | 693 | ||
603 | if (XFS_FORCED_SHUTDOWN(mp)) | 694 | } |
604 | return -EIO; | ||
605 | 695 | ||
606 | relock: | 696 | /* |
607 | if (ioflags & IO_ISDIRECT) { | 697 | * xfs_file_dio_aio_write - handle direct IO writes |
608 | iolock = XFS_IOLOCK_SHARED; | 698 | * |
609 | need_i_mutex = 0; | 699 | * Lock the inode appropriately to prepare for and issue a direct IO write. |
610 | } else { | 700 | * By separating it from the buffered write path we remove all the tricky to |
611 | iolock = XFS_IOLOCK_EXCL; | 701 | * follow locking changes and looping. |
612 | need_i_mutex = 1; | 702 | * |
613 | mutex_lock(&inode->i_mutex); | 703 | * If there are cached pages or we're extending the file, we need IOLOCK_EXCL |
704 | * until we're sure the bytes at the new EOF have been zeroed and/or the cached | ||
705 | * pages are flushed out. | ||
706 | * | ||
707 | * In most cases the direct IO writes will be done holding IOLOCK_SHARED | ||
708 | * allowing them to be done in parallel with reads and other direct IO writes. | ||
709 | * However, if the IO is not aligned to filesystem blocks, the direct IO layer | ||
710 | * needs to do sub-block zeroing and that requires serialisation against other | ||
711 | * direct IOs to the same block. In this case we need to serialise the | ||
712 | * submission of the unaligned IOs so that we don't get racing block zeroing in | ||
713 | * the dio layer. To avoid the problem with aio, we also need to wait for | ||
714 | * outstanding IOs to complete so that unwritten extent conversion is completed | ||
715 | * before we try to map the overlapping block. This is currently implemented by | ||
716 | * hitting it with a big hammer (i.e. xfs_ioend_wait()). | ||
717 | * | ||
718 | * Returns with locks held indicated by @iolock and errors indicated by | ||
719 | * negative return values. | ||
720 | */ | ||
721 | STATIC ssize_t | ||
722 | xfs_file_dio_aio_write( | ||
723 | struct kiocb *iocb, | ||
724 | const struct iovec *iovp, | ||
725 | unsigned long nr_segs, | ||
726 | loff_t pos, | ||
727 | size_t ocount, | ||
728 | int *iolock) | ||
729 | { | ||
730 | struct file *file = iocb->ki_filp; | ||
731 | struct address_space *mapping = file->f_mapping; | ||
732 | struct inode *inode = mapping->host; | ||
733 | struct xfs_inode *ip = XFS_I(inode); | ||
734 | struct xfs_mount *mp = ip->i_mount; | ||
735 | ssize_t ret = 0; | ||
736 | size_t count = ocount; | ||
737 | int unaligned_io = 0; | ||
738 | struct xfs_buftarg *target = XFS_IS_REALTIME_INODE(ip) ? | ||
739 | mp->m_rtdev_targp : mp->m_ddev_targp; | ||
740 | |||
741 | *iolock = 0; | ||
742 | if ((pos & target->bt_smask) || (count & target->bt_smask)) | ||
743 | return -XFS_ERROR(EINVAL); | ||
744 | |||
745 | if ((pos & mp->m_blockmask) || ((pos + count) & mp->m_blockmask)) | ||
746 | unaligned_io = 1; | ||
747 | |||
748 | if (unaligned_io || mapping->nrpages || pos > ip->i_size) | ||
749 | *iolock = XFS_IOLOCK_EXCL; | ||
750 | else | ||
751 | *iolock = XFS_IOLOCK_SHARED; | ||
752 | xfs_rw_ilock(ip, XFS_ILOCK_EXCL | *iolock); | ||
753 | |||
754 | ret = xfs_file_aio_write_checks(file, &pos, &count, iolock); | ||
755 | if (ret) | ||
756 | return ret; | ||
757 | |||
758 | if (mapping->nrpages) { | ||
759 | WARN_ON(*iolock != XFS_IOLOCK_EXCL); | ||
760 | ret = -xfs_flushinval_pages(ip, (pos & PAGE_CACHE_MASK), -1, | ||
761 | FI_REMAPF_LOCKED); | ||
762 | if (ret) | ||
763 | return ret; | ||
614 | } | 764 | } |
615 | 765 | ||
616 | xfs_ilock(ip, XFS_ILOCK_EXCL|iolock); | 766 | /* |
617 | 767 | * If we are doing unaligned IO, wait for all other IO to drain, | |
618 | start: | 768 | * otherwise demote the lock if we had to flush cached pages |
619 | error = -generic_write_checks(file, &pos, &count, | 769 | */ |
620 | S_ISBLK(inode->i_mode)); | 770 | if (unaligned_io) |
621 | if (error) { | 771 | xfs_ioend_wait(ip); |
622 | xfs_iunlock(ip, XFS_ILOCK_EXCL|iolock); | 772 | else if (*iolock == XFS_IOLOCK_EXCL) { |
623 | goto out_unlock_mutex; | 773 | xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL); |
774 | *iolock = XFS_IOLOCK_SHARED; | ||
624 | } | 775 | } |
625 | 776 | ||
626 | if (ioflags & IO_ISDIRECT) { | 777 | trace_xfs_file_direct_write(ip, count, iocb->ki_pos, 0); |
627 | xfs_buftarg_t *target = | 778 | ret = generic_file_direct_write(iocb, iovp, |
628 | XFS_IS_REALTIME_INODE(ip) ? | 779 | &nr_segs, pos, &iocb->ki_pos, count, ocount); |
629 | mp->m_rtdev_targp : mp->m_ddev_targp; | ||
630 | 780 | ||
631 | if ((pos & target->bt_smask) || (count & target->bt_smask)) { | 781 | /* No fallback to buffered IO on errors for XFS. */ |
632 | xfs_iunlock(ip, XFS_ILOCK_EXCL|iolock); | 782 | ASSERT(ret < 0 || ret == count); |
633 | return XFS_ERROR(-EINVAL); | 783 | return ret; |
634 | } | 784 | } |
635 | 785 | ||
636 | if (!need_i_mutex && (mapping->nrpages || pos > ip->i_size)) { | 786 | STATIC ssize_t |
637 | xfs_iunlock(ip, XFS_ILOCK_EXCL|iolock); | 787 | xfs_file_buffered_aio_write( |
638 | iolock = XFS_IOLOCK_EXCL; | 788 | struct kiocb *iocb, |
639 | need_i_mutex = 1; | 789 | const struct iovec *iovp, |
640 | mutex_lock(&inode->i_mutex); | 790 | unsigned long nr_segs, |
641 | xfs_ilock(ip, XFS_ILOCK_EXCL|iolock); | 791 | loff_t pos, |
642 | goto start; | 792 | size_t ocount, |
643 | } | 793 | int *iolock) |
644 | } | 794 | { |
795 | struct file *file = iocb->ki_filp; | ||
796 | struct address_space *mapping = file->f_mapping; | ||
797 | struct inode *inode = mapping->host; | ||
798 | struct xfs_inode *ip = XFS_I(inode); | ||
799 | ssize_t ret; | ||
800 | int enospc = 0; | ||
801 | size_t count = ocount; | ||
645 | 802 | ||
646 | new_size = pos + count; | 803 | *iolock = XFS_IOLOCK_EXCL; |
647 | if (new_size > ip->i_size) | 804 | xfs_rw_ilock(ip, XFS_ILOCK_EXCL | *iolock); |
648 | ip->i_new_size = new_size; | ||
649 | 805 | ||
650 | if (likely(!(ioflags & IO_INVIS))) | 806 | ret = xfs_file_aio_write_checks(file, &pos, &count, iolock); |
651 | file_update_time(file); | 807 | if (ret) |
808 | return ret; | ||
809 | |||
810 | /* We can write back this queue in page reclaim */ | ||
811 | current->backing_dev_info = mapping->backing_dev_info; | ||
652 | 812 | ||
813 | write_retry: | ||
814 | trace_xfs_file_buffered_write(ip, count, iocb->ki_pos, 0); | ||
815 | ret = generic_file_buffered_write(iocb, iovp, nr_segs, | ||
816 | pos, &iocb->ki_pos, count, ret); | ||
653 | /* | 817 | /* |
654 | * If the offset is beyond the size of the file, we have a couple | 818 | * if we just got an ENOSPC, flush the inode now we aren't holding any |
655 | * of things to do. First, if there is already space allocated | 819 | * page locks and retry *once* |
656 | * we need to either create holes or zero the disk or ... | ||
657 | * | ||
658 | * If there is a page where the previous size lands, we need | ||
659 | * to zero it out up to the new size. | ||
660 | */ | 820 | */ |
661 | 821 | if (ret == -ENOSPC && !enospc) { | |
662 | if (pos > ip->i_size) { | 822 | ret = -xfs_flush_pages(ip, 0, -1, 0, FI_NONE); |
663 | error = xfs_zero_eof(ip, pos, ip->i_size); | 823 | if (ret) |
664 | if (error) { | 824 | return ret; |
665 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | 825 | enospc = 1; |
666 | goto out_unlock_internal; | 826 | goto write_retry; |
667 | } | ||
668 | } | 827 | } |
669 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | 828 | current->backing_dev_info = NULL; |
829 | return ret; | ||
830 | } | ||
670 | 831 | ||
671 | /* | 832 | STATIC ssize_t |
672 | * If we're writing the file then make sure to clear the | 833 | xfs_file_aio_write( |
673 | * setuid and setgid bits if the process is not being run | 834 | struct kiocb *iocb, |
674 | * by root. This keeps people from modifying setuid and | 835 | const struct iovec *iovp, |
675 | * setgid binaries. | 836 | unsigned long nr_segs, |
676 | */ | 837 | loff_t pos) |
677 | error = -file_remove_suid(file); | 838 | { |
678 | if (unlikely(error)) | 839 | struct file *file = iocb->ki_filp; |
679 | goto out_unlock_internal; | 840 | struct address_space *mapping = file->f_mapping; |
841 | struct inode *inode = mapping->host; | ||
842 | struct xfs_inode *ip = XFS_I(inode); | ||
843 | ssize_t ret; | ||
844 | int iolock; | ||
845 | size_t ocount = 0; | ||
680 | 846 | ||
681 | /* We can write back this queue in page reclaim */ | 847 | XFS_STATS_INC(xs_write_calls); |
682 | current->backing_dev_info = mapping->backing_dev_info; | ||
683 | 848 | ||
684 | if ((ioflags & IO_ISDIRECT)) { | 849 | BUG_ON(iocb->ki_pos != pos); |
685 | if (mapping->nrpages) { | ||
686 | WARN_ON(need_i_mutex == 0); | ||
687 | error = xfs_flushinval_pages(ip, | ||
688 | (pos & PAGE_CACHE_MASK), | ||
689 | -1, FI_REMAPF_LOCKED); | ||
690 | if (error) | ||
691 | goto out_unlock_internal; | ||
692 | } | ||
693 | 850 | ||
694 | if (need_i_mutex) { | 851 | ret = generic_segment_checks(iovp, &nr_segs, &ocount, VERIFY_READ); |
695 | /* demote the lock now the cached pages are gone */ | 852 | if (ret) |
696 | xfs_ilock_demote(ip, XFS_IOLOCK_EXCL); | 853 | return ret; |
697 | mutex_unlock(&inode->i_mutex); | ||
698 | 854 | ||
699 | iolock = XFS_IOLOCK_SHARED; | 855 | if (ocount == 0) |
700 | need_i_mutex = 0; | 856 | return 0; |
701 | } | ||
702 | 857 | ||
703 | trace_xfs_file_direct_write(ip, count, iocb->ki_pos, ioflags); | 858 | xfs_wait_for_freeze(ip->i_mount, SB_FREEZE_WRITE); |
704 | ret = generic_file_direct_write(iocb, iovp, | ||
705 | &nr_segs, pos, &iocb->ki_pos, count, ocount); | ||
706 | 859 | ||
707 | /* | 860 | if (XFS_FORCED_SHUTDOWN(ip->i_mount)) |
708 | * direct-io write to a hole: fall through to buffered I/O | 861 | return -EIO; |
709 | * for completing the rest of the request. | ||
710 | */ | ||
711 | if (ret >= 0 && ret != count) { | ||
712 | XFS_STATS_ADD(xs_write_bytes, ret); | ||
713 | 862 | ||
714 | pos += ret; | 863 | if (unlikely(file->f_flags & O_DIRECT)) |
715 | count -= ret; | 864 | ret = xfs_file_dio_aio_write(iocb, iovp, nr_segs, pos, |
865 | ocount, &iolock); | ||
866 | else | ||
867 | ret = xfs_file_buffered_aio_write(iocb, iovp, nr_segs, pos, | ||
868 | ocount, &iolock); | ||
716 | 869 | ||
717 | ioflags &= ~IO_ISDIRECT; | 870 | xfs_aio_write_isize_update(inode, &iocb->ki_pos, ret); |
718 | xfs_iunlock(ip, iolock); | ||
719 | goto relock; | ||
720 | } | ||
721 | } else { | ||
722 | int enospc = 0; | ||
723 | ssize_t ret2 = 0; | ||
724 | 871 | ||
725 | write_retry: | 872 | if (ret <= 0) |
726 | trace_xfs_file_buffered_write(ip, count, iocb->ki_pos, ioflags); | 873 | goto out_unlock; |
727 | ret2 = generic_file_buffered_write(iocb, iovp, nr_segs, | ||
728 | pos, &iocb->ki_pos, count, ret); | ||
729 | /* | ||
730 | * if we just got an ENOSPC, flush the inode now we | ||
731 | * aren't holding any page locks and retry *once* | ||
732 | */ | ||
733 | if (ret2 == -ENOSPC && !enospc) { | ||
734 | error = xfs_flush_pages(ip, 0, -1, 0, FI_NONE); | ||
735 | if (error) | ||
736 | goto out_unlock_internal; | ||
737 | enospc = 1; | ||
738 | goto write_retry; | ||
739 | } | ||
740 | ret = ret2; | ||
741 | } | ||
742 | 874 | ||
743 | current->backing_dev_info = NULL; | 875 | /* Handle various SYNC-type writes */ |
876 | if ((file->f_flags & O_DSYNC) || IS_SYNC(inode)) { | ||
877 | loff_t end = pos + ret - 1; | ||
878 | int error, error2; | ||
744 | 879 | ||
745 | isize = i_size_read(inode); | 880 | xfs_rw_iunlock(ip, iolock); |
746 | if (unlikely(ret < 0 && ret != -EFAULT && iocb->ki_pos > isize)) | 881 | error = filemap_write_and_wait_range(mapping, pos, end); |
747 | iocb->ki_pos = isize; | 882 | xfs_rw_ilock(ip, iolock); |
748 | 883 | ||
749 | if (iocb->ki_pos > ip->i_size) { | 884 | error2 = -xfs_file_fsync(file, |
750 | xfs_ilock(ip, XFS_ILOCK_EXCL); | 885 | (file->f_flags & __O_SYNC) ? 0 : 1); |
751 | if (iocb->ki_pos > ip->i_size) | 886 | if (error) |
752 | ip->i_size = iocb->ki_pos; | 887 | ret = error; |
753 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | 888 | else if (error2) |
889 | ret = error2; | ||
754 | } | 890 | } |
755 | 891 | ||
756 | error = -ret; | 892 | out_unlock: |
757 | if (ret <= 0) | 893 | xfs_aio_write_newsize_update(ip); |
758 | goto out_unlock_internal; | 894 | xfs_rw_iunlock(ip, iolock); |
895 | return ret; | ||
896 | } | ||
759 | 897 | ||
760 | XFS_STATS_ADD(xs_write_bytes, ret); | 898 | STATIC long |
899 | xfs_file_fallocate( | ||
900 | struct file *file, | ||
901 | int mode, | ||
902 | loff_t offset, | ||
903 | loff_t len) | ||
904 | { | ||
905 | struct inode *inode = file->f_path.dentry->d_inode; | ||
906 | long error; | ||
907 | loff_t new_size = 0; | ||
908 | xfs_flock64_t bf; | ||
909 | xfs_inode_t *ip = XFS_I(inode); | ||
910 | int cmd = XFS_IOC_RESVSP; | ||
911 | int attr_flags = XFS_ATTR_NOLOCK; | ||
761 | 912 | ||
762 | /* Handle various SYNC-type writes */ | 913 | if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)) |
763 | if ((file->f_flags & O_DSYNC) || IS_SYNC(inode)) { | 914 | return -EOPNOTSUPP; |
764 | loff_t end = pos + ret - 1; | ||
765 | int error2; | ||
766 | 915 | ||
767 | xfs_iunlock(ip, iolock); | 916 | bf.l_whence = 0; |
768 | if (need_i_mutex) | 917 | bf.l_start = offset; |
769 | mutex_unlock(&inode->i_mutex); | 918 | bf.l_len = len; |
770 | 919 | ||
771 | error2 = filemap_write_and_wait_range(mapping, pos, end); | 920 | xfs_ilock(ip, XFS_IOLOCK_EXCL); |
772 | if (!error) | ||
773 | error = error2; | ||
774 | if (need_i_mutex) | ||
775 | mutex_lock(&inode->i_mutex); | ||
776 | xfs_ilock(ip, iolock); | ||
777 | 921 | ||
778 | error2 = -xfs_file_fsync(file, | 922 | if (mode & FALLOC_FL_PUNCH_HOLE) |
779 | (file->f_flags & __O_SYNC) ? 0 : 1); | 923 | cmd = XFS_IOC_UNRESVSP; |
780 | if (!error) | 924 | |
781 | error = error2; | 925 | /* check the new inode size is valid before allocating */ |
926 | if (!(mode & FALLOC_FL_KEEP_SIZE) && | ||
927 | offset + len > i_size_read(inode)) { | ||
928 | new_size = offset + len; | ||
929 | error = inode_newsize_ok(inode, new_size); | ||
930 | if (error) | ||
931 | goto out_unlock; | ||
782 | } | 932 | } |
783 | 933 | ||
784 | out_unlock_internal: | 934 | if (file->f_flags & O_DSYNC) |
785 | if (ip->i_new_size) { | 935 | attr_flags |= XFS_ATTR_SYNC; |
786 | xfs_ilock(ip, XFS_ILOCK_EXCL); | 936 | |
787 | ip->i_new_size = 0; | 937 | error = -xfs_change_file_space(ip, cmd, &bf, 0, attr_flags); |
788 | /* | 938 | if (error) |
789 | * If this was a direct or synchronous I/O that failed (such | 939 | goto out_unlock; |
790 | * as ENOSPC) then part of the I/O may have been written to | 940 | |
791 | * disk before the error occured. In this case the on-disk | 941 | /* Change file size if needed */ |
792 | * file size may have been adjusted beyond the in-memory file | 942 | if (new_size) { |
793 | * size and now needs to be truncated back. | 943 | struct iattr iattr; |
794 | */ | 944 | |
795 | if (ip->i_d.di_size > ip->i_size) | 945 | iattr.ia_valid = ATTR_SIZE; |
796 | ip->i_d.di_size = ip->i_size; | 946 | iattr.ia_size = new_size; |
797 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | 947 | error = -xfs_setattr(ip, &iattr, XFS_ATTR_NOLOCK); |
798 | } | 948 | } |
799 | xfs_iunlock(ip, iolock); | 949 | |
800 | out_unlock_mutex: | 950 | out_unlock: |
801 | if (need_i_mutex) | 951 | xfs_iunlock(ip, XFS_IOLOCK_EXCL); |
802 | mutex_unlock(&inode->i_mutex); | 952 | return error; |
803 | return -error; | ||
804 | } | 953 | } |
805 | 954 | ||
955 | |||
806 | STATIC int | 956 | STATIC int |
807 | xfs_file_open( | 957 | xfs_file_open( |
808 | struct inode *inode, | 958 | struct inode *inode, |
@@ -921,6 +1071,7 @@ const struct file_operations xfs_file_operations = { | |||
921 | .open = xfs_file_open, | 1071 | .open = xfs_file_open, |
922 | .release = xfs_file_release, | 1072 | .release = xfs_file_release, |
923 | .fsync = xfs_file_fsync, | 1073 | .fsync = xfs_file_fsync, |
1074 | .fallocate = xfs_file_fallocate, | ||
924 | }; | 1075 | }; |
925 | 1076 | ||
926 | const struct file_operations xfs_dir_file_operations = { | 1077 | const struct file_operations xfs_dir_file_operations = { |
diff --git a/fs/xfs/linux-2.6/xfs_fs_subr.c b/fs/xfs/linux-2.6/xfs_fs_subr.c index 1f279b012f94..ed88ed16811c 100644 --- a/fs/xfs/linux-2.6/xfs_fs_subr.c +++ b/fs/xfs/linux-2.6/xfs_fs_subr.c | |||
@@ -32,10 +32,9 @@ xfs_tosspages( | |||
32 | xfs_off_t last, | 32 | xfs_off_t last, |
33 | int fiopt) | 33 | int fiopt) |
34 | { | 34 | { |
35 | struct address_space *mapping = VFS_I(ip)->i_mapping; | 35 | /* can't toss partial tail pages, so mask them out */ |
36 | 36 | last &= ~(PAGE_SIZE - 1); | |
37 | if (mapping->nrpages) | 37 | truncate_inode_pages_range(VFS_I(ip)->i_mapping, first, last - 1); |
38 | truncate_inode_pages(mapping, first); | ||
39 | } | 38 | } |
40 | 39 | ||
41 | int | 40 | int |
@@ -50,12 +49,11 @@ xfs_flushinval_pages( | |||
50 | 49 | ||
51 | trace_xfs_pagecache_inval(ip, first, last); | 50 | trace_xfs_pagecache_inval(ip, first, last); |
52 | 51 | ||
53 | if (mapping->nrpages) { | 52 | xfs_iflags_clear(ip, XFS_ITRUNCATED); |
54 | xfs_iflags_clear(ip, XFS_ITRUNCATED); | 53 | ret = filemap_write_and_wait_range(mapping, first, |
55 | ret = filemap_write_and_wait(mapping); | 54 | last == -1 ? LLONG_MAX : last); |
56 | if (!ret) | 55 | if (!ret) |
57 | truncate_inode_pages(mapping, first); | 56 | truncate_inode_pages_range(mapping, first, last); |
58 | } | ||
59 | return -ret; | 57 | return -ret; |
60 | } | 58 | } |
61 | 59 | ||
@@ -71,10 +69,9 @@ xfs_flush_pages( | |||
71 | int ret = 0; | 69 | int ret = 0; |
72 | int ret2; | 70 | int ret2; |
73 | 71 | ||
74 | if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) { | 72 | xfs_iflags_clear(ip, XFS_ITRUNCATED); |
75 | xfs_iflags_clear(ip, XFS_ITRUNCATED); | 73 | ret = -filemap_fdatawrite_range(mapping, first, |
76 | ret = -filemap_fdatawrite(mapping); | 74 | last == -1 ? LLONG_MAX : last); |
77 | } | ||
78 | if (flags & XBF_ASYNC) | 75 | if (flags & XBF_ASYNC) |
79 | return ret; | 76 | return ret; |
80 | ret2 = xfs_wait_on_pages(ip, first, last); | 77 | ret2 = xfs_wait_on_pages(ip, first, last); |
@@ -91,7 +88,9 @@ xfs_wait_on_pages( | |||
91 | { | 88 | { |
92 | struct address_space *mapping = VFS_I(ip)->i_mapping; | 89 | struct address_space *mapping = VFS_I(ip)->i_mapping; |
93 | 90 | ||
94 | if (mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK)) | 91 | if (mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK)) { |
95 | return -filemap_fdatawait(mapping); | 92 | return -filemap_fdatawait_range(mapping, first, |
93 | last == -1 ? ip->i_size - 1 : last); | ||
94 | } | ||
96 | return 0; | 95 | return 0; |
97 | } | 96 | } |
diff --git a/fs/xfs/linux-2.6/xfs_globals.c b/fs/xfs/linux-2.6/xfs_globals.c index 2ae8b1ccb02e..76e81cff70b9 100644 --- a/fs/xfs/linux-2.6/xfs_globals.c +++ b/fs/xfs/linux-2.6/xfs_globals.c | |||
@@ -16,7 +16,6 @@ | |||
16 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | 16 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA |
17 | */ | 17 | */ |
18 | #include "xfs.h" | 18 | #include "xfs.h" |
19 | #include "xfs_cred.h" | ||
20 | #include "xfs_sysctl.h" | 19 | #include "xfs_sysctl.h" |
21 | 20 | ||
22 | /* | 21 | /* |
diff --git a/fs/xfs/linux-2.6/xfs_globals.h b/fs/xfs/linux-2.6/xfs_globals.h deleted file mode 100644 index 69f71caf061c..000000000000 --- a/fs/xfs/linux-2.6/xfs_globals.h +++ /dev/null | |||
@@ -1,23 +0,0 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc. | ||
3 | * All Rights Reserved. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or | ||
6 | * modify it under the terms of the GNU General Public License as | ||
7 | * published by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope that it would be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, write the Free Software Foundation, | ||
16 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
17 | */ | ||
18 | #ifndef __XFS_GLOBALS_H__ | ||
19 | #define __XFS_GLOBALS_H__ | ||
20 | |||
21 | extern uint64_t xfs_panic_mask; /* set to cause more panics */ | ||
22 | |||
23 | #endif /* __XFS_GLOBALS_H__ */ | ||
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c index 3b9e626f7cd1..acca2c5ca3fa 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl.c +++ b/fs/xfs/linux-2.6/xfs_ioctl.c | |||
@@ -39,6 +39,7 @@ | |||
39 | #include "xfs_dfrag.h" | 39 | #include "xfs_dfrag.h" |
40 | #include "xfs_fsops.h" | 40 | #include "xfs_fsops.h" |
41 | #include "xfs_vnodeops.h" | 41 | #include "xfs_vnodeops.h" |
42 | #include "xfs_discard.h" | ||
42 | #include "xfs_quota.h" | 43 | #include "xfs_quota.h" |
43 | #include "xfs_inode_item.h" | 44 | #include "xfs_inode_item.h" |
44 | #include "xfs_export.h" | 45 | #include "xfs_export.h" |
@@ -416,7 +417,7 @@ xfs_attrlist_by_handle( | |||
416 | if (IS_ERR(dentry)) | 417 | if (IS_ERR(dentry)) |
417 | return PTR_ERR(dentry); | 418 | return PTR_ERR(dentry); |
418 | 419 | ||
419 | kbuf = kmalloc(al_hreq.buflen, GFP_KERNEL); | 420 | kbuf = kzalloc(al_hreq.buflen, GFP_KERNEL); |
420 | if (!kbuf) | 421 | if (!kbuf) |
421 | goto out_dput; | 422 | goto out_dput; |
422 | 423 | ||
@@ -623,6 +624,10 @@ xfs_ioc_space( | |||
623 | 624 | ||
624 | if (filp->f_flags & (O_NDELAY|O_NONBLOCK)) | 625 | if (filp->f_flags & (O_NDELAY|O_NONBLOCK)) |
625 | attr_flags |= XFS_ATTR_NONBLOCK; | 626 | attr_flags |= XFS_ATTR_NONBLOCK; |
627 | |||
628 | if (filp->f_flags & O_DSYNC) | ||
629 | attr_flags |= XFS_ATTR_SYNC; | ||
630 | |||
626 | if (ioflags & IO_INVIS) | 631 | if (ioflags & IO_INVIS) |
627 | attr_flags |= XFS_ATTR_DMI; | 632 | attr_flags |= XFS_ATTR_DMI; |
628 | 633 | ||
@@ -694,14 +699,19 @@ xfs_ioc_fsgeometry_v1( | |||
694 | xfs_mount_t *mp, | 699 | xfs_mount_t *mp, |
695 | void __user *arg) | 700 | void __user *arg) |
696 | { | 701 | { |
697 | xfs_fsop_geom_v1_t fsgeo; | 702 | xfs_fsop_geom_t fsgeo; |
698 | int error; | 703 | int error; |
699 | 704 | ||
700 | error = xfs_fs_geometry(mp, (xfs_fsop_geom_t *)&fsgeo, 3); | 705 | error = xfs_fs_geometry(mp, &fsgeo, 3); |
701 | if (error) | 706 | if (error) |
702 | return -error; | 707 | return -error; |
703 | 708 | ||
704 | if (copy_to_user(arg, &fsgeo, sizeof(fsgeo))) | 709 | /* |
710 | * Caller should have passed an argument of type | ||
711 | * xfs_fsop_geom_v1_t. This is a proper subset of the | ||
712 | * xfs_fsop_geom_t that xfs_fs_geometry() fills in. | ||
713 | */ | ||
714 | if (copy_to_user(arg, &fsgeo, sizeof(xfs_fsop_geom_v1_t))) | ||
705 | return -XFS_ERROR(EFAULT); | 715 | return -XFS_ERROR(EFAULT); |
706 | return 0; | 716 | return 0; |
707 | } | 717 | } |
@@ -790,7 +800,7 @@ xfs_ioc_fsgetxattr( | |||
790 | xfs_ilock(ip, XFS_ILOCK_SHARED); | 800 | xfs_ilock(ip, XFS_ILOCK_SHARED); |
791 | fa.fsx_xflags = xfs_ip2xflags(ip); | 801 | fa.fsx_xflags = xfs_ip2xflags(ip); |
792 | fa.fsx_extsize = ip->i_d.di_extsize << ip->i_mount->m_sb.sb_blocklog; | 802 | fa.fsx_extsize = ip->i_d.di_extsize << ip->i_mount->m_sb.sb_blocklog; |
793 | fa.fsx_projid = ip->i_d.di_projid; | 803 | fa.fsx_projid = xfs_get_projid(ip); |
794 | 804 | ||
795 | if (attr) { | 805 | if (attr) { |
796 | if (ip->i_afp) { | 806 | if (ip->i_afp) { |
@@ -909,10 +919,10 @@ xfs_ioctl_setattr( | |||
909 | return XFS_ERROR(EIO); | 919 | return XFS_ERROR(EIO); |
910 | 920 | ||
911 | /* | 921 | /* |
912 | * Disallow 32bit project ids because on-disk structure | 922 | * Disallow 32bit project ids when projid32bit feature is not enabled. |
913 | * is 16bit only. | ||
914 | */ | 923 | */ |
915 | if ((mask & FSX_PROJID) && (fa->fsx_projid > (__uint16_t)-1)) | 924 | if ((mask & FSX_PROJID) && (fa->fsx_projid > (__uint16_t)-1) && |
925 | !xfs_sb_version_hasprojid32bit(&ip->i_mount->m_sb)) | ||
916 | return XFS_ERROR(EINVAL); | 926 | return XFS_ERROR(EINVAL); |
917 | 927 | ||
918 | /* | 928 | /* |
@@ -961,7 +971,7 @@ xfs_ioctl_setattr( | |||
961 | if (mask & FSX_PROJID) { | 971 | if (mask & FSX_PROJID) { |
962 | if (XFS_IS_QUOTA_RUNNING(mp) && | 972 | if (XFS_IS_QUOTA_RUNNING(mp) && |
963 | XFS_IS_PQUOTA_ON(mp) && | 973 | XFS_IS_PQUOTA_ON(mp) && |
964 | ip->i_d.di_projid != fa->fsx_projid) { | 974 | xfs_get_projid(ip) != fa->fsx_projid) { |
965 | ASSERT(tp); | 975 | ASSERT(tp); |
966 | code = xfs_qm_vop_chown_reserve(tp, ip, udqp, gdqp, | 976 | code = xfs_qm_vop_chown_reserve(tp, ip, udqp, gdqp, |
967 | capable(CAP_FOWNER) ? | 977 | capable(CAP_FOWNER) ? |
@@ -984,10 +994,22 @@ xfs_ioctl_setattr( | |||
984 | 994 | ||
985 | /* | 995 | /* |
986 | * Extent size must be a multiple of the appropriate block | 996 | * Extent size must be a multiple of the appropriate block |
987 | * size, if set at all. | 997 | * size, if set at all. It must also be smaller than the |
998 | * maximum extent size supported by the filesystem. | ||
999 | * | ||
1000 | * Also, for non-realtime files, limit the extent size hint to | ||
1001 | * half the size of the AGs in the filesystem so alignment | ||
1002 | * doesn't result in extents larger than an AG. | ||
988 | */ | 1003 | */ |
989 | if (fa->fsx_extsize != 0) { | 1004 | if (fa->fsx_extsize != 0) { |
990 | xfs_extlen_t size; | 1005 | xfs_extlen_t size; |
1006 | xfs_fsblock_t extsize_fsb; | ||
1007 | |||
1008 | extsize_fsb = XFS_B_TO_FSB(mp, fa->fsx_extsize); | ||
1009 | if (extsize_fsb > MAXEXTLEN) { | ||
1010 | code = XFS_ERROR(EINVAL); | ||
1011 | goto error_return; | ||
1012 | } | ||
991 | 1013 | ||
992 | if (XFS_IS_REALTIME_INODE(ip) || | 1014 | if (XFS_IS_REALTIME_INODE(ip) || |
993 | ((mask & FSX_XFLAGS) && | 1015 | ((mask & FSX_XFLAGS) && |
@@ -996,6 +1018,10 @@ xfs_ioctl_setattr( | |||
996 | mp->m_sb.sb_blocklog; | 1018 | mp->m_sb.sb_blocklog; |
997 | } else { | 1019 | } else { |
998 | size = mp->m_sb.sb_blocksize; | 1020 | size = mp->m_sb.sb_blocksize; |
1021 | if (extsize_fsb > mp->m_sb.sb_agblocks / 2) { | ||
1022 | code = XFS_ERROR(EINVAL); | ||
1023 | goto error_return; | ||
1024 | } | ||
999 | } | 1025 | } |
1000 | 1026 | ||
1001 | if (fa->fsx_extsize % size) { | 1027 | if (fa->fsx_extsize % size) { |
@@ -1063,12 +1089,12 @@ xfs_ioctl_setattr( | |||
1063 | * Change the ownerships and register quota modifications | 1089 | * Change the ownerships and register quota modifications |
1064 | * in the transaction. | 1090 | * in the transaction. |
1065 | */ | 1091 | */ |
1066 | if (ip->i_d.di_projid != fa->fsx_projid) { | 1092 | if (xfs_get_projid(ip) != fa->fsx_projid) { |
1067 | if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_PQUOTA_ON(mp)) { | 1093 | if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_PQUOTA_ON(mp)) { |
1068 | olddquot = xfs_qm_vop_chown(tp, ip, | 1094 | olddquot = xfs_qm_vop_chown(tp, ip, |
1069 | &ip->i_gdquot, gdqp); | 1095 | &ip->i_gdquot, gdqp); |
1070 | } | 1096 | } |
1071 | ip->i_d.di_projid = fa->fsx_projid; | 1097 | xfs_set_projid(ip, fa->fsx_projid); |
1072 | 1098 | ||
1073 | /* | 1099 | /* |
1074 | * We may have to rev the inode as well as | 1100 | * We may have to rev the inode as well as |
@@ -1088,8 +1114,8 @@ xfs_ioctl_setattr( | |||
1088 | xfs_diflags_to_linux(ip); | 1114 | xfs_diflags_to_linux(ip); |
1089 | } | 1115 | } |
1090 | 1116 | ||
1117 | xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG); | ||
1091 | xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); | 1118 | xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); |
1092 | xfs_ichgtime(ip, XFS_ICHGTIME_CHG); | ||
1093 | 1119 | ||
1094 | XFS_STATS_INC(xs_ig_attrchg); | 1120 | XFS_STATS_INC(xs_ig_attrchg); |
1095 | 1121 | ||
@@ -1294,6 +1320,8 @@ xfs_file_ioctl( | |||
1294 | trace_xfs_file_ioctl(ip); | 1320 | trace_xfs_file_ioctl(ip); |
1295 | 1321 | ||
1296 | switch (cmd) { | 1322 | switch (cmd) { |
1323 | case FITRIM: | ||
1324 | return xfs_ioc_trim(mp, arg); | ||
1297 | case XFS_IOC_ALLOCSP: | 1325 | case XFS_IOC_ALLOCSP: |
1298 | case XFS_IOC_FREESP: | 1326 | case XFS_IOC_FREESP: |
1299 | case XFS_IOC_RESVSP: | 1327 | case XFS_IOC_RESVSP: |
@@ -1301,7 +1329,8 @@ xfs_file_ioctl( | |||
1301 | case XFS_IOC_ALLOCSP64: | 1329 | case XFS_IOC_ALLOCSP64: |
1302 | case XFS_IOC_FREESP64: | 1330 | case XFS_IOC_FREESP64: |
1303 | case XFS_IOC_RESVSP64: | 1331 | case XFS_IOC_RESVSP64: |
1304 | case XFS_IOC_UNRESVSP64: { | 1332 | case XFS_IOC_UNRESVSP64: |
1333 | case XFS_IOC_ZERO_RANGE: { | ||
1305 | xfs_flock64_t bf; | 1334 | xfs_flock64_t bf; |
1306 | 1335 | ||
1307 | if (copy_from_user(&bf, arg, sizeof(bf))) | 1336 | if (copy_from_user(&bf, arg, sizeof(bf))) |
diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.c b/fs/xfs/linux-2.6/xfs_ioctl32.c index 6c83f7f62dc9..54e623bfbb85 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl32.c +++ b/fs/xfs/linux-2.6/xfs_ioctl32.c | |||
@@ -164,7 +164,8 @@ xfs_ioctl32_bstat_copyin( | |||
164 | get_user(bstat->bs_extsize, &bstat32->bs_extsize) || | 164 | get_user(bstat->bs_extsize, &bstat32->bs_extsize) || |
165 | get_user(bstat->bs_extents, &bstat32->bs_extents) || | 165 | get_user(bstat->bs_extents, &bstat32->bs_extents) || |
166 | get_user(bstat->bs_gen, &bstat32->bs_gen) || | 166 | get_user(bstat->bs_gen, &bstat32->bs_gen) || |
167 | get_user(bstat->bs_projid, &bstat32->bs_projid) || | 167 | get_user(bstat->bs_projid_lo, &bstat32->bs_projid_lo) || |
168 | get_user(bstat->bs_projid_hi, &bstat32->bs_projid_hi) || | ||
168 | get_user(bstat->bs_dmevmask, &bstat32->bs_dmevmask) || | 169 | get_user(bstat->bs_dmevmask, &bstat32->bs_dmevmask) || |
169 | get_user(bstat->bs_dmstate, &bstat32->bs_dmstate) || | 170 | get_user(bstat->bs_dmstate, &bstat32->bs_dmstate) || |
170 | get_user(bstat->bs_aextents, &bstat32->bs_aextents)) | 171 | get_user(bstat->bs_aextents, &bstat32->bs_aextents)) |
@@ -218,6 +219,7 @@ xfs_bulkstat_one_fmt_compat( | |||
218 | put_user(buffer->bs_extents, &p32->bs_extents) || | 219 | put_user(buffer->bs_extents, &p32->bs_extents) || |
219 | put_user(buffer->bs_gen, &p32->bs_gen) || | 220 | put_user(buffer->bs_gen, &p32->bs_gen) || |
220 | put_user(buffer->bs_projid, &p32->bs_projid) || | 221 | put_user(buffer->bs_projid, &p32->bs_projid) || |
222 | put_user(buffer->bs_projid_hi, &p32->bs_projid_hi) || | ||
221 | put_user(buffer->bs_dmevmask, &p32->bs_dmevmask) || | 223 | put_user(buffer->bs_dmevmask, &p32->bs_dmevmask) || |
222 | put_user(buffer->bs_dmstate, &p32->bs_dmstate) || | 224 | put_user(buffer->bs_dmstate, &p32->bs_dmstate) || |
223 | put_user(buffer->bs_aextents, &p32->bs_aextents)) | 225 | put_user(buffer->bs_aextents, &p32->bs_aextents)) |
@@ -574,6 +576,7 @@ xfs_file_compat_ioctl( | |||
574 | case XFS_IOC_FSGEOMETRY_V1: | 576 | case XFS_IOC_FSGEOMETRY_V1: |
575 | case XFS_IOC_FSGROWFSDATA: | 577 | case XFS_IOC_FSGROWFSDATA: |
576 | case XFS_IOC_FSGROWFSRT: | 578 | case XFS_IOC_FSGROWFSRT: |
579 | case XFS_IOC_ZERO_RANGE: | ||
577 | return xfs_file_ioctl(filp, cmd, p); | 580 | return xfs_file_ioctl(filp, cmd, p); |
578 | #else | 581 | #else |
579 | case XFS_IOC_ALLOCSP_32: | 582 | case XFS_IOC_ALLOCSP_32: |
@@ -583,7 +586,8 @@ xfs_file_compat_ioctl( | |||
583 | case XFS_IOC_RESVSP_32: | 586 | case XFS_IOC_RESVSP_32: |
584 | case XFS_IOC_UNRESVSP_32: | 587 | case XFS_IOC_UNRESVSP_32: |
585 | case XFS_IOC_RESVSP64_32: | 588 | case XFS_IOC_RESVSP64_32: |
586 | case XFS_IOC_UNRESVSP64_32: { | 589 | case XFS_IOC_UNRESVSP64_32: |
590 | case XFS_IOC_ZERO_RANGE_32: { | ||
587 | struct xfs_flock64 bf; | 591 | struct xfs_flock64 bf; |
588 | 592 | ||
589 | if (xfs_compat_flock64_copyin(&bf, arg)) | 593 | if (xfs_compat_flock64_copyin(&bf, arg)) |
diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.h b/fs/xfs/linux-2.6/xfs_ioctl32.h index 1024c4f8ba0d..80f4060e8970 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl32.h +++ b/fs/xfs/linux-2.6/xfs_ioctl32.h | |||
@@ -65,8 +65,10 @@ typedef struct compat_xfs_bstat { | |||
65 | __s32 bs_extsize; /* extent size */ | 65 | __s32 bs_extsize; /* extent size */ |
66 | __s32 bs_extents; /* number of extents */ | 66 | __s32 bs_extents; /* number of extents */ |
67 | __u32 bs_gen; /* generation count */ | 67 | __u32 bs_gen; /* generation count */ |
68 | __u16 bs_projid; /* project id */ | 68 | __u16 bs_projid_lo; /* lower part of project id */ |
69 | unsigned char bs_pad[14]; /* pad space, unused */ | 69 | #define bs_projid bs_projid_lo /* (previously just bs_projid) */ |
70 | __u16 bs_projid_hi; /* high part of project id */ | ||
71 | unsigned char bs_pad[12]; /* pad space, unused */ | ||
70 | __u32 bs_dmevmask; /* DMIG event mask */ | 72 | __u32 bs_dmevmask; /* DMIG event mask */ |
71 | __u16 bs_dmstate; /* DMIG state info */ | 73 | __u16 bs_dmstate; /* DMIG state info */ |
72 | __u16 bs_aextents; /* attribute number of extents */ | 74 | __u16 bs_aextents; /* attribute number of extents */ |
@@ -182,6 +184,7 @@ typedef struct compat_xfs_flock64 { | |||
182 | #define XFS_IOC_UNRESVSP_32 _IOW('X', 41, struct compat_xfs_flock64) | 184 | #define XFS_IOC_UNRESVSP_32 _IOW('X', 41, struct compat_xfs_flock64) |
183 | #define XFS_IOC_RESVSP64_32 _IOW('X', 42, struct compat_xfs_flock64) | 185 | #define XFS_IOC_RESVSP64_32 _IOW('X', 42, struct compat_xfs_flock64) |
184 | #define XFS_IOC_UNRESVSP64_32 _IOW('X', 43, struct compat_xfs_flock64) | 186 | #define XFS_IOC_UNRESVSP64_32 _IOW('X', 43, struct compat_xfs_flock64) |
187 | #define XFS_IOC_ZERO_RANGE_32 _IOW('X', 57, struct compat_xfs_flock64) | ||
185 | 188 | ||
186 | typedef struct compat_xfs_fsop_geom_v1 { | 189 | typedef struct compat_xfs_fsop_geom_v1 { |
187 | __u32 blocksize; /* filesystem (data) block size */ | 190 | __u32 blocksize; /* filesystem (data) block size */ |
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c index b1fc2a6bfe83..d44d92cd12b1 100644 --- a/fs/xfs/linux-2.6/xfs_iops.c +++ b/fs/xfs/linux-2.6/xfs_iops.c | |||
@@ -46,7 +46,6 @@ | |||
46 | #include <linux/namei.h> | 46 | #include <linux/namei.h> |
47 | #include <linux/posix_acl.h> | 47 | #include <linux/posix_acl.h> |
48 | #include <linux/security.h> | 48 | #include <linux/security.h> |
49 | #include <linux/falloc.h> | ||
50 | #include <linux/fiemap.h> | 49 | #include <linux/fiemap.h> |
51 | #include <linux/slab.h> | 50 | #include <linux/slab.h> |
52 | 51 | ||
@@ -71,7 +70,7 @@ xfs_synchronize_times( | |||
71 | 70 | ||
72 | /* | 71 | /* |
73 | * If the linux inode is valid, mark it dirty. | 72 | * If the linux inode is valid, mark it dirty. |
74 | * Used when commiting a dirty inode into a transaction so that | 73 | * Used when committing a dirty inode into a transaction so that |
75 | * the inode will get written back by the linux code | 74 | * the inode will get written back by the linux code |
76 | */ | 75 | */ |
77 | void | 76 | void |
@@ -95,41 +94,6 @@ xfs_mark_inode_dirty( | |||
95 | } | 94 | } |
96 | 95 | ||
97 | /* | 96 | /* |
98 | * Change the requested timestamp in the given inode. | ||
99 | * We don't lock across timestamp updates, and we don't log them but | ||
100 | * we do record the fact that there is dirty information in core. | ||
101 | */ | ||
102 | void | ||
103 | xfs_ichgtime( | ||
104 | xfs_inode_t *ip, | ||
105 | int flags) | ||
106 | { | ||
107 | struct inode *inode = VFS_I(ip); | ||
108 | timespec_t tv; | ||
109 | int sync_it = 0; | ||
110 | |||
111 | tv = current_fs_time(inode->i_sb); | ||
112 | |||
113 | if ((flags & XFS_ICHGTIME_MOD) && | ||
114 | !timespec_equal(&inode->i_mtime, &tv)) { | ||
115 | inode->i_mtime = tv; | ||
116 | sync_it = 1; | ||
117 | } | ||
118 | if ((flags & XFS_ICHGTIME_CHG) && | ||
119 | !timespec_equal(&inode->i_ctime, &tv)) { | ||
120 | inode->i_ctime = tv; | ||
121 | sync_it = 1; | ||
122 | } | ||
123 | |||
124 | /* | ||
125 | * Update complete - now make sure everyone knows that the inode | ||
126 | * is dirty. | ||
127 | */ | ||
128 | if (sync_it) | ||
129 | xfs_mark_inode_dirty_sync(ip); | ||
130 | } | ||
131 | |||
132 | /* | ||
133 | * Hook in SELinux. This is not quite correct yet, what we really need | 97 | * Hook in SELinux. This is not quite correct yet, what we really need |
134 | * here (as we do for default ACLs) is a mechanism by which creation of | 98 | * here (as we do for default ACLs) is a mechanism by which creation of |
135 | * these attrs can be journalled at inode creation time (along with the | 99 | * these attrs can be journalled at inode creation time (along with the |
@@ -138,7 +102,8 @@ xfs_ichgtime( | |||
138 | STATIC int | 102 | STATIC int |
139 | xfs_init_security( | 103 | xfs_init_security( |
140 | struct inode *inode, | 104 | struct inode *inode, |
141 | struct inode *dir) | 105 | struct inode *dir, |
106 | const struct qstr *qstr) | ||
142 | { | 107 | { |
143 | struct xfs_inode *ip = XFS_I(inode); | 108 | struct xfs_inode *ip = XFS_I(inode); |
144 | size_t length; | 109 | size_t length; |
@@ -146,7 +111,7 @@ xfs_init_security( | |||
146 | unsigned char *name; | 111 | unsigned char *name; |
147 | int error; | 112 | int error; |
148 | 113 | ||
149 | error = security_inode_init_security(inode, dir, (char **)&name, | 114 | error = security_inode_init_security(inode, dir, qstr, (char **)&name, |
150 | &value, &length); | 115 | &value, &length); |
151 | if (error) { | 116 | if (error) { |
152 | if (error == -EOPNOTSUPP) | 117 | if (error == -EOPNOTSUPP) |
@@ -217,20 +182,20 @@ xfs_vn_mknod( | |||
217 | if (IS_POSIXACL(dir)) { | 182 | if (IS_POSIXACL(dir)) { |
218 | default_acl = xfs_get_acl(dir, ACL_TYPE_DEFAULT); | 183 | default_acl = xfs_get_acl(dir, ACL_TYPE_DEFAULT); |
219 | if (IS_ERR(default_acl)) | 184 | if (IS_ERR(default_acl)) |
220 | return -PTR_ERR(default_acl); | 185 | return PTR_ERR(default_acl); |
221 | 186 | ||
222 | if (!default_acl) | 187 | if (!default_acl) |
223 | mode &= ~current_umask(); | 188 | mode &= ~current_umask(); |
224 | } | 189 | } |
225 | 190 | ||
226 | xfs_dentry_to_name(&name, dentry); | 191 | xfs_dentry_to_name(&name, dentry); |
227 | error = xfs_create(XFS_I(dir), &name, mode, rdev, &ip, NULL); | 192 | error = xfs_create(XFS_I(dir), &name, mode, rdev, &ip); |
228 | if (unlikely(error)) | 193 | if (unlikely(error)) |
229 | goto out_free_acl; | 194 | goto out_free_acl; |
230 | 195 | ||
231 | inode = VFS_I(ip); | 196 | inode = VFS_I(ip); |
232 | 197 | ||
233 | error = xfs_init_security(inode, dir); | 198 | error = xfs_init_security(inode, dir, &dentry->d_name); |
234 | if (unlikely(error)) | 199 | if (unlikely(error)) |
235 | goto out_cleanup_inode; | 200 | goto out_cleanup_inode; |
236 | 201 | ||
@@ -352,7 +317,7 @@ xfs_vn_link( | |||
352 | if (unlikely(error)) | 317 | if (unlikely(error)) |
353 | return -error; | 318 | return -error; |
354 | 319 | ||
355 | atomic_inc(&inode->i_count); | 320 | ihold(inode); |
356 | d_instantiate(dentry, inode); | 321 | d_instantiate(dentry, inode); |
357 | return 0; | 322 | return 0; |
358 | } | 323 | } |
@@ -397,13 +362,13 @@ xfs_vn_symlink( | |||
397 | (irix_symlink_mode ? 0777 & ~current_umask() : S_IRWXUGO); | 362 | (irix_symlink_mode ? 0777 & ~current_umask() : S_IRWXUGO); |
398 | xfs_dentry_to_name(&name, dentry); | 363 | xfs_dentry_to_name(&name, dentry); |
399 | 364 | ||
400 | error = xfs_symlink(XFS_I(dir), &name, symname, mode, &cip, NULL); | 365 | error = xfs_symlink(XFS_I(dir), &name, symname, mode, &cip); |
401 | if (unlikely(error)) | 366 | if (unlikely(error)) |
402 | goto out; | 367 | goto out; |
403 | 368 | ||
404 | inode = VFS_I(cip); | 369 | inode = VFS_I(cip); |
405 | 370 | ||
406 | error = xfs_init_security(inode, dir); | 371 | error = xfs_init_security(inode, dir, &dentry->d_name); |
407 | if (unlikely(error)) | 372 | if (unlikely(error)) |
408 | goto out_cleanup_inode; | 373 | goto out_cleanup_inode; |
409 | 374 | ||
@@ -540,58 +505,6 @@ xfs_vn_setattr( | |||
540 | return -xfs_setattr(XFS_I(dentry->d_inode), iattr, 0); | 505 | return -xfs_setattr(XFS_I(dentry->d_inode), iattr, 0); |
541 | } | 506 | } |
542 | 507 | ||
543 | STATIC long | ||
544 | xfs_vn_fallocate( | ||
545 | struct inode *inode, | ||
546 | int mode, | ||
547 | loff_t offset, | ||
548 | loff_t len) | ||
549 | { | ||
550 | long error; | ||
551 | loff_t new_size = 0; | ||
552 | xfs_flock64_t bf; | ||
553 | xfs_inode_t *ip = XFS_I(inode); | ||
554 | |||
555 | /* preallocation on directories not yet supported */ | ||
556 | error = -ENODEV; | ||
557 | if (S_ISDIR(inode->i_mode)) | ||
558 | goto out_error; | ||
559 | |||
560 | bf.l_whence = 0; | ||
561 | bf.l_start = offset; | ||
562 | bf.l_len = len; | ||
563 | |||
564 | xfs_ilock(ip, XFS_IOLOCK_EXCL); | ||
565 | |||
566 | /* check the new inode size is valid before allocating */ | ||
567 | if (!(mode & FALLOC_FL_KEEP_SIZE) && | ||
568 | offset + len > i_size_read(inode)) { | ||
569 | new_size = offset + len; | ||
570 | error = inode_newsize_ok(inode, new_size); | ||
571 | if (error) | ||
572 | goto out_unlock; | ||
573 | } | ||
574 | |||
575 | error = -xfs_change_file_space(ip, XFS_IOC_RESVSP, &bf, | ||
576 | 0, XFS_ATTR_NOLOCK); | ||
577 | if (error) | ||
578 | goto out_unlock; | ||
579 | |||
580 | /* Change file size if needed */ | ||
581 | if (new_size) { | ||
582 | struct iattr iattr; | ||
583 | |||
584 | iattr.ia_valid = ATTR_SIZE; | ||
585 | iattr.ia_size = new_size; | ||
586 | error = -xfs_setattr(ip, &iattr, XFS_ATTR_NOLOCK); | ||
587 | } | ||
588 | |||
589 | out_unlock: | ||
590 | xfs_iunlock(ip, XFS_IOLOCK_EXCL); | ||
591 | out_error: | ||
592 | return error; | ||
593 | } | ||
594 | |||
595 | #define XFS_FIEMAP_FLAGS (FIEMAP_FLAG_SYNC|FIEMAP_FLAG_XATTR) | 508 | #define XFS_FIEMAP_FLAGS (FIEMAP_FLAG_SYNC|FIEMAP_FLAG_XATTR) |
596 | 509 | ||
597 | /* | 510 | /* |
@@ -685,7 +598,6 @@ static const struct inode_operations xfs_inode_operations = { | |||
685 | .getxattr = generic_getxattr, | 598 | .getxattr = generic_getxattr, |
686 | .removexattr = generic_removexattr, | 599 | .removexattr = generic_removexattr, |
687 | .listxattr = xfs_vn_listxattr, | 600 | .listxattr = xfs_vn_listxattr, |
688 | .fallocate = xfs_vn_fallocate, | ||
689 | .fiemap = xfs_vn_fiemap, | 601 | .fiemap = xfs_vn_fiemap, |
690 | }; | 602 | }; |
691 | 603 | ||
@@ -795,7 +707,10 @@ xfs_setup_inode( | |||
795 | 707 | ||
796 | inode->i_ino = ip->i_ino; | 708 | inode->i_ino = ip->i_ino; |
797 | inode->i_state = I_NEW; | 709 | inode->i_state = I_NEW; |
798 | inode_add_to_lists(ip->i_mount->m_super, inode); | 710 | |
711 | inode_sb_list_add(inode); | ||
712 | /* make the inode look hashed for the writeback code */ | ||
713 | hlist_add_fake(&inode->i_hash); | ||
799 | 714 | ||
800 | inode->i_mode = ip->i_d.di_mode; | 715 | inode->i_mode = ip->i_d.di_mode; |
801 | inode->i_nlink = ip->i_d.di_nlink; | 716 | inode->i_nlink = ip->i_d.di_nlink; |
diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h index 2fa0bd9ebc7f..8633521b3b2e 100644 --- a/fs/xfs/linux-2.6/xfs_linux.h +++ b/fs/xfs/linux-2.6/xfs_linux.h | |||
@@ -37,10 +37,8 @@ | |||
37 | 37 | ||
38 | #include <kmem.h> | 38 | #include <kmem.h> |
39 | #include <mrlock.h> | 39 | #include <mrlock.h> |
40 | #include <sv.h> | ||
41 | #include <time.h> | 40 | #include <time.h> |
42 | 41 | ||
43 | #include <support/debug.h> | ||
44 | #include <support/uuid.h> | 42 | #include <support/uuid.h> |
45 | 43 | ||
46 | #include <linux/semaphore.h> | 44 | #include <linux/semaphore.h> |
@@ -71,6 +69,8 @@ | |||
71 | #include <linux/random.h> | 69 | #include <linux/random.h> |
72 | #include <linux/ctype.h> | 70 | #include <linux/ctype.h> |
73 | #include <linux/writeback.h> | 71 | #include <linux/writeback.h> |
72 | #include <linux/capability.h> | ||
73 | #include <linux/list_sort.h> | ||
74 | 74 | ||
75 | #include <asm/page.h> | 75 | #include <asm/page.h> |
76 | #include <asm/div64.h> | 76 | #include <asm/div64.h> |
@@ -79,15 +79,14 @@ | |||
79 | #include <asm/byteorder.h> | 79 | #include <asm/byteorder.h> |
80 | #include <asm/unaligned.h> | 80 | #include <asm/unaligned.h> |
81 | 81 | ||
82 | #include <xfs_cred.h> | ||
83 | #include <xfs_vnode.h> | 82 | #include <xfs_vnode.h> |
84 | #include <xfs_stats.h> | 83 | #include <xfs_stats.h> |
85 | #include <xfs_sysctl.h> | 84 | #include <xfs_sysctl.h> |
86 | #include <xfs_iops.h> | 85 | #include <xfs_iops.h> |
87 | #include <xfs_aops.h> | 86 | #include <xfs_aops.h> |
88 | #include <xfs_super.h> | 87 | #include <xfs_super.h> |
89 | #include <xfs_globals.h> | ||
90 | #include <xfs_buf.h> | 88 | #include <xfs_buf.h> |
89 | #include <xfs_message.h> | ||
91 | 90 | ||
92 | /* | 91 | /* |
93 | * Feature macros (disable/enable) | 92 | * Feature macros (disable/enable) |
@@ -144,7 +143,7 @@ | |||
144 | #define SYNCHRONIZE() barrier() | 143 | #define SYNCHRONIZE() barrier() |
145 | #define __return_address __builtin_return_address(0) | 144 | #define __return_address __builtin_return_address(0) |
146 | 145 | ||
147 | #define dfltprid 0 | 146 | #define XFS_PROJID_DEFAULT 0 |
148 | #define MAXPATHLEN 1024 | 147 | #define MAXPATHLEN 1024 |
149 | 148 | ||
150 | #define MIN(a,b) (min(a,b)) | 149 | #define MIN(a,b) (min(a,b)) |
@@ -282,4 +281,25 @@ static inline __uint64_t howmany_64(__uint64_t x, __uint32_t y) | |||
282 | #define __arch_pack | 281 | #define __arch_pack |
283 | #endif | 282 | #endif |
284 | 283 | ||
284 | #define ASSERT_ALWAYS(expr) \ | ||
285 | (unlikely(expr) ? (void)0 : assfail(#expr, __FILE__, __LINE__)) | ||
286 | |||
287 | #ifndef DEBUG | ||
288 | #define ASSERT(expr) ((void)0) | ||
289 | |||
290 | #ifndef STATIC | ||
291 | # define STATIC static noinline | ||
292 | #endif | ||
293 | |||
294 | #else /* DEBUG */ | ||
295 | |||
296 | #define ASSERT(expr) \ | ||
297 | (unlikely(expr) ? (void)0 : assfail(#expr, __FILE__, __LINE__)) | ||
298 | |||
299 | #ifndef STATIC | ||
300 | # define STATIC noinline | ||
301 | #endif | ||
302 | |||
303 | #endif /* DEBUG */ | ||
304 | |||
285 | #endif /* __XFS_LINUX__ */ | 305 | #endif /* __XFS_LINUX__ */ |
diff --git a/fs/xfs/linux-2.6/xfs_message.c b/fs/xfs/linux-2.6/xfs_message.c new file mode 100644 index 000000000000..bd672def95ac --- /dev/null +++ b/fs/xfs/linux-2.6/xfs_message.c | |||
@@ -0,0 +1,108 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2011 Red Hat, Inc. All Rights Reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or | ||
5 | * modify it under the terms of the GNU General Public License as | ||
6 | * published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope that it would be useful, | ||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
11 | * GNU General Public License for more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program; if not, write the Free Software Foundation, | ||
15 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
16 | */ | ||
17 | |||
18 | #include "xfs.h" | ||
19 | #include "xfs_fs.h" | ||
20 | #include "xfs_types.h" | ||
21 | #include "xfs_log.h" | ||
22 | #include "xfs_inum.h" | ||
23 | #include "xfs_trans.h" | ||
24 | #include "xfs_sb.h" | ||
25 | #include "xfs_ag.h" | ||
26 | #include "xfs_mount.h" | ||
27 | |||
28 | /* | ||
29 | * XFS logging functions | ||
30 | */ | ||
31 | static void | ||
32 | __xfs_printk( | ||
33 | const char *level, | ||
34 | const struct xfs_mount *mp, | ||
35 | struct va_format *vaf) | ||
36 | { | ||
37 | if (mp && mp->m_fsname) { | ||
38 | printk("%sXFS (%s): %pV\n", level, mp->m_fsname, vaf); | ||
39 | return; | ||
40 | } | ||
41 | printk("%sXFS: %pV\n", level, vaf); | ||
42 | } | ||
43 | |||
44 | #define define_xfs_printk_level(func, kern_level) \ | ||
45 | void func(const struct xfs_mount *mp, const char *fmt, ...) \ | ||
46 | { \ | ||
47 | struct va_format vaf; \ | ||
48 | va_list args; \ | ||
49 | \ | ||
50 | va_start(args, fmt); \ | ||
51 | \ | ||
52 | vaf.fmt = fmt; \ | ||
53 | vaf.va = &args; \ | ||
54 | \ | ||
55 | __xfs_printk(kern_level, mp, &vaf); \ | ||
56 | va_end(args); \ | ||
57 | } \ | ||
58 | |||
59 | define_xfs_printk_level(xfs_emerg, KERN_EMERG); | ||
60 | define_xfs_printk_level(xfs_alert, KERN_ALERT); | ||
61 | define_xfs_printk_level(xfs_crit, KERN_CRIT); | ||
62 | define_xfs_printk_level(xfs_err, KERN_ERR); | ||
63 | define_xfs_printk_level(xfs_warn, KERN_WARNING); | ||
64 | define_xfs_printk_level(xfs_notice, KERN_NOTICE); | ||
65 | define_xfs_printk_level(xfs_info, KERN_INFO); | ||
66 | #ifdef DEBUG | ||
67 | define_xfs_printk_level(xfs_debug, KERN_DEBUG); | ||
68 | #endif | ||
69 | |||
70 | void | ||
71 | xfs_alert_tag( | ||
72 | const struct xfs_mount *mp, | ||
73 | int panic_tag, | ||
74 | const char *fmt, ...) | ||
75 | { | ||
76 | struct va_format vaf; | ||
77 | va_list args; | ||
78 | int do_panic = 0; | ||
79 | |||
80 | if (xfs_panic_mask && (xfs_panic_mask & panic_tag)) { | ||
81 | xfs_alert(mp, "Transforming an alert into a BUG."); | ||
82 | do_panic = 1; | ||
83 | } | ||
84 | |||
85 | va_start(args, fmt); | ||
86 | |||
87 | vaf.fmt = fmt; | ||
88 | vaf.va = &args; | ||
89 | |||
90 | __xfs_printk(KERN_ALERT, mp, &vaf); | ||
91 | va_end(args); | ||
92 | |||
93 | BUG_ON(do_panic); | ||
94 | } | ||
95 | |||
96 | void | ||
97 | assfail(char *expr, char *file, int line) | ||
98 | { | ||
99 | xfs_emerg(NULL, "Assertion failed: %s, file: %s, line: %d", | ||
100 | expr, file, line); | ||
101 | BUG(); | ||
102 | } | ||
103 | |||
104 | void | ||
105 | xfs_hex_dump(void *p, int length) | ||
106 | { | ||
107 | print_hex_dump(KERN_ALERT, "", DUMP_PREFIX_ADDRESS, 16, 1, p, length, 1); | ||
108 | } | ||
diff --git a/fs/xfs/linux-2.6/xfs_message.h b/fs/xfs/linux-2.6/xfs_message.h new file mode 100644 index 000000000000..7fb7ea007672 --- /dev/null +++ b/fs/xfs/linux-2.6/xfs_message.h | |||
@@ -0,0 +1,39 @@ | |||
1 | #ifndef __XFS_MESSAGE_H | ||
2 | #define __XFS_MESSAGE_H 1 | ||
3 | |||
4 | struct xfs_mount; | ||
5 | |||
6 | extern void xfs_emerg(const struct xfs_mount *mp, const char *fmt, ...) | ||
7 | __attribute__ ((format (printf, 2, 3))); | ||
8 | extern void xfs_alert(const struct xfs_mount *mp, const char *fmt, ...) | ||
9 | __attribute__ ((format (printf, 2, 3))); | ||
10 | extern void xfs_alert_tag(const struct xfs_mount *mp, int tag, | ||
11 | const char *fmt, ...) | ||
12 | __attribute__ ((format (printf, 3, 4))); | ||
13 | extern void xfs_crit(const struct xfs_mount *mp, const char *fmt, ...) | ||
14 | __attribute__ ((format (printf, 2, 3))); | ||
15 | extern void xfs_err(const struct xfs_mount *mp, const char *fmt, ...) | ||
16 | __attribute__ ((format (printf, 2, 3))); | ||
17 | extern void xfs_warn(const struct xfs_mount *mp, const char *fmt, ...) | ||
18 | __attribute__ ((format (printf, 2, 3))); | ||
19 | extern void xfs_notice(const struct xfs_mount *mp, const char *fmt, ...) | ||
20 | __attribute__ ((format (printf, 2, 3))); | ||
21 | extern void xfs_info(const struct xfs_mount *mp, const char *fmt, ...) | ||
22 | __attribute__ ((format (printf, 2, 3))); | ||
23 | |||
24 | #ifdef DEBUG | ||
25 | extern void xfs_debug(const struct xfs_mount *mp, const char *fmt, ...) | ||
26 | __attribute__ ((format (printf, 2, 3))); | ||
27 | #else | ||
28 | static inline void | ||
29 | __attribute__ ((format (printf, 2, 3))) | ||
30 | xfs_debug(const struct xfs_mount *mp, const char *fmt, ...) | ||
31 | { | ||
32 | } | ||
33 | #endif | ||
34 | |||
35 | extern void assfail(char *expr, char *f, int l); | ||
36 | |||
37 | extern void xfs_hex_dump(void *p, int length); | ||
38 | |||
39 | #endif /* __XFS_MESSAGE_H */ | ||
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index a4e07974955b..a1a881e68a9a 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c | |||
@@ -44,7 +44,6 @@ | |||
44 | #include "xfs_buf_item.h" | 44 | #include "xfs_buf_item.h" |
45 | #include "xfs_utils.h" | 45 | #include "xfs_utils.h" |
46 | #include "xfs_vnodeops.h" | 46 | #include "xfs_vnodeops.h" |
47 | #include "xfs_version.h" | ||
48 | #include "xfs_log_priv.h" | 47 | #include "xfs_log_priv.h" |
49 | #include "xfs_trans_priv.h" | 48 | #include "xfs_trans_priv.h" |
50 | #include "xfs_filestream.h" | 49 | #include "xfs_filestream.h" |
@@ -111,8 +110,10 @@ mempool_t *xfs_ioend_pool; | |||
111 | #define MNTOPT_GQUOTANOENF "gqnoenforce"/* group quota limit enforcement */ | 110 | #define MNTOPT_GQUOTANOENF "gqnoenforce"/* group quota limit enforcement */ |
112 | #define MNTOPT_PQUOTANOENF "pqnoenforce"/* project quota limit enforcement */ | 111 | #define MNTOPT_PQUOTANOENF "pqnoenforce"/* project quota limit enforcement */ |
113 | #define MNTOPT_QUOTANOENF "qnoenforce" /* same as uqnoenforce */ | 112 | #define MNTOPT_QUOTANOENF "qnoenforce" /* same as uqnoenforce */ |
114 | #define MNTOPT_DELAYLOG "delaylog" /* Delayed loging enabled */ | 113 | #define MNTOPT_DELAYLOG "delaylog" /* Delayed logging enabled */ |
115 | #define MNTOPT_NODELAYLOG "nodelaylog" /* Delayed loging disabled */ | 114 | #define MNTOPT_NODELAYLOG "nodelaylog" /* Delayed logging disabled */ |
115 | #define MNTOPT_DISCARD "discard" /* Discard unused blocks */ | ||
116 | #define MNTOPT_NODISCARD "nodiscard" /* Do not discard unused blocks */ | ||
116 | 117 | ||
117 | /* | 118 | /* |
118 | * Table driven mount option parser. | 119 | * Table driven mount option parser. |
@@ -174,6 +175,15 @@ xfs_parseargs( | |||
174 | __uint8_t iosizelog = 0; | 175 | __uint8_t iosizelog = 0; |
175 | 176 | ||
176 | /* | 177 | /* |
178 | * set up the mount name first so all the errors will refer to the | ||
179 | * correct device. | ||
180 | */ | ||
181 | mp->m_fsname = kstrndup(sb->s_id, MAXNAMELEN, GFP_KERNEL); | ||
182 | if (!mp->m_fsname) | ||
183 | return ENOMEM; | ||
184 | mp->m_fsname_len = strlen(mp->m_fsname) + 1; | ||
185 | |||
186 | /* | ||
177 | * Copy binary VFS mount flags we are interested in. | 187 | * Copy binary VFS mount flags we are interested in. |
178 | */ | 188 | */ |
179 | if (sb->s_flags & MS_RDONLY) | 189 | if (sb->s_flags & MS_RDONLY) |
@@ -190,6 +200,7 @@ xfs_parseargs( | |||
190 | mp->m_flags |= XFS_MOUNT_BARRIER; | 200 | mp->m_flags |= XFS_MOUNT_BARRIER; |
191 | mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE; | 201 | mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE; |
192 | mp->m_flags |= XFS_MOUNT_SMALL_INUMS; | 202 | mp->m_flags |= XFS_MOUNT_SMALL_INUMS; |
203 | mp->m_flags |= XFS_MOUNT_DELAYLOG; | ||
193 | 204 | ||
194 | /* | 205 | /* |
195 | * These can be overridden by the mount option parsing. | 206 | * These can be overridden by the mount option parsing. |
@@ -208,24 +219,21 @@ xfs_parseargs( | |||
208 | 219 | ||
209 | if (!strcmp(this_char, MNTOPT_LOGBUFS)) { | 220 | if (!strcmp(this_char, MNTOPT_LOGBUFS)) { |
210 | if (!value || !*value) { | 221 | if (!value || !*value) { |
211 | cmn_err(CE_WARN, | 222 | xfs_warn(mp, "%s option requires an argument", |
212 | "XFS: %s option requires an argument", | ||
213 | this_char); | 223 | this_char); |
214 | return EINVAL; | 224 | return EINVAL; |
215 | } | 225 | } |
216 | mp->m_logbufs = simple_strtoul(value, &eov, 10); | 226 | mp->m_logbufs = simple_strtoul(value, &eov, 10); |
217 | } else if (!strcmp(this_char, MNTOPT_LOGBSIZE)) { | 227 | } else if (!strcmp(this_char, MNTOPT_LOGBSIZE)) { |
218 | if (!value || !*value) { | 228 | if (!value || !*value) { |
219 | cmn_err(CE_WARN, | 229 | xfs_warn(mp, "%s option requires an argument", |
220 | "XFS: %s option requires an argument", | ||
221 | this_char); | 230 | this_char); |
222 | return EINVAL; | 231 | return EINVAL; |
223 | } | 232 | } |
224 | mp->m_logbsize = suffix_strtoul(value, &eov, 10); | 233 | mp->m_logbsize = suffix_strtoul(value, &eov, 10); |
225 | } else if (!strcmp(this_char, MNTOPT_LOGDEV)) { | 234 | } else if (!strcmp(this_char, MNTOPT_LOGDEV)) { |
226 | if (!value || !*value) { | 235 | if (!value || !*value) { |
227 | cmn_err(CE_WARN, | 236 | xfs_warn(mp, "%s option requires an argument", |
228 | "XFS: %s option requires an argument", | ||
229 | this_char); | 237 | this_char); |
230 | return EINVAL; | 238 | return EINVAL; |
231 | } | 239 | } |
@@ -233,14 +241,12 @@ xfs_parseargs( | |||
233 | if (!mp->m_logname) | 241 | if (!mp->m_logname) |
234 | return ENOMEM; | 242 | return ENOMEM; |
235 | } else if (!strcmp(this_char, MNTOPT_MTPT)) { | 243 | } else if (!strcmp(this_char, MNTOPT_MTPT)) { |
236 | cmn_err(CE_WARN, | 244 | xfs_warn(mp, "%s option not allowed on this system", |
237 | "XFS: %s option not allowed on this system", | ||
238 | this_char); | 245 | this_char); |
239 | return EINVAL; | 246 | return EINVAL; |
240 | } else if (!strcmp(this_char, MNTOPT_RTDEV)) { | 247 | } else if (!strcmp(this_char, MNTOPT_RTDEV)) { |
241 | if (!value || !*value) { | 248 | if (!value || !*value) { |
242 | cmn_err(CE_WARN, | 249 | xfs_warn(mp, "%s option requires an argument", |
243 | "XFS: %s option requires an argument", | ||
244 | this_char); | 250 | this_char); |
245 | return EINVAL; | 251 | return EINVAL; |
246 | } | 252 | } |
@@ -249,8 +255,7 @@ xfs_parseargs( | |||
249 | return ENOMEM; | 255 | return ENOMEM; |
250 | } else if (!strcmp(this_char, MNTOPT_BIOSIZE)) { | 256 | } else if (!strcmp(this_char, MNTOPT_BIOSIZE)) { |
251 | if (!value || !*value) { | 257 | if (!value || !*value) { |
252 | cmn_err(CE_WARN, | 258 | xfs_warn(mp, "%s option requires an argument", |
253 | "XFS: %s option requires an argument", | ||
254 | this_char); | 259 | this_char); |
255 | return EINVAL; | 260 | return EINVAL; |
256 | } | 261 | } |
@@ -258,8 +263,7 @@ xfs_parseargs( | |||
258 | iosizelog = ffs(iosize) - 1; | 263 | iosizelog = ffs(iosize) - 1; |
259 | } else if (!strcmp(this_char, MNTOPT_ALLOCSIZE)) { | 264 | } else if (!strcmp(this_char, MNTOPT_ALLOCSIZE)) { |
260 | if (!value || !*value) { | 265 | if (!value || !*value) { |
261 | cmn_err(CE_WARN, | 266 | xfs_warn(mp, "%s option requires an argument", |
262 | "XFS: %s option requires an argument", | ||
263 | this_char); | 267 | this_char); |
264 | return EINVAL; | 268 | return EINVAL; |
265 | } | 269 | } |
@@ -281,16 +285,14 @@ xfs_parseargs( | |||
281 | mp->m_flags |= XFS_MOUNT_SWALLOC; | 285 | mp->m_flags |= XFS_MOUNT_SWALLOC; |
282 | } else if (!strcmp(this_char, MNTOPT_SUNIT)) { | 286 | } else if (!strcmp(this_char, MNTOPT_SUNIT)) { |
283 | if (!value || !*value) { | 287 | if (!value || !*value) { |
284 | cmn_err(CE_WARN, | 288 | xfs_warn(mp, "%s option requires an argument", |
285 | "XFS: %s option requires an argument", | ||
286 | this_char); | 289 | this_char); |
287 | return EINVAL; | 290 | return EINVAL; |
288 | } | 291 | } |
289 | dsunit = simple_strtoul(value, &eov, 10); | 292 | dsunit = simple_strtoul(value, &eov, 10); |
290 | } else if (!strcmp(this_char, MNTOPT_SWIDTH)) { | 293 | } else if (!strcmp(this_char, MNTOPT_SWIDTH)) { |
291 | if (!value || !*value) { | 294 | if (!value || !*value) { |
292 | cmn_err(CE_WARN, | 295 | xfs_warn(mp, "%s option requires an argument", |
293 | "XFS: %s option requires an argument", | ||
294 | this_char); | 296 | this_char); |
295 | return EINVAL; | 297 | return EINVAL; |
296 | } | 298 | } |
@@ -298,8 +300,7 @@ xfs_parseargs( | |||
298 | } else if (!strcmp(this_char, MNTOPT_64BITINODE)) { | 300 | } else if (!strcmp(this_char, MNTOPT_64BITINODE)) { |
299 | mp->m_flags &= ~XFS_MOUNT_SMALL_INUMS; | 301 | mp->m_flags &= ~XFS_MOUNT_SMALL_INUMS; |
300 | #if !XFS_BIG_INUMS | 302 | #if !XFS_BIG_INUMS |
301 | cmn_err(CE_WARN, | 303 | xfs_warn(mp, "%s option not allowed on this system", |
302 | "XFS: %s option not allowed on this system", | ||
303 | this_char); | 304 | this_char); |
304 | return EINVAL; | 305 | return EINVAL; |
305 | #endif | 306 | #endif |
@@ -354,26 +355,26 @@ xfs_parseargs( | |||
354 | mp->m_qflags &= ~XFS_OQUOTA_ENFD; | 355 | mp->m_qflags &= ~XFS_OQUOTA_ENFD; |
355 | } else if (!strcmp(this_char, MNTOPT_DELAYLOG)) { | 356 | } else if (!strcmp(this_char, MNTOPT_DELAYLOG)) { |
356 | mp->m_flags |= XFS_MOUNT_DELAYLOG; | 357 | mp->m_flags |= XFS_MOUNT_DELAYLOG; |
357 | cmn_err(CE_WARN, | ||
358 | "Enabling EXPERIMENTAL delayed logging feature " | ||
359 | "- use at your own risk.\n"); | ||
360 | } else if (!strcmp(this_char, MNTOPT_NODELAYLOG)) { | 358 | } else if (!strcmp(this_char, MNTOPT_NODELAYLOG)) { |
361 | mp->m_flags &= ~XFS_MOUNT_DELAYLOG; | 359 | mp->m_flags &= ~XFS_MOUNT_DELAYLOG; |
360 | } else if (!strcmp(this_char, MNTOPT_DISCARD)) { | ||
361 | mp->m_flags |= XFS_MOUNT_DISCARD; | ||
362 | } else if (!strcmp(this_char, MNTOPT_NODISCARD)) { | ||
363 | mp->m_flags &= ~XFS_MOUNT_DISCARD; | ||
362 | } else if (!strcmp(this_char, "ihashsize")) { | 364 | } else if (!strcmp(this_char, "ihashsize")) { |
363 | cmn_err(CE_WARN, | 365 | xfs_warn(mp, |
364 | "XFS: ihashsize no longer used, option is deprecated."); | 366 | "ihashsize no longer used, option is deprecated."); |
365 | } else if (!strcmp(this_char, "osyncisdsync")) { | 367 | } else if (!strcmp(this_char, "osyncisdsync")) { |
366 | cmn_err(CE_WARN, | 368 | xfs_warn(mp, |
367 | "XFS: osyncisdsync has no effect, option is deprecated."); | 369 | "osyncisdsync has no effect, option is deprecated."); |
368 | } else if (!strcmp(this_char, "osyncisosync")) { | 370 | } else if (!strcmp(this_char, "osyncisosync")) { |
369 | cmn_err(CE_WARN, | 371 | xfs_warn(mp, |
370 | "XFS: osyncisosync has no effect, option is deprecated."); | 372 | "osyncisosync has no effect, option is deprecated."); |
371 | } else if (!strcmp(this_char, "irixsgid")) { | 373 | } else if (!strcmp(this_char, "irixsgid")) { |
372 | cmn_err(CE_WARN, | 374 | xfs_warn(mp, |
373 | "XFS: irixsgid is now a sysctl(2) variable, option is deprecated."); | 375 | "irixsgid is now a sysctl(2) variable, option is deprecated."); |
374 | } else { | 376 | } else { |
375 | cmn_err(CE_WARN, | 377 | xfs_warn(mp, "unknown mount option [%s].", this_char); |
376 | "XFS: unknown mount option [%s].", this_char); | ||
377 | return EINVAL; | 378 | return EINVAL; |
378 | } | 379 | } |
379 | } | 380 | } |
@@ -383,40 +384,44 @@ xfs_parseargs( | |||
383 | */ | 384 | */ |
384 | if ((mp->m_flags & XFS_MOUNT_NORECOVERY) && | 385 | if ((mp->m_flags & XFS_MOUNT_NORECOVERY) && |
385 | !(mp->m_flags & XFS_MOUNT_RDONLY)) { | 386 | !(mp->m_flags & XFS_MOUNT_RDONLY)) { |
386 | cmn_err(CE_WARN, "XFS: no-recovery mounts must be read-only."); | 387 | xfs_warn(mp, "no-recovery mounts must be read-only."); |
387 | return EINVAL; | 388 | return EINVAL; |
388 | } | 389 | } |
389 | 390 | ||
390 | if ((mp->m_flags & XFS_MOUNT_NOALIGN) && (dsunit || dswidth)) { | 391 | if ((mp->m_flags & XFS_MOUNT_NOALIGN) && (dsunit || dswidth)) { |
391 | cmn_err(CE_WARN, | 392 | xfs_warn(mp, |
392 | "XFS: sunit and swidth options incompatible with the noalign option"); | 393 | "sunit and swidth options incompatible with the noalign option"); |
394 | return EINVAL; | ||
395 | } | ||
396 | |||
397 | if ((mp->m_flags & XFS_MOUNT_DISCARD) && | ||
398 | !(mp->m_flags & XFS_MOUNT_DELAYLOG)) { | ||
399 | xfs_warn(mp, | ||
400 | "the discard option is incompatible with the nodelaylog option"); | ||
393 | return EINVAL; | 401 | return EINVAL; |
394 | } | 402 | } |
395 | 403 | ||
396 | #ifndef CONFIG_XFS_QUOTA | 404 | #ifndef CONFIG_XFS_QUOTA |
397 | if (XFS_IS_QUOTA_RUNNING(mp)) { | 405 | if (XFS_IS_QUOTA_RUNNING(mp)) { |
398 | cmn_err(CE_WARN, | 406 | xfs_warn(mp, "quota support not available in this kernel."); |
399 | "XFS: quota support not available in this kernel."); | ||
400 | return EINVAL; | 407 | return EINVAL; |
401 | } | 408 | } |
402 | #endif | 409 | #endif |
403 | 410 | ||
404 | if ((mp->m_qflags & (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE)) && | 411 | if ((mp->m_qflags & (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE)) && |
405 | (mp->m_qflags & (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE))) { | 412 | (mp->m_qflags & (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE))) { |
406 | cmn_err(CE_WARN, | 413 | xfs_warn(mp, "cannot mount with both project and group quota"); |
407 | "XFS: cannot mount with both project and group quota"); | ||
408 | return EINVAL; | 414 | return EINVAL; |
409 | } | 415 | } |
410 | 416 | ||
411 | if ((dsunit && !dswidth) || (!dsunit && dswidth)) { | 417 | if ((dsunit && !dswidth) || (!dsunit && dswidth)) { |
412 | cmn_err(CE_WARN, | 418 | xfs_warn(mp, "sunit and swidth must be specified together"); |
413 | "XFS: sunit and swidth must be specified together"); | ||
414 | return EINVAL; | 419 | return EINVAL; |
415 | } | 420 | } |
416 | 421 | ||
417 | if (dsunit && (dswidth % dsunit != 0)) { | 422 | if (dsunit && (dswidth % dsunit != 0)) { |
418 | cmn_err(CE_WARN, | 423 | xfs_warn(mp, |
419 | "XFS: stripe width (%d) must be a multiple of the stripe unit (%d)", | 424 | "stripe width (%d) must be a multiple of the stripe unit (%d)", |
420 | dswidth, dsunit); | 425 | dswidth, dsunit); |
421 | return EINVAL; | 426 | return EINVAL; |
422 | } | 427 | } |
@@ -442,8 +447,7 @@ done: | |||
442 | mp->m_logbufs != 0 && | 447 | mp->m_logbufs != 0 && |
443 | (mp->m_logbufs < XLOG_MIN_ICLOGS || | 448 | (mp->m_logbufs < XLOG_MIN_ICLOGS || |
444 | mp->m_logbufs > XLOG_MAX_ICLOGS)) { | 449 | mp->m_logbufs > XLOG_MAX_ICLOGS)) { |
445 | cmn_err(CE_WARN, | 450 | xfs_warn(mp, "invalid logbufs value: %d [not %d-%d]", |
446 | "XFS: invalid logbufs value: %d [not %d-%d]", | ||
447 | mp->m_logbufs, XLOG_MIN_ICLOGS, XLOG_MAX_ICLOGS); | 451 | mp->m_logbufs, XLOG_MIN_ICLOGS, XLOG_MAX_ICLOGS); |
448 | return XFS_ERROR(EINVAL); | 452 | return XFS_ERROR(EINVAL); |
449 | } | 453 | } |
@@ -452,22 +456,16 @@ done: | |||
452 | (mp->m_logbsize < XLOG_MIN_RECORD_BSIZE || | 456 | (mp->m_logbsize < XLOG_MIN_RECORD_BSIZE || |
453 | mp->m_logbsize > XLOG_MAX_RECORD_BSIZE || | 457 | mp->m_logbsize > XLOG_MAX_RECORD_BSIZE || |
454 | !is_power_of_2(mp->m_logbsize))) { | 458 | !is_power_of_2(mp->m_logbsize))) { |
455 | cmn_err(CE_WARN, | 459 | xfs_warn(mp, |
456 | "XFS: invalid logbufsize: %d [not 16k,32k,64k,128k or 256k]", | 460 | "invalid logbufsize: %d [not 16k,32k,64k,128k or 256k]", |
457 | mp->m_logbsize); | 461 | mp->m_logbsize); |
458 | return XFS_ERROR(EINVAL); | 462 | return XFS_ERROR(EINVAL); |
459 | } | 463 | } |
460 | 464 | ||
461 | mp->m_fsname = kstrndup(sb->s_id, MAXNAMELEN, GFP_KERNEL); | ||
462 | if (!mp->m_fsname) | ||
463 | return ENOMEM; | ||
464 | mp->m_fsname_len = strlen(mp->m_fsname) + 1; | ||
465 | |||
466 | if (iosizelog) { | 465 | if (iosizelog) { |
467 | if (iosizelog > XFS_MAX_IO_LOG || | 466 | if (iosizelog > XFS_MAX_IO_LOG || |
468 | iosizelog < XFS_MIN_IO_LOG) { | 467 | iosizelog < XFS_MIN_IO_LOG) { |
469 | cmn_err(CE_WARN, | 468 | xfs_warn(mp, "invalid log iosize: %d [not %d-%d]", |
470 | "XFS: invalid log iosize: %d [not %d-%d]", | ||
471 | iosizelog, XFS_MIN_IO_LOG, | 469 | iosizelog, XFS_MIN_IO_LOG, |
472 | XFS_MAX_IO_LOG); | 470 | XFS_MAX_IO_LOG); |
473 | return XFS_ERROR(EINVAL); | 471 | return XFS_ERROR(EINVAL); |
@@ -503,6 +501,7 @@ xfs_showargs( | |||
503 | { XFS_MOUNT_FILESTREAMS, "," MNTOPT_FILESTREAM }, | 501 | { XFS_MOUNT_FILESTREAMS, "," MNTOPT_FILESTREAM }, |
504 | { XFS_MOUNT_GRPID, "," MNTOPT_GRPID }, | 502 | { XFS_MOUNT_GRPID, "," MNTOPT_GRPID }, |
505 | { XFS_MOUNT_DELAYLOG, "," MNTOPT_DELAYLOG }, | 503 | { XFS_MOUNT_DELAYLOG, "," MNTOPT_DELAYLOG }, |
504 | { XFS_MOUNT_DISCARD, "," MNTOPT_DISCARD }, | ||
506 | { 0, NULL } | 505 | { 0, NULL } |
507 | }; | 506 | }; |
508 | static struct proc_xfs_info xfs_info_unset[] = { | 507 | static struct proc_xfs_info xfs_info_unset[] = { |
@@ -577,7 +576,7 @@ xfs_max_file_offset( | |||
577 | 576 | ||
578 | /* Figure out maximum filesize, on Linux this can depend on | 577 | /* Figure out maximum filesize, on Linux this can depend on |
579 | * the filesystem blocksize (on 32 bit platforms). | 578 | * the filesystem blocksize (on 32 bit platforms). |
580 | * __block_prepare_write does this in an [unsigned] long... | 579 | * __block_write_begin does this in an [unsigned] long... |
581 | * page->index << (PAGE_CACHE_SHIFT - bbits) | 580 | * page->index << (PAGE_CACHE_SHIFT - bbits) |
582 | * So, for page sized blocks (4K on 32 bit platforms), | 581 | * So, for page sized blocks (4K on 32 bit platforms), |
583 | * this wraps at around 8Tb (hence MAX_LFS_FILESIZE which is | 582 | * this wraps at around 8Tb (hence MAX_LFS_FILESIZE which is |
@@ -610,10 +609,11 @@ xfs_blkdev_get( | |||
610 | { | 609 | { |
611 | int error = 0; | 610 | int error = 0; |
612 | 611 | ||
613 | *bdevp = open_bdev_exclusive(name, FMODE_READ|FMODE_WRITE, mp); | 612 | *bdevp = blkdev_get_by_path(name, FMODE_READ|FMODE_WRITE|FMODE_EXCL, |
613 | mp); | ||
614 | if (IS_ERR(*bdevp)) { | 614 | if (IS_ERR(*bdevp)) { |
615 | error = PTR_ERR(*bdevp); | 615 | error = PTR_ERR(*bdevp); |
616 | printk("XFS: Invalid device [%s], error=%d\n", name, error); | 616 | xfs_warn(mp, "Invalid device [%s], error=%d\n", name, error); |
617 | } | 617 | } |
618 | 618 | ||
619 | return -error; | 619 | return -error; |
@@ -624,77 +624,14 @@ xfs_blkdev_put( | |||
624 | struct block_device *bdev) | 624 | struct block_device *bdev) |
625 | { | 625 | { |
626 | if (bdev) | 626 | if (bdev) |
627 | close_bdev_exclusive(bdev, FMODE_READ|FMODE_WRITE); | 627 | blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); |
628 | } | ||
629 | |||
630 | /* | ||
631 | * Try to write out the superblock using barriers. | ||
632 | */ | ||
633 | STATIC int | ||
634 | xfs_barrier_test( | ||
635 | xfs_mount_t *mp) | ||
636 | { | ||
637 | xfs_buf_t *sbp = xfs_getsb(mp, 0); | ||
638 | int error; | ||
639 | |||
640 | XFS_BUF_UNDONE(sbp); | ||
641 | XFS_BUF_UNREAD(sbp); | ||
642 | XFS_BUF_UNDELAYWRITE(sbp); | ||
643 | XFS_BUF_WRITE(sbp); | ||
644 | XFS_BUF_UNASYNC(sbp); | ||
645 | XFS_BUF_ORDERED(sbp); | ||
646 | |||
647 | xfsbdstrat(mp, sbp); | ||
648 | error = xfs_iowait(sbp); | ||
649 | |||
650 | /* | ||
651 | * Clear all the flags we set and possible error state in the | ||
652 | * buffer. We only did the write to try out whether barriers | ||
653 | * worked and shouldn't leave any traces in the superblock | ||
654 | * buffer. | ||
655 | */ | ||
656 | XFS_BUF_DONE(sbp); | ||
657 | XFS_BUF_ERROR(sbp, 0); | ||
658 | XFS_BUF_UNORDERED(sbp); | ||
659 | |||
660 | xfs_buf_relse(sbp); | ||
661 | return error; | ||
662 | } | ||
663 | |||
664 | STATIC void | ||
665 | xfs_mountfs_check_barriers(xfs_mount_t *mp) | ||
666 | { | ||
667 | int error; | ||
668 | |||
669 | if (mp->m_logdev_targp != mp->m_ddev_targp) { | ||
670 | xfs_fs_cmn_err(CE_NOTE, mp, | ||
671 | "Disabling barriers, not supported with external log device"); | ||
672 | mp->m_flags &= ~XFS_MOUNT_BARRIER; | ||
673 | return; | ||
674 | } | ||
675 | |||
676 | if (xfs_readonly_buftarg(mp->m_ddev_targp)) { | ||
677 | xfs_fs_cmn_err(CE_NOTE, mp, | ||
678 | "Disabling barriers, underlying device is readonly"); | ||
679 | mp->m_flags &= ~XFS_MOUNT_BARRIER; | ||
680 | return; | ||
681 | } | ||
682 | |||
683 | error = xfs_barrier_test(mp); | ||
684 | if (error) { | ||
685 | xfs_fs_cmn_err(CE_NOTE, mp, | ||
686 | "Disabling barriers, trial barrier write failed"); | ||
687 | mp->m_flags &= ~XFS_MOUNT_BARRIER; | ||
688 | return; | ||
689 | } | ||
690 | } | 628 | } |
691 | 629 | ||
692 | void | 630 | void |
693 | xfs_blkdev_issue_flush( | 631 | xfs_blkdev_issue_flush( |
694 | xfs_buftarg_t *buftarg) | 632 | xfs_buftarg_t *buftarg) |
695 | { | 633 | { |
696 | blkdev_issue_flush(buftarg->bt_bdev, GFP_KERNEL, NULL, | 634 | blkdev_issue_flush(buftarg->bt_bdev, GFP_KERNEL, NULL); |
697 | BLKDEV_IFL_WAIT); | ||
698 | } | 635 | } |
699 | 636 | ||
700 | STATIC void | 637 | STATIC void |
@@ -747,8 +684,8 @@ xfs_open_devices( | |||
747 | goto out_close_logdev; | 684 | goto out_close_logdev; |
748 | 685 | ||
749 | if (rtdev == ddev || rtdev == logdev) { | 686 | if (rtdev == ddev || rtdev == logdev) { |
750 | cmn_err(CE_WARN, | 687 | xfs_warn(mp, |
751 | "XFS: Cannot mount filesystem with identical rtdev and ddev/logdev."); | 688 | "Cannot mount filesystem with identical rtdev and ddev/logdev."); |
752 | error = EINVAL; | 689 | error = EINVAL; |
753 | goto out_close_rtdev; | 690 | goto out_close_rtdev; |
754 | } | 691 | } |
@@ -758,18 +695,20 @@ xfs_open_devices( | |||
758 | * Setup xfs_mount buffer target pointers | 695 | * Setup xfs_mount buffer target pointers |
759 | */ | 696 | */ |
760 | error = ENOMEM; | 697 | error = ENOMEM; |
761 | mp->m_ddev_targp = xfs_alloc_buftarg(ddev, 0, mp->m_fsname); | 698 | mp->m_ddev_targp = xfs_alloc_buftarg(mp, ddev, 0, mp->m_fsname); |
762 | if (!mp->m_ddev_targp) | 699 | if (!mp->m_ddev_targp) |
763 | goto out_close_rtdev; | 700 | goto out_close_rtdev; |
764 | 701 | ||
765 | if (rtdev) { | 702 | if (rtdev) { |
766 | mp->m_rtdev_targp = xfs_alloc_buftarg(rtdev, 1, mp->m_fsname); | 703 | mp->m_rtdev_targp = xfs_alloc_buftarg(mp, rtdev, 1, |
704 | mp->m_fsname); | ||
767 | if (!mp->m_rtdev_targp) | 705 | if (!mp->m_rtdev_targp) |
768 | goto out_free_ddev_targ; | 706 | goto out_free_ddev_targ; |
769 | } | 707 | } |
770 | 708 | ||
771 | if (logdev && logdev != ddev) { | 709 | if (logdev && logdev != ddev) { |
772 | mp->m_logdev_targp = xfs_alloc_buftarg(logdev, 1, mp->m_fsname); | 710 | mp->m_logdev_targp = xfs_alloc_buftarg(mp, logdev, 1, |
711 | mp->m_fsname); | ||
773 | if (!mp->m_logdev_targp) | 712 | if (!mp->m_logdev_targp) |
774 | goto out_free_rtdev_targ; | 713 | goto out_free_rtdev_targ; |
775 | } else { | 714 | } else { |
@@ -829,63 +768,6 @@ xfs_setup_devices( | |||
829 | return 0; | 768 | return 0; |
830 | } | 769 | } |
831 | 770 | ||
832 | /* | ||
833 | * XFS AIL push thread support | ||
834 | */ | ||
835 | void | ||
836 | xfsaild_wakeup( | ||
837 | struct xfs_ail *ailp, | ||
838 | xfs_lsn_t threshold_lsn) | ||
839 | { | ||
840 | ailp->xa_target = threshold_lsn; | ||
841 | wake_up_process(ailp->xa_task); | ||
842 | } | ||
843 | |||
844 | STATIC int | ||
845 | xfsaild( | ||
846 | void *data) | ||
847 | { | ||
848 | struct xfs_ail *ailp = data; | ||
849 | xfs_lsn_t last_pushed_lsn = 0; | ||
850 | long tout = 0; /* milliseconds */ | ||
851 | |||
852 | while (!kthread_should_stop()) { | ||
853 | schedule_timeout_interruptible(tout ? | ||
854 | msecs_to_jiffies(tout) : MAX_SCHEDULE_TIMEOUT); | ||
855 | |||
856 | /* swsusp */ | ||
857 | try_to_freeze(); | ||
858 | |||
859 | ASSERT(ailp->xa_mount->m_log); | ||
860 | if (XFS_FORCED_SHUTDOWN(ailp->xa_mount)) | ||
861 | continue; | ||
862 | |||
863 | tout = xfsaild_push(ailp, &last_pushed_lsn); | ||
864 | } | ||
865 | |||
866 | return 0; | ||
867 | } /* xfsaild */ | ||
868 | |||
869 | int | ||
870 | xfsaild_start( | ||
871 | struct xfs_ail *ailp) | ||
872 | { | ||
873 | ailp->xa_target = 0; | ||
874 | ailp->xa_task = kthread_run(xfsaild, ailp, "xfsaild/%s", | ||
875 | ailp->xa_mount->m_fsname); | ||
876 | if (IS_ERR(ailp->xa_task)) | ||
877 | return -PTR_ERR(ailp->xa_task); | ||
878 | return 0; | ||
879 | } | ||
880 | |||
881 | void | ||
882 | xfsaild_stop( | ||
883 | struct xfs_ail *ailp) | ||
884 | { | ||
885 | kthread_stop(ailp->xa_task); | ||
886 | } | ||
887 | |||
888 | |||
889 | /* Catch misguided souls that try to use this interface on XFS */ | 771 | /* Catch misguided souls that try to use this interface on XFS */ |
890 | STATIC struct inode * | 772 | STATIC struct inode * |
891 | xfs_fs_alloc_inode( | 773 | xfs_fs_alloc_inode( |
@@ -938,7 +820,7 @@ out_reclaim: | |||
938 | * Slab object creation initialisation for the XFS inode. | 820 | * Slab object creation initialisation for the XFS inode. |
939 | * This covers only the idempotent fields in the XFS inode; | 821 | * This covers only the idempotent fields in the XFS inode; |
940 | * all other fields need to be initialised on allocation | 822 | * all other fields need to be initialised on allocation |
941 | * from the slab. This avoids the need to repeatedly intialise | 823 | * from the slab. This avoids the need to repeatedly initialise |
942 | * fields in the xfs inode that left in the initialise state | 824 | * fields in the xfs inode that left in the initialise state |
943 | * when freeing the inode. | 825 | * when freeing the inode. |
944 | */ | 826 | */ |
@@ -972,12 +854,7 @@ xfs_fs_inode_init_once( | |||
972 | 854 | ||
973 | /* | 855 | /* |
974 | * Dirty the XFS inode when mark_inode_dirty_sync() is called so that | 856 | * Dirty the XFS inode when mark_inode_dirty_sync() is called so that |
975 | * we catch unlogged VFS level updates to the inode. Care must be taken | 857 | * we catch unlogged VFS level updates to the inode. |
976 | * here - the transaction code calls mark_inode_dirty_sync() to mark the | ||
977 | * VFS inode dirty in a transaction and clears the i_update_core field; | ||
978 | * it must clear the field after calling mark_inode_dirty_sync() to | ||
979 | * correctly indicate that the dirty state has been propagated into the | ||
980 | * inode log item. | ||
981 | * | 858 | * |
982 | * We need the barrier() to maintain correct ordering between unlogged | 859 | * We need the barrier() to maintain correct ordering between unlogged |
983 | * updates and the transaction commit code that clears the i_update_core | 860 | * updates and the transaction commit code that clears the i_update_core |
@@ -986,7 +863,8 @@ xfs_fs_inode_init_once( | |||
986 | */ | 863 | */ |
987 | STATIC void | 864 | STATIC void |
988 | xfs_fs_dirty_inode( | 865 | xfs_fs_dirty_inode( |
989 | struct inode *inode) | 866 | struct inode *inode, |
867 | int flags) | ||
990 | { | 868 | { |
991 | barrier(); | 869 | barrier(); |
992 | XFS_I(inode)->i_update_core = 1; | 870 | XFS_I(inode)->i_update_core = 1; |
@@ -1084,7 +962,7 @@ xfs_fs_write_inode( | |||
1084 | error = 0; | 962 | error = 0; |
1085 | goto out_unlock; | 963 | goto out_unlock; |
1086 | } | 964 | } |
1087 | error = xfs_iflush(ip, 0); | 965 | error = xfs_iflush(ip, SYNC_TRYLOCK); |
1088 | } | 966 | } |
1089 | 967 | ||
1090 | out_unlock: | 968 | out_unlock: |
@@ -1126,6 +1004,8 @@ xfs_fs_evict_inode( | |||
1126 | */ | 1004 | */ |
1127 | ASSERT(!rwsem_is_locked(&ip->i_iolock.mr_lock)); | 1005 | ASSERT(!rwsem_is_locked(&ip->i_iolock.mr_lock)); |
1128 | mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino); | 1006 | mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino); |
1007 | lockdep_set_class_and_name(&ip->i_iolock.mr_lock, | ||
1008 | &xfs_iolock_reclaimable, "xfs_iolock_reclaimable"); | ||
1129 | 1009 | ||
1130 | xfs_inactive(ip); | 1010 | xfs_inactive(ip); |
1131 | } | 1011 | } |
@@ -1195,22 +1075,12 @@ xfs_fs_sync_fs( | |||
1195 | return -error; | 1075 | return -error; |
1196 | 1076 | ||
1197 | if (laptop_mode) { | 1077 | if (laptop_mode) { |
1198 | int prev_sync_seq = mp->m_sync_seq; | ||
1199 | |||
1200 | /* | 1078 | /* |
1201 | * The disk must be active because we're syncing. | 1079 | * The disk must be active because we're syncing. |
1202 | * We schedule xfssyncd now (now that the disk is | 1080 | * We schedule xfssyncd now (now that the disk is |
1203 | * active) instead of later (when it might not be). | 1081 | * active) instead of later (when it might not be). |
1204 | */ | 1082 | */ |
1205 | wake_up_process(mp->m_sync_task); | 1083 | flush_delayed_work_sync(&mp->m_sync_work); |
1206 | /* | ||
1207 | * We have to wait for the sync iteration to complete. | ||
1208 | * If we don't, the disk activity caused by the sync | ||
1209 | * will come after the sync is completed, and that | ||
1210 | * triggers another sync from laptop mode. | ||
1211 | */ | ||
1212 | wait_event(mp->m_wait_single_sync_task, | ||
1213 | mp->m_sync_seq != prev_sync_seq); | ||
1214 | } | 1084 | } |
1215 | 1085 | ||
1216 | return 0; | 1086 | return 0; |
@@ -1308,14 +1178,6 @@ xfs_fs_remount( | |||
1308 | switch (token) { | 1178 | switch (token) { |
1309 | case Opt_barrier: | 1179 | case Opt_barrier: |
1310 | mp->m_flags |= XFS_MOUNT_BARRIER; | 1180 | mp->m_flags |= XFS_MOUNT_BARRIER; |
1311 | |||
1312 | /* | ||
1313 | * Test if barriers are actually working if we can, | ||
1314 | * else delay this check until the filesystem is | ||
1315 | * marked writeable. | ||
1316 | */ | ||
1317 | if (!(mp->m_flags & XFS_MOUNT_RDONLY)) | ||
1318 | xfs_mountfs_check_barriers(mp); | ||
1319 | break; | 1181 | break; |
1320 | case Opt_nobarrier: | 1182 | case Opt_nobarrier: |
1321 | mp->m_flags &= ~XFS_MOUNT_BARRIER; | 1183 | mp->m_flags &= ~XFS_MOUNT_BARRIER; |
@@ -1338,8 +1200,8 @@ xfs_fs_remount( | |||
1338 | * options that we can't actually change. | 1200 | * options that we can't actually change. |
1339 | */ | 1201 | */ |
1340 | #if 0 | 1202 | #if 0 |
1341 | printk(KERN_INFO | 1203 | xfs_info(mp, |
1342 | "XFS: mount option \"%s\" not supported for remount\n", p); | 1204 | "mount option \"%s\" not supported for remount\n", p); |
1343 | return -EINVAL; | 1205 | return -EINVAL; |
1344 | #else | 1206 | #else |
1345 | break; | 1207 | break; |
@@ -1350,8 +1212,6 @@ xfs_fs_remount( | |||
1350 | /* ro -> rw */ | 1212 | /* ro -> rw */ |
1351 | if ((mp->m_flags & XFS_MOUNT_RDONLY) && !(*flags & MS_RDONLY)) { | 1213 | if ((mp->m_flags & XFS_MOUNT_RDONLY) && !(*flags & MS_RDONLY)) { |
1352 | mp->m_flags &= ~XFS_MOUNT_RDONLY; | 1214 | mp->m_flags &= ~XFS_MOUNT_RDONLY; |
1353 | if (mp->m_flags & XFS_MOUNT_BARRIER) | ||
1354 | xfs_mountfs_check_barriers(mp); | ||
1355 | 1215 | ||
1356 | /* | 1216 | /* |
1357 | * If this is the first remount to writeable state we | 1217 | * If this is the first remount to writeable state we |
@@ -1360,8 +1220,7 @@ xfs_fs_remount( | |||
1360 | if (mp->m_update_flags) { | 1220 | if (mp->m_update_flags) { |
1361 | error = xfs_mount_log_sb(mp, mp->m_update_flags); | 1221 | error = xfs_mount_log_sb(mp, mp->m_update_flags); |
1362 | if (error) { | 1222 | if (error) { |
1363 | cmn_err(CE_WARN, | 1223 | xfs_warn(mp, "failed to write sb changes"); |
1364 | "XFS: failed to write sb changes"); | ||
1365 | return error; | 1224 | return error; |
1366 | } | 1225 | } |
1367 | mp->m_update_flags = 0; | 1226 | mp->m_update_flags = 0; |
@@ -1407,7 +1266,7 @@ xfs_fs_freeze( | |||
1407 | 1266 | ||
1408 | xfs_save_resvblks(mp); | 1267 | xfs_save_resvblks(mp); |
1409 | xfs_quiesce_attr(mp); | 1268 | xfs_quiesce_attr(mp); |
1410 | return -xfs_fs_log_dummy(mp, SYNC_WAIT); | 1269 | return -xfs_fs_log_dummy(mp); |
1411 | } | 1270 | } |
1412 | 1271 | ||
1413 | STATIC int | 1272 | STATIC int |
@@ -1445,15 +1304,15 @@ xfs_finish_flags( | |||
1445 | mp->m_logbsize = mp->m_sb.sb_logsunit; | 1304 | mp->m_logbsize = mp->m_sb.sb_logsunit; |
1446 | } else if (mp->m_logbsize > 0 && | 1305 | } else if (mp->m_logbsize > 0 && |
1447 | mp->m_logbsize < mp->m_sb.sb_logsunit) { | 1306 | mp->m_logbsize < mp->m_sb.sb_logsunit) { |
1448 | cmn_err(CE_WARN, | 1307 | xfs_warn(mp, |
1449 | "XFS: logbuf size must be greater than or equal to log stripe size"); | 1308 | "logbuf size must be greater than or equal to log stripe size"); |
1450 | return XFS_ERROR(EINVAL); | 1309 | return XFS_ERROR(EINVAL); |
1451 | } | 1310 | } |
1452 | } else { | 1311 | } else { |
1453 | /* Fail a mount if the logbuf is larger than 32K */ | 1312 | /* Fail a mount if the logbuf is larger than 32K */ |
1454 | if (mp->m_logbsize > XLOG_BIG_RECORD_BSIZE) { | 1313 | if (mp->m_logbsize > XLOG_BIG_RECORD_BSIZE) { |
1455 | cmn_err(CE_WARN, | 1314 | xfs_warn(mp, |
1456 | "XFS: logbuf size for version 1 logs must be 16K or 32K"); | 1315 | "logbuf size for version 1 logs must be 16K or 32K"); |
1457 | return XFS_ERROR(EINVAL); | 1316 | return XFS_ERROR(EINVAL); |
1458 | } | 1317 | } |
1459 | } | 1318 | } |
@@ -1470,8 +1329,8 @@ xfs_finish_flags( | |||
1470 | * prohibit r/w mounts of read-only filesystems | 1329 | * prohibit r/w mounts of read-only filesystems |
1471 | */ | 1330 | */ |
1472 | if ((mp->m_sb.sb_flags & XFS_SBF_READONLY) && !ronly) { | 1331 | if ((mp->m_sb.sb_flags & XFS_SBF_READONLY) && !ronly) { |
1473 | cmn_err(CE_WARN, | 1332 | xfs_warn(mp, |
1474 | "XFS: cannot mount a read-only filesystem as read-write"); | 1333 | "cannot mount a read-only filesystem as read-write"); |
1475 | return XFS_ERROR(EROFS); | 1334 | return XFS_ERROR(EROFS); |
1476 | } | 1335 | } |
1477 | 1336 | ||
@@ -1495,9 +1354,6 @@ xfs_fs_fill_super( | |||
1495 | spin_lock_init(&mp->m_sb_lock); | 1354 | spin_lock_init(&mp->m_sb_lock); |
1496 | mutex_init(&mp->m_growlock); | 1355 | mutex_init(&mp->m_growlock); |
1497 | atomic_set(&mp->m_active_trans, 0); | 1356 | atomic_set(&mp->m_active_trans, 0); |
1498 | INIT_LIST_HEAD(&mp->m_sync_list); | ||
1499 | spin_lock_init(&mp->m_sync_lock); | ||
1500 | init_waitqueue_head(&mp->m_wait_single_sync_task); | ||
1501 | 1357 | ||
1502 | mp->m_super = sb; | 1358 | mp->m_super = sb; |
1503 | sb->s_fs_info = mp; | 1359 | sb->s_fs_info = mp; |
@@ -1521,8 +1377,9 @@ xfs_fs_fill_super( | |||
1521 | if (error) | 1377 | if (error) |
1522 | goto out_free_fsname; | 1378 | goto out_free_fsname; |
1523 | 1379 | ||
1524 | if (xfs_icsb_init_counters(mp)) | 1380 | error = xfs_icsb_init_counters(mp); |
1525 | mp->m_flags |= XFS_MOUNT_NO_PERCPU_SB; | 1381 | if (error) |
1382 | goto out_close_devices; | ||
1526 | 1383 | ||
1527 | error = xfs_readsb(mp, flags); | 1384 | error = xfs_readsb(mp, flags); |
1528 | if (error) | 1385 | if (error) |
@@ -1536,17 +1393,18 @@ xfs_fs_fill_super( | |||
1536 | if (error) | 1393 | if (error) |
1537 | goto out_free_sb; | 1394 | goto out_free_sb; |
1538 | 1395 | ||
1539 | if (mp->m_flags & XFS_MOUNT_BARRIER) | ||
1540 | xfs_mountfs_check_barriers(mp); | ||
1541 | |||
1542 | error = xfs_filestream_mount(mp); | 1396 | error = xfs_filestream_mount(mp); |
1543 | if (error) | 1397 | if (error) |
1544 | goto out_free_sb; | 1398 | goto out_free_sb; |
1545 | 1399 | ||
1546 | error = xfs_mountfs(mp); | 1400 | /* |
1547 | if (error) | 1401 | * we must configure the block size in the superblock before we run the |
1548 | goto out_filestream_unmount; | 1402 | * full mount process as the mount process can lookup and cache inodes. |
1549 | 1403 | * For the same reason we must also initialise the syncd and register | |
1404 | * the inode cache shrinker so that inodes can be reclaimed during | ||
1405 | * operations like a quotacheck that iterate all inodes in the | ||
1406 | * filesystem. | ||
1407 | */ | ||
1550 | sb->s_magic = XFS_SB_MAGIC; | 1408 | sb->s_magic = XFS_SB_MAGIC; |
1551 | sb->s_blocksize = mp->m_sb.sb_blocksize; | 1409 | sb->s_blocksize = mp->m_sb.sb_blocksize; |
1552 | sb->s_blocksize_bits = ffs(sb->s_blocksize) - 1; | 1410 | sb->s_blocksize_bits = ffs(sb->s_blocksize) - 1; |
@@ -1554,6 +1412,16 @@ xfs_fs_fill_super( | |||
1554 | sb->s_time_gran = 1; | 1412 | sb->s_time_gran = 1; |
1555 | set_posix_acl_flag(sb); | 1413 | set_posix_acl_flag(sb); |
1556 | 1414 | ||
1415 | error = xfs_syncd_init(mp); | ||
1416 | if (error) | ||
1417 | goto out_filestream_unmount; | ||
1418 | |||
1419 | xfs_inode_shrinker_register(mp); | ||
1420 | |||
1421 | error = xfs_mountfs(mp); | ||
1422 | if (error) | ||
1423 | goto out_syncd_stop; | ||
1424 | |||
1557 | root = igrab(VFS_I(mp->m_rootip)); | 1425 | root = igrab(VFS_I(mp->m_rootip)); |
1558 | if (!root) { | 1426 | if (!root) { |
1559 | error = ENOENT; | 1427 | error = ENOENT; |
@@ -1569,20 +1437,18 @@ xfs_fs_fill_super( | |||
1569 | goto fail_vnrele; | 1437 | goto fail_vnrele; |
1570 | } | 1438 | } |
1571 | 1439 | ||
1572 | error = xfs_syncd_init(mp); | ||
1573 | if (error) | ||
1574 | goto fail_vnrele; | ||
1575 | |||
1576 | xfs_inode_shrinker_register(mp); | ||
1577 | |||
1578 | return 0; | 1440 | return 0; |
1579 | 1441 | ||
1442 | out_syncd_stop: | ||
1443 | xfs_inode_shrinker_unregister(mp); | ||
1444 | xfs_syncd_stop(mp); | ||
1580 | out_filestream_unmount: | 1445 | out_filestream_unmount: |
1581 | xfs_filestream_unmount(mp); | 1446 | xfs_filestream_unmount(mp); |
1582 | out_free_sb: | 1447 | out_free_sb: |
1583 | xfs_freesb(mp); | 1448 | xfs_freesb(mp); |
1584 | out_destroy_counters: | 1449 | out_destroy_counters: |
1585 | xfs_icsb_destroy_counters(mp); | 1450 | xfs_icsb_destroy_counters(mp); |
1451 | out_close_devices: | ||
1586 | xfs_close_devices(mp); | 1452 | xfs_close_devices(mp); |
1587 | out_free_fsname: | 1453 | out_free_fsname: |
1588 | xfs_free_fsname(mp); | 1454 | xfs_free_fsname(mp); |
@@ -1599,6 +1465,9 @@ xfs_fs_fill_super( | |||
1599 | } | 1465 | } |
1600 | 1466 | ||
1601 | fail_unmount: | 1467 | fail_unmount: |
1468 | xfs_inode_shrinker_unregister(mp); | ||
1469 | xfs_syncd_stop(mp); | ||
1470 | |||
1602 | /* | 1471 | /* |
1603 | * Blow away any referenced inode in the filestreams cache. | 1472 | * Blow away any referenced inode in the filestreams cache. |
1604 | * This can and will cause log traffic as inodes go inactive | 1473 | * This can and will cause log traffic as inodes go inactive |
@@ -1612,16 +1481,14 @@ xfs_fs_fill_super( | |||
1612 | goto out_free_sb; | 1481 | goto out_free_sb; |
1613 | } | 1482 | } |
1614 | 1483 | ||
1615 | STATIC int | 1484 | STATIC struct dentry * |
1616 | xfs_fs_get_sb( | 1485 | xfs_fs_mount( |
1617 | struct file_system_type *fs_type, | 1486 | struct file_system_type *fs_type, |
1618 | int flags, | 1487 | int flags, |
1619 | const char *dev_name, | 1488 | const char *dev_name, |
1620 | void *data, | 1489 | void *data) |
1621 | struct vfsmount *mnt) | ||
1622 | { | 1490 | { |
1623 | return get_sb_bdev(fs_type, flags, dev_name, data, xfs_fs_fill_super, | 1491 | return mount_bdev(fs_type, flags, dev_name, data, xfs_fs_fill_super); |
1624 | mnt); | ||
1625 | } | 1492 | } |
1626 | 1493 | ||
1627 | static const struct super_operations xfs_super_operations = { | 1494 | static const struct super_operations xfs_super_operations = { |
@@ -1642,7 +1509,7 @@ static const struct super_operations xfs_super_operations = { | |||
1642 | static struct file_system_type xfs_fs_type = { | 1509 | static struct file_system_type xfs_fs_type = { |
1643 | .owner = THIS_MODULE, | 1510 | .owner = THIS_MODULE, |
1644 | .name = "xfs", | 1511 | .name = "xfs", |
1645 | .get_sb = xfs_fs_get_sb, | 1512 | .mount = xfs_fs_mount, |
1646 | .kill_sb = kill_block_super, | 1513 | .kill_sb = kill_block_super, |
1647 | .fs_flags = FS_REQUIRES_DEV, | 1514 | .fs_flags = FS_REQUIRES_DEV, |
1648 | }; | 1515 | }; |
@@ -1790,6 +1657,38 @@ xfs_destroy_zones(void) | |||
1790 | } | 1657 | } |
1791 | 1658 | ||
1792 | STATIC int __init | 1659 | STATIC int __init |
1660 | xfs_init_workqueues(void) | ||
1661 | { | ||
1662 | /* | ||
1663 | * max_active is set to 8 to give enough concurency to allow | ||
1664 | * multiple work operations on each CPU to run. This allows multiple | ||
1665 | * filesystems to be running sync work concurrently, and scales with | ||
1666 | * the number of CPUs in the system. | ||
1667 | */ | ||
1668 | xfs_syncd_wq = alloc_workqueue("xfssyncd", WQ_CPU_INTENSIVE, 8); | ||
1669 | if (!xfs_syncd_wq) | ||
1670 | goto out; | ||
1671 | |||
1672 | xfs_ail_wq = alloc_workqueue("xfsail", WQ_CPU_INTENSIVE, 8); | ||
1673 | if (!xfs_ail_wq) | ||
1674 | goto out_destroy_syncd; | ||
1675 | |||
1676 | return 0; | ||
1677 | |||
1678 | out_destroy_syncd: | ||
1679 | destroy_workqueue(xfs_syncd_wq); | ||
1680 | out: | ||
1681 | return -ENOMEM; | ||
1682 | } | ||
1683 | |||
1684 | STATIC void | ||
1685 | xfs_destroy_workqueues(void) | ||
1686 | { | ||
1687 | destroy_workqueue(xfs_ail_wq); | ||
1688 | destroy_workqueue(xfs_syncd_wq); | ||
1689 | } | ||
1690 | |||
1691 | STATIC int __init | ||
1793 | init_xfs_fs(void) | 1692 | init_xfs_fs(void) |
1794 | { | 1693 | { |
1795 | int error; | 1694 | int error; |
@@ -1804,10 +1703,14 @@ init_xfs_fs(void) | |||
1804 | if (error) | 1703 | if (error) |
1805 | goto out; | 1704 | goto out; |
1806 | 1705 | ||
1807 | error = xfs_mru_cache_init(); | 1706 | error = xfs_init_workqueues(); |
1808 | if (error) | 1707 | if (error) |
1809 | goto out_destroy_zones; | 1708 | goto out_destroy_zones; |
1810 | 1709 | ||
1710 | error = xfs_mru_cache_init(); | ||
1711 | if (error) | ||
1712 | goto out_destroy_wq; | ||
1713 | |||
1811 | error = xfs_filestream_init(); | 1714 | error = xfs_filestream_init(); |
1812 | if (error) | 1715 | if (error) |
1813 | goto out_mru_cache_uninit; | 1716 | goto out_mru_cache_uninit; |
@@ -1841,6 +1744,8 @@ init_xfs_fs(void) | |||
1841 | xfs_filestream_uninit(); | 1744 | xfs_filestream_uninit(); |
1842 | out_mru_cache_uninit: | 1745 | out_mru_cache_uninit: |
1843 | xfs_mru_cache_uninit(); | 1746 | xfs_mru_cache_uninit(); |
1747 | out_destroy_wq: | ||
1748 | xfs_destroy_workqueues(); | ||
1844 | out_destroy_zones: | 1749 | out_destroy_zones: |
1845 | xfs_destroy_zones(); | 1750 | xfs_destroy_zones(); |
1846 | out: | 1751 | out: |
@@ -1857,6 +1762,7 @@ exit_xfs_fs(void) | |||
1857 | xfs_buf_terminate(); | 1762 | xfs_buf_terminate(); |
1858 | xfs_filestream_uninit(); | 1763 | xfs_filestream_uninit(); |
1859 | xfs_mru_cache_uninit(); | 1764 | xfs_mru_cache_uninit(); |
1765 | xfs_destroy_workqueues(); | ||
1860 | xfs_destroy_zones(); | 1766 | xfs_destroy_zones(); |
1861 | } | 1767 | } |
1862 | 1768 | ||
diff --git a/fs/xfs/linux-2.6/xfs_super.h b/fs/xfs/linux-2.6/xfs_super.h index 1ef4a4d2d997..50a3266c999e 100644 --- a/fs/xfs/linux-2.6/xfs_super.h +++ b/fs/xfs/linux-2.6/xfs_super.h | |||
@@ -62,6 +62,7 @@ extern void xfs_qm_exit(void); | |||
62 | # define XFS_DBG_STRING "no debug" | 62 | # define XFS_DBG_STRING "no debug" |
63 | #endif | 63 | #endif |
64 | 64 | ||
65 | #define XFS_VERSION_STRING "SGI XFS" | ||
65 | #define XFS_BUILD_OPTIONS XFS_ACL_STRING \ | 66 | #define XFS_BUILD_OPTIONS XFS_ACL_STRING \ |
66 | XFS_SECURITY_STRING \ | 67 | XFS_SECURITY_STRING \ |
67 | XFS_REALTIME_STRING \ | 68 | XFS_REALTIME_STRING \ |
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index 81976ffed7d6..8ecad5ff9f9b 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c | |||
@@ -22,6 +22,7 @@ | |||
22 | #include "xfs_log.h" | 22 | #include "xfs_log.h" |
23 | #include "xfs_inum.h" | 23 | #include "xfs_inum.h" |
24 | #include "xfs_trans.h" | 24 | #include "xfs_trans.h" |
25 | #include "xfs_trans_priv.h" | ||
25 | #include "xfs_sb.h" | 26 | #include "xfs_sb.h" |
26 | #include "xfs_ag.h" | 27 | #include "xfs_ag.h" |
27 | #include "xfs_mount.h" | 28 | #include "xfs_mount.h" |
@@ -39,42 +40,61 @@ | |||
39 | #include <linux/kthread.h> | 40 | #include <linux/kthread.h> |
40 | #include <linux/freezer.h> | 41 | #include <linux/freezer.h> |
41 | 42 | ||
43 | struct workqueue_struct *xfs_syncd_wq; /* sync workqueue */ | ||
42 | 44 | ||
43 | STATIC xfs_inode_t * | 45 | /* |
44 | xfs_inode_ag_lookup( | 46 | * The inode lookup is done in batches to keep the amount of lock traffic and |
45 | struct xfs_mount *mp, | 47 | * radix tree lookups to a minimum. The batch size is a trade off between |
46 | struct xfs_perag *pag, | 48 | * lookup reduction and stack usage. This is in the reclaim path, so we can't |
47 | uint32_t *first_index, | 49 | * be too greedy. |
48 | int tag) | 50 | */ |
51 | #define XFS_LOOKUP_BATCH 32 | ||
52 | |||
53 | STATIC int | ||
54 | xfs_inode_ag_walk_grab( | ||
55 | struct xfs_inode *ip) | ||
49 | { | 56 | { |
50 | int nr_found; | 57 | struct inode *inode = VFS_I(ip); |
51 | struct xfs_inode *ip; | 58 | |
59 | ASSERT(rcu_read_lock_held()); | ||
52 | 60 | ||
53 | /* | 61 | /* |
54 | * use a gang lookup to find the next inode in the tree | 62 | * check for stale RCU freed inode |
55 | * as the tree is sparse and a gang lookup walks to find | 63 | * |
56 | * the number of objects requested. | 64 | * If the inode has been reallocated, it doesn't matter if it's not in |
65 | * the AG we are walking - we are walking for writeback, so if it | ||
66 | * passes all the "valid inode" checks and is dirty, then we'll write | ||
67 | * it back anyway. If it has been reallocated and still being | ||
68 | * initialised, the XFS_INEW check below will catch it. | ||
57 | */ | 69 | */ |
58 | if (tag == XFS_ICI_NO_TAG) { | 70 | spin_lock(&ip->i_flags_lock); |
59 | nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, | 71 | if (!ip->i_ino) |
60 | (void **)&ip, *first_index, 1); | 72 | goto out_unlock_noent; |
61 | } else { | 73 | |
62 | nr_found = radix_tree_gang_lookup_tag(&pag->pag_ici_root, | 74 | /* avoid new or reclaimable inodes. Leave for reclaim code to flush */ |
63 | (void **)&ip, *first_index, 1, tag); | 75 | if (__xfs_iflags_test(ip, XFS_INEW | XFS_IRECLAIMABLE | XFS_IRECLAIM)) |
76 | goto out_unlock_noent; | ||
77 | spin_unlock(&ip->i_flags_lock); | ||
78 | |||
79 | /* nothing to sync during shutdown */ | ||
80 | if (XFS_FORCED_SHUTDOWN(ip->i_mount)) | ||
81 | return EFSCORRUPTED; | ||
82 | |||
83 | /* If we can't grab the inode, it must on it's way to reclaim. */ | ||
84 | if (!igrab(inode)) | ||
85 | return ENOENT; | ||
86 | |||
87 | if (is_bad_inode(inode)) { | ||
88 | IRELE(ip); | ||
89 | return ENOENT; | ||
64 | } | 90 | } |
65 | if (!nr_found) | ||
66 | return NULL; | ||
67 | 91 | ||
68 | /* | 92 | /* inode is valid */ |
69 | * Update the index for the next lookup. Catch overflows | 93 | return 0; |
70 | * into the next AG range which can occur if we have inodes | 94 | |
71 | * in the last block of the AG and we are currently | 95 | out_unlock_noent: |
72 | * pointing to the last inode. | 96 | spin_unlock(&ip->i_flags_lock); |
73 | */ | 97 | return ENOENT; |
74 | *first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1); | ||
75 | if (*first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) | ||
76 | return NULL; | ||
77 | return ip; | ||
78 | } | 98 | } |
79 | 99 | ||
80 | STATIC int | 100 | STATIC int |
@@ -83,49 +103,83 @@ xfs_inode_ag_walk( | |||
83 | struct xfs_perag *pag, | 103 | struct xfs_perag *pag, |
84 | int (*execute)(struct xfs_inode *ip, | 104 | int (*execute)(struct xfs_inode *ip, |
85 | struct xfs_perag *pag, int flags), | 105 | struct xfs_perag *pag, int flags), |
86 | int flags, | 106 | int flags) |
87 | int tag, | ||
88 | int exclusive, | ||
89 | int *nr_to_scan) | ||
90 | { | 107 | { |
91 | uint32_t first_index; | 108 | uint32_t first_index; |
92 | int last_error = 0; | 109 | int last_error = 0; |
93 | int skipped; | 110 | int skipped; |
111 | int done; | ||
112 | int nr_found; | ||
94 | 113 | ||
95 | restart: | 114 | restart: |
115 | done = 0; | ||
96 | skipped = 0; | 116 | skipped = 0; |
97 | first_index = 0; | 117 | first_index = 0; |
118 | nr_found = 0; | ||
98 | do { | 119 | do { |
120 | struct xfs_inode *batch[XFS_LOOKUP_BATCH]; | ||
99 | int error = 0; | 121 | int error = 0; |
100 | xfs_inode_t *ip; | 122 | int i; |
101 | 123 | ||
102 | if (exclusive) | 124 | rcu_read_lock(); |
103 | write_lock(&pag->pag_ici_lock); | 125 | nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, |
104 | else | 126 | (void **)batch, first_index, |
105 | read_lock(&pag->pag_ici_lock); | 127 | XFS_LOOKUP_BATCH); |
106 | ip = xfs_inode_ag_lookup(mp, pag, &first_index, tag); | 128 | if (!nr_found) { |
107 | if (!ip) { | 129 | rcu_read_unlock(); |
108 | if (exclusive) | ||
109 | write_unlock(&pag->pag_ici_lock); | ||
110 | else | ||
111 | read_unlock(&pag->pag_ici_lock); | ||
112 | break; | 130 | break; |
113 | } | 131 | } |
114 | 132 | ||
115 | /* execute releases pag->pag_ici_lock */ | 133 | /* |
116 | error = execute(ip, pag, flags); | 134 | * Grab the inodes before we drop the lock. if we found |
117 | if (error == EAGAIN) { | 135 | * nothing, nr == 0 and the loop will be skipped. |
118 | skipped++; | 136 | */ |
119 | continue; | 137 | for (i = 0; i < nr_found; i++) { |
138 | struct xfs_inode *ip = batch[i]; | ||
139 | |||
140 | if (done || xfs_inode_ag_walk_grab(ip)) | ||
141 | batch[i] = NULL; | ||
142 | |||
143 | /* | ||
144 | * Update the index for the next lookup. Catch | ||
145 | * overflows into the next AG range which can occur if | ||
146 | * we have inodes in the last block of the AG and we | ||
147 | * are currently pointing to the last inode. | ||
148 | * | ||
149 | * Because we may see inodes that are from the wrong AG | ||
150 | * due to RCU freeing and reallocation, only update the | ||
151 | * index if it lies in this AG. It was a race that lead | ||
152 | * us to see this inode, so another lookup from the | ||
153 | * same index will not find it again. | ||
154 | */ | ||
155 | if (XFS_INO_TO_AGNO(mp, ip->i_ino) != pag->pag_agno) | ||
156 | continue; | ||
157 | first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1); | ||
158 | if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) | ||
159 | done = 1; | ||
160 | } | ||
161 | |||
162 | /* unlock now we've grabbed the inodes. */ | ||
163 | rcu_read_unlock(); | ||
164 | |||
165 | for (i = 0; i < nr_found; i++) { | ||
166 | if (!batch[i]) | ||
167 | continue; | ||
168 | error = execute(batch[i], pag, flags); | ||
169 | IRELE(batch[i]); | ||
170 | if (error == EAGAIN) { | ||
171 | skipped++; | ||
172 | continue; | ||
173 | } | ||
174 | if (error && last_error != EFSCORRUPTED) | ||
175 | last_error = error; | ||
120 | } | 176 | } |
121 | if (error) | ||
122 | last_error = error; | ||
123 | 177 | ||
124 | /* bail out if the filesystem is corrupted. */ | 178 | /* bail out if the filesystem is corrupted. */ |
125 | if (error == EFSCORRUPTED) | 179 | if (error == EFSCORRUPTED) |
126 | break; | 180 | break; |
127 | 181 | ||
128 | } while ((*nr_to_scan)--); | 182 | } while (nr_found && !done); |
129 | 183 | ||
130 | if (skipped) { | 184 | if (skipped) { |
131 | delay(1); | 185 | delay(1); |
@@ -134,110 +188,32 @@ restart: | |||
134 | return last_error; | 188 | return last_error; |
135 | } | 189 | } |
136 | 190 | ||
137 | /* | ||
138 | * Select the next per-ag structure to iterate during the walk. The reclaim | ||
139 | * walk is optimised only to walk AGs with reclaimable inodes in them. | ||
140 | */ | ||
141 | static struct xfs_perag * | ||
142 | xfs_inode_ag_iter_next_pag( | ||
143 | struct xfs_mount *mp, | ||
144 | xfs_agnumber_t *first, | ||
145 | int tag) | ||
146 | { | ||
147 | struct xfs_perag *pag = NULL; | ||
148 | |||
149 | if (tag == XFS_ICI_RECLAIM_TAG) { | ||
150 | int found; | ||
151 | int ref; | ||
152 | |||
153 | spin_lock(&mp->m_perag_lock); | ||
154 | found = radix_tree_gang_lookup_tag(&mp->m_perag_tree, | ||
155 | (void **)&pag, *first, 1, tag); | ||
156 | if (found <= 0) { | ||
157 | spin_unlock(&mp->m_perag_lock); | ||
158 | return NULL; | ||
159 | } | ||
160 | *first = pag->pag_agno + 1; | ||
161 | /* open coded pag reference increment */ | ||
162 | ref = atomic_inc_return(&pag->pag_ref); | ||
163 | spin_unlock(&mp->m_perag_lock); | ||
164 | trace_xfs_perag_get_reclaim(mp, pag->pag_agno, ref, _RET_IP_); | ||
165 | } else { | ||
166 | pag = xfs_perag_get(mp, *first); | ||
167 | (*first)++; | ||
168 | } | ||
169 | return pag; | ||
170 | } | ||
171 | |||
172 | int | 191 | int |
173 | xfs_inode_ag_iterator( | 192 | xfs_inode_ag_iterator( |
174 | struct xfs_mount *mp, | 193 | struct xfs_mount *mp, |
175 | int (*execute)(struct xfs_inode *ip, | 194 | int (*execute)(struct xfs_inode *ip, |
176 | struct xfs_perag *pag, int flags), | 195 | struct xfs_perag *pag, int flags), |
177 | int flags, | 196 | int flags) |
178 | int tag, | ||
179 | int exclusive, | ||
180 | int *nr_to_scan) | ||
181 | { | 197 | { |
182 | struct xfs_perag *pag; | 198 | struct xfs_perag *pag; |
183 | int error = 0; | 199 | int error = 0; |
184 | int last_error = 0; | 200 | int last_error = 0; |
185 | xfs_agnumber_t ag; | 201 | xfs_agnumber_t ag; |
186 | int nr; | ||
187 | 202 | ||
188 | nr = nr_to_scan ? *nr_to_scan : INT_MAX; | ||
189 | ag = 0; | 203 | ag = 0; |
190 | while ((pag = xfs_inode_ag_iter_next_pag(mp, &ag, tag))) { | 204 | while ((pag = xfs_perag_get(mp, ag))) { |
191 | error = xfs_inode_ag_walk(mp, pag, execute, flags, tag, | 205 | ag = pag->pag_agno + 1; |
192 | exclusive, &nr); | 206 | error = xfs_inode_ag_walk(mp, pag, execute, flags); |
193 | xfs_perag_put(pag); | 207 | xfs_perag_put(pag); |
194 | if (error) { | 208 | if (error) { |
195 | last_error = error; | 209 | last_error = error; |
196 | if (error == EFSCORRUPTED) | 210 | if (error == EFSCORRUPTED) |
197 | break; | 211 | break; |
198 | } | 212 | } |
199 | if (nr <= 0) | ||
200 | break; | ||
201 | } | 213 | } |
202 | if (nr_to_scan) | ||
203 | *nr_to_scan = nr; | ||
204 | return XFS_ERROR(last_error); | 214 | return XFS_ERROR(last_error); |
205 | } | 215 | } |
206 | 216 | ||
207 | /* must be called with pag_ici_lock held and releases it */ | ||
208 | int | ||
209 | xfs_sync_inode_valid( | ||
210 | struct xfs_inode *ip, | ||
211 | struct xfs_perag *pag) | ||
212 | { | ||
213 | struct inode *inode = VFS_I(ip); | ||
214 | int error = EFSCORRUPTED; | ||
215 | |||
216 | /* nothing to sync during shutdown */ | ||
217 | if (XFS_FORCED_SHUTDOWN(ip->i_mount)) | ||
218 | goto out_unlock; | ||
219 | |||
220 | /* avoid new or reclaimable inodes. Leave for reclaim code to flush */ | ||
221 | error = ENOENT; | ||
222 | if (xfs_iflags_test(ip, XFS_INEW | XFS_IRECLAIMABLE | XFS_IRECLAIM)) | ||
223 | goto out_unlock; | ||
224 | |||
225 | /* If we can't grab the inode, it must on it's way to reclaim. */ | ||
226 | if (!igrab(inode)) | ||
227 | goto out_unlock; | ||
228 | |||
229 | if (is_bad_inode(inode)) { | ||
230 | IRELE(ip); | ||
231 | goto out_unlock; | ||
232 | } | ||
233 | |||
234 | /* inode is valid */ | ||
235 | error = 0; | ||
236 | out_unlock: | ||
237 | read_unlock(&pag->pag_ici_lock); | ||
238 | return error; | ||
239 | } | ||
240 | |||
241 | STATIC int | 217 | STATIC int |
242 | xfs_sync_inode_data( | 218 | xfs_sync_inode_data( |
243 | struct xfs_inode *ip, | 219 | struct xfs_inode *ip, |
@@ -248,10 +224,6 @@ xfs_sync_inode_data( | |||
248 | struct address_space *mapping = inode->i_mapping; | 224 | struct address_space *mapping = inode->i_mapping; |
249 | int error = 0; | 225 | int error = 0; |
250 | 226 | ||
251 | error = xfs_sync_inode_valid(ip, pag); | ||
252 | if (error) | ||
253 | return error; | ||
254 | |||
255 | if (!mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) | 227 | if (!mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) |
256 | goto out_wait; | 228 | goto out_wait; |
257 | 229 | ||
@@ -268,7 +240,6 @@ xfs_sync_inode_data( | |||
268 | out_wait: | 240 | out_wait: |
269 | if (flags & SYNC_WAIT) | 241 | if (flags & SYNC_WAIT) |
270 | xfs_ioend_wait(ip); | 242 | xfs_ioend_wait(ip); |
271 | IRELE(ip); | ||
272 | return error; | 243 | return error; |
273 | } | 244 | } |
274 | 245 | ||
@@ -280,10 +251,6 @@ xfs_sync_inode_attr( | |||
280 | { | 251 | { |
281 | int error = 0; | 252 | int error = 0; |
282 | 253 | ||
283 | error = xfs_sync_inode_valid(ip, pag); | ||
284 | if (error) | ||
285 | return error; | ||
286 | |||
287 | xfs_ilock(ip, XFS_ILOCK_SHARED); | 254 | xfs_ilock(ip, XFS_ILOCK_SHARED); |
288 | if (xfs_inode_clean(ip)) | 255 | if (xfs_inode_clean(ip)) |
289 | goto out_unlock; | 256 | goto out_unlock; |
@@ -300,9 +267,18 @@ xfs_sync_inode_attr( | |||
300 | 267 | ||
301 | error = xfs_iflush(ip, flags); | 268 | error = xfs_iflush(ip, flags); |
302 | 269 | ||
270 | /* | ||
271 | * We don't want to try again on non-blocking flushes that can't run | ||
272 | * again immediately. If an inode really must be written, then that's | ||
273 | * what the SYNC_WAIT flag is for. | ||
274 | */ | ||
275 | if (error == EAGAIN) { | ||
276 | ASSERT(!(flags & SYNC_WAIT)); | ||
277 | error = 0; | ||
278 | } | ||
279 | |||
303 | out_unlock: | 280 | out_unlock: |
304 | xfs_iunlock(ip, XFS_ILOCK_SHARED); | 281 | xfs_iunlock(ip, XFS_ILOCK_SHARED); |
305 | IRELE(ip); | ||
306 | return error; | 282 | return error; |
307 | } | 283 | } |
308 | 284 | ||
@@ -318,8 +294,7 @@ xfs_sync_data( | |||
318 | 294 | ||
319 | ASSERT((flags & ~(SYNC_TRYLOCK|SYNC_WAIT)) == 0); | 295 | ASSERT((flags & ~(SYNC_TRYLOCK|SYNC_WAIT)) == 0); |
320 | 296 | ||
321 | error = xfs_inode_ag_iterator(mp, xfs_sync_inode_data, flags, | 297 | error = xfs_inode_ag_iterator(mp, xfs_sync_inode_data, flags); |
322 | XFS_ICI_NO_TAG, 0, NULL); | ||
323 | if (error) | 298 | if (error) |
324 | return XFS_ERROR(error); | 299 | return XFS_ERROR(error); |
325 | 300 | ||
@@ -337,8 +312,7 @@ xfs_sync_attr( | |||
337 | { | 312 | { |
338 | ASSERT((flags & ~SYNC_WAIT) == 0); | 313 | ASSERT((flags & ~SYNC_WAIT) == 0); |
339 | 314 | ||
340 | return xfs_inode_ag_iterator(mp, xfs_sync_inode_attr, flags, | 315 | return xfs_inode_ag_iterator(mp, xfs_sync_inode_attr, flags); |
341 | XFS_ICI_NO_TAG, 0, NULL); | ||
342 | } | 316 | } |
343 | 317 | ||
344 | STATIC int | 318 | STATIC int |
@@ -401,7 +375,7 @@ xfs_quiesce_data( | |||
401 | 375 | ||
402 | /* mark the log as covered if needed */ | 376 | /* mark the log as covered if needed */ |
403 | if (xfs_log_need_covered(mp)) | 377 | if (xfs_log_need_covered(mp)) |
404 | error2 = xfs_fs_log_dummy(mp, SYNC_WAIT); | 378 | error2 = xfs_fs_log_dummy(mp); |
405 | 379 | ||
406 | /* flush data-only devices */ | 380 | /* flush data-only devices */ |
407 | if (mp->m_rtdev_targp) | 381 | if (mp->m_rtdev_targp) |
@@ -440,7 +414,7 @@ xfs_quiesce_fs( | |||
440 | /* | 414 | /* |
441 | * Second stage of a quiesce. The data is already synced, now we have to take | 415 | * Second stage of a quiesce. The data is already synced, now we have to take |
442 | * care of the metadata. New transactions are already blocked, so we need to | 416 | * care of the metadata. New transactions are already blocked, so we need to |
443 | * wait for any remaining transactions to drain out before proceding. | 417 | * wait for any remaining transactions to drain out before proceeding. |
444 | */ | 418 | */ |
445 | void | 419 | void |
446 | xfs_quiesce_attr( | 420 | xfs_quiesce_attr( |
@@ -464,69 +438,18 @@ xfs_quiesce_attr( | |||
464 | /* Push the superblock and write an unmount record */ | 438 | /* Push the superblock and write an unmount record */ |
465 | error = xfs_log_sbcount(mp, 1); | 439 | error = xfs_log_sbcount(mp, 1); |
466 | if (error) | 440 | if (error) |
467 | xfs_fs_cmn_err(CE_WARN, mp, | 441 | xfs_warn(mp, "xfs_attr_quiesce: failed to log sb changes. " |
468 | "xfs_attr_quiesce: failed to log sb changes. " | ||
469 | "Frozen image may not be consistent."); | 442 | "Frozen image may not be consistent."); |
470 | xfs_log_unmount_write(mp); | 443 | xfs_log_unmount_write(mp); |
471 | xfs_unmountfs_writesb(mp); | 444 | xfs_unmountfs_writesb(mp); |
472 | } | 445 | } |
473 | 446 | ||
474 | /* | 447 | static void |
475 | * Enqueue a work item to be picked up by the vfs xfssyncd thread. | 448 | xfs_syncd_queue_sync( |
476 | * Doing this has two advantages: | 449 | struct xfs_mount *mp) |
477 | * - It saves on stack space, which is tight in certain situations | ||
478 | * - It can be used (with care) as a mechanism to avoid deadlocks. | ||
479 | * Flushing while allocating in a full filesystem requires both. | ||
480 | */ | ||
481 | STATIC void | ||
482 | xfs_syncd_queue_work( | ||
483 | struct xfs_mount *mp, | ||
484 | void *data, | ||
485 | void (*syncer)(struct xfs_mount *, void *), | ||
486 | struct completion *completion) | ||
487 | { | ||
488 | struct xfs_sync_work *work; | ||
489 | |||
490 | work = kmem_alloc(sizeof(struct xfs_sync_work), KM_SLEEP); | ||
491 | INIT_LIST_HEAD(&work->w_list); | ||
492 | work->w_syncer = syncer; | ||
493 | work->w_data = data; | ||
494 | work->w_mount = mp; | ||
495 | work->w_completion = completion; | ||
496 | spin_lock(&mp->m_sync_lock); | ||
497 | list_add_tail(&work->w_list, &mp->m_sync_list); | ||
498 | spin_unlock(&mp->m_sync_lock); | ||
499 | wake_up_process(mp->m_sync_task); | ||
500 | } | ||
501 | |||
502 | /* | ||
503 | * Flush delayed allocate data, attempting to free up reserved space | ||
504 | * from existing allocations. At this point a new allocation attempt | ||
505 | * has failed with ENOSPC and we are in the process of scratching our | ||
506 | * heads, looking about for more room... | ||
507 | */ | ||
508 | STATIC void | ||
509 | xfs_flush_inodes_work( | ||
510 | struct xfs_mount *mp, | ||
511 | void *arg) | ||
512 | { | ||
513 | struct inode *inode = arg; | ||
514 | xfs_sync_data(mp, SYNC_TRYLOCK); | ||
515 | xfs_sync_data(mp, SYNC_TRYLOCK | SYNC_WAIT); | ||
516 | iput(inode); | ||
517 | } | ||
518 | |||
519 | void | ||
520 | xfs_flush_inodes( | ||
521 | xfs_inode_t *ip) | ||
522 | { | 450 | { |
523 | struct inode *inode = VFS_I(ip); | 451 | queue_delayed_work(xfs_syncd_wq, &mp->m_sync_work, |
524 | DECLARE_COMPLETION_ONSTACK(completion); | 452 | msecs_to_jiffies(xfs_syncd_centisecs * 10)); |
525 | |||
526 | igrab(inode); | ||
527 | xfs_syncd_queue_work(ip->i_mount, inode, xfs_flush_inodes_work, &completion); | ||
528 | wait_for_completion(&completion); | ||
529 | xfs_log_force(ip->i_mount, XFS_LOG_SYNC); | ||
530 | } | 453 | } |
531 | 454 | ||
532 | /* | 455 | /* |
@@ -536,84 +459,119 @@ xfs_flush_inodes( | |||
536 | */ | 459 | */ |
537 | STATIC void | 460 | STATIC void |
538 | xfs_sync_worker( | 461 | xfs_sync_worker( |
539 | struct xfs_mount *mp, | 462 | struct work_struct *work) |
540 | void *unused) | ||
541 | { | 463 | { |
464 | struct xfs_mount *mp = container_of(to_delayed_work(work), | ||
465 | struct xfs_mount, m_sync_work); | ||
542 | int error; | 466 | int error; |
543 | 467 | ||
544 | if (!(mp->m_flags & XFS_MOUNT_RDONLY)) { | 468 | if (!(mp->m_flags & XFS_MOUNT_RDONLY)) { |
545 | xfs_log_force(mp, 0); | ||
546 | xfs_reclaim_inodes(mp, 0); | ||
547 | /* dgc: errors ignored here */ | 469 | /* dgc: errors ignored here */ |
548 | error = xfs_qm_sync(mp, SYNC_TRYLOCK); | ||
549 | if (mp->m_super->s_frozen == SB_UNFROZEN && | 470 | if (mp->m_super->s_frozen == SB_UNFROZEN && |
550 | xfs_log_need_covered(mp)) | 471 | xfs_log_need_covered(mp)) |
551 | error = xfs_fs_log_dummy(mp, 0); | 472 | error = xfs_fs_log_dummy(mp); |
473 | else | ||
474 | xfs_log_force(mp, 0); | ||
475 | error = xfs_qm_sync(mp, SYNC_TRYLOCK); | ||
476 | |||
477 | /* start pushing all the metadata that is currently dirty */ | ||
478 | xfs_ail_push_all(mp->m_ail); | ||
552 | } | 479 | } |
553 | mp->m_sync_seq++; | 480 | |
554 | wake_up(&mp->m_wait_single_sync_task); | 481 | /* queue us up again */ |
482 | xfs_syncd_queue_sync(mp); | ||
555 | } | 483 | } |
556 | 484 | ||
557 | STATIC int | 485 | /* |
558 | xfssyncd( | 486 | * Queue a new inode reclaim pass if there are reclaimable inodes and there |
559 | void *arg) | 487 | * isn't a reclaim pass already in progress. By default it runs every 5s based |
488 | * on the xfs syncd work default of 30s. Perhaps this should have it's own | ||
489 | * tunable, but that can be done if this method proves to be ineffective or too | ||
490 | * aggressive. | ||
491 | */ | ||
492 | static void | ||
493 | xfs_syncd_queue_reclaim( | ||
494 | struct xfs_mount *mp) | ||
560 | { | 495 | { |
561 | struct xfs_mount *mp = arg; | ||
562 | long timeleft; | ||
563 | xfs_sync_work_t *work, *n; | ||
564 | LIST_HEAD (tmp); | ||
565 | |||
566 | set_freezable(); | ||
567 | timeleft = xfs_syncd_centisecs * msecs_to_jiffies(10); | ||
568 | for (;;) { | ||
569 | if (list_empty(&mp->m_sync_list)) | ||
570 | timeleft = schedule_timeout_interruptible(timeleft); | ||
571 | /* swsusp */ | ||
572 | try_to_freeze(); | ||
573 | if (kthread_should_stop() && list_empty(&mp->m_sync_list)) | ||
574 | break; | ||
575 | 496 | ||
576 | spin_lock(&mp->m_sync_lock); | 497 | /* |
577 | /* | 498 | * We can have inodes enter reclaim after we've shut down the syncd |
578 | * We can get woken by laptop mode, to do a sync - | 499 | * workqueue during unmount, so don't allow reclaim work to be queued |
579 | * that's the (only!) case where the list would be | 500 | * during unmount. |
580 | * empty with time remaining. | 501 | */ |
581 | */ | 502 | if (!(mp->m_super->s_flags & MS_ACTIVE)) |
582 | if (!timeleft || list_empty(&mp->m_sync_list)) { | 503 | return; |
583 | if (!timeleft) | ||
584 | timeleft = xfs_syncd_centisecs * | ||
585 | msecs_to_jiffies(10); | ||
586 | INIT_LIST_HEAD(&mp->m_sync_work.w_list); | ||
587 | list_add_tail(&mp->m_sync_work.w_list, | ||
588 | &mp->m_sync_list); | ||
589 | } | ||
590 | list_splice_init(&mp->m_sync_list, &tmp); | ||
591 | spin_unlock(&mp->m_sync_lock); | ||
592 | 504 | ||
593 | list_for_each_entry_safe(work, n, &tmp, w_list) { | 505 | rcu_read_lock(); |
594 | (*work->w_syncer)(mp, work->w_data); | 506 | if (radix_tree_tagged(&mp->m_perag_tree, XFS_ICI_RECLAIM_TAG)) { |
595 | list_del(&work->w_list); | 507 | queue_delayed_work(xfs_syncd_wq, &mp->m_reclaim_work, |
596 | if (work == &mp->m_sync_work) | 508 | msecs_to_jiffies(xfs_syncd_centisecs / 6 * 10)); |
597 | continue; | ||
598 | if (work->w_completion) | ||
599 | complete(work->w_completion); | ||
600 | kmem_free(work); | ||
601 | } | ||
602 | } | 509 | } |
510 | rcu_read_unlock(); | ||
511 | } | ||
603 | 512 | ||
604 | return 0; | 513 | /* |
514 | * This is a fast pass over the inode cache to try to get reclaim moving on as | ||
515 | * many inodes as possible in a short period of time. It kicks itself every few | ||
516 | * seconds, as well as being kicked by the inode cache shrinker when memory | ||
517 | * goes low. It scans as quickly as possible avoiding locked inodes or those | ||
518 | * already being flushed, and once done schedules a future pass. | ||
519 | */ | ||
520 | STATIC void | ||
521 | xfs_reclaim_worker( | ||
522 | struct work_struct *work) | ||
523 | { | ||
524 | struct xfs_mount *mp = container_of(to_delayed_work(work), | ||
525 | struct xfs_mount, m_reclaim_work); | ||
526 | |||
527 | xfs_reclaim_inodes(mp, SYNC_TRYLOCK); | ||
528 | xfs_syncd_queue_reclaim(mp); | ||
529 | } | ||
530 | |||
531 | /* | ||
532 | * Flush delayed allocate data, attempting to free up reserved space | ||
533 | * from existing allocations. At this point a new allocation attempt | ||
534 | * has failed with ENOSPC and we are in the process of scratching our | ||
535 | * heads, looking about for more room. | ||
536 | * | ||
537 | * Queue a new data flush if there isn't one already in progress and | ||
538 | * wait for completion of the flush. This means that we only ever have one | ||
539 | * inode flush in progress no matter how many ENOSPC events are occurring and | ||
540 | * so will prevent the system from bogging down due to every concurrent | ||
541 | * ENOSPC event scanning all the active inodes in the system for writeback. | ||
542 | */ | ||
543 | void | ||
544 | xfs_flush_inodes( | ||
545 | struct xfs_inode *ip) | ||
546 | { | ||
547 | struct xfs_mount *mp = ip->i_mount; | ||
548 | |||
549 | queue_work(xfs_syncd_wq, &mp->m_flush_work); | ||
550 | flush_work_sync(&mp->m_flush_work); | ||
551 | } | ||
552 | |||
553 | STATIC void | ||
554 | xfs_flush_worker( | ||
555 | struct work_struct *work) | ||
556 | { | ||
557 | struct xfs_mount *mp = container_of(work, | ||
558 | struct xfs_mount, m_flush_work); | ||
559 | |||
560 | xfs_sync_data(mp, SYNC_TRYLOCK); | ||
561 | xfs_sync_data(mp, SYNC_TRYLOCK | SYNC_WAIT); | ||
605 | } | 562 | } |
606 | 563 | ||
607 | int | 564 | int |
608 | xfs_syncd_init( | 565 | xfs_syncd_init( |
609 | struct xfs_mount *mp) | 566 | struct xfs_mount *mp) |
610 | { | 567 | { |
611 | mp->m_sync_work.w_syncer = xfs_sync_worker; | 568 | INIT_WORK(&mp->m_flush_work, xfs_flush_worker); |
612 | mp->m_sync_work.w_mount = mp; | 569 | INIT_DELAYED_WORK(&mp->m_sync_work, xfs_sync_worker); |
613 | mp->m_sync_work.w_completion = NULL; | 570 | INIT_DELAYED_WORK(&mp->m_reclaim_work, xfs_reclaim_worker); |
614 | mp->m_sync_task = kthread_run(xfssyncd, mp, "xfssyncd/%s", mp->m_fsname); | 571 | |
615 | if (IS_ERR(mp->m_sync_task)) | 572 | xfs_syncd_queue_sync(mp); |
616 | return -PTR_ERR(mp->m_sync_task); | 573 | xfs_syncd_queue_reclaim(mp); |
574 | |||
617 | return 0; | 575 | return 0; |
618 | } | 576 | } |
619 | 577 | ||
@@ -621,7 +579,9 @@ void | |||
621 | xfs_syncd_stop( | 579 | xfs_syncd_stop( |
622 | struct xfs_mount *mp) | 580 | struct xfs_mount *mp) |
623 | { | 581 | { |
624 | kthread_stop(mp->m_sync_task); | 582 | cancel_delayed_work_sync(&mp->m_sync_work); |
583 | cancel_delayed_work_sync(&mp->m_reclaim_work); | ||
584 | cancel_work_sync(&mp->m_flush_work); | ||
625 | } | 585 | } |
626 | 586 | ||
627 | void | 587 | void |
@@ -640,6 +600,10 @@ __xfs_inode_set_reclaim_tag( | |||
640 | XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino), | 600 | XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino), |
641 | XFS_ICI_RECLAIM_TAG); | 601 | XFS_ICI_RECLAIM_TAG); |
642 | spin_unlock(&ip->i_mount->m_perag_lock); | 602 | spin_unlock(&ip->i_mount->m_perag_lock); |
603 | |||
604 | /* schedule periodic background inode reclaim */ | ||
605 | xfs_syncd_queue_reclaim(ip->i_mount); | ||
606 | |||
643 | trace_xfs_perag_set_reclaim(ip->i_mount, pag->pag_agno, | 607 | trace_xfs_perag_set_reclaim(ip->i_mount, pag->pag_agno, |
644 | -1, _RET_IP_); | 608 | -1, _RET_IP_); |
645 | } | 609 | } |
@@ -659,12 +623,12 @@ xfs_inode_set_reclaim_tag( | |||
659 | struct xfs_perag *pag; | 623 | struct xfs_perag *pag; |
660 | 624 | ||
661 | pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); | 625 | pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); |
662 | write_lock(&pag->pag_ici_lock); | 626 | spin_lock(&pag->pag_ici_lock); |
663 | spin_lock(&ip->i_flags_lock); | 627 | spin_lock(&ip->i_flags_lock); |
664 | __xfs_inode_set_reclaim_tag(pag, ip); | 628 | __xfs_inode_set_reclaim_tag(pag, ip); |
665 | __xfs_iflags_set(ip, XFS_IRECLAIMABLE); | 629 | __xfs_iflags_set(ip, XFS_IRECLAIMABLE); |
666 | spin_unlock(&ip->i_flags_lock); | 630 | spin_unlock(&ip->i_flags_lock); |
667 | write_unlock(&pag->pag_ici_lock); | 631 | spin_unlock(&pag->pag_ici_lock); |
668 | xfs_perag_put(pag); | 632 | xfs_perag_put(pag); |
669 | } | 633 | } |
670 | 634 | ||
@@ -698,6 +662,53 @@ __xfs_inode_clear_reclaim_tag( | |||
698 | } | 662 | } |
699 | 663 | ||
700 | /* | 664 | /* |
665 | * Grab the inode for reclaim exclusively. | ||
666 | * Return 0 if we grabbed it, non-zero otherwise. | ||
667 | */ | ||
668 | STATIC int | ||
669 | xfs_reclaim_inode_grab( | ||
670 | struct xfs_inode *ip, | ||
671 | int flags) | ||
672 | { | ||
673 | ASSERT(rcu_read_lock_held()); | ||
674 | |||
675 | /* quick check for stale RCU freed inode */ | ||
676 | if (!ip->i_ino) | ||
677 | return 1; | ||
678 | |||
679 | /* | ||
680 | * do some unlocked checks first to avoid unnecessary lock traffic. | ||
681 | * The first is a flush lock check, the second is a already in reclaim | ||
682 | * check. Only do these checks if we are not going to block on locks. | ||
683 | */ | ||
684 | if ((flags & SYNC_TRYLOCK) && | ||
685 | (!ip->i_flush.done || __xfs_iflags_test(ip, XFS_IRECLAIM))) { | ||
686 | return 1; | ||
687 | } | ||
688 | |||
689 | /* | ||
690 | * The radix tree lock here protects a thread in xfs_iget from racing | ||
691 | * with us starting reclaim on the inode. Once we have the | ||
692 | * XFS_IRECLAIM flag set it will not touch us. | ||
693 | * | ||
694 | * Due to RCU lookup, we may find inodes that have been freed and only | ||
695 | * have XFS_IRECLAIM set. Indeed, we may see reallocated inodes that | ||
696 | * aren't candidates for reclaim at all, so we must check the | ||
697 | * XFS_IRECLAIMABLE is set first before proceeding to reclaim. | ||
698 | */ | ||
699 | spin_lock(&ip->i_flags_lock); | ||
700 | if (!__xfs_iflags_test(ip, XFS_IRECLAIMABLE) || | ||
701 | __xfs_iflags_test(ip, XFS_IRECLAIM)) { | ||
702 | /* not a reclaim candidate. */ | ||
703 | spin_unlock(&ip->i_flags_lock); | ||
704 | return 1; | ||
705 | } | ||
706 | __xfs_iflags_set(ip, XFS_IRECLAIM); | ||
707 | spin_unlock(&ip->i_flags_lock); | ||
708 | return 0; | ||
709 | } | ||
710 | |||
711 | /* | ||
701 | * Inodes in different states need to be treated differently, and the return | 712 | * Inodes in different states need to be treated differently, and the return |
702 | * value of xfs_iflush is not sufficient to get this right. The following table | 713 | * value of xfs_iflush is not sufficient to get this right. The following table |
703 | * lists the inode states and the reclaim actions necessary for non-blocking | 714 | * lists the inode states and the reclaim actions necessary for non-blocking |
@@ -753,25 +764,10 @@ xfs_reclaim_inode( | |||
753 | struct xfs_perag *pag, | 764 | struct xfs_perag *pag, |
754 | int sync_mode) | 765 | int sync_mode) |
755 | { | 766 | { |
756 | int error = 0; | 767 | int error; |
757 | |||
758 | /* | ||
759 | * The radix tree lock here protects a thread in xfs_iget from racing | ||
760 | * with us starting reclaim on the inode. Once we have the | ||
761 | * XFS_IRECLAIM flag set it will not touch us. | ||
762 | */ | ||
763 | spin_lock(&ip->i_flags_lock); | ||
764 | ASSERT_ALWAYS(__xfs_iflags_test(ip, XFS_IRECLAIMABLE)); | ||
765 | if (__xfs_iflags_test(ip, XFS_IRECLAIM)) { | ||
766 | /* ignore as it is already under reclaim */ | ||
767 | spin_unlock(&ip->i_flags_lock); | ||
768 | write_unlock(&pag->pag_ici_lock); | ||
769 | return 0; | ||
770 | } | ||
771 | __xfs_iflags_set(ip, XFS_IRECLAIM); | ||
772 | spin_unlock(&ip->i_flags_lock); | ||
773 | write_unlock(&pag->pag_ici_lock); | ||
774 | 768 | ||
769 | restart: | ||
770 | error = 0; | ||
775 | xfs_ilock(ip, XFS_ILOCK_EXCL); | 771 | xfs_ilock(ip, XFS_ILOCK_EXCL); |
776 | if (!xfs_iflock_nowait(ip)) { | 772 | if (!xfs_iflock_nowait(ip)) { |
777 | if (!(sync_mode & SYNC_WAIT)) | 773 | if (!(sync_mode & SYNC_WAIT)) |
@@ -797,9 +793,31 @@ xfs_reclaim_inode( | |||
797 | if (xfs_inode_clean(ip)) | 793 | if (xfs_inode_clean(ip)) |
798 | goto reclaim; | 794 | goto reclaim; |
799 | 795 | ||
800 | /* Now we have an inode that needs flushing */ | 796 | /* |
801 | error = xfs_iflush(ip, sync_mode); | 797 | * Now we have an inode that needs flushing. |
798 | * | ||
799 | * We do a nonblocking flush here even if we are doing a SYNC_WAIT | ||
800 | * reclaim as we can deadlock with inode cluster removal. | ||
801 | * xfs_ifree_cluster() can lock the inode buffer before it locks the | ||
802 | * ip->i_lock, and we are doing the exact opposite here. As a result, | ||
803 | * doing a blocking xfs_itobp() to get the cluster buffer will result | ||
804 | * in an ABBA deadlock with xfs_ifree_cluster(). | ||
805 | * | ||
806 | * As xfs_ifree_cluser() must gather all inodes that are active in the | ||
807 | * cache to mark them stale, if we hit this case we don't actually want | ||
808 | * to do IO here - we want the inode marked stale so we can simply | ||
809 | * reclaim it. Hence if we get an EAGAIN error on a SYNC_WAIT flush, | ||
810 | * just unlock the inode, back off and try again. Hopefully the next | ||
811 | * pass through will see the stale flag set on the inode. | ||
812 | */ | ||
813 | error = xfs_iflush(ip, SYNC_TRYLOCK | sync_mode); | ||
802 | if (sync_mode & SYNC_WAIT) { | 814 | if (sync_mode & SYNC_WAIT) { |
815 | if (error == EAGAIN) { | ||
816 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | ||
817 | /* backoff longer than in xfs_ifree_cluster */ | ||
818 | delay(2); | ||
819 | goto restart; | ||
820 | } | ||
803 | xfs_iflock(ip); | 821 | xfs_iflock(ip); |
804 | goto reclaim; | 822 | goto reclaim; |
805 | } | 823 | } |
@@ -814,7 +832,7 @@ xfs_reclaim_inode( | |||
814 | * pass on the error. | 832 | * pass on the error. |
815 | */ | 833 | */ |
816 | if (error && error != EAGAIN && !XFS_FORCED_SHUTDOWN(ip->i_mount)) { | 834 | if (error && error != EAGAIN && !XFS_FORCED_SHUTDOWN(ip->i_mount)) { |
817 | xfs_fs_cmn_err(CE_WARN, ip->i_mount, | 835 | xfs_warn(ip->i_mount, |
818 | "inode 0x%llx background reclaim flush failed with %d", | 836 | "inode 0x%llx background reclaim flush failed with %d", |
819 | (long long)ip->i_ino, error); | 837 | (long long)ip->i_ino, error); |
820 | } | 838 | } |
@@ -842,12 +860,12 @@ reclaim: | |||
842 | * added to the tree assert that it's been there before to catch | 860 | * added to the tree assert that it's been there before to catch |
843 | * problems with the inode life time early on. | 861 | * problems with the inode life time early on. |
844 | */ | 862 | */ |
845 | write_lock(&pag->pag_ici_lock); | 863 | spin_lock(&pag->pag_ici_lock); |
846 | if (!radix_tree_delete(&pag->pag_ici_root, | 864 | if (!radix_tree_delete(&pag->pag_ici_root, |
847 | XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino))) | 865 | XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino))) |
848 | ASSERT(0); | 866 | ASSERT(0); |
849 | __xfs_inode_clear_reclaim(pag, ip); | 867 | __xfs_inode_clear_reclaim(pag, ip); |
850 | write_unlock(&pag->pag_ici_lock); | 868 | spin_unlock(&pag->pag_ici_lock); |
851 | 869 | ||
852 | /* | 870 | /* |
853 | * Here we do an (almost) spurious inode lock in order to coordinate | 871 | * Here we do an (almost) spurious inode lock in order to coordinate |
@@ -868,45 +886,181 @@ reclaim: | |||
868 | 886 | ||
869 | } | 887 | } |
870 | 888 | ||
889 | /* | ||
890 | * Walk the AGs and reclaim the inodes in them. Even if the filesystem is | ||
891 | * corrupted, we still want to try to reclaim all the inodes. If we don't, | ||
892 | * then a shut down during filesystem unmount reclaim walk leak all the | ||
893 | * unreclaimed inodes. | ||
894 | */ | ||
895 | int | ||
896 | xfs_reclaim_inodes_ag( | ||
897 | struct xfs_mount *mp, | ||
898 | int flags, | ||
899 | int *nr_to_scan) | ||
900 | { | ||
901 | struct xfs_perag *pag; | ||
902 | int error = 0; | ||
903 | int last_error = 0; | ||
904 | xfs_agnumber_t ag; | ||
905 | int trylock = flags & SYNC_TRYLOCK; | ||
906 | int skipped; | ||
907 | |||
908 | restart: | ||
909 | ag = 0; | ||
910 | skipped = 0; | ||
911 | while ((pag = xfs_perag_get_tag(mp, ag, XFS_ICI_RECLAIM_TAG))) { | ||
912 | unsigned long first_index = 0; | ||
913 | int done = 0; | ||
914 | int nr_found = 0; | ||
915 | |||
916 | ag = pag->pag_agno + 1; | ||
917 | |||
918 | if (trylock) { | ||
919 | if (!mutex_trylock(&pag->pag_ici_reclaim_lock)) { | ||
920 | skipped++; | ||
921 | xfs_perag_put(pag); | ||
922 | continue; | ||
923 | } | ||
924 | first_index = pag->pag_ici_reclaim_cursor; | ||
925 | } else | ||
926 | mutex_lock(&pag->pag_ici_reclaim_lock); | ||
927 | |||
928 | do { | ||
929 | struct xfs_inode *batch[XFS_LOOKUP_BATCH]; | ||
930 | int i; | ||
931 | |||
932 | rcu_read_lock(); | ||
933 | nr_found = radix_tree_gang_lookup_tag( | ||
934 | &pag->pag_ici_root, | ||
935 | (void **)batch, first_index, | ||
936 | XFS_LOOKUP_BATCH, | ||
937 | XFS_ICI_RECLAIM_TAG); | ||
938 | if (!nr_found) { | ||
939 | done = 1; | ||
940 | rcu_read_unlock(); | ||
941 | break; | ||
942 | } | ||
943 | |||
944 | /* | ||
945 | * Grab the inodes before we drop the lock. if we found | ||
946 | * nothing, nr == 0 and the loop will be skipped. | ||
947 | */ | ||
948 | for (i = 0; i < nr_found; i++) { | ||
949 | struct xfs_inode *ip = batch[i]; | ||
950 | |||
951 | if (done || xfs_reclaim_inode_grab(ip, flags)) | ||
952 | batch[i] = NULL; | ||
953 | |||
954 | /* | ||
955 | * Update the index for the next lookup. Catch | ||
956 | * overflows into the next AG range which can | ||
957 | * occur if we have inodes in the last block of | ||
958 | * the AG and we are currently pointing to the | ||
959 | * last inode. | ||
960 | * | ||
961 | * Because we may see inodes that are from the | ||
962 | * wrong AG due to RCU freeing and | ||
963 | * reallocation, only update the index if it | ||
964 | * lies in this AG. It was a race that lead us | ||
965 | * to see this inode, so another lookup from | ||
966 | * the same index will not find it again. | ||
967 | */ | ||
968 | if (XFS_INO_TO_AGNO(mp, ip->i_ino) != | ||
969 | pag->pag_agno) | ||
970 | continue; | ||
971 | first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1); | ||
972 | if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) | ||
973 | done = 1; | ||
974 | } | ||
975 | |||
976 | /* unlock now we've grabbed the inodes. */ | ||
977 | rcu_read_unlock(); | ||
978 | |||
979 | for (i = 0; i < nr_found; i++) { | ||
980 | if (!batch[i]) | ||
981 | continue; | ||
982 | error = xfs_reclaim_inode(batch[i], pag, flags); | ||
983 | if (error && last_error != EFSCORRUPTED) | ||
984 | last_error = error; | ||
985 | } | ||
986 | |||
987 | *nr_to_scan -= XFS_LOOKUP_BATCH; | ||
988 | |||
989 | } while (nr_found && !done && *nr_to_scan > 0); | ||
990 | |||
991 | if (trylock && !done) | ||
992 | pag->pag_ici_reclaim_cursor = first_index; | ||
993 | else | ||
994 | pag->pag_ici_reclaim_cursor = 0; | ||
995 | mutex_unlock(&pag->pag_ici_reclaim_lock); | ||
996 | xfs_perag_put(pag); | ||
997 | } | ||
998 | |||
999 | /* | ||
1000 | * if we skipped any AG, and we still have scan count remaining, do | ||
1001 | * another pass this time using blocking reclaim semantics (i.e | ||
1002 | * waiting on the reclaim locks and ignoring the reclaim cursors). This | ||
1003 | * ensure that when we get more reclaimers than AGs we block rather | ||
1004 | * than spin trying to execute reclaim. | ||
1005 | */ | ||
1006 | if (trylock && skipped && *nr_to_scan > 0) { | ||
1007 | trylock = 0; | ||
1008 | goto restart; | ||
1009 | } | ||
1010 | return XFS_ERROR(last_error); | ||
1011 | } | ||
1012 | |||
871 | int | 1013 | int |
872 | xfs_reclaim_inodes( | 1014 | xfs_reclaim_inodes( |
873 | xfs_mount_t *mp, | 1015 | xfs_mount_t *mp, |
874 | int mode) | 1016 | int mode) |
875 | { | 1017 | { |
876 | return xfs_inode_ag_iterator(mp, xfs_reclaim_inode, mode, | 1018 | int nr_to_scan = INT_MAX; |
877 | XFS_ICI_RECLAIM_TAG, 1, NULL); | 1019 | |
1020 | return xfs_reclaim_inodes_ag(mp, mode, &nr_to_scan); | ||
878 | } | 1021 | } |
879 | 1022 | ||
880 | /* | 1023 | /* |
881 | * Shrinker infrastructure. | 1024 | * Inode cache shrinker. |
1025 | * | ||
1026 | * When called we make sure that there is a background (fast) inode reclaim in | ||
1027 | * progress, while we will throttle the speed of reclaim via doiing synchronous | ||
1028 | * reclaim of inodes. That means if we come across dirty inodes, we wait for | ||
1029 | * them to be cleaned, which we hope will not be very long due to the | ||
1030 | * background walker having already kicked the IO off on those dirty inodes. | ||
882 | */ | 1031 | */ |
883 | static int | 1032 | static int |
884 | xfs_reclaim_inode_shrink( | 1033 | xfs_reclaim_inode_shrink( |
885 | struct shrinker *shrink, | 1034 | struct shrinker *shrink, |
886 | int nr_to_scan, | 1035 | struct shrink_control *sc) |
887 | gfp_t gfp_mask) | ||
888 | { | 1036 | { |
889 | struct xfs_mount *mp; | 1037 | struct xfs_mount *mp; |
890 | struct xfs_perag *pag; | 1038 | struct xfs_perag *pag; |
891 | xfs_agnumber_t ag; | 1039 | xfs_agnumber_t ag; |
892 | int reclaimable; | 1040 | int reclaimable; |
1041 | int nr_to_scan = sc->nr_to_scan; | ||
1042 | gfp_t gfp_mask = sc->gfp_mask; | ||
893 | 1043 | ||
894 | mp = container_of(shrink, struct xfs_mount, m_inode_shrink); | 1044 | mp = container_of(shrink, struct xfs_mount, m_inode_shrink); |
895 | if (nr_to_scan) { | 1045 | if (nr_to_scan) { |
1046 | /* kick background reclaimer and push the AIL */ | ||
1047 | xfs_syncd_queue_reclaim(mp); | ||
1048 | xfs_ail_push_all(mp->m_ail); | ||
1049 | |||
896 | if (!(gfp_mask & __GFP_FS)) | 1050 | if (!(gfp_mask & __GFP_FS)) |
897 | return -1; | 1051 | return -1; |
898 | 1052 | ||
899 | xfs_inode_ag_iterator(mp, xfs_reclaim_inode, 0, | 1053 | xfs_reclaim_inodes_ag(mp, SYNC_TRYLOCK | SYNC_WAIT, |
900 | XFS_ICI_RECLAIM_TAG, 1, &nr_to_scan); | 1054 | &nr_to_scan); |
901 | /* if we don't exhaust the scan, don't bother coming back */ | 1055 | /* terminate if we don't exhaust the scan */ |
902 | if (nr_to_scan > 0) | 1056 | if (nr_to_scan > 0) |
903 | return -1; | 1057 | return -1; |
904 | } | 1058 | } |
905 | 1059 | ||
906 | reclaimable = 0; | 1060 | reclaimable = 0; |
907 | ag = 0; | 1061 | ag = 0; |
908 | while ((pag = xfs_inode_ag_iter_next_pag(mp, &ag, | 1062 | while ((pag = xfs_perag_get_tag(mp, ag, XFS_ICI_RECLAIM_TAG))) { |
909 | XFS_ICI_RECLAIM_TAG))) { | 1063 | ag = pag->pag_agno + 1; |
910 | reclaimable += pag->pag_ici_reclaimable; | 1064 | reclaimable += pag->pag_ici_reclaimable; |
911 | xfs_perag_put(pag); | 1065 | xfs_perag_put(pag); |
912 | } | 1066 | } |
diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h index fe78726196f8..e3a6ad27415f 100644 --- a/fs/xfs/linux-2.6/xfs_sync.h +++ b/fs/xfs/linux-2.6/xfs_sync.h | |||
@@ -32,6 +32,8 @@ typedef struct xfs_sync_work { | |||
32 | #define SYNC_WAIT 0x0001 /* wait for i/o to complete */ | 32 | #define SYNC_WAIT 0x0001 /* wait for i/o to complete */ |
33 | #define SYNC_TRYLOCK 0x0002 /* only try to lock inodes */ | 33 | #define SYNC_TRYLOCK 0x0002 /* only try to lock inodes */ |
34 | 34 | ||
35 | extern struct workqueue_struct *xfs_syncd_wq; /* sync workqueue */ | ||
36 | |||
35 | int xfs_syncd_init(struct xfs_mount *mp); | 37 | int xfs_syncd_init(struct xfs_mount *mp); |
36 | void xfs_syncd_stop(struct xfs_mount *mp); | 38 | void xfs_syncd_stop(struct xfs_mount *mp); |
37 | 39 | ||
@@ -47,10 +49,10 @@ void __xfs_inode_set_reclaim_tag(struct xfs_perag *pag, struct xfs_inode *ip); | |||
47 | void __xfs_inode_clear_reclaim_tag(struct xfs_mount *mp, struct xfs_perag *pag, | 49 | void __xfs_inode_clear_reclaim_tag(struct xfs_mount *mp, struct xfs_perag *pag, |
48 | struct xfs_inode *ip); | 50 | struct xfs_inode *ip); |
49 | 51 | ||
50 | int xfs_sync_inode_valid(struct xfs_inode *ip, struct xfs_perag *pag); | 52 | int xfs_sync_inode_grab(struct xfs_inode *ip); |
51 | int xfs_inode_ag_iterator(struct xfs_mount *mp, | 53 | int xfs_inode_ag_iterator(struct xfs_mount *mp, |
52 | int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag, int flags), | 54 | int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag, int flags), |
53 | int flags, int tag, int write_lock, int *nr_to_scan); | 55 | int flags); |
54 | 56 | ||
55 | void xfs_inode_shrinker_register(struct xfs_mount *mp); | 57 | void xfs_inode_shrinker_register(struct xfs_mount *mp); |
56 | void xfs_inode_shrinker_unregister(struct xfs_mount *mp); | 58 | void xfs_inode_shrinker_unregister(struct xfs_mount *mp); |
diff --git a/fs/xfs/linux-2.6/xfs_sysctl.c b/fs/xfs/linux-2.6/xfs_sysctl.c index 7bb5092d6ae4..ee2d2adaa438 100644 --- a/fs/xfs/linux-2.6/xfs_sysctl.c +++ b/fs/xfs/linux-2.6/xfs_sysctl.c | |||
@@ -18,6 +18,7 @@ | |||
18 | #include "xfs.h" | 18 | #include "xfs.h" |
19 | #include <linux/sysctl.h> | 19 | #include <linux/sysctl.h> |
20 | #include <linux/proc_fs.h> | 20 | #include <linux/proc_fs.h> |
21 | #include "xfs_error.h" | ||
21 | 22 | ||
22 | static struct ctl_table_header *xfs_table_header; | 23 | static struct ctl_table_header *xfs_table_header; |
23 | 24 | ||
@@ -36,7 +37,7 @@ xfs_stats_clear_proc_handler( | |||
36 | ret = proc_dointvec_minmax(ctl, write, buffer, lenp, ppos); | 37 | ret = proc_dointvec_minmax(ctl, write, buffer, lenp, ppos); |
37 | 38 | ||
38 | if (!ret && write && *valp) { | 39 | if (!ret && write && *valp) { |
39 | printk("XFS Clearing xfsstats\n"); | 40 | xfs_notice(NULL, "Clearing xfsstats"); |
40 | for_each_possible_cpu(c) { | 41 | for_each_possible_cpu(c) { |
41 | preempt_disable(); | 42 | preempt_disable(); |
42 | /* save vn_active, it's a universal truth! */ | 43 | /* save vn_active, it's a universal truth! */ |
@@ -51,6 +52,26 @@ xfs_stats_clear_proc_handler( | |||
51 | 52 | ||
52 | return ret; | 53 | return ret; |
53 | } | 54 | } |
55 | |||
56 | STATIC int | ||
57 | xfs_panic_mask_proc_handler( | ||
58 | ctl_table *ctl, | ||
59 | int write, | ||
60 | void __user *buffer, | ||
61 | size_t *lenp, | ||
62 | loff_t *ppos) | ||
63 | { | ||
64 | int ret, *valp = ctl->data; | ||
65 | |||
66 | ret = proc_dointvec_minmax(ctl, write, buffer, lenp, ppos); | ||
67 | if (!ret && write) { | ||
68 | xfs_panic_mask = *valp; | ||
69 | #ifdef DEBUG | ||
70 | xfs_panic_mask |= (XFS_PTAG_SHUTDOWN_CORRUPT | XFS_PTAG_LOGRES); | ||
71 | #endif | ||
72 | } | ||
73 | return ret; | ||
74 | } | ||
54 | #endif /* CONFIG_PROC_FS */ | 75 | #endif /* CONFIG_PROC_FS */ |
55 | 76 | ||
56 | static ctl_table xfs_table[] = { | 77 | static ctl_table xfs_table[] = { |
@@ -77,7 +98,7 @@ static ctl_table xfs_table[] = { | |||
77 | .data = &xfs_params.panic_mask.val, | 98 | .data = &xfs_params.panic_mask.val, |
78 | .maxlen = sizeof(int), | 99 | .maxlen = sizeof(int), |
79 | .mode = 0644, | 100 | .mode = 0644, |
80 | .proc_handler = proc_dointvec_minmax, | 101 | .proc_handler = xfs_panic_mask_proc_handler, |
81 | .extra1 = &xfs_params.panic_mask.min, | 102 | .extra1 = &xfs_params.panic_mask.min, |
82 | .extra2 = &xfs_params.panic_mask.max | 103 | .extra2 = &xfs_params.panic_mask.max |
83 | }, | 104 | }, |
diff --git a/fs/xfs/linux-2.6/xfs_trace.h b/fs/xfs/linux-2.6/xfs_trace.h index be5dffd282a1..d48b7a579ae1 100644 --- a/fs/xfs/linux-2.6/xfs_trace.h +++ b/fs/xfs/linux-2.6/xfs_trace.h | |||
@@ -124,7 +124,7 @@ DEFINE_EVENT(xfs_perag_class, name, \ | |||
124 | unsigned long caller_ip), \ | 124 | unsigned long caller_ip), \ |
125 | TP_ARGS(mp, agno, refcount, caller_ip)) | 125 | TP_ARGS(mp, agno, refcount, caller_ip)) |
126 | DEFINE_PERAG_REF_EVENT(xfs_perag_get); | 126 | DEFINE_PERAG_REF_EVENT(xfs_perag_get); |
127 | DEFINE_PERAG_REF_EVENT(xfs_perag_get_reclaim); | 127 | DEFINE_PERAG_REF_EVENT(xfs_perag_get_tag); |
128 | DEFINE_PERAG_REF_EVENT(xfs_perag_put); | 128 | DEFINE_PERAG_REF_EVENT(xfs_perag_put); |
129 | DEFINE_PERAG_REF_EVENT(xfs_perag_set_reclaim); | 129 | DEFINE_PERAG_REF_EVENT(xfs_perag_set_reclaim); |
130 | DEFINE_PERAG_REF_EVENT(xfs_perag_clear_reclaim); | 130 | DEFINE_PERAG_REF_EVENT(xfs_perag_clear_reclaim); |
@@ -325,13 +325,12 @@ DEFINE_BUF_EVENT(xfs_buf_lock); | |||
325 | DEFINE_BUF_EVENT(xfs_buf_lock_done); | 325 | DEFINE_BUF_EVENT(xfs_buf_lock_done); |
326 | DEFINE_BUF_EVENT(xfs_buf_cond_lock); | 326 | DEFINE_BUF_EVENT(xfs_buf_cond_lock); |
327 | DEFINE_BUF_EVENT(xfs_buf_unlock); | 327 | DEFINE_BUF_EVENT(xfs_buf_unlock); |
328 | DEFINE_BUF_EVENT(xfs_buf_ordered_retry); | ||
329 | DEFINE_BUF_EVENT(xfs_buf_iowait); | 328 | DEFINE_BUF_EVENT(xfs_buf_iowait); |
330 | DEFINE_BUF_EVENT(xfs_buf_iowait_done); | 329 | DEFINE_BUF_EVENT(xfs_buf_iowait_done); |
331 | DEFINE_BUF_EVENT(xfs_buf_delwri_queue); | 330 | DEFINE_BUF_EVENT(xfs_buf_delwri_queue); |
332 | DEFINE_BUF_EVENT(xfs_buf_delwri_dequeue); | 331 | DEFINE_BUF_EVENT(xfs_buf_delwri_dequeue); |
333 | DEFINE_BUF_EVENT(xfs_buf_delwri_split); | 332 | DEFINE_BUF_EVENT(xfs_buf_delwri_split); |
334 | DEFINE_BUF_EVENT(xfs_buf_get_noaddr); | 333 | DEFINE_BUF_EVENT(xfs_buf_get_uncached); |
335 | DEFINE_BUF_EVENT(xfs_bdstrat_shut); | 334 | DEFINE_BUF_EVENT(xfs_bdstrat_shut); |
336 | DEFINE_BUF_EVENT(xfs_buf_item_relse); | 335 | DEFINE_BUF_EVENT(xfs_buf_item_relse); |
337 | DEFINE_BUF_EVENT(xfs_buf_item_iodone); | 336 | DEFINE_BUF_EVENT(xfs_buf_item_iodone); |
@@ -767,8 +766,8 @@ DECLARE_EVENT_CLASS(xfs_loggrant_class, | |||
767 | __field(int, curr_res) | 766 | __field(int, curr_res) |
768 | __field(int, unit_res) | 767 | __field(int, unit_res) |
769 | __field(unsigned int, flags) | 768 | __field(unsigned int, flags) |
770 | __field(void *, reserve_headq) | 769 | __field(int, reserveq) |
771 | __field(void *, write_headq) | 770 | __field(int, writeq) |
772 | __field(int, grant_reserve_cycle) | 771 | __field(int, grant_reserve_cycle) |
773 | __field(int, grant_reserve_bytes) | 772 | __field(int, grant_reserve_bytes) |
774 | __field(int, grant_write_cycle) | 773 | __field(int, grant_write_cycle) |
@@ -785,19 +784,21 @@ DECLARE_EVENT_CLASS(xfs_loggrant_class, | |||
785 | __entry->curr_res = tic->t_curr_res; | 784 | __entry->curr_res = tic->t_curr_res; |
786 | __entry->unit_res = tic->t_unit_res; | 785 | __entry->unit_res = tic->t_unit_res; |
787 | __entry->flags = tic->t_flags; | 786 | __entry->flags = tic->t_flags; |
788 | __entry->reserve_headq = log->l_reserve_headq; | 787 | __entry->reserveq = list_empty(&log->l_reserveq); |
789 | __entry->write_headq = log->l_write_headq; | 788 | __entry->writeq = list_empty(&log->l_writeq); |
790 | __entry->grant_reserve_cycle = log->l_grant_reserve_cycle; | 789 | xlog_crack_grant_head(&log->l_grant_reserve_head, |
791 | __entry->grant_reserve_bytes = log->l_grant_reserve_bytes; | 790 | &__entry->grant_reserve_cycle, |
792 | __entry->grant_write_cycle = log->l_grant_write_cycle; | 791 | &__entry->grant_reserve_bytes); |
793 | __entry->grant_write_bytes = log->l_grant_write_bytes; | 792 | xlog_crack_grant_head(&log->l_grant_write_head, |
793 | &__entry->grant_write_cycle, | ||
794 | &__entry->grant_write_bytes); | ||
794 | __entry->curr_cycle = log->l_curr_cycle; | 795 | __entry->curr_cycle = log->l_curr_cycle; |
795 | __entry->curr_block = log->l_curr_block; | 796 | __entry->curr_block = log->l_curr_block; |
796 | __entry->tail_lsn = log->l_tail_lsn; | 797 | __entry->tail_lsn = atomic64_read(&log->l_tail_lsn); |
797 | ), | 798 | ), |
798 | TP_printk("dev %d:%d type %s t_ocnt %u t_cnt %u t_curr_res %u " | 799 | TP_printk("dev %d:%d type %s t_ocnt %u t_cnt %u t_curr_res %u " |
799 | "t_unit_res %u t_flags %s reserve_headq 0x%p " | 800 | "t_unit_res %u t_flags %s reserveq %s " |
800 | "write_headq 0x%p grant_reserve_cycle %d " | 801 | "writeq %s grant_reserve_cycle %d " |
801 | "grant_reserve_bytes %d grant_write_cycle %d " | 802 | "grant_reserve_bytes %d grant_write_cycle %d " |
802 | "grant_write_bytes %d curr_cycle %d curr_block %d " | 803 | "grant_write_bytes %d curr_cycle %d curr_block %d " |
803 | "tail_cycle %d tail_block %d", | 804 | "tail_cycle %d tail_block %d", |
@@ -808,8 +809,8 @@ DECLARE_EVENT_CLASS(xfs_loggrant_class, | |||
808 | __entry->curr_res, | 809 | __entry->curr_res, |
809 | __entry->unit_res, | 810 | __entry->unit_res, |
810 | __print_flags(__entry->flags, "|", XLOG_TIC_FLAGS), | 811 | __print_flags(__entry->flags, "|", XLOG_TIC_FLAGS), |
811 | __entry->reserve_headq, | 812 | __entry->reserveq ? "empty" : "active", |
812 | __entry->write_headq, | 813 | __entry->writeq ? "empty" : "active", |
813 | __entry->grant_reserve_cycle, | 814 | __entry->grant_reserve_cycle, |
814 | __entry->grant_reserve_bytes, | 815 | __entry->grant_reserve_bytes, |
815 | __entry->grant_write_cycle, | 816 | __entry->grant_write_cycle, |
@@ -836,6 +837,7 @@ DEFINE_LOGGRANT_EVENT(xfs_log_grant_sleep1); | |||
836 | DEFINE_LOGGRANT_EVENT(xfs_log_grant_wake1); | 837 | DEFINE_LOGGRANT_EVENT(xfs_log_grant_wake1); |
837 | DEFINE_LOGGRANT_EVENT(xfs_log_grant_sleep2); | 838 | DEFINE_LOGGRANT_EVENT(xfs_log_grant_sleep2); |
838 | DEFINE_LOGGRANT_EVENT(xfs_log_grant_wake2); | 839 | DEFINE_LOGGRANT_EVENT(xfs_log_grant_wake2); |
840 | DEFINE_LOGGRANT_EVENT(xfs_log_grant_wake_up); | ||
839 | DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_enter); | 841 | DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_enter); |
840 | DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_exit); | 842 | DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_exit); |
841 | DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_error); | 843 | DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_error); |
@@ -843,6 +845,7 @@ DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_sleep1); | |||
843 | DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_wake1); | 845 | DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_wake1); |
844 | DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_sleep2); | 846 | DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_sleep2); |
845 | DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_wake2); | 847 | DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_wake2); |
848 | DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_wake_up); | ||
846 | DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_enter); | 849 | DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_enter); |
847 | DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_exit); | 850 | DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_exit); |
848 | DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_sub); | 851 | DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_sub); |
@@ -936,10 +939,10 @@ DEFINE_PAGE_EVENT(xfs_writepage); | |||
936 | DEFINE_PAGE_EVENT(xfs_releasepage); | 939 | DEFINE_PAGE_EVENT(xfs_releasepage); |
937 | DEFINE_PAGE_EVENT(xfs_invalidatepage); | 940 | DEFINE_PAGE_EVENT(xfs_invalidatepage); |
938 | 941 | ||
939 | DECLARE_EVENT_CLASS(xfs_iomap_class, | 942 | DECLARE_EVENT_CLASS(xfs_imap_class, |
940 | TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count, | 943 | TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count, |
941 | int flags, struct xfs_bmbt_irec *irec), | 944 | int type, struct xfs_bmbt_irec *irec), |
942 | TP_ARGS(ip, offset, count, flags, irec), | 945 | TP_ARGS(ip, offset, count, type, irec), |
943 | TP_STRUCT__entry( | 946 | TP_STRUCT__entry( |
944 | __field(dev_t, dev) | 947 | __field(dev_t, dev) |
945 | __field(xfs_ino_t, ino) | 948 | __field(xfs_ino_t, ino) |
@@ -947,7 +950,7 @@ DECLARE_EVENT_CLASS(xfs_iomap_class, | |||
947 | __field(loff_t, new_size) | 950 | __field(loff_t, new_size) |
948 | __field(loff_t, offset) | 951 | __field(loff_t, offset) |
949 | __field(size_t, count) | 952 | __field(size_t, count) |
950 | __field(int, flags) | 953 | __field(int, type) |
951 | __field(xfs_fileoff_t, startoff) | 954 | __field(xfs_fileoff_t, startoff) |
952 | __field(xfs_fsblock_t, startblock) | 955 | __field(xfs_fsblock_t, startblock) |
953 | __field(xfs_filblks_t, blockcount) | 956 | __field(xfs_filblks_t, blockcount) |
@@ -959,13 +962,13 @@ DECLARE_EVENT_CLASS(xfs_iomap_class, | |||
959 | __entry->new_size = ip->i_new_size; | 962 | __entry->new_size = ip->i_new_size; |
960 | __entry->offset = offset; | 963 | __entry->offset = offset; |
961 | __entry->count = count; | 964 | __entry->count = count; |
962 | __entry->flags = flags; | 965 | __entry->type = type; |
963 | __entry->startoff = irec ? irec->br_startoff : 0; | 966 | __entry->startoff = irec ? irec->br_startoff : 0; |
964 | __entry->startblock = irec ? irec->br_startblock : 0; | 967 | __entry->startblock = irec ? irec->br_startblock : 0; |
965 | __entry->blockcount = irec ? irec->br_blockcount : 0; | 968 | __entry->blockcount = irec ? irec->br_blockcount : 0; |
966 | ), | 969 | ), |
967 | TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx " | 970 | TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx " |
968 | "offset 0x%llx count %zd flags %s " | 971 | "offset 0x%llx count %zd type %s " |
969 | "startoff 0x%llx startblock %lld blockcount 0x%llx", | 972 | "startoff 0x%llx startblock %lld blockcount 0x%llx", |
970 | MAJOR(__entry->dev), MINOR(__entry->dev), | 973 | MAJOR(__entry->dev), MINOR(__entry->dev), |
971 | __entry->ino, | 974 | __entry->ino, |
@@ -973,20 +976,21 @@ DECLARE_EVENT_CLASS(xfs_iomap_class, | |||
973 | __entry->new_size, | 976 | __entry->new_size, |
974 | __entry->offset, | 977 | __entry->offset, |
975 | __entry->count, | 978 | __entry->count, |
976 | __print_flags(__entry->flags, "|", BMAPI_FLAGS), | 979 | __print_symbolic(__entry->type, XFS_IO_TYPES), |
977 | __entry->startoff, | 980 | __entry->startoff, |
978 | (__int64_t)__entry->startblock, | 981 | (__int64_t)__entry->startblock, |
979 | __entry->blockcount) | 982 | __entry->blockcount) |
980 | ) | 983 | ) |
981 | 984 | ||
982 | #define DEFINE_IOMAP_EVENT(name) \ | 985 | #define DEFINE_IOMAP_EVENT(name) \ |
983 | DEFINE_EVENT(xfs_iomap_class, name, \ | 986 | DEFINE_EVENT(xfs_imap_class, name, \ |
984 | TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count, \ | 987 | TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count, \ |
985 | int flags, struct xfs_bmbt_irec *irec), \ | 988 | int type, struct xfs_bmbt_irec *irec), \ |
986 | TP_ARGS(ip, offset, count, flags, irec)) | 989 | TP_ARGS(ip, offset, count, type, irec)) |
987 | DEFINE_IOMAP_EVENT(xfs_iomap_enter); | 990 | DEFINE_IOMAP_EVENT(xfs_map_blocks_found); |
988 | DEFINE_IOMAP_EVENT(xfs_iomap_found); | 991 | DEFINE_IOMAP_EVENT(xfs_map_blocks_alloc); |
989 | DEFINE_IOMAP_EVENT(xfs_iomap_alloc); | 992 | DEFINE_IOMAP_EVENT(xfs_get_blocks_found); |
993 | DEFINE_IOMAP_EVENT(xfs_get_blocks_alloc); | ||
990 | 994 | ||
991 | DECLARE_EVENT_CLASS(xfs_simple_io_class, | 995 | DECLARE_EVENT_CLASS(xfs_simple_io_class, |
992 | TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count), | 996 | TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count), |
@@ -1023,6 +1027,7 @@ DEFINE_EVENT(xfs_simple_io_class, name, \ | |||
1023 | TP_ARGS(ip, offset, count)) | 1027 | TP_ARGS(ip, offset, count)) |
1024 | DEFINE_SIMPLE_IO_EVENT(xfs_delalloc_enospc); | 1028 | DEFINE_SIMPLE_IO_EVENT(xfs_delalloc_enospc); |
1025 | DEFINE_SIMPLE_IO_EVENT(xfs_unwritten_convert); | 1029 | DEFINE_SIMPLE_IO_EVENT(xfs_unwritten_convert); |
1030 | DEFINE_SIMPLE_IO_EVENT(xfs_get_blocks_notfound); | ||
1026 | 1031 | ||
1027 | 1032 | ||
1028 | TRACE_EVENT(xfs_itruncate_start, | 1033 | TRACE_EVENT(xfs_itruncate_start, |
@@ -1146,44 +1151,7 @@ TRACE_EVENT(xfs_bunmap, | |||
1146 | 1151 | ||
1147 | ); | 1152 | ); |
1148 | 1153 | ||
1149 | #define XFS_BUSY_SYNC \ | 1154 | DECLARE_EVENT_CLASS(xfs_busy_class, |
1150 | { 0, "async" }, \ | ||
1151 | { 1, "sync" } | ||
1152 | |||
1153 | TRACE_EVENT(xfs_alloc_busy, | ||
1154 | TP_PROTO(struct xfs_trans *trans, xfs_agnumber_t agno, | ||
1155 | xfs_agblock_t agbno, xfs_extlen_t len, int sync), | ||
1156 | TP_ARGS(trans, agno, agbno, len, sync), | ||
1157 | TP_STRUCT__entry( | ||
1158 | __field(dev_t, dev) | ||
1159 | __field(struct xfs_trans *, tp) | ||
1160 | __field(int, tid) | ||
1161 | __field(xfs_agnumber_t, agno) | ||
1162 | __field(xfs_agblock_t, agbno) | ||
1163 | __field(xfs_extlen_t, len) | ||
1164 | __field(int, sync) | ||
1165 | ), | ||
1166 | TP_fast_assign( | ||
1167 | __entry->dev = trans->t_mountp->m_super->s_dev; | ||
1168 | __entry->tp = trans; | ||
1169 | __entry->tid = trans->t_ticket->t_tid; | ||
1170 | __entry->agno = agno; | ||
1171 | __entry->agbno = agbno; | ||
1172 | __entry->len = len; | ||
1173 | __entry->sync = sync; | ||
1174 | ), | ||
1175 | TP_printk("dev %d:%d trans 0x%p tid 0x%x agno %u agbno %u len %u %s", | ||
1176 | MAJOR(__entry->dev), MINOR(__entry->dev), | ||
1177 | __entry->tp, | ||
1178 | __entry->tid, | ||
1179 | __entry->agno, | ||
1180 | __entry->agbno, | ||
1181 | __entry->len, | ||
1182 | __print_symbolic(__entry->sync, XFS_BUSY_SYNC)) | ||
1183 | |||
1184 | ); | ||
1185 | |||
1186 | TRACE_EVENT(xfs_alloc_unbusy, | ||
1187 | TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, | 1155 | TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, |
1188 | xfs_agblock_t agbno, xfs_extlen_t len), | 1156 | xfs_agblock_t agbno, xfs_extlen_t len), |
1189 | TP_ARGS(mp, agno, agbno, len), | 1157 | TP_ARGS(mp, agno, agbno, len), |
@@ -1205,35 +1173,45 @@ TRACE_EVENT(xfs_alloc_unbusy, | |||
1205 | __entry->agbno, | 1173 | __entry->agbno, |
1206 | __entry->len) | 1174 | __entry->len) |
1207 | ); | 1175 | ); |
1208 | 1176 | #define DEFINE_BUSY_EVENT(name) \ | |
1209 | #define XFS_BUSY_STATES \ | 1177 | DEFINE_EVENT(xfs_busy_class, name, \ |
1210 | { 0, "missing" }, \ | 1178 | TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, \ |
1211 | { 1, "found" } | 1179 | xfs_agblock_t agbno, xfs_extlen_t len), \ |
1212 | 1180 | TP_ARGS(mp, agno, agbno, len)) | |
1213 | TRACE_EVENT(xfs_alloc_busysearch, | 1181 | DEFINE_BUSY_EVENT(xfs_alloc_busy); |
1182 | DEFINE_BUSY_EVENT(xfs_alloc_busy_enomem); | ||
1183 | DEFINE_BUSY_EVENT(xfs_alloc_busy_force); | ||
1184 | DEFINE_BUSY_EVENT(xfs_alloc_busy_reuse); | ||
1185 | DEFINE_BUSY_EVENT(xfs_alloc_busy_clear); | ||
1186 | |||
1187 | TRACE_EVENT(xfs_alloc_busy_trim, | ||
1214 | TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, | 1188 | TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, |
1215 | xfs_agblock_t agbno, xfs_extlen_t len, int found), | 1189 | xfs_agblock_t agbno, xfs_extlen_t len, |
1216 | TP_ARGS(mp, agno, agbno, len, found), | 1190 | xfs_agblock_t tbno, xfs_extlen_t tlen), |
1191 | TP_ARGS(mp, agno, agbno, len, tbno, tlen), | ||
1217 | TP_STRUCT__entry( | 1192 | TP_STRUCT__entry( |
1218 | __field(dev_t, dev) | 1193 | __field(dev_t, dev) |
1219 | __field(xfs_agnumber_t, agno) | 1194 | __field(xfs_agnumber_t, agno) |
1220 | __field(xfs_agblock_t, agbno) | 1195 | __field(xfs_agblock_t, agbno) |
1221 | __field(xfs_extlen_t, len) | 1196 | __field(xfs_extlen_t, len) |
1222 | __field(int, found) | 1197 | __field(xfs_agblock_t, tbno) |
1198 | __field(xfs_extlen_t, tlen) | ||
1223 | ), | 1199 | ), |
1224 | TP_fast_assign( | 1200 | TP_fast_assign( |
1225 | __entry->dev = mp->m_super->s_dev; | 1201 | __entry->dev = mp->m_super->s_dev; |
1226 | __entry->agno = agno; | 1202 | __entry->agno = agno; |
1227 | __entry->agbno = agbno; | 1203 | __entry->agbno = agbno; |
1228 | __entry->len = len; | 1204 | __entry->len = len; |
1229 | __entry->found = found; | 1205 | __entry->tbno = tbno; |
1206 | __entry->tlen = tlen; | ||
1230 | ), | 1207 | ), |
1231 | TP_printk("dev %d:%d agno %u agbno %u len %u %s", | 1208 | TP_printk("dev %d:%d agno %u agbno %u len %u tbno %u tlen %u", |
1232 | MAJOR(__entry->dev), MINOR(__entry->dev), | 1209 | MAJOR(__entry->dev), MINOR(__entry->dev), |
1233 | __entry->agno, | 1210 | __entry->agno, |
1234 | __entry->agbno, | 1211 | __entry->agbno, |
1235 | __entry->len, | 1212 | __entry->len, |
1236 | __print_symbolic(__entry->found, XFS_BUSY_STATES)) | 1213 | __entry->tbno, |
1214 | __entry->tlen) | ||
1237 | ); | 1215 | ); |
1238 | 1216 | ||
1239 | TRACE_EVENT(xfs_trans_commit_lsn, | 1217 | TRACE_EVENT(xfs_trans_commit_lsn, |
@@ -1413,7 +1391,7 @@ DECLARE_EVENT_CLASS(xfs_alloc_class, | |||
1413 | __entry->wasfromfl, | 1391 | __entry->wasfromfl, |
1414 | __entry->isfl, | 1392 | __entry->isfl, |
1415 | __entry->userdata, | 1393 | __entry->userdata, |
1416 | __entry->firstblock) | 1394 | (unsigned long long)__entry->firstblock) |
1417 | ) | 1395 | ) |
1418 | 1396 | ||
1419 | #define DEFINE_ALLOC_EVENT(name) \ | 1397 | #define DEFINE_ALLOC_EVENT(name) \ |
@@ -1421,17 +1399,21 @@ DEFINE_EVENT(xfs_alloc_class, name, \ | |||
1421 | TP_PROTO(struct xfs_alloc_arg *args), \ | 1399 | TP_PROTO(struct xfs_alloc_arg *args), \ |
1422 | TP_ARGS(args)) | 1400 | TP_ARGS(args)) |
1423 | DEFINE_ALLOC_EVENT(xfs_alloc_exact_done); | 1401 | DEFINE_ALLOC_EVENT(xfs_alloc_exact_done); |
1402 | DEFINE_ALLOC_EVENT(xfs_alloc_exact_notfound); | ||
1424 | DEFINE_ALLOC_EVENT(xfs_alloc_exact_error); | 1403 | DEFINE_ALLOC_EVENT(xfs_alloc_exact_error); |
1425 | DEFINE_ALLOC_EVENT(xfs_alloc_near_nominleft); | 1404 | DEFINE_ALLOC_EVENT(xfs_alloc_near_nominleft); |
1426 | DEFINE_ALLOC_EVENT(xfs_alloc_near_first); | 1405 | DEFINE_ALLOC_EVENT(xfs_alloc_near_first); |
1427 | DEFINE_ALLOC_EVENT(xfs_alloc_near_greater); | 1406 | DEFINE_ALLOC_EVENT(xfs_alloc_near_greater); |
1428 | DEFINE_ALLOC_EVENT(xfs_alloc_near_lesser); | 1407 | DEFINE_ALLOC_EVENT(xfs_alloc_near_lesser); |
1429 | DEFINE_ALLOC_EVENT(xfs_alloc_near_error); | 1408 | DEFINE_ALLOC_EVENT(xfs_alloc_near_error); |
1409 | DEFINE_ALLOC_EVENT(xfs_alloc_near_noentry); | ||
1410 | DEFINE_ALLOC_EVENT(xfs_alloc_near_busy); | ||
1430 | DEFINE_ALLOC_EVENT(xfs_alloc_size_neither); | 1411 | DEFINE_ALLOC_EVENT(xfs_alloc_size_neither); |
1431 | DEFINE_ALLOC_EVENT(xfs_alloc_size_noentry); | 1412 | DEFINE_ALLOC_EVENT(xfs_alloc_size_noentry); |
1432 | DEFINE_ALLOC_EVENT(xfs_alloc_size_nominleft); | 1413 | DEFINE_ALLOC_EVENT(xfs_alloc_size_nominleft); |
1433 | DEFINE_ALLOC_EVENT(xfs_alloc_size_done); | 1414 | DEFINE_ALLOC_EVENT(xfs_alloc_size_done); |
1434 | DEFINE_ALLOC_EVENT(xfs_alloc_size_error); | 1415 | DEFINE_ALLOC_EVENT(xfs_alloc_size_error); |
1416 | DEFINE_ALLOC_EVENT(xfs_alloc_size_busy); | ||
1435 | DEFINE_ALLOC_EVENT(xfs_alloc_small_freelist); | 1417 | DEFINE_ALLOC_EVENT(xfs_alloc_small_freelist); |
1436 | DEFINE_ALLOC_EVENT(xfs_alloc_small_notenough); | 1418 | DEFINE_ALLOC_EVENT(xfs_alloc_small_notenough); |
1437 | DEFINE_ALLOC_EVENT(xfs_alloc_small_done); | 1419 | DEFINE_ALLOC_EVENT(xfs_alloc_small_done); |
@@ -1753,6 +1735,39 @@ DEFINE_LOG_RECOVER_INO_ITEM(xfs_log_recover_inode_recover); | |||
1753 | DEFINE_LOG_RECOVER_INO_ITEM(xfs_log_recover_inode_cancel); | 1735 | DEFINE_LOG_RECOVER_INO_ITEM(xfs_log_recover_inode_cancel); |
1754 | DEFINE_LOG_RECOVER_INO_ITEM(xfs_log_recover_inode_skip); | 1736 | DEFINE_LOG_RECOVER_INO_ITEM(xfs_log_recover_inode_skip); |
1755 | 1737 | ||
1738 | DECLARE_EVENT_CLASS(xfs_discard_class, | ||
1739 | TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, | ||
1740 | xfs_agblock_t agbno, xfs_extlen_t len), | ||
1741 | TP_ARGS(mp, agno, agbno, len), | ||
1742 | TP_STRUCT__entry( | ||
1743 | __field(dev_t, dev) | ||
1744 | __field(xfs_agnumber_t, agno) | ||
1745 | __field(xfs_agblock_t, agbno) | ||
1746 | __field(xfs_extlen_t, len) | ||
1747 | ), | ||
1748 | TP_fast_assign( | ||
1749 | __entry->dev = mp->m_super->s_dev; | ||
1750 | __entry->agno = agno; | ||
1751 | __entry->agbno = agbno; | ||
1752 | __entry->len = len; | ||
1753 | ), | ||
1754 | TP_printk("dev %d:%d agno %u agbno %u len %u\n", | ||
1755 | MAJOR(__entry->dev), MINOR(__entry->dev), | ||
1756 | __entry->agno, | ||
1757 | __entry->agbno, | ||
1758 | __entry->len) | ||
1759 | ) | ||
1760 | |||
1761 | #define DEFINE_DISCARD_EVENT(name) \ | ||
1762 | DEFINE_EVENT(xfs_discard_class, name, \ | ||
1763 | TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, \ | ||
1764 | xfs_agblock_t agbno, xfs_extlen_t len), \ | ||
1765 | TP_ARGS(mp, agno, agbno, len)) | ||
1766 | DEFINE_DISCARD_EVENT(xfs_discard_extent); | ||
1767 | DEFINE_DISCARD_EVENT(xfs_discard_toosmall); | ||
1768 | DEFINE_DISCARD_EVENT(xfs_discard_exclude); | ||
1769 | DEFINE_DISCARD_EVENT(xfs_discard_busy); | ||
1770 | |||
1756 | #endif /* _TRACE_XFS_H */ | 1771 | #endif /* _TRACE_XFS_H */ |
1757 | 1772 | ||
1758 | #undef TRACE_INCLUDE_PATH | 1773 | #undef TRACE_INCLUDE_PATH |
diff --git a/fs/xfs/linux-2.6/xfs_version.h b/fs/xfs/linux-2.6/xfs_version.h deleted file mode 100644 index f8d279d7563a..000000000000 --- a/fs/xfs/linux-2.6/xfs_version.h +++ /dev/null | |||
@@ -1,29 +0,0 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2001-2002,2005 Silicon Graphics, Inc. | ||
3 | * All Rights Reserved. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or | ||
6 | * modify it under the terms of the GNU General Public License as | ||
7 | * published by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope that it would be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, write the Free Software Foundation, | ||
16 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
17 | */ | ||
18 | #ifndef __XFS_VERSION_H__ | ||
19 | #define __XFS_VERSION_H__ | ||
20 | |||
21 | /* | ||
22 | * Dummy file that can contain a timestamp to put into the | ||
23 | * XFS init string, to help users keep track of what they're | ||
24 | * running | ||
25 | */ | ||
26 | |||
27 | #define XFS_VERSION_STRING "SGI XFS" | ||
28 | |||
29 | #endif /* __XFS_VERSION_H__ */ | ||
diff --git a/fs/xfs/quota/xfs_dquot.c b/fs/xfs/quota/xfs_dquot.c index e1a2f6800e01..6fa214603819 100644 --- a/fs/xfs/quota/xfs_dquot.c +++ b/fs/xfs/quota/xfs_dquot.c | |||
@@ -149,7 +149,6 @@ xfs_qm_dqdestroy( | |||
149 | ASSERT(list_empty(&dqp->q_freelist)); | 149 | ASSERT(list_empty(&dqp->q_freelist)); |
150 | 150 | ||
151 | mutex_destroy(&dqp->q_qlock); | 151 | mutex_destroy(&dqp->q_qlock); |
152 | sv_destroy(&dqp->q_pinwait); | ||
153 | kmem_zone_free(xfs_Gqm->qm_dqzone, dqp); | 152 | kmem_zone_free(xfs_Gqm->qm_dqzone, dqp); |
154 | 153 | ||
155 | atomic_dec(&xfs_Gqm->qm_totaldquots); | 154 | atomic_dec(&xfs_Gqm->qm_totaldquots); |
@@ -463,87 +462,68 @@ xfs_qm_dqtobp( | |||
463 | uint flags) | 462 | uint flags) |
464 | { | 463 | { |
465 | xfs_bmbt_irec_t map; | 464 | xfs_bmbt_irec_t map; |
466 | int nmaps, error; | 465 | int nmaps = 1, error; |
467 | xfs_buf_t *bp; | 466 | xfs_buf_t *bp; |
468 | xfs_inode_t *quotip; | 467 | xfs_inode_t *quotip = XFS_DQ_TO_QIP(dqp); |
469 | xfs_mount_t *mp; | 468 | xfs_mount_t *mp = dqp->q_mount; |
470 | xfs_disk_dquot_t *ddq; | 469 | xfs_disk_dquot_t *ddq; |
471 | xfs_dqid_t id; | 470 | xfs_dqid_t id = be32_to_cpu(dqp->q_core.d_id); |
472 | boolean_t newdquot; | ||
473 | xfs_trans_t *tp = (tpp ? *tpp : NULL); | 471 | xfs_trans_t *tp = (tpp ? *tpp : NULL); |
474 | 472 | ||
475 | mp = dqp->q_mount; | 473 | dqp->q_fileoffset = (xfs_fileoff_t)id / mp->m_quotainfo->qi_dqperchunk; |
476 | id = be32_to_cpu(dqp->q_core.d_id); | ||
477 | nmaps = 1; | ||
478 | newdquot = B_FALSE; | ||
479 | 474 | ||
480 | /* | 475 | xfs_ilock(quotip, XFS_ILOCK_SHARED); |
481 | * If we don't know where the dquot lives, find out. | 476 | if (XFS_IS_THIS_QUOTA_OFF(dqp)) { |
482 | */ | ||
483 | if (dqp->q_blkno == (xfs_daddr_t) 0) { | ||
484 | /* We use the id as an index */ | ||
485 | dqp->q_fileoffset = (xfs_fileoff_t)id / | ||
486 | mp->m_quotainfo->qi_dqperchunk; | ||
487 | nmaps = 1; | ||
488 | quotip = XFS_DQ_TO_QIP(dqp); | ||
489 | xfs_ilock(quotip, XFS_ILOCK_SHARED); | ||
490 | /* | 477 | /* |
491 | * Return if this type of quotas is turned off while we didn't | 478 | * Return if this type of quotas is turned off while we |
492 | * have an inode lock | 479 | * didn't have the quota inode lock. |
493 | */ | 480 | */ |
494 | if (XFS_IS_THIS_QUOTA_OFF(dqp)) { | 481 | xfs_iunlock(quotip, XFS_ILOCK_SHARED); |
495 | xfs_iunlock(quotip, XFS_ILOCK_SHARED); | 482 | return ESRCH; |
496 | return (ESRCH); | 483 | } |
497 | } | 484 | |
485 | /* | ||
486 | * Find the block map; no allocations yet | ||
487 | */ | ||
488 | error = xfs_bmapi(NULL, quotip, dqp->q_fileoffset, | ||
489 | XFS_DQUOT_CLUSTER_SIZE_FSB, XFS_BMAPI_METADATA, | ||
490 | NULL, 0, &map, &nmaps, NULL); | ||
491 | |||
492 | xfs_iunlock(quotip, XFS_ILOCK_SHARED); | ||
493 | if (error) | ||
494 | return error; | ||
495 | |||
496 | ASSERT(nmaps == 1); | ||
497 | ASSERT(map.br_blockcount == 1); | ||
498 | |||
499 | /* | ||
500 | * Offset of dquot in the (fixed sized) dquot chunk. | ||
501 | */ | ||
502 | dqp->q_bufoffset = (id % mp->m_quotainfo->qi_dqperchunk) * | ||
503 | sizeof(xfs_dqblk_t); | ||
504 | |||
505 | ASSERT(map.br_startblock != DELAYSTARTBLOCK); | ||
506 | if (map.br_startblock == HOLESTARTBLOCK) { | ||
498 | /* | 507 | /* |
499 | * Find the block map; no allocations yet | 508 | * We don't allocate unless we're asked to |
500 | */ | 509 | */ |
501 | error = xfs_bmapi(NULL, quotip, dqp->q_fileoffset, | 510 | if (!(flags & XFS_QMOPT_DQALLOC)) |
502 | XFS_DQUOT_CLUSTER_SIZE_FSB, | 511 | return ENOENT; |
503 | XFS_BMAPI_METADATA, | ||
504 | NULL, 0, &map, &nmaps, NULL); | ||
505 | 512 | ||
506 | xfs_iunlock(quotip, XFS_ILOCK_SHARED); | 513 | ASSERT(tp); |
514 | error = xfs_qm_dqalloc(tpp, mp, dqp, quotip, | ||
515 | dqp->q_fileoffset, &bp); | ||
507 | if (error) | 516 | if (error) |
508 | return (error); | 517 | return error; |
509 | ASSERT(nmaps == 1); | 518 | tp = *tpp; |
510 | ASSERT(map.br_blockcount == 1); | 519 | } else { |
520 | trace_xfs_dqtobp_read(dqp); | ||
511 | 521 | ||
512 | /* | 522 | /* |
513 | * offset of dquot in the (fixed sized) dquot chunk. | 523 | * store the blkno etc so that we don't have to do the |
524 | * mapping all the time | ||
514 | */ | 525 | */ |
515 | dqp->q_bufoffset = (id % mp->m_quotainfo->qi_dqperchunk) * | 526 | dqp->q_blkno = XFS_FSB_TO_DADDR(mp, map.br_startblock); |
516 | sizeof(xfs_dqblk_t); | ||
517 | if (map.br_startblock == HOLESTARTBLOCK) { | ||
518 | /* | ||
519 | * We don't allocate unless we're asked to | ||
520 | */ | ||
521 | if (!(flags & XFS_QMOPT_DQALLOC)) | ||
522 | return (ENOENT); | ||
523 | |||
524 | ASSERT(tp); | ||
525 | if ((error = xfs_qm_dqalloc(tpp, mp, dqp, quotip, | ||
526 | dqp->q_fileoffset, &bp))) | ||
527 | return (error); | ||
528 | tp = *tpp; | ||
529 | newdquot = B_TRUE; | ||
530 | } else { | ||
531 | /* | ||
532 | * store the blkno etc so that we don't have to do the | ||
533 | * mapping all the time | ||
534 | */ | ||
535 | dqp->q_blkno = XFS_FSB_TO_DADDR(mp, map.br_startblock); | ||
536 | } | ||
537 | } | ||
538 | ASSERT(dqp->q_blkno != DELAYSTARTBLOCK); | ||
539 | ASSERT(dqp->q_blkno != HOLESTARTBLOCK); | ||
540 | |||
541 | /* | ||
542 | * Read in the buffer, unless we've just done the allocation | ||
543 | * (in which case we already have the buf). | ||
544 | */ | ||
545 | if (!newdquot) { | ||
546 | trace_xfs_dqtobp_read(dqp); | ||
547 | 527 | ||
548 | error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, | 528 | error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, |
549 | dqp->q_blkno, | 529 | dqp->q_blkno, |
@@ -552,20 +532,22 @@ xfs_qm_dqtobp( | |||
552 | if (error || !bp) | 532 | if (error || !bp) |
553 | return XFS_ERROR(error); | 533 | return XFS_ERROR(error); |
554 | } | 534 | } |
535 | |||
555 | ASSERT(XFS_BUF_ISBUSY(bp)); | 536 | ASSERT(XFS_BUF_ISBUSY(bp)); |
556 | ASSERT(XFS_BUF_VALUSEMA(bp) <= 0); | 537 | ASSERT(XFS_BUF_VALUSEMA(bp) <= 0); |
557 | 538 | ||
558 | /* | 539 | /* |
559 | * calculate the location of the dquot inside the buffer. | 540 | * calculate the location of the dquot inside the buffer. |
560 | */ | 541 | */ |
561 | ddq = (xfs_disk_dquot_t *)((char *)XFS_BUF_PTR(bp) + dqp->q_bufoffset); | 542 | ddq = (struct xfs_disk_dquot *)(XFS_BUF_PTR(bp) + dqp->q_bufoffset); |
562 | 543 | ||
563 | /* | 544 | /* |
564 | * A simple sanity check in case we got a corrupted dquot... | 545 | * A simple sanity check in case we got a corrupted dquot... |
565 | */ | 546 | */ |
566 | if (xfs_qm_dqcheck(ddq, id, dqp->dq_flags & XFS_DQ_ALLTYPES, | 547 | error = xfs_qm_dqcheck(mp, ddq, id, dqp->dq_flags & XFS_DQ_ALLTYPES, |
567 | flags & (XFS_QMOPT_DQREPAIR|XFS_QMOPT_DOWARN), | 548 | flags & (XFS_QMOPT_DQREPAIR|XFS_QMOPT_DOWARN), |
568 | "dqtobp")) { | 549 | "dqtobp"); |
550 | if (error) { | ||
569 | if (!(flags & XFS_QMOPT_DQREPAIR)) { | 551 | if (!(flags & XFS_QMOPT_DQREPAIR)) { |
570 | xfs_trans_brelse(tp, bp); | 552 | xfs_trans_brelse(tp, bp); |
571 | return XFS_ERROR(EIO); | 553 | return XFS_ERROR(EIO); |
@@ -618,7 +600,7 @@ xfs_qm_dqread( | |||
618 | 600 | ||
619 | /* | 601 | /* |
620 | * Reservation counters are defined as reservation plus current usage | 602 | * Reservation counters are defined as reservation plus current usage |
621 | * to avoid having to add everytime. | 603 | * to avoid having to add every time. |
622 | */ | 604 | */ |
623 | dqp->q_res_bcount = be64_to_cpu(ddqp->d_bcount); | 605 | dqp->q_res_bcount = be64_to_cpu(ddqp->d_bcount); |
624 | dqp->q_res_icount = be64_to_cpu(ddqp->d_icount); | 606 | dqp->q_res_icount = be64_to_cpu(ddqp->d_icount); |
@@ -846,7 +828,7 @@ xfs_qm_dqget( | |||
846 | if (xfs_do_dqerror) { | 828 | if (xfs_do_dqerror) { |
847 | if ((xfs_dqerror_target == mp->m_ddev_targp) && | 829 | if ((xfs_dqerror_target == mp->m_ddev_targp) && |
848 | (xfs_dqreq_num++ % xfs_dqerror_mod) == 0) { | 830 | (xfs_dqreq_num++ % xfs_dqerror_mod) == 0) { |
849 | cmn_err(CE_DEBUG, "Returning error in dqget"); | 831 | xfs_debug(mp, "Returning error in dqget"); |
850 | return (EIO); | 832 | return (EIO); |
851 | } | 833 | } |
852 | } | 834 | } |
@@ -1176,18 +1158,18 @@ xfs_qm_dqflush( | |||
1176 | xfs_dquot_t *dqp, | 1158 | xfs_dquot_t *dqp, |
1177 | uint flags) | 1159 | uint flags) |
1178 | { | 1160 | { |
1179 | xfs_mount_t *mp; | 1161 | struct xfs_mount *mp = dqp->q_mount; |
1180 | xfs_buf_t *bp; | 1162 | struct xfs_buf *bp; |
1181 | xfs_disk_dquot_t *ddqp; | 1163 | struct xfs_disk_dquot *ddqp; |
1182 | int error; | 1164 | int error; |
1183 | 1165 | ||
1184 | ASSERT(XFS_DQ_IS_LOCKED(dqp)); | 1166 | ASSERT(XFS_DQ_IS_LOCKED(dqp)); |
1185 | ASSERT(!completion_done(&dqp->q_flush)); | 1167 | ASSERT(!completion_done(&dqp->q_flush)); |
1168 | |||
1186 | trace_xfs_dqflush(dqp); | 1169 | trace_xfs_dqflush(dqp); |
1187 | 1170 | ||
1188 | /* | 1171 | /* |
1189 | * If not dirty, or it's pinned and we are not supposed to | 1172 | * If not dirty, or it's pinned and we are not supposed to block, nada. |
1190 | * block, nada. | ||
1191 | */ | 1173 | */ |
1192 | if (!XFS_DQ_IS_DIRTY(dqp) || | 1174 | if (!XFS_DQ_IS_DIRTY(dqp) || |
1193 | (!(flags & SYNC_WAIT) && atomic_read(&dqp->q_pincount) > 0)) { | 1175 | (!(flags & SYNC_WAIT) && atomic_read(&dqp->q_pincount) > 0)) { |
@@ -1201,40 +1183,47 @@ xfs_qm_dqflush( | |||
1201 | * down forcibly. If that's the case we must not write this dquot | 1183 | * down forcibly. If that's the case we must not write this dquot |
1202 | * to disk, because the log record didn't make it to disk! | 1184 | * to disk, because the log record didn't make it to disk! |
1203 | */ | 1185 | */ |
1204 | if (XFS_FORCED_SHUTDOWN(dqp->q_mount)) { | 1186 | if (XFS_FORCED_SHUTDOWN(mp)) { |
1205 | dqp->dq_flags &= ~(XFS_DQ_DIRTY); | 1187 | dqp->dq_flags &= ~XFS_DQ_DIRTY; |
1206 | xfs_dqfunlock(dqp); | 1188 | xfs_dqfunlock(dqp); |
1207 | return XFS_ERROR(EIO); | 1189 | return XFS_ERROR(EIO); |
1208 | } | 1190 | } |
1209 | 1191 | ||
1210 | /* | 1192 | /* |
1211 | * Get the buffer containing the on-disk dquot | 1193 | * Get the buffer containing the on-disk dquot |
1212 | * We don't need a transaction envelope because we know that the | ||
1213 | * the ondisk-dquot has already been allocated for. | ||
1214 | */ | 1194 | */ |
1215 | if ((error = xfs_qm_dqtobp(NULL, dqp, &ddqp, &bp, XFS_QMOPT_DOWARN))) { | 1195 | error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, dqp->q_blkno, |
1196 | mp->m_quotainfo->qi_dqchunklen, 0, &bp); | ||
1197 | if (error) { | ||
1216 | ASSERT(error != ENOENT); | 1198 | ASSERT(error != ENOENT); |
1217 | /* | ||
1218 | * Quotas could have gotten turned off (ESRCH) | ||
1219 | */ | ||
1220 | xfs_dqfunlock(dqp); | 1199 | xfs_dqfunlock(dqp); |
1221 | return (error); | 1200 | return error; |
1222 | } | 1201 | } |
1223 | 1202 | ||
1224 | if (xfs_qm_dqcheck(&dqp->q_core, be32_to_cpu(ddqp->d_id), | 1203 | /* |
1225 | 0, XFS_QMOPT_DOWARN, "dqflush (incore copy)")) { | 1204 | * Calculate the location of the dquot inside the buffer. |
1226 | xfs_force_shutdown(dqp->q_mount, SHUTDOWN_CORRUPT_INCORE); | 1205 | */ |
1206 | ddqp = (struct xfs_disk_dquot *)(XFS_BUF_PTR(bp) + dqp->q_bufoffset); | ||
1207 | |||
1208 | /* | ||
1209 | * A simple sanity check in case we got a corrupted dquot.. | ||
1210 | */ | ||
1211 | error = xfs_qm_dqcheck(mp, &dqp->q_core, be32_to_cpu(ddqp->d_id), 0, | ||
1212 | XFS_QMOPT_DOWARN, "dqflush (incore copy)"); | ||
1213 | if (error) { | ||
1214 | xfs_buf_relse(bp); | ||
1215 | xfs_dqfunlock(dqp); | ||
1216 | xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); | ||
1227 | return XFS_ERROR(EIO); | 1217 | return XFS_ERROR(EIO); |
1228 | } | 1218 | } |
1229 | 1219 | ||
1230 | /* This is the only portion of data that needs to persist */ | 1220 | /* This is the only portion of data that needs to persist */ |
1231 | memcpy(ddqp, &(dqp->q_core), sizeof(xfs_disk_dquot_t)); | 1221 | memcpy(ddqp, &dqp->q_core, sizeof(xfs_disk_dquot_t)); |
1232 | 1222 | ||
1233 | /* | 1223 | /* |
1234 | * Clear the dirty field and remember the flush lsn for later use. | 1224 | * Clear the dirty field and remember the flush lsn for later use. |
1235 | */ | 1225 | */ |
1236 | dqp->dq_flags &= ~(XFS_DQ_DIRTY); | 1226 | dqp->dq_flags &= ~XFS_DQ_DIRTY; |
1237 | mp = dqp->q_mount; | ||
1238 | 1227 | ||
1239 | xfs_trans_ail_copy_lsn(mp->m_ail, &dqp->q_logitem.qli_flush_lsn, | 1228 | xfs_trans_ail_copy_lsn(mp->m_ail, &dqp->q_logitem.qli_flush_lsn, |
1240 | &dqp->q_logitem.qli_item.li_lsn); | 1229 | &dqp->q_logitem.qli_item.li_lsn); |
@@ -1404,8 +1393,8 @@ xfs_qm_dqpurge( | |||
1404 | */ | 1393 | */ |
1405 | error = xfs_qm_dqflush(dqp, SYNC_WAIT); | 1394 | error = xfs_qm_dqflush(dqp, SYNC_WAIT); |
1406 | if (error) | 1395 | if (error) |
1407 | xfs_fs_cmn_err(CE_WARN, mp, | 1396 | xfs_warn(mp, "%s: dquot %p flush failed", |
1408 | "xfs_qm_dqpurge: dquot %p flush failed", dqp); | 1397 | __func__, dqp); |
1409 | xfs_dqflock(dqp); | 1398 | xfs_dqflock(dqp); |
1410 | } | 1399 | } |
1411 | ASSERT(atomic_read(&dqp->q_pincount) == 0); | 1400 | ASSERT(atomic_read(&dqp->q_pincount) == 0); |
@@ -1438,36 +1427,38 @@ xfs_qm_dqpurge( | |||
1438 | void | 1427 | void |
1439 | xfs_qm_dqprint(xfs_dquot_t *dqp) | 1428 | xfs_qm_dqprint(xfs_dquot_t *dqp) |
1440 | { | 1429 | { |
1441 | cmn_err(CE_DEBUG, "-----------KERNEL DQUOT----------------"); | 1430 | struct xfs_mount *mp = dqp->q_mount; |
1442 | cmn_err(CE_DEBUG, "---- dquotID = %d", | 1431 | |
1432 | xfs_debug(mp, "-----------KERNEL DQUOT----------------"); | ||
1433 | xfs_debug(mp, "---- dquotID = %d", | ||
1443 | (int)be32_to_cpu(dqp->q_core.d_id)); | 1434 | (int)be32_to_cpu(dqp->q_core.d_id)); |
1444 | cmn_err(CE_DEBUG, "---- type = %s", DQFLAGTO_TYPESTR(dqp)); | 1435 | xfs_debug(mp, "---- type = %s", DQFLAGTO_TYPESTR(dqp)); |
1445 | cmn_err(CE_DEBUG, "---- fs = 0x%p", dqp->q_mount); | 1436 | xfs_debug(mp, "---- fs = 0x%p", dqp->q_mount); |
1446 | cmn_err(CE_DEBUG, "---- blkno = 0x%x", (int) dqp->q_blkno); | 1437 | xfs_debug(mp, "---- blkno = 0x%x", (int) dqp->q_blkno); |
1447 | cmn_err(CE_DEBUG, "---- boffset = 0x%x", (int) dqp->q_bufoffset); | 1438 | xfs_debug(mp, "---- boffset = 0x%x", (int) dqp->q_bufoffset); |
1448 | cmn_err(CE_DEBUG, "---- blkhlimit = %Lu (0x%x)", | 1439 | xfs_debug(mp, "---- blkhlimit = %Lu (0x%x)", |
1449 | be64_to_cpu(dqp->q_core.d_blk_hardlimit), | 1440 | be64_to_cpu(dqp->q_core.d_blk_hardlimit), |
1450 | (int)be64_to_cpu(dqp->q_core.d_blk_hardlimit)); | 1441 | (int)be64_to_cpu(dqp->q_core.d_blk_hardlimit)); |
1451 | cmn_err(CE_DEBUG, "---- blkslimit = %Lu (0x%x)", | 1442 | xfs_debug(mp, "---- blkslimit = %Lu (0x%x)", |
1452 | be64_to_cpu(dqp->q_core.d_blk_softlimit), | 1443 | be64_to_cpu(dqp->q_core.d_blk_softlimit), |
1453 | (int)be64_to_cpu(dqp->q_core.d_blk_softlimit)); | 1444 | (int)be64_to_cpu(dqp->q_core.d_blk_softlimit)); |
1454 | cmn_err(CE_DEBUG, "---- inohlimit = %Lu (0x%x)", | 1445 | xfs_debug(mp, "---- inohlimit = %Lu (0x%x)", |
1455 | be64_to_cpu(dqp->q_core.d_ino_hardlimit), | 1446 | be64_to_cpu(dqp->q_core.d_ino_hardlimit), |
1456 | (int)be64_to_cpu(dqp->q_core.d_ino_hardlimit)); | 1447 | (int)be64_to_cpu(dqp->q_core.d_ino_hardlimit)); |
1457 | cmn_err(CE_DEBUG, "---- inoslimit = %Lu (0x%x)", | 1448 | xfs_debug(mp, "---- inoslimit = %Lu (0x%x)", |
1458 | be64_to_cpu(dqp->q_core.d_ino_softlimit), | 1449 | be64_to_cpu(dqp->q_core.d_ino_softlimit), |
1459 | (int)be64_to_cpu(dqp->q_core.d_ino_softlimit)); | 1450 | (int)be64_to_cpu(dqp->q_core.d_ino_softlimit)); |
1460 | cmn_err(CE_DEBUG, "---- bcount = %Lu (0x%x)", | 1451 | xfs_debug(mp, "---- bcount = %Lu (0x%x)", |
1461 | be64_to_cpu(dqp->q_core.d_bcount), | 1452 | be64_to_cpu(dqp->q_core.d_bcount), |
1462 | (int)be64_to_cpu(dqp->q_core.d_bcount)); | 1453 | (int)be64_to_cpu(dqp->q_core.d_bcount)); |
1463 | cmn_err(CE_DEBUG, "---- icount = %Lu (0x%x)", | 1454 | xfs_debug(mp, "---- icount = %Lu (0x%x)", |
1464 | be64_to_cpu(dqp->q_core.d_icount), | 1455 | be64_to_cpu(dqp->q_core.d_icount), |
1465 | (int)be64_to_cpu(dqp->q_core.d_icount)); | 1456 | (int)be64_to_cpu(dqp->q_core.d_icount)); |
1466 | cmn_err(CE_DEBUG, "---- btimer = %d", | 1457 | xfs_debug(mp, "---- btimer = %d", |
1467 | (int)be32_to_cpu(dqp->q_core.d_btimer)); | 1458 | (int)be32_to_cpu(dqp->q_core.d_btimer)); |
1468 | cmn_err(CE_DEBUG, "---- itimer = %d", | 1459 | xfs_debug(mp, "---- itimer = %d", |
1469 | (int)be32_to_cpu(dqp->q_core.d_itimer)); | 1460 | (int)be32_to_cpu(dqp->q_core.d_itimer)); |
1470 | cmn_err(CE_DEBUG, "---------------------------"); | 1461 | xfs_debug(mp, "---------------------------"); |
1471 | } | 1462 | } |
1472 | #endif | 1463 | #endif |
1473 | 1464 | ||
diff --git a/fs/xfs/quota/xfs_dquot_item.c b/fs/xfs/quota/xfs_dquot_item.c index 2a1f3dc10a02..9e0e2fa3f2c8 100644 --- a/fs/xfs/quota/xfs_dquot_item.c +++ b/fs/xfs/quota/xfs_dquot_item.c | |||
@@ -136,9 +136,8 @@ xfs_qm_dquot_logitem_push( | |||
136 | */ | 136 | */ |
137 | error = xfs_qm_dqflush(dqp, 0); | 137 | error = xfs_qm_dqflush(dqp, 0); |
138 | if (error) | 138 | if (error) |
139 | xfs_fs_cmn_err(CE_WARN, dqp->q_mount, | 139 | xfs_warn(dqp->q_mount, "%s: push error %d on dqp %p", |
140 | "xfs_qm_dquot_logitem_push: push error %d on dqp %p", | 140 | __func__, error, dqp); |
141 | error, dqp); | ||
142 | xfs_dqunlock(dqp); | 141 | xfs_dqunlock(dqp); |
143 | } | 142 | } |
144 | 143 | ||
diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c index 9a92407109a1..b94dace4e785 100644 --- a/fs/xfs/quota/xfs_qm.c +++ b/fs/xfs/quota/xfs_qm.c | |||
@@ -55,14 +55,12 @@ uint ndquot; | |||
55 | kmem_zone_t *qm_dqzone; | 55 | kmem_zone_t *qm_dqzone; |
56 | kmem_zone_t *qm_dqtrxzone; | 56 | kmem_zone_t *qm_dqtrxzone; |
57 | 57 | ||
58 | static cred_t xfs_zerocr; | ||
59 | |||
60 | STATIC void xfs_qm_list_init(xfs_dqlist_t *, char *, int); | 58 | STATIC void xfs_qm_list_init(xfs_dqlist_t *, char *, int); |
61 | STATIC void xfs_qm_list_destroy(xfs_dqlist_t *); | 59 | STATIC void xfs_qm_list_destroy(xfs_dqlist_t *); |
62 | 60 | ||
63 | STATIC int xfs_qm_init_quotainos(xfs_mount_t *); | 61 | STATIC int xfs_qm_init_quotainos(xfs_mount_t *); |
64 | STATIC int xfs_qm_init_quotainfo(xfs_mount_t *); | 62 | STATIC int xfs_qm_init_quotainfo(xfs_mount_t *); |
65 | STATIC int xfs_qm_shake(struct shrinker *, int, gfp_t); | 63 | STATIC int xfs_qm_shake(struct shrinker *, struct shrink_control *); |
66 | 64 | ||
67 | static struct shrinker xfs_qm_shaker = { | 65 | static struct shrinker xfs_qm_shaker = { |
68 | .shrink = xfs_qm_shake, | 66 | .shrink = xfs_qm_shake, |
@@ -82,7 +80,7 @@ xfs_qm_dquot_list_print( | |||
82 | int i = 0; | 80 | int i = 0; |
83 | 81 | ||
84 | list_for_each_entry(dqp, &mp->m_quotainfo->qi_dqlist_lock, qi_mplist) { | 82 | list_for_each_entry(dqp, &mp->m_quotainfo->qi_dqlist_lock, qi_mplist) { |
85 | cmn_err(CE_DEBUG, " %d. \"%d (%s)\" " | 83 | xfs_debug(mp, " %d. \"%d (%s)\" " |
86 | "bcnt = %lld, icnt = %lld, refs = %d", | 84 | "bcnt = %lld, icnt = %lld, refs = %d", |
87 | i++, be32_to_cpu(dqp->q_core.d_id), | 85 | i++, be32_to_cpu(dqp->q_core.d_id), |
88 | DQFLAGTO_TYPESTR(dqp), | 86 | DQFLAGTO_TYPESTR(dqp), |
@@ -207,7 +205,7 @@ xfs_qm_destroy( | |||
207 | list_for_each_entry_safe(dqp, n, &xqm->qm_dqfrlist, q_freelist) { | 205 | list_for_each_entry_safe(dqp, n, &xqm->qm_dqfrlist, q_freelist) { |
208 | xfs_dqlock(dqp); | 206 | xfs_dqlock(dqp); |
209 | #ifdef QUOTADEBUG | 207 | #ifdef QUOTADEBUG |
210 | cmn_err(CE_DEBUG, "FREELIST destroy 0x%p", dqp); | 208 | xfs_debug(dqp->q_mount, "FREELIST destroy 0x%p", dqp); |
211 | #endif | 209 | #endif |
212 | list_del_init(&dqp->q_freelist); | 210 | list_del_init(&dqp->q_freelist); |
213 | xfs_Gqm->qm_dqfrlist_cnt--; | 211 | xfs_Gqm->qm_dqfrlist_cnt--; |
@@ -343,9 +341,7 @@ xfs_qm_mount_quotas( | |||
343 | * quotas immediately. | 341 | * quotas immediately. |
344 | */ | 342 | */ |
345 | if (mp->m_sb.sb_rextents) { | 343 | if (mp->m_sb.sb_rextents) { |
346 | cmn_err(CE_NOTE, | 344 | xfs_notice(mp, "Cannot turn on quotas for realtime filesystem"); |
347 | "Cannot turn on quotas for realtime filesystem %s", | ||
348 | mp->m_fsname); | ||
349 | mp->m_qflags = 0; | 345 | mp->m_qflags = 0; |
350 | goto write_changes; | 346 | goto write_changes; |
351 | } | 347 | } |
@@ -404,14 +400,13 @@ xfs_qm_mount_quotas( | |||
404 | * off, but the on disk superblock doesn't know that ! | 400 | * off, but the on disk superblock doesn't know that ! |
405 | */ | 401 | */ |
406 | ASSERT(!(XFS_IS_QUOTA_RUNNING(mp))); | 402 | ASSERT(!(XFS_IS_QUOTA_RUNNING(mp))); |
407 | xfs_fs_cmn_err(CE_ALERT, mp, | 403 | xfs_alert(mp, "%s: Superblock update failed!", |
408 | "XFS mount_quotas: Superblock update failed!"); | 404 | __func__); |
409 | } | 405 | } |
410 | } | 406 | } |
411 | 407 | ||
412 | if (error) { | 408 | if (error) { |
413 | xfs_fs_cmn_err(CE_WARN, mp, | 409 | xfs_warn(mp, "Failed to initialize disk quotas."); |
414 | "Failed to initialize disk quotas."); | ||
415 | return; | 410 | return; |
416 | } | 411 | } |
417 | 412 | ||
@@ -466,12 +461,10 @@ xfs_qm_dqflush_all( | |||
466 | struct xfs_quotainfo *q = mp->m_quotainfo; | 461 | struct xfs_quotainfo *q = mp->m_quotainfo; |
467 | int recl; | 462 | int recl; |
468 | struct xfs_dquot *dqp; | 463 | struct xfs_dquot *dqp; |
469 | int niters; | ||
470 | int error; | 464 | int error; |
471 | 465 | ||
472 | if (!q) | 466 | if (!q) |
473 | return 0; | 467 | return 0; |
474 | niters = 0; | ||
475 | again: | 468 | again: |
476 | mutex_lock(&q->qi_dqlist_lock); | 469 | mutex_lock(&q->qi_dqlist_lock); |
477 | list_for_each_entry(dqp, &q->qi_dqlist, q_mplist) { | 470 | list_for_each_entry(dqp, &q->qi_dqlist, q_mplist) { |
@@ -837,7 +830,7 @@ xfs_qm_dqattach_locked( | |||
837 | xfs_qm_dqattach_one(ip, ip->i_d.di_gid, XFS_DQ_GROUP, | 830 | xfs_qm_dqattach_one(ip, ip->i_d.di_gid, XFS_DQ_GROUP, |
838 | flags & XFS_QMOPT_DQALLOC, | 831 | flags & XFS_QMOPT_DQALLOC, |
839 | ip->i_udquot, &ip->i_gdquot) : | 832 | ip->i_udquot, &ip->i_gdquot) : |
840 | xfs_qm_dqattach_one(ip, ip->i_d.di_projid, XFS_DQ_PROJ, | 833 | xfs_qm_dqattach_one(ip, xfs_get_projid(ip), XFS_DQ_PROJ, |
841 | flags & XFS_QMOPT_DQALLOC, | 834 | flags & XFS_QMOPT_DQALLOC, |
842 | ip->i_udquot, &ip->i_gdquot); | 835 | ip->i_udquot, &ip->i_gdquot); |
843 | /* | 836 | /* |
@@ -1199,87 +1192,6 @@ xfs_qm_list_destroy( | |||
1199 | mutex_destroy(&(list->qh_lock)); | 1192 | mutex_destroy(&(list->qh_lock)); |
1200 | } | 1193 | } |
1201 | 1194 | ||
1202 | |||
1203 | /* | ||
1204 | * Stripped down version of dqattach. This doesn't attach, or even look at the | ||
1205 | * dquots attached to the inode. The rationale is that there won't be any | ||
1206 | * attached at the time this is called from quotacheck. | ||
1207 | */ | ||
1208 | STATIC int | ||
1209 | xfs_qm_dqget_noattach( | ||
1210 | xfs_inode_t *ip, | ||
1211 | xfs_dquot_t **O_udqpp, | ||
1212 | xfs_dquot_t **O_gdqpp) | ||
1213 | { | ||
1214 | int error; | ||
1215 | xfs_mount_t *mp; | ||
1216 | xfs_dquot_t *udqp, *gdqp; | ||
1217 | |||
1218 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); | ||
1219 | mp = ip->i_mount; | ||
1220 | udqp = NULL; | ||
1221 | gdqp = NULL; | ||
1222 | |||
1223 | if (XFS_IS_UQUOTA_ON(mp)) { | ||
1224 | ASSERT(ip->i_udquot == NULL); | ||
1225 | /* | ||
1226 | * We want the dquot allocated if it doesn't exist. | ||
1227 | */ | ||
1228 | if ((error = xfs_qm_dqget(mp, ip, ip->i_d.di_uid, XFS_DQ_USER, | ||
1229 | XFS_QMOPT_DQALLOC | XFS_QMOPT_DOWARN, | ||
1230 | &udqp))) { | ||
1231 | /* | ||
1232 | * Shouldn't be able to turn off quotas here. | ||
1233 | */ | ||
1234 | ASSERT(error != ESRCH); | ||
1235 | ASSERT(error != ENOENT); | ||
1236 | return error; | ||
1237 | } | ||
1238 | ASSERT(udqp); | ||
1239 | } | ||
1240 | |||
1241 | if (XFS_IS_OQUOTA_ON(mp)) { | ||
1242 | ASSERT(ip->i_gdquot == NULL); | ||
1243 | if (udqp) | ||
1244 | xfs_dqunlock(udqp); | ||
1245 | error = XFS_IS_GQUOTA_ON(mp) ? | ||
1246 | xfs_qm_dqget(mp, ip, | ||
1247 | ip->i_d.di_gid, XFS_DQ_GROUP, | ||
1248 | XFS_QMOPT_DQALLOC|XFS_QMOPT_DOWARN, | ||
1249 | &gdqp) : | ||
1250 | xfs_qm_dqget(mp, ip, | ||
1251 | ip->i_d.di_projid, XFS_DQ_PROJ, | ||
1252 | XFS_QMOPT_DQALLOC|XFS_QMOPT_DOWARN, | ||
1253 | &gdqp); | ||
1254 | if (error) { | ||
1255 | if (udqp) | ||
1256 | xfs_qm_dqrele(udqp); | ||
1257 | ASSERT(error != ESRCH); | ||
1258 | ASSERT(error != ENOENT); | ||
1259 | return error; | ||
1260 | } | ||
1261 | ASSERT(gdqp); | ||
1262 | |||
1263 | /* Reacquire the locks in the right order */ | ||
1264 | if (udqp) { | ||
1265 | if (! xfs_qm_dqlock_nowait(udqp)) { | ||
1266 | xfs_dqunlock(gdqp); | ||
1267 | xfs_dqlock(udqp); | ||
1268 | xfs_dqlock(gdqp); | ||
1269 | } | ||
1270 | } | ||
1271 | } | ||
1272 | |||
1273 | *O_udqpp = udqp; | ||
1274 | *O_gdqpp = gdqp; | ||
1275 | |||
1276 | #ifdef QUOTADEBUG | ||
1277 | if (udqp) ASSERT(XFS_DQ_IS_LOCKED(udqp)); | ||
1278 | if (gdqp) ASSERT(XFS_DQ_IS_LOCKED(gdqp)); | ||
1279 | #endif | ||
1280 | return 0; | ||
1281 | } | ||
1282 | |||
1283 | /* | 1195 | /* |
1284 | * Create an inode and return with a reference already taken, but unlocked | 1196 | * Create an inode and return with a reference already taken, but unlocked |
1285 | * This is how we create quota inodes | 1197 | * This is how we create quota inodes |
@@ -1305,21 +1217,14 @@ xfs_qm_qino_alloc( | |||
1305 | return error; | 1217 | return error; |
1306 | } | 1218 | } |
1307 | 1219 | ||
1308 | if ((error = xfs_dir_ialloc(&tp, NULL, S_IFREG, 1, 0, | 1220 | error = xfs_dir_ialloc(&tp, NULL, S_IFREG, 1, 0, 0, 1, ip, &committed); |
1309 | &xfs_zerocr, 0, 1, ip, &committed))) { | 1221 | if (error) { |
1310 | xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | | 1222 | xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | |
1311 | XFS_TRANS_ABORT); | 1223 | XFS_TRANS_ABORT); |
1312 | return error; | 1224 | return error; |
1313 | } | 1225 | } |
1314 | 1226 | ||
1315 | /* | 1227 | /* |
1316 | * Keep an extra reference to this quota inode. This inode is | ||
1317 | * locked exclusively and joined to the transaction already. | ||
1318 | */ | ||
1319 | ASSERT(xfs_isilocked(*ip, XFS_ILOCK_EXCL)); | ||
1320 | IHOLD(*ip); | ||
1321 | |||
1322 | /* | ||
1323 | * Make the changes in the superblock, and log those too. | 1228 | * Make the changes in the superblock, and log those too. |
1324 | * sbfields arg may contain fields other than *QUOTINO; | 1229 | * sbfields arg may contain fields other than *QUOTINO; |
1325 | * VERSIONNUM for example. | 1230 | * VERSIONNUM for example. |
@@ -1347,7 +1252,7 @@ xfs_qm_qino_alloc( | |||
1347 | xfs_mod_sb(tp, sbfields); | 1252 | xfs_mod_sb(tp, sbfields); |
1348 | 1253 | ||
1349 | if ((error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES))) { | 1254 | if ((error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES))) { |
1350 | xfs_fs_cmn_err(CE_ALERT, mp, "XFS qino_alloc failed!"); | 1255 | xfs_alert(mp, "%s failed (error %d)!", __func__, error); |
1351 | return error; | 1256 | return error; |
1352 | } | 1257 | } |
1353 | return 0; | 1258 | return 0; |
@@ -1382,7 +1287,7 @@ xfs_qm_reset_dqcounts( | |||
1382 | * output any warnings because it's perfectly possible to | 1287 | * output any warnings because it's perfectly possible to |
1383 | * find uninitialised dquot blks. See comment in xfs_qm_dqcheck. | 1288 | * find uninitialised dquot blks. See comment in xfs_qm_dqcheck. |
1384 | */ | 1289 | */ |
1385 | (void) xfs_qm_dqcheck(ddq, id+j, type, XFS_QMOPT_DQREPAIR, | 1290 | (void) xfs_qm_dqcheck(mp, ddq, id+j, type, XFS_QMOPT_DQREPAIR, |
1386 | "xfs_quotacheck"); | 1291 | "xfs_quotacheck"); |
1387 | ddq->d_bcount = 0; | 1292 | ddq->d_bcount = 0; |
1388 | ddq->d_icount = 0; | 1293 | ddq->d_icount = 0; |
@@ -1407,14 +1312,9 @@ xfs_qm_dqiter_bufs( | |||
1407 | { | 1312 | { |
1408 | xfs_buf_t *bp; | 1313 | xfs_buf_t *bp; |
1409 | int error; | 1314 | int error; |
1410 | int notcommitted; | ||
1411 | int incr; | ||
1412 | int type; | 1315 | int type; |
1413 | 1316 | ||
1414 | ASSERT(blkcnt > 0); | 1317 | ASSERT(blkcnt > 0); |
1415 | notcommitted = 0; | ||
1416 | incr = (blkcnt > XFS_QM_MAX_DQCLUSTER_LOGSZ) ? | ||
1417 | XFS_QM_MAX_DQCLUSTER_LOGSZ : blkcnt; | ||
1418 | type = flags & XFS_QMOPT_UQUOTA ? XFS_DQ_USER : | 1318 | type = flags & XFS_QMOPT_UQUOTA ? XFS_DQ_USER : |
1419 | (flags & XFS_QMOPT_PQUOTA ? XFS_DQ_PROJ : XFS_DQ_GROUP); | 1319 | (flags & XFS_QMOPT_PQUOTA ? XFS_DQ_PROJ : XFS_DQ_GROUP); |
1420 | error = 0; | 1320 | error = 0; |
@@ -1516,7 +1416,7 @@ xfs_qm_dqiterate( | |||
1516 | rablkcnt = map[i+1].br_blockcount; | 1416 | rablkcnt = map[i+1].br_blockcount; |
1517 | rablkno = map[i+1].br_startblock; | 1417 | rablkno = map[i+1].br_startblock; |
1518 | while (rablkcnt--) { | 1418 | while (rablkcnt--) { |
1519 | xfs_baread(mp->m_ddev_targp, | 1419 | xfs_buf_readahead(mp->m_ddev_targp, |
1520 | XFS_FSB_TO_DADDR(mp, rablkno), | 1420 | XFS_FSB_TO_DADDR(mp, rablkno), |
1521 | mp->m_quotainfo->qi_dqchunklen); | 1421 | mp->m_quotainfo->qi_dqchunklen); |
1522 | rablkno++; | 1422 | rablkno++; |
@@ -1546,18 +1446,34 @@ xfs_qm_dqiterate( | |||
1546 | 1446 | ||
1547 | /* | 1447 | /* |
1548 | * Called by dqusage_adjust in doing a quotacheck. | 1448 | * Called by dqusage_adjust in doing a quotacheck. |
1549 | * Given the inode, and a dquot (either USR or GRP, doesn't matter), | 1449 | * |
1550 | * this updates its incore copy as well as the buffer copy. This is | 1450 | * Given the inode, and a dquot id this updates both the incore dqout as well |
1551 | * so that once the quotacheck is done, we can just log all the buffers, | 1451 | * as the buffer copy. This is so that once the quotacheck is done, we can |
1552 | * as opposed to logging numerous updates to individual dquots. | 1452 | * just log all the buffers, as opposed to logging numerous updates to |
1453 | * individual dquots. | ||
1553 | */ | 1454 | */ |
1554 | STATIC void | 1455 | STATIC int |
1555 | xfs_qm_quotacheck_dqadjust( | 1456 | xfs_qm_quotacheck_dqadjust( |
1556 | xfs_dquot_t *dqp, | 1457 | struct xfs_inode *ip, |
1458 | xfs_dqid_t id, | ||
1459 | uint type, | ||
1557 | xfs_qcnt_t nblks, | 1460 | xfs_qcnt_t nblks, |
1558 | xfs_qcnt_t rtblks) | 1461 | xfs_qcnt_t rtblks) |
1559 | { | 1462 | { |
1560 | ASSERT(XFS_DQ_IS_LOCKED(dqp)); | 1463 | struct xfs_mount *mp = ip->i_mount; |
1464 | struct xfs_dquot *dqp; | ||
1465 | int error; | ||
1466 | |||
1467 | error = xfs_qm_dqget(mp, ip, id, type, | ||
1468 | XFS_QMOPT_DQALLOC | XFS_QMOPT_DOWARN, &dqp); | ||
1469 | if (error) { | ||
1470 | /* | ||
1471 | * Shouldn't be able to turn off quotas here. | ||
1472 | */ | ||
1473 | ASSERT(error != ESRCH); | ||
1474 | ASSERT(error != ENOENT); | ||
1475 | return error; | ||
1476 | } | ||
1561 | 1477 | ||
1562 | trace_xfs_dqadjust(dqp); | 1478 | trace_xfs_dqadjust(dqp); |
1563 | 1479 | ||
@@ -1582,11 +1498,13 @@ xfs_qm_quotacheck_dqadjust( | |||
1582 | * There are no timers for the default values set in the root dquot. | 1498 | * There are no timers for the default values set in the root dquot. |
1583 | */ | 1499 | */ |
1584 | if (dqp->q_core.d_id) { | 1500 | if (dqp->q_core.d_id) { |
1585 | xfs_qm_adjust_dqlimits(dqp->q_mount, &dqp->q_core); | 1501 | xfs_qm_adjust_dqlimits(mp, &dqp->q_core); |
1586 | xfs_qm_adjust_dqtimers(dqp->q_mount, &dqp->q_core); | 1502 | xfs_qm_adjust_dqtimers(mp, &dqp->q_core); |
1587 | } | 1503 | } |
1588 | 1504 | ||
1589 | dqp->dq_flags |= XFS_DQ_DIRTY; | 1505 | dqp->dq_flags |= XFS_DQ_DIRTY; |
1506 | xfs_qm_dqput(dqp); | ||
1507 | return 0; | ||
1590 | } | 1508 | } |
1591 | 1509 | ||
1592 | STATIC int | 1510 | STATIC int |
@@ -1629,8 +1547,7 @@ xfs_qm_dqusage_adjust( | |||
1629 | int *res) /* result code value */ | 1547 | int *res) /* result code value */ |
1630 | { | 1548 | { |
1631 | xfs_inode_t *ip; | 1549 | xfs_inode_t *ip; |
1632 | xfs_dquot_t *udqp, *gdqp; | 1550 | xfs_qcnt_t nblks, rtblks = 0; |
1633 | xfs_qcnt_t nblks, rtblks; | ||
1634 | int error; | 1551 | int error; |
1635 | 1552 | ||
1636 | ASSERT(XFS_IS_QUOTA_RUNNING(mp)); | 1553 | ASSERT(XFS_IS_QUOTA_RUNNING(mp)); |
@@ -1650,51 +1567,24 @@ xfs_qm_dqusage_adjust( | |||
1650 | * the case in all other instances. It's OK that we do this because | 1567 | * the case in all other instances. It's OK that we do this because |
1651 | * quotacheck is done only at mount time. | 1568 | * quotacheck is done only at mount time. |
1652 | */ | 1569 | */ |
1653 | if ((error = xfs_iget(mp, NULL, ino, 0, XFS_ILOCK_EXCL, &ip))) { | 1570 | error = xfs_iget(mp, NULL, ino, 0, XFS_ILOCK_EXCL, &ip); |
1571 | if (error) { | ||
1654 | *res = BULKSTAT_RV_NOTHING; | 1572 | *res = BULKSTAT_RV_NOTHING; |
1655 | return error; | 1573 | return error; |
1656 | } | 1574 | } |
1657 | 1575 | ||
1658 | /* | 1576 | ASSERT(ip->i_delayed_blks == 0); |
1659 | * Obtain the locked dquots. In case of an error (eg. allocation | ||
1660 | * fails for ENOSPC), we return the negative of the error number | ||
1661 | * to bulkstat, so that it can get propagated to quotacheck() and | ||
1662 | * making us disable quotas for the file system. | ||
1663 | */ | ||
1664 | if ((error = xfs_qm_dqget_noattach(ip, &udqp, &gdqp))) { | ||
1665 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | ||
1666 | IRELE(ip); | ||
1667 | *res = BULKSTAT_RV_GIVEUP; | ||
1668 | return error; | ||
1669 | } | ||
1670 | 1577 | ||
1671 | rtblks = 0; | 1578 | if (XFS_IS_REALTIME_INODE(ip)) { |
1672 | if (! XFS_IS_REALTIME_INODE(ip)) { | ||
1673 | nblks = (xfs_qcnt_t)ip->i_d.di_nblocks; | ||
1674 | } else { | ||
1675 | /* | 1579 | /* |
1676 | * Walk thru the extent list and count the realtime blocks. | 1580 | * Walk thru the extent list and count the realtime blocks. |
1677 | */ | 1581 | */ |
1678 | if ((error = xfs_qm_get_rtblks(ip, &rtblks))) { | 1582 | error = xfs_qm_get_rtblks(ip, &rtblks); |
1679 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | 1583 | if (error) |
1680 | IRELE(ip); | 1584 | goto error0; |
1681 | if (udqp) | ||
1682 | xfs_qm_dqput(udqp); | ||
1683 | if (gdqp) | ||
1684 | xfs_qm_dqput(gdqp); | ||
1685 | *res = BULKSTAT_RV_GIVEUP; | ||
1686 | return error; | ||
1687 | } | ||
1688 | nblks = (xfs_qcnt_t)ip->i_d.di_nblocks - rtblks; | ||
1689 | } | 1585 | } |
1690 | ASSERT(ip->i_delayed_blks == 0); | ||
1691 | 1586 | ||
1692 | /* | 1587 | nblks = (xfs_qcnt_t)ip->i_d.di_nblocks - rtblks; |
1693 | * We can't release the inode while holding its dquot locks. | ||
1694 | * The inode can go into inactive and might try to acquire the dquotlocks. | ||
1695 | * So, just unlock here and do a vn_rele at the end. | ||
1696 | */ | ||
1697 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | ||
1698 | 1588 | ||
1699 | /* | 1589 | /* |
1700 | * Add the (disk blocks and inode) resources occupied by this | 1590 | * Add the (disk blocks and inode) resources occupied by this |
@@ -1709,26 +1599,36 @@ xfs_qm_dqusage_adjust( | |||
1709 | * and quotaoffs don't race. (Quotachecks happen at mount time only). | 1599 | * and quotaoffs don't race. (Quotachecks happen at mount time only). |
1710 | */ | 1600 | */ |
1711 | if (XFS_IS_UQUOTA_ON(mp)) { | 1601 | if (XFS_IS_UQUOTA_ON(mp)) { |
1712 | ASSERT(udqp); | 1602 | error = xfs_qm_quotacheck_dqadjust(ip, ip->i_d.di_uid, |
1713 | xfs_qm_quotacheck_dqadjust(udqp, nblks, rtblks); | 1603 | XFS_DQ_USER, nblks, rtblks); |
1714 | xfs_qm_dqput(udqp); | 1604 | if (error) |
1605 | goto error0; | ||
1715 | } | 1606 | } |
1716 | if (XFS_IS_OQUOTA_ON(mp)) { | 1607 | |
1717 | ASSERT(gdqp); | 1608 | if (XFS_IS_GQUOTA_ON(mp)) { |
1718 | xfs_qm_quotacheck_dqadjust(gdqp, nblks, rtblks); | 1609 | error = xfs_qm_quotacheck_dqadjust(ip, ip->i_d.di_gid, |
1719 | xfs_qm_dqput(gdqp); | 1610 | XFS_DQ_GROUP, nblks, rtblks); |
1611 | if (error) | ||
1612 | goto error0; | ||
1720 | } | 1613 | } |
1721 | /* | ||
1722 | * Now release the inode. This will send it to 'inactive', and | ||
1723 | * possibly even free blocks. | ||
1724 | */ | ||
1725 | IRELE(ip); | ||
1726 | 1614 | ||
1727 | /* | 1615 | if (XFS_IS_PQUOTA_ON(mp)) { |
1728 | * Goto next inode. | 1616 | error = xfs_qm_quotacheck_dqadjust(ip, xfs_get_projid(ip), |
1729 | */ | 1617 | XFS_DQ_PROJ, nblks, rtblks); |
1618 | if (error) | ||
1619 | goto error0; | ||
1620 | } | ||
1621 | |||
1622 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | ||
1623 | IRELE(ip); | ||
1730 | *res = BULKSTAT_RV_DIDONE; | 1624 | *res = BULKSTAT_RV_DIDONE; |
1731 | return 0; | 1625 | return 0; |
1626 | |||
1627 | error0: | ||
1628 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | ||
1629 | IRELE(ip); | ||
1630 | *res = BULKSTAT_RV_GIVEUP; | ||
1631 | return error; | ||
1732 | } | 1632 | } |
1733 | 1633 | ||
1734 | /* | 1634 | /* |
@@ -1759,7 +1659,7 @@ xfs_qm_quotacheck( | |||
1759 | */ | 1659 | */ |
1760 | ASSERT(list_empty(&mp->m_quotainfo->qi_dqlist)); | 1660 | ASSERT(list_empty(&mp->m_quotainfo->qi_dqlist)); |
1761 | 1661 | ||
1762 | cmn_err(CE_NOTE, "XFS quotacheck %s: Please wait.", mp->m_fsname); | 1662 | xfs_notice(mp, "Quotacheck needed: Please wait."); |
1763 | 1663 | ||
1764 | /* | 1664 | /* |
1765 | * First we go thru all the dquots on disk, USR and GRP/PRJ, and reset | 1665 | * First we go thru all the dquots on disk, USR and GRP/PRJ, and reset |
@@ -1837,9 +1737,9 @@ xfs_qm_quotacheck( | |||
1837 | 1737 | ||
1838 | error_return: | 1738 | error_return: |
1839 | if (error) { | 1739 | if (error) { |
1840 | cmn_err(CE_WARN, "XFS quotacheck %s: Unsuccessful (Error %d): " | 1740 | xfs_warn(mp, |
1841 | "Disabling quotas.", | 1741 | "Quotacheck: Unsuccessful (Error %d): Disabling quotas.", |
1842 | mp->m_fsname, error); | 1742 | error); |
1843 | /* | 1743 | /* |
1844 | * We must turn off quotas. | 1744 | * We must turn off quotas. |
1845 | */ | 1745 | */ |
@@ -1847,12 +1747,11 @@ xfs_qm_quotacheck( | |||
1847 | ASSERT(xfs_Gqm != NULL); | 1747 | ASSERT(xfs_Gqm != NULL); |
1848 | xfs_qm_destroy_quotainfo(mp); | 1748 | xfs_qm_destroy_quotainfo(mp); |
1849 | if (xfs_mount_reset_sbqflags(mp)) { | 1749 | if (xfs_mount_reset_sbqflags(mp)) { |
1850 | cmn_err(CE_WARN, "XFS quotacheck %s: " | 1750 | xfs_warn(mp, |
1851 | "Failed to reset quota flags.", mp->m_fsname); | 1751 | "Quotacheck: Failed to reset quota flags."); |
1852 | } | 1752 | } |
1853 | } else { | 1753 | } else |
1854 | cmn_err(CE_NOTE, "XFS quotacheck %s: Done.", mp->m_fsname); | 1754 | xfs_notice(mp, "Quotacheck: Done."); |
1855 | } | ||
1856 | return (error); | 1755 | return (error); |
1857 | } | 1756 | } |
1858 | 1757 | ||
@@ -1946,12 +1845,14 @@ xfs_qm_dqreclaim_one(void) | |||
1946 | xfs_dquot_t *dqpout; | 1845 | xfs_dquot_t *dqpout; |
1947 | xfs_dquot_t *dqp; | 1846 | xfs_dquot_t *dqp; |
1948 | int restarts; | 1847 | int restarts; |
1848 | int startagain; | ||
1949 | 1849 | ||
1950 | restarts = 0; | 1850 | restarts = 0; |
1951 | dqpout = NULL; | 1851 | dqpout = NULL; |
1952 | 1852 | ||
1953 | /* lockorder: hashchainlock, freelistlock, mplistlock, dqlock, dqflock */ | 1853 | /* lockorder: hashchainlock, freelistlock, mplistlock, dqlock, dqflock */ |
1954 | startagain: | 1854 | again: |
1855 | startagain = 0; | ||
1955 | mutex_lock(&xfs_Gqm->qm_dqfrlist_lock); | 1856 | mutex_lock(&xfs_Gqm->qm_dqfrlist_lock); |
1956 | 1857 | ||
1957 | list_for_each_entry(dqp, &xfs_Gqm->qm_dqfrlist, q_freelist) { | 1858 | list_for_each_entry(dqp, &xfs_Gqm->qm_dqfrlist, q_freelist) { |
@@ -1968,13 +1869,10 @@ startagain: | |||
1968 | ASSERT(! (dqp->dq_flags & XFS_DQ_INACTIVE)); | 1869 | ASSERT(! (dqp->dq_flags & XFS_DQ_INACTIVE)); |
1969 | 1870 | ||
1970 | trace_xfs_dqreclaim_want(dqp); | 1871 | trace_xfs_dqreclaim_want(dqp); |
1971 | |||
1972 | xfs_dqunlock(dqp); | ||
1973 | mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock); | ||
1974 | if (++restarts >= XFS_QM_RECLAIM_MAX_RESTARTS) | ||
1975 | return NULL; | ||
1976 | XQM_STATS_INC(xqmstats.xs_qm_dqwants); | 1872 | XQM_STATS_INC(xqmstats.xs_qm_dqwants); |
1977 | goto startagain; | 1873 | restarts++; |
1874 | startagain = 1; | ||
1875 | goto dqunlock; | ||
1978 | } | 1876 | } |
1979 | 1877 | ||
1980 | /* | 1878 | /* |
@@ -1989,23 +1887,20 @@ startagain: | |||
1989 | ASSERT(list_empty(&dqp->q_mplist)); | 1887 | ASSERT(list_empty(&dqp->q_mplist)); |
1990 | list_del_init(&dqp->q_freelist); | 1888 | list_del_init(&dqp->q_freelist); |
1991 | xfs_Gqm->qm_dqfrlist_cnt--; | 1889 | xfs_Gqm->qm_dqfrlist_cnt--; |
1992 | xfs_dqunlock(dqp); | ||
1993 | dqpout = dqp; | 1890 | dqpout = dqp; |
1994 | XQM_STATS_INC(xqmstats.xs_qm_dqinact_reclaims); | 1891 | XQM_STATS_INC(xqmstats.xs_qm_dqinact_reclaims); |
1995 | break; | 1892 | goto dqunlock; |
1996 | } | 1893 | } |
1997 | 1894 | ||
1998 | ASSERT(dqp->q_hash); | 1895 | ASSERT(dqp->q_hash); |
1999 | ASSERT(!list_empty(&dqp->q_mplist)); | 1896 | ASSERT(!list_empty(&dqp->q_mplist)); |
2000 | 1897 | ||
2001 | /* | 1898 | /* |
2002 | * Try to grab the flush lock. If this dquot is in the process of | 1899 | * Try to grab the flush lock. If this dquot is in the process |
2003 | * getting flushed to disk, we don't want to reclaim it. | 1900 | * of getting flushed to disk, we don't want to reclaim it. |
2004 | */ | 1901 | */ |
2005 | if (!xfs_dqflock_nowait(dqp)) { | 1902 | if (!xfs_dqflock_nowait(dqp)) |
2006 | xfs_dqunlock(dqp); | 1903 | goto dqunlock; |
2007 | continue; | ||
2008 | } | ||
2009 | 1904 | ||
2010 | /* | 1905 | /* |
2011 | * We have the flush lock so we know that this is not in the | 1906 | * We have the flush lock so we know that this is not in the |
@@ -2024,11 +1919,10 @@ startagain: | |||
2024 | */ | 1919 | */ |
2025 | error = xfs_qm_dqflush(dqp, 0); | 1920 | error = xfs_qm_dqflush(dqp, 0); |
2026 | if (error) { | 1921 | if (error) { |
2027 | xfs_fs_cmn_err(CE_WARN, mp, | 1922 | xfs_warn(mp, "%s: dquot %p flush failed", |
2028 | "xfs_qm_dqreclaim: dquot %p flush failed", dqp); | 1923 | __func__, dqp); |
2029 | } | 1924 | } |
2030 | xfs_dqunlock(dqp); /* dqflush unlocks dqflock */ | 1925 | goto dqunlock; |
2031 | continue; | ||
2032 | } | 1926 | } |
2033 | 1927 | ||
2034 | /* | 1928 | /* |
@@ -2050,13 +1944,8 @@ startagain: | |||
2050 | */ | 1944 | */ |
2051 | if (!mutex_trylock(&mp->m_quotainfo->qi_dqlist_lock)) { | 1945 | if (!mutex_trylock(&mp->m_quotainfo->qi_dqlist_lock)) { |
2052 | restarts++; | 1946 | restarts++; |
2053 | mutex_unlock(&dqp->q_hash->qh_lock); | 1947 | startagain = 1; |
2054 | xfs_dqfunlock(dqp); | 1948 | goto qhunlock; |
2055 | xfs_dqunlock(dqp); | ||
2056 | mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock); | ||
2057 | if (restarts++ >= XFS_QM_RECLAIM_MAX_RESTARTS) | ||
2058 | return NULL; | ||
2059 | goto startagain; | ||
2060 | } | 1949 | } |
2061 | 1950 | ||
2062 | ASSERT(dqp->q_nrefs == 0); | 1951 | ASSERT(dqp->q_nrefs == 0); |
@@ -2069,14 +1958,20 @@ startagain: | |||
2069 | xfs_Gqm->qm_dqfrlist_cnt--; | 1958 | xfs_Gqm->qm_dqfrlist_cnt--; |
2070 | dqpout = dqp; | 1959 | dqpout = dqp; |
2071 | mutex_unlock(&mp->m_quotainfo->qi_dqlist_lock); | 1960 | mutex_unlock(&mp->m_quotainfo->qi_dqlist_lock); |
1961 | qhunlock: | ||
2072 | mutex_unlock(&dqp->q_hash->qh_lock); | 1962 | mutex_unlock(&dqp->q_hash->qh_lock); |
2073 | dqfunlock: | 1963 | dqfunlock: |
2074 | xfs_dqfunlock(dqp); | 1964 | xfs_dqfunlock(dqp); |
1965 | dqunlock: | ||
2075 | xfs_dqunlock(dqp); | 1966 | xfs_dqunlock(dqp); |
2076 | if (dqpout) | 1967 | if (dqpout) |
2077 | break; | 1968 | break; |
2078 | if (restarts >= XFS_QM_RECLAIM_MAX_RESTARTS) | 1969 | if (restarts >= XFS_QM_RECLAIM_MAX_RESTARTS) |
2079 | return NULL; | 1970 | break; |
1971 | if (startagain) { | ||
1972 | mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock); | ||
1973 | goto again; | ||
1974 | } | ||
2080 | } | 1975 | } |
2081 | mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock); | 1976 | mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock); |
2082 | return dqpout; | 1977 | return dqpout; |
@@ -2114,10 +2009,10 @@ xfs_qm_shake_freelist( | |||
2114 | STATIC int | 2009 | STATIC int |
2115 | xfs_qm_shake( | 2010 | xfs_qm_shake( |
2116 | struct shrinker *shrink, | 2011 | struct shrinker *shrink, |
2117 | int nr_to_scan, | 2012 | struct shrink_control *sc) |
2118 | gfp_t gfp_mask) | ||
2119 | { | 2013 | { |
2120 | int ndqused, nfree, n; | 2014 | int ndqused, nfree, n; |
2015 | gfp_t gfp_mask = sc->gfp_mask; | ||
2121 | 2016 | ||
2122 | if (!kmem_shake_allow(gfp_mask)) | 2017 | if (!kmem_shake_allow(gfp_mask)) |
2123 | return 0; | 2018 | return 0; |
@@ -2202,7 +2097,7 @@ xfs_qm_write_sb_changes( | |||
2202 | int error; | 2097 | int error; |
2203 | 2098 | ||
2204 | #ifdef QUOTADEBUG | 2099 | #ifdef QUOTADEBUG |
2205 | cmn_err(CE_NOTE, "Writing superblock quota changes :%s", mp->m_fsname); | 2100 | xfs_notice(mp, "Writing superblock quota changes"); |
2206 | #endif | 2101 | #endif |
2207 | tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SBCHANGE); | 2102 | tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SBCHANGE); |
2208 | if ((error = xfs_trans_reserve(tp, 0, | 2103 | if ((error = xfs_trans_reserve(tp, 0, |
@@ -2224,7 +2119,7 @@ xfs_qm_write_sb_changes( | |||
2224 | 2119 | ||
2225 | 2120 | ||
2226 | /* | 2121 | /* |
2227 | * Given an inode, a uid and gid (from cred_t) make sure that we have | 2122 | * Given an inode, a uid, gid and prid make sure that we have |
2228 | * allocated relevant dquot(s) on disk, and that we won't exceed inode | 2123 | * allocated relevant dquot(s) on disk, and that we won't exceed inode |
2229 | * quotas by creating this file. | 2124 | * quotas by creating this file. |
2230 | * This also attaches dquot(s) to the given inode after locking it, | 2125 | * This also attaches dquot(s) to the given inode after locking it, |
@@ -2332,7 +2227,7 @@ xfs_qm_vop_dqalloc( | |||
2332 | xfs_dqunlock(gq); | 2227 | xfs_dqunlock(gq); |
2333 | } | 2228 | } |
2334 | } else if ((flags & XFS_QMOPT_PQUOTA) && XFS_IS_PQUOTA_ON(mp)) { | 2229 | } else if ((flags & XFS_QMOPT_PQUOTA) && XFS_IS_PQUOTA_ON(mp)) { |
2335 | if (ip->i_d.di_projid != prid) { | 2230 | if (xfs_get_projid(ip) != prid) { |
2336 | xfs_iunlock(ip, lockflags); | 2231 | xfs_iunlock(ip, lockflags); |
2337 | if ((error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t)prid, | 2232 | if ((error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t)prid, |
2338 | XFS_DQ_PROJ, | 2233 | XFS_DQ_PROJ, |
@@ -2454,7 +2349,7 @@ xfs_qm_vop_chown_reserve( | |||
2454 | } | 2349 | } |
2455 | if (XFS_IS_OQUOTA_ON(ip->i_mount) && gdqp) { | 2350 | if (XFS_IS_OQUOTA_ON(ip->i_mount) && gdqp) { |
2456 | if (XFS_IS_PQUOTA_ON(ip->i_mount) && | 2351 | if (XFS_IS_PQUOTA_ON(ip->i_mount) && |
2457 | ip->i_d.di_projid != be32_to_cpu(gdqp->q_core.d_id)) | 2352 | xfs_get_projid(ip) != be32_to_cpu(gdqp->q_core.d_id)) |
2458 | prjflags = XFS_QMOPT_ENOSPC; | 2353 | prjflags = XFS_QMOPT_ENOSPC; |
2459 | 2354 | ||
2460 | if (prjflags || | 2355 | if (prjflags || |
@@ -2558,7 +2453,7 @@ xfs_qm_vop_create_dqattach( | |||
2558 | ip->i_gdquot = gdqp; | 2453 | ip->i_gdquot = gdqp; |
2559 | ASSERT(XFS_IS_OQUOTA_ON(mp)); | 2454 | ASSERT(XFS_IS_OQUOTA_ON(mp)); |
2560 | ASSERT((XFS_IS_GQUOTA_ON(mp) ? | 2455 | ASSERT((XFS_IS_GQUOTA_ON(mp) ? |
2561 | ip->i_d.di_gid : ip->i_d.di_projid) == | 2456 | ip->i_d.di_gid : xfs_get_projid(ip)) == |
2562 | be32_to_cpu(gdqp->q_core.d_id)); | 2457 | be32_to_cpu(gdqp->q_core.d_id)); |
2563 | xfs_trans_mod_dquot(tp, gdqp, XFS_TRANS_DQ_ICOUNT, 1); | 2458 | xfs_trans_mod_dquot(tp, gdqp, XFS_TRANS_DQ_ICOUNT, 1); |
2564 | } | 2459 | } |
diff --git a/fs/xfs/quota/xfs_qm.h b/fs/xfs/quota/xfs_qm.h index c9446f1c726d..567b29b9f1b3 100644 --- a/fs/xfs/quota/xfs_qm.h +++ b/fs/xfs/quota/xfs_qm.h | |||
@@ -65,11 +65,6 @@ extern kmem_zone_t *qm_dqtrxzone; | |||
65 | * block in the dquot/xqm code. | 65 | * block in the dquot/xqm code. |
66 | */ | 66 | */ |
67 | #define XFS_DQUOT_CLUSTER_SIZE_FSB (xfs_filblks_t)1 | 67 | #define XFS_DQUOT_CLUSTER_SIZE_FSB (xfs_filblks_t)1 |
68 | /* | ||
69 | * When doing a quotacheck, we log dquot clusters of this many FSBs at most | ||
70 | * in a single transaction. We don't want to ask for too huge a log reservation. | ||
71 | */ | ||
72 | #define XFS_QM_MAX_DQCLUSTER_LOGSZ 3 | ||
73 | 68 | ||
74 | typedef xfs_dqhash_t xfs_dqlist_t; | 69 | typedef xfs_dqhash_t xfs_dqlist_t; |
75 | 70 | ||
diff --git a/fs/xfs/quota/xfs_qm_bhv.c b/fs/xfs/quota/xfs_qm_bhv.c index bea02d786c5d..a0a829addca9 100644 --- a/fs/xfs/quota/xfs_qm_bhv.c +++ b/fs/xfs/quota/xfs_qm_bhv.c | |||
@@ -81,7 +81,7 @@ xfs_qm_statvfs( | |||
81 | xfs_mount_t *mp = ip->i_mount; | 81 | xfs_mount_t *mp = ip->i_mount; |
82 | xfs_dquot_t *dqp; | 82 | xfs_dquot_t *dqp; |
83 | 83 | ||
84 | if (!xfs_qm_dqget(mp, NULL, ip->i_d.di_projid, XFS_DQ_PROJ, 0, &dqp)) { | 84 | if (!xfs_qm_dqget(mp, NULL, xfs_get_projid(ip), XFS_DQ_PROJ, 0, &dqp)) { |
85 | xfs_fill_statvfs_from_dquot(statp, &dqp->q_core); | 85 | xfs_fill_statvfs_from_dquot(statp, &dqp->q_core); |
86 | xfs_qm_dqput(dqp); | 86 | xfs_qm_dqput(dqp); |
87 | } | 87 | } |
@@ -119,8 +119,7 @@ xfs_qm_newmount( | |||
119 | (gquotaondisk && !XFS_IS_GQUOTA_ON(mp)) || | 119 | (gquotaondisk && !XFS_IS_GQUOTA_ON(mp)) || |
120 | (!gquotaondisk && XFS_IS_OQUOTA_ON(mp))) && | 120 | (!gquotaondisk && XFS_IS_OQUOTA_ON(mp))) && |
121 | xfs_dev_is_read_only(mp, "changing quota state")) { | 121 | xfs_dev_is_read_only(mp, "changing quota state")) { |
122 | cmn_err(CE_WARN, | 122 | xfs_warn(mp, "please mount with%s%s%s%s.", |
123 | "XFS: please mount with%s%s%s%s.", | ||
124 | (!quotaondisk ? "out quota" : ""), | 123 | (!quotaondisk ? "out quota" : ""), |
125 | (uquotaondisk ? " usrquota" : ""), | 124 | (uquotaondisk ? " usrquota" : ""), |
126 | (pquotaondisk ? " prjquota" : ""), | 125 | (pquotaondisk ? " prjquota" : ""), |
@@ -135,7 +134,7 @@ xfs_qm_newmount( | |||
135 | */ | 134 | */ |
136 | if (quotaondisk && !XFS_QM_NEED_QUOTACHECK(mp)) { | 135 | if (quotaondisk && !XFS_QM_NEED_QUOTACHECK(mp)) { |
137 | /* | 136 | /* |
138 | * If an error occured, qm_mount_quotas code | 137 | * If an error occurred, qm_mount_quotas code |
139 | * has already disabled quotas. So, just finish | 138 | * has already disabled quotas. So, just finish |
140 | * mounting, and get on with the boring life | 139 | * mounting, and get on with the boring life |
141 | * without disk quotas. | 140 | * without disk quotas. |
diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c index 45e5849df238..2dadb15d5ca9 100644 --- a/fs/xfs/quota/xfs_qm_syscalls.c +++ b/fs/xfs/quota/xfs_qm_syscalls.c | |||
@@ -41,12 +41,6 @@ | |||
41 | #include "xfs_qm.h" | 41 | #include "xfs_qm.h" |
42 | #include "xfs_trace.h" | 42 | #include "xfs_trace.h" |
43 | 43 | ||
44 | #ifdef DEBUG | ||
45 | # define qdprintk(s, args...) cmn_err(CE_DEBUG, s, ## args) | ||
46 | #else | ||
47 | # define qdprintk(s, args...) do { } while (0) | ||
48 | #endif | ||
49 | |||
50 | STATIC int xfs_qm_log_quotaoff(xfs_mount_t *, xfs_qoff_logitem_t **, uint); | 44 | STATIC int xfs_qm_log_quotaoff(xfs_mount_t *, xfs_qoff_logitem_t **, uint); |
51 | STATIC int xfs_qm_log_quotaoff_end(xfs_mount_t *, xfs_qoff_logitem_t *, | 45 | STATIC int xfs_qm_log_quotaoff_end(xfs_mount_t *, xfs_qoff_logitem_t *, |
52 | uint); | 46 | uint); |
@@ -178,7 +172,7 @@ xfs_qm_scall_quotaoff( | |||
178 | /* | 172 | /* |
179 | * Next we make the changes in the quota flag in the mount struct. | 173 | * Next we make the changes in the quota flag in the mount struct. |
180 | * This isn't protected by a particular lock directly, because we | 174 | * This isn't protected by a particular lock directly, because we |
181 | * don't want to take a mrlock everytime we depend on quotas being on. | 175 | * don't want to take a mrlock every time we depend on quotas being on. |
182 | */ | 176 | */ |
183 | mp->m_qflags &= ~(flags); | 177 | mp->m_qflags &= ~(flags); |
184 | 178 | ||
@@ -276,7 +270,7 @@ xfs_qm_scall_trunc_qfile( | |||
276 | goto out_unlock; | 270 | goto out_unlock; |
277 | } | 271 | } |
278 | 272 | ||
279 | xfs_ichgtime(ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); | 273 | xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); |
280 | error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); | 274 | error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); |
281 | 275 | ||
282 | out_unlock: | 276 | out_unlock: |
@@ -294,7 +288,8 @@ xfs_qm_scall_trunc_qfiles( | |||
294 | int error = 0, error2 = 0; | 288 | int error = 0, error2 = 0; |
295 | 289 | ||
296 | if (!xfs_sb_version_hasquota(&mp->m_sb) || flags == 0) { | 290 | if (!xfs_sb_version_hasquota(&mp->m_sb) || flags == 0) { |
297 | qdprintk("qtrunc flags=%x m_qflags=%x\n", flags, mp->m_qflags); | 291 | xfs_debug(mp, "%s: flags=%x m_qflags=%x\n", |
292 | __func__, flags, mp->m_qflags); | ||
298 | return XFS_ERROR(EINVAL); | 293 | return XFS_ERROR(EINVAL); |
299 | } | 294 | } |
300 | 295 | ||
@@ -318,20 +313,19 @@ xfs_qm_scall_quotaon( | |||
318 | { | 313 | { |
319 | int error; | 314 | int error; |
320 | uint qf; | 315 | uint qf; |
321 | uint accflags; | ||
322 | __int64_t sbflags; | 316 | __int64_t sbflags; |
323 | 317 | ||
324 | flags &= (XFS_ALL_QUOTA_ACCT | XFS_ALL_QUOTA_ENFD); | 318 | flags &= (XFS_ALL_QUOTA_ACCT | XFS_ALL_QUOTA_ENFD); |
325 | /* | 319 | /* |
326 | * Switching on quota accounting must be done at mount time. | 320 | * Switching on quota accounting must be done at mount time. |
327 | */ | 321 | */ |
328 | accflags = flags & XFS_ALL_QUOTA_ACCT; | ||
329 | flags &= ~(XFS_ALL_QUOTA_ACCT); | 322 | flags &= ~(XFS_ALL_QUOTA_ACCT); |
330 | 323 | ||
331 | sbflags = 0; | 324 | sbflags = 0; |
332 | 325 | ||
333 | if (flags == 0) { | 326 | if (flags == 0) { |
334 | qdprintk("quotaon: zero flags, m_qflags=%x\n", mp->m_qflags); | 327 | xfs_debug(mp, "%s: zero flags, m_qflags=%x\n", |
328 | __func__, mp->m_qflags); | ||
335 | return XFS_ERROR(EINVAL); | 329 | return XFS_ERROR(EINVAL); |
336 | } | 330 | } |
337 | 331 | ||
@@ -352,12 +346,13 @@ xfs_qm_scall_quotaon( | |||
352 | (flags & XFS_GQUOTA_ACCT) == 0 && | 346 | (flags & XFS_GQUOTA_ACCT) == 0 && |
353 | (mp->m_sb.sb_qflags & XFS_GQUOTA_ACCT) == 0 && | 347 | (mp->m_sb.sb_qflags & XFS_GQUOTA_ACCT) == 0 && |
354 | (flags & XFS_OQUOTA_ENFD))) { | 348 | (flags & XFS_OQUOTA_ENFD))) { |
355 | qdprintk("Can't enforce without acct, flags=%x sbflags=%x\n", | 349 | xfs_debug(mp, |
356 | flags, mp->m_sb.sb_qflags); | 350 | "%s: Can't enforce without acct, flags=%x sbflags=%x\n", |
351 | __func__, flags, mp->m_sb.sb_qflags); | ||
357 | return XFS_ERROR(EINVAL); | 352 | return XFS_ERROR(EINVAL); |
358 | } | 353 | } |
359 | /* | 354 | /* |
360 | * If everything's upto-date incore, then don't waste time. | 355 | * If everything's up to-date incore, then don't waste time. |
361 | */ | 356 | */ |
362 | if ((mp->m_qflags & flags) == flags) | 357 | if ((mp->m_qflags & flags) == flags) |
363 | return XFS_ERROR(EEXIST); | 358 | return XFS_ERROR(EEXIST); |
@@ -541,7 +536,7 @@ xfs_qm_scall_setqlim( | |||
541 | q->qi_bsoftlimit = soft; | 536 | q->qi_bsoftlimit = soft; |
542 | } | 537 | } |
543 | } else { | 538 | } else { |
544 | qdprintk("blkhard %Ld < blksoft %Ld\n", hard, soft); | 539 | xfs_debug(mp, "blkhard %Ld < blksoft %Ld\n", hard, soft); |
545 | } | 540 | } |
546 | hard = (newlim->d_fieldmask & FS_DQ_RTBHARD) ? | 541 | hard = (newlim->d_fieldmask & FS_DQ_RTBHARD) ? |
547 | (xfs_qcnt_t) XFS_BB_TO_FSB(mp, newlim->d_rtb_hardlimit) : | 542 | (xfs_qcnt_t) XFS_BB_TO_FSB(mp, newlim->d_rtb_hardlimit) : |
@@ -557,7 +552,7 @@ xfs_qm_scall_setqlim( | |||
557 | q->qi_rtbsoftlimit = soft; | 552 | q->qi_rtbsoftlimit = soft; |
558 | } | 553 | } |
559 | } else { | 554 | } else { |
560 | qdprintk("rtbhard %Ld < rtbsoft %Ld\n", hard, soft); | 555 | xfs_debug(mp, "rtbhard %Ld < rtbsoft %Ld\n", hard, soft); |
561 | } | 556 | } |
562 | 557 | ||
563 | hard = (newlim->d_fieldmask & FS_DQ_IHARD) ? | 558 | hard = (newlim->d_fieldmask & FS_DQ_IHARD) ? |
@@ -574,7 +569,7 @@ xfs_qm_scall_setqlim( | |||
574 | q->qi_isoftlimit = soft; | 569 | q->qi_isoftlimit = soft; |
575 | } | 570 | } |
576 | } else { | 571 | } else { |
577 | qdprintk("ihard %Ld < isoft %Ld\n", hard, soft); | 572 | xfs_debug(mp, "ihard %Ld < isoft %Ld\n", hard, soft); |
578 | } | 573 | } |
579 | 574 | ||
580 | /* | 575 | /* |
@@ -875,21 +870,14 @@ xfs_dqrele_inode( | |||
875 | struct xfs_perag *pag, | 870 | struct xfs_perag *pag, |
876 | int flags) | 871 | int flags) |
877 | { | 872 | { |
878 | int error; | ||
879 | |||
880 | /* skip quota inodes */ | 873 | /* skip quota inodes */ |
881 | if (ip == ip->i_mount->m_quotainfo->qi_uquotaip || | 874 | if (ip == ip->i_mount->m_quotainfo->qi_uquotaip || |
882 | ip == ip->i_mount->m_quotainfo->qi_gquotaip) { | 875 | ip == ip->i_mount->m_quotainfo->qi_gquotaip) { |
883 | ASSERT(ip->i_udquot == NULL); | 876 | ASSERT(ip->i_udquot == NULL); |
884 | ASSERT(ip->i_gdquot == NULL); | 877 | ASSERT(ip->i_gdquot == NULL); |
885 | read_unlock(&pag->pag_ici_lock); | ||
886 | return 0; | 878 | return 0; |
887 | } | 879 | } |
888 | 880 | ||
889 | error = xfs_sync_inode_valid(ip, pag); | ||
890 | if (error) | ||
891 | return error; | ||
892 | |||
893 | xfs_ilock(ip, XFS_ILOCK_EXCL); | 881 | xfs_ilock(ip, XFS_ILOCK_EXCL); |
894 | if ((flags & XFS_UQUOTA_ACCT) && ip->i_udquot) { | 882 | if ((flags & XFS_UQUOTA_ACCT) && ip->i_udquot) { |
895 | xfs_qm_dqrele(ip->i_udquot); | 883 | xfs_qm_dqrele(ip->i_udquot); |
@@ -900,8 +888,6 @@ xfs_dqrele_inode( | |||
900 | ip->i_gdquot = NULL; | 888 | ip->i_gdquot = NULL; |
901 | } | 889 | } |
902 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | 890 | xfs_iunlock(ip, XFS_ILOCK_EXCL); |
903 | |||
904 | IRELE(ip); | ||
905 | return 0; | 891 | return 0; |
906 | } | 892 | } |
907 | 893 | ||
@@ -918,8 +904,7 @@ xfs_qm_dqrele_all_inodes( | |||
918 | uint flags) | 904 | uint flags) |
919 | { | 905 | { |
920 | ASSERT(mp->m_quotainfo); | 906 | ASSERT(mp->m_quotainfo); |
921 | xfs_inode_ag_iterator(mp, xfs_dqrele_inode, flags, | 907 | xfs_inode_ag_iterator(mp, xfs_dqrele_inode, flags); |
922 | XFS_ICI_NO_TAG, 0, NULL); | ||
923 | } | 908 | } |
924 | 909 | ||
925 | /*------------------------------------------------------------------------*/ | 910 | /*------------------------------------------------------------------------*/ |
@@ -949,10 +934,11 @@ struct mutex qcheck_lock; | |||
949 | #define DQTEST_LIST_PRINT(l, NXT, title) \ | 934 | #define DQTEST_LIST_PRINT(l, NXT, title) \ |
950 | { \ | 935 | { \ |
951 | xfs_dqtest_t *dqp; int i = 0;\ | 936 | xfs_dqtest_t *dqp; int i = 0;\ |
952 | cmn_err(CE_DEBUG, "%s (#%d)", title, (int) (l)->qh_nelems); \ | 937 | xfs_debug(NULL, "%s (#%d)", title, (int) (l)->qh_nelems); \ |
953 | for (dqp = (xfs_dqtest_t *)(l)->qh_next; dqp != NULL; \ | 938 | for (dqp = (xfs_dqtest_t *)(l)->qh_next; dqp != NULL; \ |
954 | dqp = (xfs_dqtest_t *)dqp->NXT) { \ | 939 | dqp = (xfs_dqtest_t *)dqp->NXT) { \ |
955 | cmn_err(CE_DEBUG, " %d. \"%d (%s)\" bcnt = %d, icnt = %d", \ | 940 | xfs_debug(dqp->q_mount, \ |
941 | " %d. \"%d (%s)\" bcnt = %d, icnt = %d", \ | ||
956 | ++i, dqp->d_id, DQFLAGTO_TYPESTR(dqp), \ | 942 | ++i, dqp->d_id, DQFLAGTO_TYPESTR(dqp), \ |
957 | dqp->d_bcount, dqp->d_icount); } \ | 943 | dqp->d_bcount, dqp->d_icount); } \ |
958 | } | 944 | } |
@@ -976,16 +962,17 @@ xfs_qm_hashinsert(xfs_dqhash_t *h, xfs_dqtest_t *dqp) | |||
976 | } | 962 | } |
977 | STATIC void | 963 | STATIC void |
978 | xfs_qm_dqtest_print( | 964 | xfs_qm_dqtest_print( |
979 | xfs_dqtest_t *d) | 965 | struct xfs_mount *mp, |
966 | struct dqtest *d) | ||
980 | { | 967 | { |
981 | cmn_err(CE_DEBUG, "-----------DQTEST DQUOT----------------"); | 968 | xfs_debug(mp, "-----------DQTEST DQUOT----------------"); |
982 | cmn_err(CE_DEBUG, "---- dquot ID = %d", d->d_id); | 969 | xfs_debug(mp, "---- dquot ID = %d", d->d_id); |
983 | cmn_err(CE_DEBUG, "---- fs = 0x%p", d->q_mount); | 970 | xfs_debug(mp, "---- fs = 0x%p", d->q_mount); |
984 | cmn_err(CE_DEBUG, "---- bcount = %Lu (0x%x)", | 971 | xfs_debug(mp, "---- bcount = %Lu (0x%x)", |
985 | d->d_bcount, (int)d->d_bcount); | 972 | d->d_bcount, (int)d->d_bcount); |
986 | cmn_err(CE_DEBUG, "---- icount = %Lu (0x%x)", | 973 | xfs_debug(mp, "---- icount = %Lu (0x%x)", |
987 | d->d_icount, (int)d->d_icount); | 974 | d->d_icount, (int)d->d_icount); |
988 | cmn_err(CE_DEBUG, "---------------------------"); | 975 | xfs_debug(mp, "---------------------------"); |
989 | } | 976 | } |
990 | 977 | ||
991 | STATIC void | 978 | STATIC void |
@@ -999,12 +986,14 @@ xfs_qm_dqtest_failed( | |||
999 | { | 986 | { |
1000 | qmtest_nfails++; | 987 | qmtest_nfails++; |
1001 | if (error) | 988 | if (error) |
1002 | cmn_err(CE_DEBUG, "quotacheck failed id=%d, err=%d\nreason: %s", | 989 | xfs_debug(dqp->q_mount, |
1003 | d->d_id, error, reason); | 990 | "quotacheck failed id=%d, err=%d\nreason: %s", |
991 | d->d_id, error, reason); | ||
1004 | else | 992 | else |
1005 | cmn_err(CE_DEBUG, "quotacheck failed id=%d (%s) [%d != %d]", | 993 | xfs_debug(dqp->q_mount, |
1006 | d->d_id, reason, (int)a, (int)b); | 994 | "quotacheck failed id=%d (%s) [%d != %d]", |
1007 | xfs_qm_dqtest_print(d); | 995 | d->d_id, reason, (int)a, (int)b); |
996 | xfs_qm_dqtest_print(dqp->q_mount, d); | ||
1008 | if (dqp) | 997 | if (dqp) |
1009 | xfs_qm_dqprint(dqp); | 998 | xfs_qm_dqprint(dqp); |
1010 | } | 999 | } |
@@ -1031,9 +1020,9 @@ xfs_dqtest_cmp2( | |||
1031 | be64_to_cpu(dqp->q_core.d_bcount) >= | 1020 | be64_to_cpu(dqp->q_core.d_bcount) >= |
1032 | be64_to_cpu(dqp->q_core.d_blk_softlimit)) { | 1021 | be64_to_cpu(dqp->q_core.d_blk_softlimit)) { |
1033 | if (!dqp->q_core.d_btimer && dqp->q_core.d_id) { | 1022 | if (!dqp->q_core.d_btimer && dqp->q_core.d_id) { |
1034 | cmn_err(CE_DEBUG, | 1023 | xfs_debug(dqp->q_mount, |
1035 | "%d [%s] [0x%p] BLK TIMER NOT STARTED", | 1024 | "%d [%s] BLK TIMER NOT STARTED", |
1036 | d->d_id, DQFLAGTO_TYPESTR(d), d->q_mount); | 1025 | d->d_id, DQFLAGTO_TYPESTR(d)); |
1037 | err++; | 1026 | err++; |
1038 | } | 1027 | } |
1039 | } | 1028 | } |
@@ -1041,16 +1030,16 @@ xfs_dqtest_cmp2( | |||
1041 | be64_to_cpu(dqp->q_core.d_icount) >= | 1030 | be64_to_cpu(dqp->q_core.d_icount) >= |
1042 | be64_to_cpu(dqp->q_core.d_ino_softlimit)) { | 1031 | be64_to_cpu(dqp->q_core.d_ino_softlimit)) { |
1043 | if (!dqp->q_core.d_itimer && dqp->q_core.d_id) { | 1032 | if (!dqp->q_core.d_itimer && dqp->q_core.d_id) { |
1044 | cmn_err(CE_DEBUG, | 1033 | xfs_debug(dqp->q_mount, |
1045 | "%d [%s] [0x%p] INO TIMER NOT STARTED", | 1034 | "%d [%s] INO TIMER NOT STARTED", |
1046 | d->d_id, DQFLAGTO_TYPESTR(d), d->q_mount); | 1035 | d->d_id, DQFLAGTO_TYPESTR(d)); |
1047 | err++; | 1036 | err++; |
1048 | } | 1037 | } |
1049 | } | 1038 | } |
1050 | #ifdef QUOTADEBUG | 1039 | #ifdef QUOTADEBUG |
1051 | if (!err) { | 1040 | if (!err) { |
1052 | cmn_err(CE_DEBUG, "%d [%s] [0x%p] qchecked", | 1041 | xfs_debug(dqp->q_mount, "%d [%s] qchecked", |
1053 | d->d_id, DQFLAGTO_TYPESTR(d), d->q_mount); | 1042 | d->d_id, DQFLAGTO_TYPESTR(d)); |
1054 | } | 1043 | } |
1055 | #endif | 1044 | #endif |
1056 | return (err); | 1045 | return (err); |
@@ -1147,8 +1136,8 @@ xfs_qm_internalqcheck_adjust( | |||
1147 | 1136 | ||
1148 | if (ino == mp->m_sb.sb_uquotino || ino == mp->m_sb.sb_gquotino) { | 1137 | if (ino == mp->m_sb.sb_uquotino || ino == mp->m_sb.sb_gquotino) { |
1149 | *res = BULKSTAT_RV_NOTHING; | 1138 | *res = BULKSTAT_RV_NOTHING; |
1150 | qdprintk("internalqcheck: ino=%llu, uqino=%llu, gqino=%llu\n", | 1139 | xfs_debug(mp, "%s: ino=%llu, uqino=%llu, gqino=%llu\n", |
1151 | (unsigned long long) ino, | 1140 | __func__, (unsigned long long) ino, |
1152 | (unsigned long long) mp->m_sb.sb_uquotino, | 1141 | (unsigned long long) mp->m_sb.sb_uquotino, |
1153 | (unsigned long long) mp->m_sb.sb_gquotino); | 1142 | (unsigned long long) mp->m_sb.sb_gquotino); |
1154 | return XFS_ERROR(EINVAL); | 1143 | return XFS_ERROR(EINVAL); |
@@ -1175,7 +1164,7 @@ xfs_qm_internalqcheck_adjust( | |||
1175 | } | 1164 | } |
1176 | xfs_qm_internalqcheck_get_dquots(mp, | 1165 | xfs_qm_internalqcheck_get_dquots(mp, |
1177 | (xfs_dqid_t) ip->i_d.di_uid, | 1166 | (xfs_dqid_t) ip->i_d.di_uid, |
1178 | (xfs_dqid_t) ip->i_d.di_projid, | 1167 | (xfs_dqid_t) xfs_get_projid(ip), |
1179 | (xfs_dqid_t) ip->i_d.di_gid, | 1168 | (xfs_dqid_t) ip->i_d.di_gid, |
1180 | &ud, &gd); | 1169 | &ud, &gd); |
1181 | if (XFS_IS_UQUOTA_ON(mp)) { | 1170 | if (XFS_IS_UQUOTA_ON(mp)) { |
@@ -1233,12 +1222,12 @@ xfs_qm_internalqcheck( | |||
1233 | xfs_qm_internalqcheck_adjust, | 1222 | xfs_qm_internalqcheck_adjust, |
1234 | 0, NULL, &done); | 1223 | 0, NULL, &done); |
1235 | if (error) { | 1224 | if (error) { |
1236 | cmn_err(CE_DEBUG, "Bulkstat returned error 0x%x", error); | 1225 | xfs_debug(mp, "Bulkstat returned error 0x%x", error); |
1237 | break; | 1226 | break; |
1238 | } | 1227 | } |
1239 | } while (!done); | 1228 | } while (!done); |
1240 | 1229 | ||
1241 | cmn_err(CE_DEBUG, "Checking results against system dquots"); | 1230 | xfs_debug(mp, "Checking results against system dquots"); |
1242 | for (i = 0; i < qmtest_hashmask; i++) { | 1231 | for (i = 0; i < qmtest_hashmask; i++) { |
1243 | xfs_dqtest_t *d, *n; | 1232 | xfs_dqtest_t *d, *n; |
1244 | xfs_dqhash_t *h; | 1233 | xfs_dqhash_t *h; |
@@ -1256,10 +1245,10 @@ xfs_qm_internalqcheck( | |||
1256 | } | 1245 | } |
1257 | 1246 | ||
1258 | if (qmtest_nfails) { | 1247 | if (qmtest_nfails) { |
1259 | cmn_err(CE_DEBUG, "******** quotacheck failed ********"); | 1248 | xfs_debug(mp, "******** quotacheck failed ********"); |
1260 | cmn_err(CE_DEBUG, "failures = %d", qmtest_nfails); | 1249 | xfs_debug(mp, "failures = %d", qmtest_nfails); |
1261 | } else { | 1250 | } else { |
1262 | cmn_err(CE_DEBUG, "******** quotacheck successful! ********"); | 1251 | xfs_debug(mp, "******** quotacheck successful! ********"); |
1263 | } | 1252 | } |
1264 | kmem_free(qmtest_udqtab); | 1253 | kmem_free(qmtest_udqtab); |
1265 | kmem_free(qmtest_gdqtab); | 1254 | kmem_free(qmtest_gdqtab); |
diff --git a/fs/xfs/quota/xfs_trans_dquot.c b/fs/xfs/quota/xfs_trans_dquot.c index 7de91d1b75c0..2a3648731331 100644 --- a/fs/xfs/quota/xfs_trans_dquot.c +++ b/fs/xfs/quota/xfs_trans_dquot.c | |||
@@ -643,8 +643,9 @@ xfs_trans_dqresv( | |||
643 | (XFS_IS_OQUOTA_ENFORCED(dqp->q_mount) && | 643 | (XFS_IS_OQUOTA_ENFORCED(dqp->q_mount) && |
644 | (XFS_QM_ISPDQ(dqp) || XFS_QM_ISGDQ(dqp))))) { | 644 | (XFS_QM_ISPDQ(dqp) || XFS_QM_ISGDQ(dqp))))) { |
645 | #ifdef QUOTADEBUG | 645 | #ifdef QUOTADEBUG |
646 | cmn_err(CE_DEBUG, "BLK Res: nblks=%ld + resbcount=%Ld" | 646 | xfs_debug(mp, |
647 | " > hardlimit=%Ld?", nblks, *resbcountp, hardlimit); | 647 | "BLK Res: nblks=%ld + resbcount=%Ld > hardlimit=%Ld?", |
648 | nblks, *resbcountp, hardlimit); | ||
648 | #endif | 649 | #endif |
649 | if (nblks > 0) { | 650 | if (nblks > 0) { |
650 | /* | 651 | /* |
diff --git a/fs/xfs/support/debug.c b/fs/xfs/support/debug.c deleted file mode 100644 index 975aa10e1a47..000000000000 --- a/fs/xfs/support/debug.c +++ /dev/null | |||
@@ -1,115 +0,0 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc. | ||
3 | * All Rights Reserved. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or | ||
6 | * modify it under the terms of the GNU General Public License as | ||
7 | * published by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope that it would be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, write the Free Software Foundation, | ||
16 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
17 | */ | ||
18 | #include <xfs.h> | ||
19 | #include "debug.h" | ||
20 | |||
21 | /* xfs_mount.h drags a lot of crap in, sorry.. */ | ||
22 | #include "xfs_sb.h" | ||
23 | #include "xfs_inum.h" | ||
24 | #include "xfs_ag.h" | ||
25 | #include "xfs_mount.h" | ||
26 | #include "xfs_error.h" | ||
27 | |||
28 | static char message[1024]; /* keep it off the stack */ | ||
29 | static DEFINE_SPINLOCK(xfs_err_lock); | ||
30 | |||
31 | /* Translate from CE_FOO to KERN_FOO, err_level(CE_FOO) == KERN_FOO */ | ||
32 | #define XFS_MAX_ERR_LEVEL 7 | ||
33 | #define XFS_ERR_MASK ((1 << 3) - 1) | ||
34 | static const char * const err_level[XFS_MAX_ERR_LEVEL+1] = | ||
35 | {KERN_EMERG, KERN_ALERT, KERN_CRIT, | ||
36 | KERN_ERR, KERN_WARNING, KERN_NOTICE, | ||
37 | KERN_INFO, KERN_DEBUG}; | ||
38 | |||
39 | void | ||
40 | cmn_err(register int level, char *fmt, ...) | ||
41 | { | ||
42 | char *fp = fmt; | ||
43 | int len; | ||
44 | ulong flags; | ||
45 | va_list ap; | ||
46 | |||
47 | level &= XFS_ERR_MASK; | ||
48 | if (level > XFS_MAX_ERR_LEVEL) | ||
49 | level = XFS_MAX_ERR_LEVEL; | ||
50 | spin_lock_irqsave(&xfs_err_lock,flags); | ||
51 | va_start(ap, fmt); | ||
52 | if (*fmt == '!') fp++; | ||
53 | len = vsnprintf(message, sizeof(message), fp, ap); | ||
54 | if (len >= sizeof(message)) | ||
55 | len = sizeof(message) - 1; | ||
56 | if (message[len-1] == '\n') | ||
57 | message[len-1] = 0; | ||
58 | printk("%s%s\n", err_level[level], message); | ||
59 | va_end(ap); | ||
60 | spin_unlock_irqrestore(&xfs_err_lock,flags); | ||
61 | BUG_ON(level == CE_PANIC); | ||
62 | } | ||
63 | |||
64 | void | ||
65 | xfs_fs_vcmn_err( | ||
66 | int level, | ||
67 | struct xfs_mount *mp, | ||
68 | char *fmt, | ||
69 | va_list ap) | ||
70 | { | ||
71 | unsigned long flags; | ||
72 | int len = 0; | ||
73 | |||
74 | level &= XFS_ERR_MASK; | ||
75 | if (level > XFS_MAX_ERR_LEVEL) | ||
76 | level = XFS_MAX_ERR_LEVEL; | ||
77 | |||
78 | spin_lock_irqsave(&xfs_err_lock,flags); | ||
79 | |||
80 | if (mp) { | ||
81 | len = sprintf(message, "Filesystem \"%s\": ", mp->m_fsname); | ||
82 | |||
83 | /* | ||
84 | * Skip the printk if we can't print anything useful | ||
85 | * due to an over-long device name. | ||
86 | */ | ||
87 | if (len >= sizeof(message)) | ||
88 | goto out; | ||
89 | } | ||
90 | |||
91 | len = vsnprintf(message + len, sizeof(message) - len, fmt, ap); | ||
92 | if (len >= sizeof(message)) | ||
93 | len = sizeof(message) - 1; | ||
94 | if (message[len-1] == '\n') | ||
95 | message[len-1] = 0; | ||
96 | |||
97 | printk("%s%s\n", err_level[level], message); | ||
98 | out: | ||
99 | spin_unlock_irqrestore(&xfs_err_lock,flags); | ||
100 | |||
101 | BUG_ON(level == CE_PANIC); | ||
102 | } | ||
103 | |||
104 | void | ||
105 | assfail(char *expr, char *file, int line) | ||
106 | { | ||
107 | printk("Assertion failed: %s, file: %s, line: %d\n", expr, file, line); | ||
108 | BUG(); | ||
109 | } | ||
110 | |||
111 | void | ||
112 | xfs_hex_dump(void *p, int length) | ||
113 | { | ||
114 | print_hex_dump(KERN_ALERT, "", DUMP_PREFIX_ADDRESS, 16, 1, p, length, 1); | ||
115 | } | ||
diff --git a/fs/xfs/support/debug.h b/fs/xfs/support/debug.h deleted file mode 100644 index d2d20462fd4f..000000000000 --- a/fs/xfs/support/debug.h +++ /dev/null | |||
@@ -1,54 +0,0 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2000-2005 Silicon Graphics, Inc. | ||
3 | * All Rights Reserved. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or | ||
6 | * modify it under the terms of the GNU General Public License as | ||
7 | * published by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope that it would be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, write the Free Software Foundation, | ||
16 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
17 | */ | ||
18 | #ifndef __XFS_SUPPORT_DEBUG_H__ | ||
19 | #define __XFS_SUPPORT_DEBUG_H__ | ||
20 | |||
21 | #include <stdarg.h> | ||
22 | |||
23 | #define CE_DEBUG 7 /* debug */ | ||
24 | #define CE_CONT 6 /* continuation */ | ||
25 | #define CE_NOTE 5 /* notice */ | ||
26 | #define CE_WARN 4 /* warning */ | ||
27 | #define CE_ALERT 1 /* alert */ | ||
28 | #define CE_PANIC 0 /* panic */ | ||
29 | |||
30 | extern void cmn_err(int, char *, ...) | ||
31 | __attribute__ ((format (printf, 2, 3))); | ||
32 | extern void assfail(char *expr, char *f, int l); | ||
33 | |||
34 | #define ASSERT_ALWAYS(expr) \ | ||
35 | (unlikely(expr) ? (void)0 : assfail(#expr, __FILE__, __LINE__)) | ||
36 | |||
37 | #ifndef DEBUG | ||
38 | #define ASSERT(expr) ((void)0) | ||
39 | |||
40 | #ifndef STATIC | ||
41 | # define STATIC static noinline | ||
42 | #endif | ||
43 | |||
44 | #else /* DEBUG */ | ||
45 | |||
46 | #define ASSERT(expr) \ | ||
47 | (unlikely(expr) ? (void)0 : assfail(#expr, __FILE__, __LINE__)) | ||
48 | |||
49 | #ifndef STATIC | ||
50 | # define STATIC noinline | ||
51 | #endif | ||
52 | |||
53 | #endif /* DEBUG */ | ||
54 | #endif /* __XFS_SUPPORT_DEBUG_H__ */ | ||
diff --git a/fs/xfs/xfs_acl.h b/fs/xfs/xfs_acl.h index 0135e2a669d7..11dd72070cbb 100644 --- a/fs/xfs/xfs_acl.h +++ b/fs/xfs/xfs_acl.h | |||
@@ -42,7 +42,7 @@ struct xfs_acl { | |||
42 | #define SGI_ACL_DEFAULT_SIZE (sizeof(SGI_ACL_DEFAULT)-1) | 42 | #define SGI_ACL_DEFAULT_SIZE (sizeof(SGI_ACL_DEFAULT)-1) |
43 | 43 | ||
44 | #ifdef CONFIG_XFS_POSIX_ACL | 44 | #ifdef CONFIG_XFS_POSIX_ACL |
45 | extern int xfs_check_acl(struct inode *inode, int mask); | 45 | extern int xfs_check_acl(struct inode *inode, int mask, unsigned int flags); |
46 | extern struct posix_acl *xfs_get_acl(struct inode *inode, int type); | 46 | extern struct posix_acl *xfs_get_acl(struct inode *inode, int type); |
47 | extern int xfs_inherit_acl(struct inode *inode, struct posix_acl *default_acl); | 47 | extern int xfs_inherit_acl(struct inode *inode, struct posix_acl *default_acl); |
48 | extern int xfs_acl_chmod(struct inode *inode); | 48 | extern int xfs_acl_chmod(struct inode *inode); |
diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h index 4917d4eed4ed..6530769a999b 100644 --- a/fs/xfs/xfs_ag.h +++ b/fs/xfs/xfs_ag.h | |||
@@ -187,7 +187,9 @@ struct xfs_busy_extent { | |||
187 | xfs_agnumber_t agno; | 187 | xfs_agnumber_t agno; |
188 | xfs_agblock_t bno; | 188 | xfs_agblock_t bno; |
189 | xfs_extlen_t length; | 189 | xfs_extlen_t length; |
190 | xlog_tid_t tid; /* transaction that created this */ | 190 | unsigned int flags; |
191 | #define XFS_ALLOC_BUSY_DISCARDED 0x01 /* undergoing a discard op. */ | ||
192 | #define XFS_ALLOC_BUSY_SKIP_DISCARD 0x02 /* do not discard */ | ||
191 | }; | 193 | }; |
192 | 194 | ||
193 | /* | 195 | /* |
@@ -227,9 +229,18 @@ typedef struct xfs_perag { | |||
227 | 229 | ||
228 | atomic_t pagf_fstrms; /* # of filestreams active in this AG */ | 230 | atomic_t pagf_fstrms; /* # of filestreams active in this AG */ |
229 | 231 | ||
230 | rwlock_t pag_ici_lock; /* incore inode lock */ | 232 | spinlock_t pag_ici_lock; /* incore inode cache lock */ |
231 | struct radix_tree_root pag_ici_root; /* incore inode cache root */ | 233 | struct radix_tree_root pag_ici_root; /* incore inode cache root */ |
232 | int pag_ici_reclaimable; /* reclaimable inodes */ | 234 | int pag_ici_reclaimable; /* reclaimable inodes */ |
235 | struct mutex pag_ici_reclaim_lock; /* serialisation point */ | ||
236 | unsigned long pag_ici_reclaim_cursor; /* reclaim restart point */ | ||
237 | |||
238 | /* buffer cache index */ | ||
239 | spinlock_t pag_buf_lock; /* lock for pag_buf_tree */ | ||
240 | struct rb_root pag_buf_tree; /* ordered tree of active buffers */ | ||
241 | |||
242 | /* for rcu-safe freeing */ | ||
243 | struct rcu_head rcu_head; | ||
233 | #endif | 244 | #endif |
234 | int pagb_count; /* pagb slots in use */ | 245 | int pagb_count; /* pagb slots in use */ |
235 | } xfs_perag_t; | 246 | } xfs_perag_t; |
diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c index af168faccc7a..95862bbff56b 100644 --- a/fs/xfs/xfs_alloc.c +++ b/fs/xfs/xfs_alloc.c | |||
@@ -41,23 +41,13 @@ | |||
41 | #define XFSA_FIXUP_BNO_OK 1 | 41 | #define XFSA_FIXUP_BNO_OK 1 |
42 | #define XFSA_FIXUP_CNT_OK 2 | 42 | #define XFSA_FIXUP_CNT_OK 2 |
43 | 43 | ||
44 | static int | ||
45 | xfs_alloc_busy_search(struct xfs_mount *mp, xfs_agnumber_t agno, | ||
46 | xfs_agblock_t bno, xfs_extlen_t len); | ||
47 | |||
48 | /* | ||
49 | * Prototypes for per-ag allocation routines | ||
50 | */ | ||
51 | |||
52 | STATIC int xfs_alloc_ag_vextent_exact(xfs_alloc_arg_t *); | 44 | STATIC int xfs_alloc_ag_vextent_exact(xfs_alloc_arg_t *); |
53 | STATIC int xfs_alloc_ag_vextent_near(xfs_alloc_arg_t *); | 45 | STATIC int xfs_alloc_ag_vextent_near(xfs_alloc_arg_t *); |
54 | STATIC int xfs_alloc_ag_vextent_size(xfs_alloc_arg_t *); | 46 | STATIC int xfs_alloc_ag_vextent_size(xfs_alloc_arg_t *); |
55 | STATIC int xfs_alloc_ag_vextent_small(xfs_alloc_arg_t *, | 47 | STATIC int xfs_alloc_ag_vextent_small(xfs_alloc_arg_t *, |
56 | xfs_btree_cur_t *, xfs_agblock_t *, xfs_extlen_t *, int *); | 48 | xfs_btree_cur_t *, xfs_agblock_t *, xfs_extlen_t *, int *); |
57 | 49 | STATIC void xfs_alloc_busy_trim(struct xfs_alloc_arg *, | |
58 | /* | 50 | xfs_agblock_t, xfs_extlen_t, xfs_agblock_t *, xfs_extlen_t *); |
59 | * Internal functions. | ||
60 | */ | ||
61 | 51 | ||
62 | /* | 52 | /* |
63 | * Lookup the record equal to [bno, len] in the btree given by cur. | 53 | * Lookup the record equal to [bno, len] in the btree given by cur. |
@@ -94,7 +84,7 @@ xfs_alloc_lookup_ge( | |||
94 | * Lookup the first record less than or equal to [bno, len] | 84 | * Lookup the first record less than or equal to [bno, len] |
95 | * in the btree given by cur. | 85 | * in the btree given by cur. |
96 | */ | 86 | */ |
97 | STATIC int /* error */ | 87 | int /* error */ |
98 | xfs_alloc_lookup_le( | 88 | xfs_alloc_lookup_le( |
99 | struct xfs_btree_cur *cur, /* btree cursor */ | 89 | struct xfs_btree_cur *cur, /* btree cursor */ |
100 | xfs_agblock_t bno, /* starting block of extent */ | 90 | xfs_agblock_t bno, /* starting block of extent */ |
@@ -127,7 +117,7 @@ xfs_alloc_update( | |||
127 | /* | 117 | /* |
128 | * Get the data from the pointed-to record. | 118 | * Get the data from the pointed-to record. |
129 | */ | 119 | */ |
130 | STATIC int /* error */ | 120 | int /* error */ |
131 | xfs_alloc_get_rec( | 121 | xfs_alloc_get_rec( |
132 | struct xfs_btree_cur *cur, /* btree cursor */ | 122 | struct xfs_btree_cur *cur, /* btree cursor */ |
133 | xfs_agblock_t *bno, /* output: starting block of extent */ | 123 | xfs_agblock_t *bno, /* output: starting block of extent */ |
@@ -151,27 +141,28 @@ xfs_alloc_get_rec( | |||
151 | */ | 141 | */ |
152 | STATIC void | 142 | STATIC void |
153 | xfs_alloc_compute_aligned( | 143 | xfs_alloc_compute_aligned( |
144 | xfs_alloc_arg_t *args, /* allocation argument structure */ | ||
154 | xfs_agblock_t foundbno, /* starting block in found extent */ | 145 | xfs_agblock_t foundbno, /* starting block in found extent */ |
155 | xfs_extlen_t foundlen, /* length in found extent */ | 146 | xfs_extlen_t foundlen, /* length in found extent */ |
156 | xfs_extlen_t alignment, /* alignment for allocation */ | ||
157 | xfs_extlen_t minlen, /* minimum length for allocation */ | ||
158 | xfs_agblock_t *resbno, /* result block number */ | 147 | xfs_agblock_t *resbno, /* result block number */ |
159 | xfs_extlen_t *reslen) /* result length */ | 148 | xfs_extlen_t *reslen) /* result length */ |
160 | { | 149 | { |
161 | xfs_agblock_t bno; | 150 | xfs_agblock_t bno; |
162 | xfs_extlen_t diff; | ||
163 | xfs_extlen_t len; | 151 | xfs_extlen_t len; |
164 | 152 | ||
165 | if (alignment > 1 && foundlen >= minlen) { | 153 | /* Trim busy sections out of found extent */ |
166 | bno = roundup(foundbno, alignment); | 154 | xfs_alloc_busy_trim(args, foundbno, foundlen, &bno, &len); |
167 | diff = bno - foundbno; | 155 | |
168 | len = diff >= foundlen ? 0 : foundlen - diff; | 156 | if (args->alignment > 1 && len >= args->minlen) { |
157 | xfs_agblock_t aligned_bno = roundup(bno, args->alignment); | ||
158 | xfs_extlen_t diff = aligned_bno - bno; | ||
159 | |||
160 | *resbno = aligned_bno; | ||
161 | *reslen = diff >= len ? 0 : len - diff; | ||
169 | } else { | 162 | } else { |
170 | bno = foundbno; | 163 | *resbno = bno; |
171 | len = foundlen; | 164 | *reslen = len; |
172 | } | 165 | } |
173 | *resbno = bno; | ||
174 | *reslen = len; | ||
175 | } | 166 | } |
176 | 167 | ||
177 | /* | 168 | /* |
@@ -285,7 +276,6 @@ xfs_alloc_fix_minleft( | |||
285 | return 1; | 276 | return 1; |
286 | agf = XFS_BUF_TO_AGF(args->agbp); | 277 | agf = XFS_BUF_TO_AGF(args->agbp); |
287 | diff = be32_to_cpu(agf->agf_freeblks) | 278 | diff = be32_to_cpu(agf->agf_freeblks) |
288 | + be32_to_cpu(agf->agf_flcount) | ||
289 | - args->len - args->minleft; | 279 | - args->len - args->minleft; |
290 | if (diff >= 0) | 280 | if (diff >= 0) |
291 | return 1; | 281 | return 1; |
@@ -468,6 +458,27 @@ xfs_alloc_read_agfl( | |||
468 | return 0; | 458 | return 0; |
469 | } | 459 | } |
470 | 460 | ||
461 | STATIC int | ||
462 | xfs_alloc_update_counters( | ||
463 | struct xfs_trans *tp, | ||
464 | struct xfs_perag *pag, | ||
465 | struct xfs_buf *agbp, | ||
466 | long len) | ||
467 | { | ||
468 | struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp); | ||
469 | |||
470 | pag->pagf_freeblks += len; | ||
471 | be32_add_cpu(&agf->agf_freeblks, len); | ||
472 | |||
473 | xfs_trans_agblocks_delta(tp, len); | ||
474 | if (unlikely(be32_to_cpu(agf->agf_freeblks) > | ||
475 | be32_to_cpu(agf->agf_length))) | ||
476 | return EFSCORRUPTED; | ||
477 | |||
478 | xfs_alloc_log_agf(tp, agbp, XFS_AGF_FREEBLKS); | ||
479 | return 0; | ||
480 | } | ||
481 | |||
471 | /* | 482 | /* |
472 | * Allocation group level functions. | 483 | * Allocation group level functions. |
473 | */ | 484 | */ |
@@ -509,49 +520,36 @@ xfs_alloc_ag_vextent( | |||
509 | ASSERT(0); | 520 | ASSERT(0); |
510 | /* NOTREACHED */ | 521 | /* NOTREACHED */ |
511 | } | 522 | } |
512 | if (error) | 523 | |
524 | if (error || args->agbno == NULLAGBLOCK) | ||
513 | return error; | 525 | return error; |
514 | /* | ||
515 | * If the allocation worked, need to change the agf structure | ||
516 | * (and log it), and the superblock. | ||
517 | */ | ||
518 | if (args->agbno != NULLAGBLOCK) { | ||
519 | xfs_agf_t *agf; /* allocation group freelist header */ | ||
520 | long slen = (long)args->len; | ||
521 | 526 | ||
522 | ASSERT(args->len >= args->minlen && args->len <= args->maxlen); | 527 | ASSERT(args->len >= args->minlen); |
523 | ASSERT(!(args->wasfromfl) || !args->isfl); | 528 | ASSERT(args->len <= args->maxlen); |
524 | ASSERT(args->agbno % args->alignment == 0); | 529 | ASSERT(!args->wasfromfl || !args->isfl); |
525 | if (!(args->wasfromfl)) { | 530 | ASSERT(args->agbno % args->alignment == 0); |
526 | 531 | ||
527 | agf = XFS_BUF_TO_AGF(args->agbp); | 532 | if (!args->wasfromfl) { |
528 | be32_add_cpu(&agf->agf_freeblks, -(args->len)); | 533 | error = xfs_alloc_update_counters(args->tp, args->pag, |
529 | xfs_trans_agblocks_delta(args->tp, | 534 | args->agbp, |
530 | -((long)(args->len))); | 535 | -((long)(args->len))); |
531 | args->pag->pagf_freeblks -= args->len; | 536 | if (error) |
532 | ASSERT(be32_to_cpu(agf->agf_freeblks) <= | 537 | return error; |
533 | be32_to_cpu(agf->agf_length)); | 538 | |
534 | xfs_alloc_log_agf(args->tp, args->agbp, | 539 | ASSERT(!xfs_alloc_busy_search(args->mp, args->agno, |
535 | XFS_AGF_FREEBLKS); | 540 | args->agbno, args->len)); |
536 | /* | ||
537 | * Search the busylist for these blocks and mark the | ||
538 | * transaction as synchronous if blocks are found. This | ||
539 | * avoids the need to block due to a synchronous log | ||
540 | * force to ensure correct ordering as the synchronous | ||
541 | * transaction will guarantee that for us. | ||
542 | */ | ||
543 | if (xfs_alloc_busy_search(args->mp, args->agno, | ||
544 | args->agbno, args->len)) | ||
545 | xfs_trans_set_sync(args->tp); | ||
546 | } | ||
547 | if (!args->isfl) | ||
548 | xfs_trans_mod_sb(args->tp, | ||
549 | args->wasdel ? XFS_TRANS_SB_RES_FDBLOCKS : | ||
550 | XFS_TRANS_SB_FDBLOCKS, -slen); | ||
551 | XFS_STATS_INC(xs_allocx); | ||
552 | XFS_STATS_ADD(xs_allocb, args->len); | ||
553 | } | 541 | } |
554 | return 0; | 542 | |
543 | if (!args->isfl) { | ||
544 | xfs_trans_mod_sb(args->tp, args->wasdel ? | ||
545 | XFS_TRANS_SB_RES_FDBLOCKS : | ||
546 | XFS_TRANS_SB_FDBLOCKS, | ||
547 | -((long)(args->len))); | ||
548 | } | ||
549 | |||
550 | XFS_STATS_INC(xs_allocx); | ||
551 | XFS_STATS_ADD(xs_allocb, args->len); | ||
552 | return error; | ||
555 | } | 553 | } |
556 | 554 | ||
557 | /* | 555 | /* |
@@ -566,72 +564,77 @@ xfs_alloc_ag_vextent_exact( | |||
566 | { | 564 | { |
567 | xfs_btree_cur_t *bno_cur;/* by block-number btree cursor */ | 565 | xfs_btree_cur_t *bno_cur;/* by block-number btree cursor */ |
568 | xfs_btree_cur_t *cnt_cur;/* by count btree cursor */ | 566 | xfs_btree_cur_t *cnt_cur;/* by count btree cursor */ |
569 | xfs_agblock_t end; /* end of allocated extent */ | ||
570 | int error; | 567 | int error; |
571 | xfs_agblock_t fbno; /* start block of found extent */ | 568 | xfs_agblock_t fbno; /* start block of found extent */ |
572 | xfs_agblock_t fend; /* end block of found extent */ | ||
573 | xfs_extlen_t flen; /* length of found extent */ | 569 | xfs_extlen_t flen; /* length of found extent */ |
570 | xfs_agblock_t tbno; /* start block of trimmed extent */ | ||
571 | xfs_extlen_t tlen; /* length of trimmed extent */ | ||
572 | xfs_agblock_t tend; /* end block of trimmed extent */ | ||
573 | xfs_agblock_t end; /* end of allocated extent */ | ||
574 | int i; /* success/failure of operation */ | 574 | int i; /* success/failure of operation */ |
575 | xfs_agblock_t maxend; /* end of maximal extent */ | ||
576 | xfs_agblock_t minend; /* end of minimal extent */ | ||
577 | xfs_extlen_t rlen; /* length of returned extent */ | 575 | xfs_extlen_t rlen; /* length of returned extent */ |
578 | 576 | ||
579 | ASSERT(args->alignment == 1); | 577 | ASSERT(args->alignment == 1); |
578 | |||
580 | /* | 579 | /* |
581 | * Allocate/initialize a cursor for the by-number freespace btree. | 580 | * Allocate/initialize a cursor for the by-number freespace btree. |
582 | */ | 581 | */ |
583 | bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp, | 582 | bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp, |
584 | args->agno, XFS_BTNUM_BNO); | 583 | args->agno, XFS_BTNUM_BNO); |
584 | |||
585 | /* | 585 | /* |
586 | * Lookup bno and minlen in the btree (minlen is irrelevant, really). | 586 | * Lookup bno and minlen in the btree (minlen is irrelevant, really). |
587 | * Look for the closest free block <= bno, it must contain bno | 587 | * Look for the closest free block <= bno, it must contain bno |
588 | * if any free block does. | 588 | * if any free block does. |
589 | */ | 589 | */ |
590 | if ((error = xfs_alloc_lookup_le(bno_cur, args->agbno, args->minlen, &i))) | 590 | error = xfs_alloc_lookup_le(bno_cur, args->agbno, args->minlen, &i); |
591 | if (error) | ||
591 | goto error0; | 592 | goto error0; |
592 | if (!i) { | 593 | if (!i) |
593 | /* | 594 | goto not_found; |
594 | * Didn't find it, return null. | 595 | |
595 | */ | ||
596 | xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR); | ||
597 | args->agbno = NULLAGBLOCK; | ||
598 | return 0; | ||
599 | } | ||
600 | /* | 596 | /* |
601 | * Grab the freespace record. | 597 | * Grab the freespace record. |
602 | */ | 598 | */ |
603 | if ((error = xfs_alloc_get_rec(bno_cur, &fbno, &flen, &i))) | 599 | error = xfs_alloc_get_rec(bno_cur, &fbno, &flen, &i); |
600 | if (error) | ||
604 | goto error0; | 601 | goto error0; |
605 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | 602 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); |
606 | ASSERT(fbno <= args->agbno); | 603 | ASSERT(fbno <= args->agbno); |
607 | minend = args->agbno + args->minlen; | 604 | |
608 | maxend = args->agbno + args->maxlen; | ||
609 | fend = fbno + flen; | ||
610 | /* | 605 | /* |
611 | * Give up if the freespace isn't long enough for the minimum request. | 606 | * Check for overlapping busy extents. |
612 | */ | 607 | */ |
613 | if (fend < minend) { | 608 | xfs_alloc_busy_trim(args, fbno, flen, &tbno, &tlen); |
614 | xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR); | 609 | |
615 | args->agbno = NULLAGBLOCK; | ||
616 | return 0; | ||
617 | } | ||
618 | /* | 610 | /* |
619 | * End of extent will be smaller of the freespace end and the | 611 | * Give up if the start of the extent is busy, or the freespace isn't |
620 | * maximal requested end. | 612 | * long enough for the minimum request. |
621 | */ | 613 | */ |
622 | end = XFS_AGBLOCK_MIN(fend, maxend); | 614 | if (tbno > args->agbno) |
615 | goto not_found; | ||
616 | if (tlen < args->minlen) | ||
617 | goto not_found; | ||
618 | tend = tbno + tlen; | ||
619 | if (tend < args->agbno + args->minlen) | ||
620 | goto not_found; | ||
621 | |||
623 | /* | 622 | /* |
623 | * End of extent will be smaller of the freespace end and the | ||
624 | * maximal requested end. | ||
625 | * | ||
624 | * Fix the length according to mod and prod if given. | 626 | * Fix the length according to mod and prod if given. |
625 | */ | 627 | */ |
628 | end = XFS_AGBLOCK_MIN(tend, args->agbno + args->maxlen); | ||
626 | args->len = end - args->agbno; | 629 | args->len = end - args->agbno; |
627 | xfs_alloc_fix_len(args); | 630 | xfs_alloc_fix_len(args); |
628 | if (!xfs_alloc_fix_minleft(args)) { | 631 | if (!xfs_alloc_fix_minleft(args)) |
629 | xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR); | 632 | goto not_found; |
630 | return 0; | 633 | |
631 | } | ||
632 | rlen = args->len; | 634 | rlen = args->len; |
633 | ASSERT(args->agbno + rlen <= fend); | 635 | ASSERT(args->agbno + rlen <= tend); |
634 | end = args->agbno + rlen; | 636 | end = args->agbno + rlen; |
637 | |||
635 | /* | 638 | /* |
636 | * We are allocating agbno for rlen [agbno .. end] | 639 | * We are allocating agbno for rlen [agbno .. end] |
637 | * Allocate/initialize a cursor for the by-size btree. | 640 | * Allocate/initialize a cursor for the by-size btree. |
@@ -640,16 +643,25 @@ xfs_alloc_ag_vextent_exact( | |||
640 | args->agno, XFS_BTNUM_CNT); | 643 | args->agno, XFS_BTNUM_CNT); |
641 | ASSERT(args->agbno + args->len <= | 644 | ASSERT(args->agbno + args->len <= |
642 | be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length)); | 645 | be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length)); |
643 | if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen, | 646 | error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen, args->agbno, |
644 | args->agbno, args->len, XFSA_FIXUP_BNO_OK))) { | 647 | args->len, XFSA_FIXUP_BNO_OK); |
648 | if (error) { | ||
645 | xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR); | 649 | xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR); |
646 | goto error0; | 650 | goto error0; |
647 | } | 651 | } |
652 | |||
648 | xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR); | 653 | xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR); |
649 | xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); | 654 | xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); |
650 | 655 | ||
651 | trace_xfs_alloc_exact_done(args); | ||
652 | args->wasfromfl = 0; | 656 | args->wasfromfl = 0; |
657 | trace_xfs_alloc_exact_done(args); | ||
658 | return 0; | ||
659 | |||
660 | not_found: | ||
661 | /* Didn't find it, return null. */ | ||
662 | xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR); | ||
663 | args->agbno = NULLAGBLOCK; | ||
664 | trace_xfs_alloc_exact_notfound(args); | ||
653 | return 0; | 665 | return 0; |
654 | 666 | ||
655 | error0: | 667 | error0: |
@@ -659,6 +671,94 @@ error0: | |||
659 | } | 671 | } |
660 | 672 | ||
661 | /* | 673 | /* |
674 | * Search the btree in a given direction via the search cursor and compare | ||
675 | * the records found against the good extent we've already found. | ||
676 | */ | ||
677 | STATIC int | ||
678 | xfs_alloc_find_best_extent( | ||
679 | struct xfs_alloc_arg *args, /* allocation argument structure */ | ||
680 | struct xfs_btree_cur **gcur, /* good cursor */ | ||
681 | struct xfs_btree_cur **scur, /* searching cursor */ | ||
682 | xfs_agblock_t gdiff, /* difference for search comparison */ | ||
683 | xfs_agblock_t *sbno, /* extent found by search */ | ||
684 | xfs_extlen_t *slen, /* extent length */ | ||
685 | xfs_agblock_t *sbnoa, /* aligned extent found by search */ | ||
686 | xfs_extlen_t *slena, /* aligned extent length */ | ||
687 | int dir) /* 0 = search right, 1 = search left */ | ||
688 | { | ||
689 | xfs_agblock_t new; | ||
690 | xfs_agblock_t sdiff; | ||
691 | int error; | ||
692 | int i; | ||
693 | |||
694 | /* The good extent is perfect, no need to search. */ | ||
695 | if (!gdiff) | ||
696 | goto out_use_good; | ||
697 | |||
698 | /* | ||
699 | * Look until we find a better one, run out of space or run off the end. | ||
700 | */ | ||
701 | do { | ||
702 | error = xfs_alloc_get_rec(*scur, sbno, slen, &i); | ||
703 | if (error) | ||
704 | goto error0; | ||
705 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | ||
706 | xfs_alloc_compute_aligned(args, *sbno, *slen, sbnoa, slena); | ||
707 | |||
708 | /* | ||
709 | * The good extent is closer than this one. | ||
710 | */ | ||
711 | if (!dir) { | ||
712 | if (*sbnoa >= args->agbno + gdiff) | ||
713 | goto out_use_good; | ||
714 | } else { | ||
715 | if (*sbnoa <= args->agbno - gdiff) | ||
716 | goto out_use_good; | ||
717 | } | ||
718 | |||
719 | /* | ||
720 | * Same distance, compare length and pick the best. | ||
721 | */ | ||
722 | if (*slena >= args->minlen) { | ||
723 | args->len = XFS_EXTLEN_MIN(*slena, args->maxlen); | ||
724 | xfs_alloc_fix_len(args); | ||
725 | |||
726 | sdiff = xfs_alloc_compute_diff(args->agbno, args->len, | ||
727 | args->alignment, *sbnoa, | ||
728 | *slena, &new); | ||
729 | |||
730 | /* | ||
731 | * Choose closer size and invalidate other cursor. | ||
732 | */ | ||
733 | if (sdiff < gdiff) | ||
734 | goto out_use_search; | ||
735 | goto out_use_good; | ||
736 | } | ||
737 | |||
738 | if (!dir) | ||
739 | error = xfs_btree_increment(*scur, 0, &i); | ||
740 | else | ||
741 | error = xfs_btree_decrement(*scur, 0, &i); | ||
742 | if (error) | ||
743 | goto error0; | ||
744 | } while (i); | ||
745 | |||
746 | out_use_good: | ||
747 | xfs_btree_del_cursor(*scur, XFS_BTREE_NOERROR); | ||
748 | *scur = NULL; | ||
749 | return 0; | ||
750 | |||
751 | out_use_search: | ||
752 | xfs_btree_del_cursor(*gcur, XFS_BTREE_NOERROR); | ||
753 | *gcur = NULL; | ||
754 | return 0; | ||
755 | |||
756 | error0: | ||
757 | /* caller invalidates cursors */ | ||
758 | return error; | ||
759 | } | ||
760 | |||
761 | /* | ||
662 | * Allocate a variable extent near bno in the allocation group agno. | 762 | * Allocate a variable extent near bno in the allocation group agno. |
663 | * Extent's length (returned in len) will be between minlen and maxlen, | 763 | * Extent's length (returned in len) will be between minlen and maxlen, |
664 | * and of the form k * prod + mod unless there's nothing that large. | 764 | * and of the form k * prod + mod unless there's nothing that large. |
@@ -687,6 +787,7 @@ xfs_alloc_ag_vextent_near( | |||
687 | xfs_extlen_t ltlena; /* aligned ... */ | 787 | xfs_extlen_t ltlena; /* aligned ... */ |
688 | xfs_agblock_t ltnew; /* useful start bno of left side */ | 788 | xfs_agblock_t ltnew; /* useful start bno of left side */ |
689 | xfs_extlen_t rlen; /* length of returned extent */ | 789 | xfs_extlen_t rlen; /* length of returned extent */ |
790 | int forced = 0; | ||
690 | #if defined(DEBUG) && defined(__KERNEL__) | 791 | #if defined(DEBUG) && defined(__KERNEL__) |
691 | /* | 792 | /* |
692 | * Randomly don't execute the first algorithm. | 793 | * Randomly don't execute the first algorithm. |
@@ -695,13 +796,20 @@ xfs_alloc_ag_vextent_near( | |||
695 | 796 | ||
696 | dofirst = random32() & 1; | 797 | dofirst = random32() & 1; |
697 | #endif | 798 | #endif |
799 | |||
800 | restart: | ||
801 | bno_cur_lt = NULL; | ||
802 | bno_cur_gt = NULL; | ||
803 | ltlen = 0; | ||
804 | gtlena = 0; | ||
805 | ltlena = 0; | ||
806 | |||
698 | /* | 807 | /* |
699 | * Get a cursor for the by-size btree. | 808 | * Get a cursor for the by-size btree. |
700 | */ | 809 | */ |
701 | cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp, | 810 | cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp, |
702 | args->agno, XFS_BTNUM_CNT); | 811 | args->agno, XFS_BTNUM_CNT); |
703 | ltlen = 0; | 812 | |
704 | bno_cur_lt = bno_cur_gt = NULL; | ||
705 | /* | 813 | /* |
706 | * See if there are any free extents as big as maxlen. | 814 | * See if there are any free extents as big as maxlen. |
707 | */ | 815 | */ |
@@ -717,11 +825,13 @@ xfs_alloc_ag_vextent_near( | |||
717 | goto error0; | 825 | goto error0; |
718 | if (i == 0 || ltlen == 0) { | 826 | if (i == 0 || ltlen == 0) { |
719 | xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); | 827 | xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); |
828 | trace_xfs_alloc_near_noentry(args); | ||
720 | return 0; | 829 | return 0; |
721 | } | 830 | } |
722 | ASSERT(i == 1); | 831 | ASSERT(i == 1); |
723 | } | 832 | } |
724 | args->wasfromfl = 0; | 833 | args->wasfromfl = 0; |
834 | |||
725 | /* | 835 | /* |
726 | * First algorithm. | 836 | * First algorithm. |
727 | * If the requested extent is large wrt the freespaces available | 837 | * If the requested extent is large wrt the freespaces available |
@@ -775,8 +885,8 @@ xfs_alloc_ag_vextent_near( | |||
775 | if ((error = xfs_alloc_get_rec(cnt_cur, <bno, <len, &i))) | 885 | if ((error = xfs_alloc_get_rec(cnt_cur, <bno, <len, &i))) |
776 | goto error0; | 886 | goto error0; |
777 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | 887 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); |
778 | xfs_alloc_compute_aligned(ltbno, ltlen, args->alignment, | 888 | xfs_alloc_compute_aligned(args, ltbno, ltlen, |
779 | args->minlen, <bnoa, <lena); | 889 | <bnoa, <lena); |
780 | if (ltlena < args->minlen) | 890 | if (ltlena < args->minlen) |
781 | continue; | 891 | continue; |
782 | args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen); | 892 | args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen); |
@@ -785,7 +895,7 @@ xfs_alloc_ag_vextent_near( | |||
785 | if (args->len < blen) | 895 | if (args->len < blen) |
786 | continue; | 896 | continue; |
787 | ltdiff = xfs_alloc_compute_diff(args->agbno, args->len, | 897 | ltdiff = xfs_alloc_compute_diff(args->agbno, args->len, |
788 | args->alignment, ltbno, ltlen, <new); | 898 | args->alignment, ltbnoa, ltlena, <new); |
789 | if (ltnew != NULLAGBLOCK && | 899 | if (ltnew != NULLAGBLOCK && |
790 | (args->len > blen || ltdiff < bdiff)) { | 900 | (args->len > blen || ltdiff < bdiff)) { |
791 | bdiff = ltdiff; | 901 | bdiff = ltdiff; |
@@ -896,8 +1006,8 @@ xfs_alloc_ag_vextent_near( | |||
896 | if ((error = xfs_alloc_get_rec(bno_cur_lt, <bno, <len, &i))) | 1006 | if ((error = xfs_alloc_get_rec(bno_cur_lt, <bno, <len, &i))) |
897 | goto error0; | 1007 | goto error0; |
898 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | 1008 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); |
899 | xfs_alloc_compute_aligned(ltbno, ltlen, args->alignment, | 1009 | xfs_alloc_compute_aligned(args, ltbno, ltlen, |
900 | args->minlen, <bnoa, <lena); | 1010 | <bnoa, <lena); |
901 | if (ltlena >= args->minlen) | 1011 | if (ltlena >= args->minlen) |
902 | break; | 1012 | break; |
903 | if ((error = xfs_btree_decrement(bno_cur_lt, 0, &i))) | 1013 | if ((error = xfs_btree_decrement(bno_cur_lt, 0, &i))) |
@@ -912,8 +1022,8 @@ xfs_alloc_ag_vextent_near( | |||
912 | if ((error = xfs_alloc_get_rec(bno_cur_gt, >bno, >len, &i))) | 1022 | if ((error = xfs_alloc_get_rec(bno_cur_gt, >bno, >len, &i))) |
913 | goto error0; | 1023 | goto error0; |
914 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | 1024 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); |
915 | xfs_alloc_compute_aligned(gtbno, gtlen, args->alignment, | 1025 | xfs_alloc_compute_aligned(args, gtbno, gtlen, |
916 | args->minlen, >bnoa, >lena); | 1026 | >bnoa, >lena); |
917 | if (gtlena >= args->minlen) | 1027 | if (gtlena >= args->minlen) |
918 | break; | 1028 | break; |
919 | if ((error = xfs_btree_increment(bno_cur_gt, 0, &i))) | 1029 | if ((error = xfs_btree_increment(bno_cur_gt, 0, &i))) |
@@ -925,211 +1035,62 @@ xfs_alloc_ag_vextent_near( | |||
925 | } | 1035 | } |
926 | } | 1036 | } |
927 | } while (bno_cur_lt || bno_cur_gt); | 1037 | } while (bno_cur_lt || bno_cur_gt); |
1038 | |||
928 | /* | 1039 | /* |
929 | * Got both cursors still active, need to find better entry. | 1040 | * Got both cursors still active, need to find better entry. |
930 | */ | 1041 | */ |
931 | if (bno_cur_lt && bno_cur_gt) { | 1042 | if (bno_cur_lt && bno_cur_gt) { |
932 | /* | ||
933 | * Left side is long enough, look for a right side entry. | ||
934 | */ | ||
935 | if (ltlena >= args->minlen) { | 1043 | if (ltlena >= args->minlen) { |
936 | /* | 1044 | /* |
937 | * Fix up the length. | 1045 | * Left side is good, look for a right side entry. |
938 | */ | 1046 | */ |
939 | args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen); | 1047 | args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen); |
940 | xfs_alloc_fix_len(args); | 1048 | xfs_alloc_fix_len(args); |
941 | rlen = args->len; | 1049 | ltdiff = xfs_alloc_compute_diff(args->agbno, args->len, |
942 | ltdiff = xfs_alloc_compute_diff(args->agbno, rlen, | 1050 | args->alignment, ltbnoa, ltlena, <new); |
943 | args->alignment, ltbno, ltlen, <new); | 1051 | |
944 | /* | 1052 | error = xfs_alloc_find_best_extent(args, |
945 | * Not perfect. | 1053 | &bno_cur_lt, &bno_cur_gt, |
946 | */ | 1054 | ltdiff, >bno, >len, |
947 | if (ltdiff) { | 1055 | >bnoa, >lena, |
948 | /* | 1056 | 0 /* search right */); |
949 | * Look until we find a better one, run out of | 1057 | } else { |
950 | * space, or run off the end. | 1058 | ASSERT(gtlena >= args->minlen); |
951 | */ | 1059 | |
952 | while (bno_cur_lt && bno_cur_gt) { | ||
953 | if ((error = xfs_alloc_get_rec( | ||
954 | bno_cur_gt, >bno, | ||
955 | >len, &i))) | ||
956 | goto error0; | ||
957 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | ||
958 | xfs_alloc_compute_aligned(gtbno, gtlen, | ||
959 | args->alignment, args->minlen, | ||
960 | >bnoa, >lena); | ||
961 | /* | ||
962 | * The left one is clearly better. | ||
963 | */ | ||
964 | if (gtbnoa >= args->agbno + ltdiff) { | ||
965 | xfs_btree_del_cursor( | ||
966 | bno_cur_gt, | ||
967 | XFS_BTREE_NOERROR); | ||
968 | bno_cur_gt = NULL; | ||
969 | break; | ||
970 | } | ||
971 | /* | ||
972 | * If we reach a big enough entry, | ||
973 | * compare the two and pick the best. | ||
974 | */ | ||
975 | if (gtlena >= args->minlen) { | ||
976 | args->len = | ||
977 | XFS_EXTLEN_MIN(gtlena, | ||
978 | args->maxlen); | ||
979 | xfs_alloc_fix_len(args); | ||
980 | rlen = args->len; | ||
981 | gtdiff = xfs_alloc_compute_diff( | ||
982 | args->agbno, rlen, | ||
983 | args->alignment, | ||
984 | gtbno, gtlen, >new); | ||
985 | /* | ||
986 | * Right side is better. | ||
987 | */ | ||
988 | if (gtdiff < ltdiff) { | ||
989 | xfs_btree_del_cursor( | ||
990 | bno_cur_lt, | ||
991 | XFS_BTREE_NOERROR); | ||
992 | bno_cur_lt = NULL; | ||
993 | } | ||
994 | /* | ||
995 | * Left side is better. | ||
996 | */ | ||
997 | else { | ||
998 | xfs_btree_del_cursor( | ||
999 | bno_cur_gt, | ||
1000 | XFS_BTREE_NOERROR); | ||
1001 | bno_cur_gt = NULL; | ||
1002 | } | ||
1003 | break; | ||
1004 | } | ||
1005 | /* | ||
1006 | * Fell off the right end. | ||
1007 | */ | ||
1008 | if ((error = xfs_btree_increment( | ||
1009 | bno_cur_gt, 0, &i))) | ||
1010 | goto error0; | ||
1011 | if (!i) { | ||
1012 | xfs_btree_del_cursor( | ||
1013 | bno_cur_gt, | ||
1014 | XFS_BTREE_NOERROR); | ||
1015 | bno_cur_gt = NULL; | ||
1016 | break; | ||
1017 | } | ||
1018 | } | ||
1019 | } | ||
1020 | /* | ||
1021 | * The left side is perfect, trash the right side. | ||
1022 | */ | ||
1023 | else { | ||
1024 | xfs_btree_del_cursor(bno_cur_gt, | ||
1025 | XFS_BTREE_NOERROR); | ||
1026 | bno_cur_gt = NULL; | ||
1027 | } | ||
1028 | } | ||
1029 | /* | ||
1030 | * It's the right side that was found first, look left. | ||
1031 | */ | ||
1032 | else { | ||
1033 | /* | 1060 | /* |
1034 | * Fix up the length. | 1061 | * Right side is good, look for a left side entry. |
1035 | */ | 1062 | */ |
1036 | args->len = XFS_EXTLEN_MIN(gtlena, args->maxlen); | 1063 | args->len = XFS_EXTLEN_MIN(gtlena, args->maxlen); |
1037 | xfs_alloc_fix_len(args); | 1064 | xfs_alloc_fix_len(args); |
1038 | rlen = args->len; | 1065 | gtdiff = xfs_alloc_compute_diff(args->agbno, args->len, |
1039 | gtdiff = xfs_alloc_compute_diff(args->agbno, rlen, | 1066 | args->alignment, gtbnoa, gtlena, >new); |
1040 | args->alignment, gtbno, gtlen, >new); | 1067 | |
1041 | /* | 1068 | error = xfs_alloc_find_best_extent(args, |
1042 | * Right side entry isn't perfect. | 1069 | &bno_cur_gt, &bno_cur_lt, |
1043 | */ | 1070 | gtdiff, <bno, <len, |
1044 | if (gtdiff) { | 1071 | <bnoa, <lena, |
1045 | /* | 1072 | 1 /* search left */); |
1046 | * Look until we find a better one, run out of | ||
1047 | * space, or run off the end. | ||
1048 | */ | ||
1049 | while (bno_cur_lt && bno_cur_gt) { | ||
1050 | if ((error = xfs_alloc_get_rec( | ||
1051 | bno_cur_lt, <bno, | ||
1052 | <len, &i))) | ||
1053 | goto error0; | ||
1054 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | ||
1055 | xfs_alloc_compute_aligned(ltbno, ltlen, | ||
1056 | args->alignment, args->minlen, | ||
1057 | <bnoa, <lena); | ||
1058 | /* | ||
1059 | * The right one is clearly better. | ||
1060 | */ | ||
1061 | if (ltbnoa <= args->agbno - gtdiff) { | ||
1062 | xfs_btree_del_cursor( | ||
1063 | bno_cur_lt, | ||
1064 | XFS_BTREE_NOERROR); | ||
1065 | bno_cur_lt = NULL; | ||
1066 | break; | ||
1067 | } | ||
1068 | /* | ||
1069 | * If we reach a big enough entry, | ||
1070 | * compare the two and pick the best. | ||
1071 | */ | ||
1072 | if (ltlena >= args->minlen) { | ||
1073 | args->len = XFS_EXTLEN_MIN( | ||
1074 | ltlena, args->maxlen); | ||
1075 | xfs_alloc_fix_len(args); | ||
1076 | rlen = args->len; | ||
1077 | ltdiff = xfs_alloc_compute_diff( | ||
1078 | args->agbno, rlen, | ||
1079 | args->alignment, | ||
1080 | ltbno, ltlen, <new); | ||
1081 | /* | ||
1082 | * Left side is better. | ||
1083 | */ | ||
1084 | if (ltdiff < gtdiff) { | ||
1085 | xfs_btree_del_cursor( | ||
1086 | bno_cur_gt, | ||
1087 | XFS_BTREE_NOERROR); | ||
1088 | bno_cur_gt = NULL; | ||
1089 | } | ||
1090 | /* | ||
1091 | * Right side is better. | ||
1092 | */ | ||
1093 | else { | ||
1094 | xfs_btree_del_cursor( | ||
1095 | bno_cur_lt, | ||
1096 | XFS_BTREE_NOERROR); | ||
1097 | bno_cur_lt = NULL; | ||
1098 | } | ||
1099 | break; | ||
1100 | } | ||
1101 | /* | ||
1102 | * Fell off the left end. | ||
1103 | */ | ||
1104 | if ((error = xfs_btree_decrement( | ||
1105 | bno_cur_lt, 0, &i))) | ||
1106 | goto error0; | ||
1107 | if (!i) { | ||
1108 | xfs_btree_del_cursor(bno_cur_lt, | ||
1109 | XFS_BTREE_NOERROR); | ||
1110 | bno_cur_lt = NULL; | ||
1111 | break; | ||
1112 | } | ||
1113 | } | ||
1114 | } | ||
1115 | /* | ||
1116 | * The right side is perfect, trash the left side. | ||
1117 | */ | ||
1118 | else { | ||
1119 | xfs_btree_del_cursor(bno_cur_lt, | ||
1120 | XFS_BTREE_NOERROR); | ||
1121 | bno_cur_lt = NULL; | ||
1122 | } | ||
1123 | } | 1073 | } |
1074 | |||
1075 | if (error) | ||
1076 | goto error0; | ||
1124 | } | 1077 | } |
1078 | |||
1125 | /* | 1079 | /* |
1126 | * If we couldn't get anything, give up. | 1080 | * If we couldn't get anything, give up. |
1127 | */ | 1081 | */ |
1128 | if (bno_cur_lt == NULL && bno_cur_gt == NULL) { | 1082 | if (bno_cur_lt == NULL && bno_cur_gt == NULL) { |
1083 | if (!forced++) { | ||
1084 | trace_xfs_alloc_near_busy(args); | ||
1085 | xfs_log_force(args->mp, XFS_LOG_SYNC); | ||
1086 | goto restart; | ||
1087 | } | ||
1088 | |||
1129 | trace_xfs_alloc_size_neither(args); | 1089 | trace_xfs_alloc_size_neither(args); |
1130 | args->agbno = NULLAGBLOCK; | 1090 | args->agbno = NULLAGBLOCK; |
1131 | return 0; | 1091 | return 0; |
1132 | } | 1092 | } |
1093 | |||
1133 | /* | 1094 | /* |
1134 | * At this point we have selected a freespace entry, either to the | 1095 | * At this point we have selected a freespace entry, either to the |
1135 | * left or to the right. If it's on the right, copy all the | 1096 | * left or to the right. If it's on the right, copy all the |
@@ -1146,6 +1107,7 @@ xfs_alloc_ag_vextent_near( | |||
1146 | j = 1; | 1107 | j = 1; |
1147 | } else | 1108 | } else |
1148 | j = 0; | 1109 | j = 0; |
1110 | |||
1149 | /* | 1111 | /* |
1150 | * Fix up the length and compute the useful address. | 1112 | * Fix up the length and compute the useful address. |
1151 | */ | 1113 | */ |
@@ -1158,12 +1120,13 @@ xfs_alloc_ag_vextent_near( | |||
1158 | return 0; | 1120 | return 0; |
1159 | } | 1121 | } |
1160 | rlen = args->len; | 1122 | rlen = args->len; |
1161 | (void)xfs_alloc_compute_diff(args->agbno, rlen, args->alignment, ltbno, | 1123 | (void)xfs_alloc_compute_diff(args->agbno, rlen, args->alignment, |
1162 | ltlen, <new); | 1124 | ltbnoa, ltlena, <new); |
1163 | ASSERT(ltnew >= ltbno); | 1125 | ASSERT(ltnew >= ltbno); |
1164 | ASSERT(ltnew + rlen <= ltbno + ltlen); | 1126 | ASSERT(ltnew + rlen <= ltbnoa + ltlena); |
1165 | ASSERT(ltnew + rlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length)); | 1127 | ASSERT(ltnew + rlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length)); |
1166 | args->agbno = ltnew; | 1128 | args->agbno = ltnew; |
1129 | |||
1167 | if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno, ltlen, | 1130 | if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno, ltlen, |
1168 | ltnew, rlen, XFSA_FIXUP_BNO_OK))) | 1131 | ltnew, rlen, XFSA_FIXUP_BNO_OK))) |
1169 | goto error0; | 1132 | goto error0; |
@@ -1206,26 +1169,35 @@ xfs_alloc_ag_vextent_size( | |||
1206 | int i; /* temp status variable */ | 1169 | int i; /* temp status variable */ |
1207 | xfs_agblock_t rbno; /* returned block number */ | 1170 | xfs_agblock_t rbno; /* returned block number */ |
1208 | xfs_extlen_t rlen; /* length of returned extent */ | 1171 | xfs_extlen_t rlen; /* length of returned extent */ |
1172 | int forced = 0; | ||
1209 | 1173 | ||
1174 | restart: | ||
1210 | /* | 1175 | /* |
1211 | * Allocate and initialize a cursor for the by-size btree. | 1176 | * Allocate and initialize a cursor for the by-size btree. |
1212 | */ | 1177 | */ |
1213 | cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp, | 1178 | cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp, |
1214 | args->agno, XFS_BTNUM_CNT); | 1179 | args->agno, XFS_BTNUM_CNT); |
1215 | bno_cur = NULL; | 1180 | bno_cur = NULL; |
1181 | |||
1216 | /* | 1182 | /* |
1217 | * Look for an entry >= maxlen+alignment-1 blocks. | 1183 | * Look for an entry >= maxlen+alignment-1 blocks. |
1218 | */ | 1184 | */ |
1219 | if ((error = xfs_alloc_lookup_ge(cnt_cur, 0, | 1185 | if ((error = xfs_alloc_lookup_ge(cnt_cur, 0, |
1220 | args->maxlen + args->alignment - 1, &i))) | 1186 | args->maxlen + args->alignment - 1, &i))) |
1221 | goto error0; | 1187 | goto error0; |
1188 | |||
1222 | /* | 1189 | /* |
1223 | * If none, then pick up the last entry in the tree unless the | 1190 | * If none or we have busy extents that we cannot allocate from, then |
1224 | * tree is empty. | 1191 | * we have to settle for a smaller extent. In the case that there are |
1192 | * no large extents, this will return the last entry in the tree unless | ||
1193 | * the tree is empty. In the case that there are only busy large | ||
1194 | * extents, this will return the largest small extent unless there | ||
1195 | * are no smaller extents available. | ||
1225 | */ | 1196 | */ |
1226 | if (!i) { | 1197 | if (!i || forced > 1) { |
1227 | if ((error = xfs_alloc_ag_vextent_small(args, cnt_cur, &fbno, | 1198 | error = xfs_alloc_ag_vextent_small(args, cnt_cur, |
1228 | &flen, &i))) | 1199 | &fbno, &flen, &i); |
1200 | if (error) | ||
1229 | goto error0; | 1201 | goto error0; |
1230 | if (i == 0 || flen == 0) { | 1202 | if (i == 0 || flen == 0) { |
1231 | xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); | 1203 | xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); |
@@ -1233,23 +1205,56 @@ xfs_alloc_ag_vextent_size( | |||
1233 | return 0; | 1205 | return 0; |
1234 | } | 1206 | } |
1235 | ASSERT(i == 1); | 1207 | ASSERT(i == 1); |
1208 | xfs_alloc_compute_aligned(args, fbno, flen, &rbno, &rlen); | ||
1209 | } else { | ||
1210 | /* | ||
1211 | * Search for a non-busy extent that is large enough. | ||
1212 | * If we are at low space, don't check, or if we fall of | ||
1213 | * the end of the btree, turn off the busy check and | ||
1214 | * restart. | ||
1215 | */ | ||
1216 | for (;;) { | ||
1217 | error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, &i); | ||
1218 | if (error) | ||
1219 | goto error0; | ||
1220 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | ||
1221 | |||
1222 | xfs_alloc_compute_aligned(args, fbno, flen, | ||
1223 | &rbno, &rlen); | ||
1224 | |||
1225 | if (rlen >= args->maxlen) | ||
1226 | break; | ||
1227 | |||
1228 | error = xfs_btree_increment(cnt_cur, 0, &i); | ||
1229 | if (error) | ||
1230 | goto error0; | ||
1231 | if (i == 0) { | ||
1232 | /* | ||
1233 | * Our only valid extents must have been busy. | ||
1234 | * Make it unbusy by forcing the log out and | ||
1235 | * retrying. If we've been here before, forcing | ||
1236 | * the log isn't making the extents available, | ||
1237 | * which means they have probably been freed in | ||
1238 | * this transaction. In that case, we have to | ||
1239 | * give up on them and we'll attempt a minlen | ||
1240 | * allocation the next time around. | ||
1241 | */ | ||
1242 | xfs_btree_del_cursor(cnt_cur, | ||
1243 | XFS_BTREE_NOERROR); | ||
1244 | trace_xfs_alloc_size_busy(args); | ||
1245 | if (!forced++) | ||
1246 | xfs_log_force(args->mp, XFS_LOG_SYNC); | ||
1247 | goto restart; | ||
1248 | } | ||
1249 | } | ||
1236 | } | 1250 | } |
1237 | /* | 1251 | |
1238 | * There's a freespace as big as maxlen+alignment-1, get it. | ||
1239 | */ | ||
1240 | else { | ||
1241 | if ((error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, &i))) | ||
1242 | goto error0; | ||
1243 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | ||
1244 | } | ||
1245 | /* | 1252 | /* |
1246 | * In the first case above, we got the last entry in the | 1253 | * In the first case above, we got the last entry in the |
1247 | * by-size btree. Now we check to see if the space hits maxlen | 1254 | * by-size btree. Now we check to see if the space hits maxlen |
1248 | * once aligned; if not, we search left for something better. | 1255 | * once aligned; if not, we search left for something better. |
1249 | * This can't happen in the second case above. | 1256 | * This can't happen in the second case above. |
1250 | */ | 1257 | */ |
1251 | xfs_alloc_compute_aligned(fbno, flen, args->alignment, args->minlen, | ||
1252 | &rbno, &rlen); | ||
1253 | rlen = XFS_EXTLEN_MIN(args->maxlen, rlen); | 1258 | rlen = XFS_EXTLEN_MIN(args->maxlen, rlen); |
1254 | XFS_WANT_CORRUPTED_GOTO(rlen == 0 || | 1259 | XFS_WANT_CORRUPTED_GOTO(rlen == 0 || |
1255 | (rlen <= flen && rbno + rlen <= fbno + flen), error0); | 1260 | (rlen <= flen && rbno + rlen <= fbno + flen), error0); |
@@ -1274,8 +1279,8 @@ xfs_alloc_ag_vextent_size( | |||
1274 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | 1279 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); |
1275 | if (flen < bestrlen) | 1280 | if (flen < bestrlen) |
1276 | break; | 1281 | break; |
1277 | xfs_alloc_compute_aligned(fbno, flen, args->alignment, | 1282 | xfs_alloc_compute_aligned(args, fbno, flen, |
1278 | args->minlen, &rbno, &rlen); | 1283 | &rbno, &rlen); |
1279 | rlen = XFS_EXTLEN_MIN(args->maxlen, rlen); | 1284 | rlen = XFS_EXTLEN_MIN(args->maxlen, rlen); |
1280 | XFS_WANT_CORRUPTED_GOTO(rlen == 0 || | 1285 | XFS_WANT_CORRUPTED_GOTO(rlen == 0 || |
1281 | (rlen <= flen && rbno + rlen <= fbno + flen), | 1286 | (rlen <= flen && rbno + rlen <= fbno + flen), |
@@ -1303,13 +1308,19 @@ xfs_alloc_ag_vextent_size( | |||
1303 | * Fix up the length. | 1308 | * Fix up the length. |
1304 | */ | 1309 | */ |
1305 | args->len = rlen; | 1310 | args->len = rlen; |
1306 | xfs_alloc_fix_len(args); | 1311 | if (rlen < args->minlen) { |
1307 | if (rlen < args->minlen || !xfs_alloc_fix_minleft(args)) { | 1312 | if (!forced++) { |
1308 | xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); | 1313 | xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); |
1309 | trace_xfs_alloc_size_nominleft(args); | 1314 | trace_xfs_alloc_size_busy(args); |
1310 | args->agbno = NULLAGBLOCK; | 1315 | xfs_log_force(args->mp, XFS_LOG_SYNC); |
1311 | return 0; | 1316 | goto restart; |
1317 | } | ||
1318 | goto out_nominleft; | ||
1312 | } | 1319 | } |
1320 | xfs_alloc_fix_len(args); | ||
1321 | |||
1322 | if (!xfs_alloc_fix_minleft(args)) | ||
1323 | goto out_nominleft; | ||
1313 | rlen = args->len; | 1324 | rlen = args->len; |
1314 | XFS_WANT_CORRUPTED_GOTO(rlen <= flen, error0); | 1325 | XFS_WANT_CORRUPTED_GOTO(rlen <= flen, error0); |
1315 | /* | 1326 | /* |
@@ -1339,6 +1350,12 @@ error0: | |||
1339 | if (bno_cur) | 1350 | if (bno_cur) |
1340 | xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR); | 1351 | xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR); |
1341 | return error; | 1352 | return error; |
1353 | |||
1354 | out_nominleft: | ||
1355 | xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); | ||
1356 | trace_xfs_alloc_size_nominleft(args); | ||
1357 | args->agbno = NULLAGBLOCK; | ||
1358 | return 0; | ||
1342 | } | 1359 | } |
1343 | 1360 | ||
1344 | /* | 1361 | /* |
@@ -1378,6 +1395,9 @@ xfs_alloc_ag_vextent_small( | |||
1378 | if (error) | 1395 | if (error) |
1379 | goto error0; | 1396 | goto error0; |
1380 | if (fbno != NULLAGBLOCK) { | 1397 | if (fbno != NULLAGBLOCK) { |
1398 | xfs_alloc_busy_reuse(args->mp, args->agno, fbno, 1, | ||
1399 | args->userdata); | ||
1400 | |||
1381 | if (args->userdata) { | 1401 | if (args->userdata) { |
1382 | xfs_buf_t *bp; | 1402 | xfs_buf_t *bp; |
1383 | 1403 | ||
@@ -1453,6 +1473,7 @@ xfs_free_ag_extent( | |||
1453 | xfs_mount_t *mp; /* mount point struct for filesystem */ | 1473 | xfs_mount_t *mp; /* mount point struct for filesystem */ |
1454 | xfs_agblock_t nbno; /* new starting block of freespace */ | 1474 | xfs_agblock_t nbno; /* new starting block of freespace */ |
1455 | xfs_extlen_t nlen; /* new length of freespace */ | 1475 | xfs_extlen_t nlen; /* new length of freespace */ |
1476 | xfs_perag_t *pag; /* per allocation group data */ | ||
1456 | 1477 | ||
1457 | mp = tp->t_mountp; | 1478 | mp = tp->t_mountp; |
1458 | /* | 1479 | /* |
@@ -1651,45 +1672,23 @@ xfs_free_ag_extent( | |||
1651 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | 1672 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); |
1652 | xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); | 1673 | xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); |
1653 | cnt_cur = NULL; | 1674 | cnt_cur = NULL; |
1675 | |||
1654 | /* | 1676 | /* |
1655 | * Update the freespace totals in the ag and superblock. | 1677 | * Update the freespace totals in the ag and superblock. |
1656 | */ | 1678 | */ |
1657 | { | 1679 | pag = xfs_perag_get(mp, agno); |
1658 | xfs_agf_t *agf; | 1680 | error = xfs_alloc_update_counters(tp, pag, agbp, len); |
1659 | xfs_perag_t *pag; /* per allocation group data */ | 1681 | xfs_perag_put(pag); |
1660 | 1682 | if (error) | |
1661 | pag = xfs_perag_get(mp, agno); | 1683 | goto error0; |
1662 | pag->pagf_freeblks += len; | ||
1663 | xfs_perag_put(pag); | ||
1664 | 1684 | ||
1665 | agf = XFS_BUF_TO_AGF(agbp); | 1685 | if (!isfl) |
1666 | be32_add_cpu(&agf->agf_freeblks, len); | 1686 | xfs_trans_mod_sb(tp, XFS_TRANS_SB_FDBLOCKS, (long)len); |
1667 | xfs_trans_agblocks_delta(tp, len); | 1687 | XFS_STATS_INC(xs_freex); |
1668 | XFS_WANT_CORRUPTED_GOTO( | 1688 | XFS_STATS_ADD(xs_freeb, len); |
1669 | be32_to_cpu(agf->agf_freeblks) <= | ||
1670 | be32_to_cpu(agf->agf_length), | ||
1671 | error0); | ||
1672 | xfs_alloc_log_agf(tp, agbp, XFS_AGF_FREEBLKS); | ||
1673 | if (!isfl) | ||
1674 | xfs_trans_mod_sb(tp, XFS_TRANS_SB_FDBLOCKS, (long)len); | ||
1675 | XFS_STATS_INC(xs_freex); | ||
1676 | XFS_STATS_ADD(xs_freeb, len); | ||
1677 | } | ||
1678 | 1689 | ||
1679 | trace_xfs_free_extent(mp, agno, bno, len, isfl, haveleft, haveright); | 1690 | trace_xfs_free_extent(mp, agno, bno, len, isfl, haveleft, haveright); |
1680 | 1691 | ||
1681 | /* | ||
1682 | * Since blocks move to the free list without the coordination | ||
1683 | * used in xfs_bmap_finish, we can't allow block to be available | ||
1684 | * for reallocation and non-transaction writing (user data) | ||
1685 | * until we know that the transaction that moved it to the free | ||
1686 | * list is permanently on disk. We track the blocks by declaring | ||
1687 | * these blocks as "busy"; the busy list is maintained on a per-ag | ||
1688 | * basis and each transaction records which entries should be removed | ||
1689 | * when the iclog commits to disk. If a busy block is allocated, | ||
1690 | * the iclog is pushed up to the LSN that freed the block. | ||
1691 | */ | ||
1692 | xfs_alloc_busy_insert(tp, agno, bno, len); | ||
1693 | return 0; | 1692 | return 0; |
1694 | 1693 | ||
1695 | error0: | 1694 | error0: |
@@ -1984,21 +1983,6 @@ xfs_alloc_get_freelist( | |||
1984 | xfs_alloc_log_agf(tp, agbp, logflags); | 1983 | xfs_alloc_log_agf(tp, agbp, logflags); |
1985 | *bnop = bno; | 1984 | *bnop = bno; |
1986 | 1985 | ||
1987 | /* | ||
1988 | * As blocks are freed, they are added to the per-ag busy list and | ||
1989 | * remain there until the freeing transaction is committed to disk. | ||
1990 | * Now that we have allocated blocks, this list must be searched to see | ||
1991 | * if a block is being reused. If one is, then the freeing transaction | ||
1992 | * must be pushed to disk before this transaction. | ||
1993 | * | ||
1994 | * We do this by setting the current transaction to a sync transaction | ||
1995 | * which guarantees that the freeing transaction is on disk before this | ||
1996 | * transaction. This is done instead of a synchronous log force here so | ||
1997 | * that we don't sit and wait with the AGF locked in the transaction | ||
1998 | * during the log force. | ||
1999 | */ | ||
2000 | if (xfs_alloc_busy_search(mp, be32_to_cpu(agf->agf_seqno), bno, 1)) | ||
2001 | xfs_trans_set_sync(tp); | ||
2002 | return 0; | 1986 | return 0; |
2003 | } | 1987 | } |
2004 | 1988 | ||
@@ -2456,131 +2440,54 @@ xfs_free_extent( | |||
2456 | memset(&args, 0, sizeof(xfs_alloc_arg_t)); | 2440 | memset(&args, 0, sizeof(xfs_alloc_arg_t)); |
2457 | args.tp = tp; | 2441 | args.tp = tp; |
2458 | args.mp = tp->t_mountp; | 2442 | args.mp = tp->t_mountp; |
2443 | |||
2444 | /* | ||
2445 | * validate that the block number is legal - the enables us to detect | ||
2446 | * and handle a silent filesystem corruption rather than crashing. | ||
2447 | */ | ||
2459 | args.agno = XFS_FSB_TO_AGNO(args.mp, bno); | 2448 | args.agno = XFS_FSB_TO_AGNO(args.mp, bno); |
2460 | ASSERT(args.agno < args.mp->m_sb.sb_agcount); | 2449 | if (args.agno >= args.mp->m_sb.sb_agcount) |
2450 | return EFSCORRUPTED; | ||
2451 | |||
2461 | args.agbno = XFS_FSB_TO_AGBNO(args.mp, bno); | 2452 | args.agbno = XFS_FSB_TO_AGBNO(args.mp, bno); |
2453 | if (args.agbno >= args.mp->m_sb.sb_agblocks) | ||
2454 | return EFSCORRUPTED; | ||
2455 | |||
2462 | args.pag = xfs_perag_get(args.mp, args.agno); | 2456 | args.pag = xfs_perag_get(args.mp, args.agno); |
2463 | if ((error = xfs_alloc_fix_freelist(&args, XFS_ALLOC_FLAG_FREEING))) | 2457 | ASSERT(args.pag); |
2458 | |||
2459 | error = xfs_alloc_fix_freelist(&args, XFS_ALLOC_FLAG_FREEING); | ||
2460 | if (error) | ||
2464 | goto error0; | 2461 | goto error0; |
2465 | #ifdef DEBUG | 2462 | |
2466 | ASSERT(args.agbp != NULL); | 2463 | /* validate the extent size is legal now we have the agf locked */ |
2467 | ASSERT((args.agbno + len) <= | 2464 | if (args.agbno + len > |
2468 | be32_to_cpu(XFS_BUF_TO_AGF(args.agbp)->agf_length)); | 2465 | be32_to_cpu(XFS_BUF_TO_AGF(args.agbp)->agf_length)) { |
2469 | #endif | 2466 | error = EFSCORRUPTED; |
2467 | goto error0; | ||
2468 | } | ||
2469 | |||
2470 | error = xfs_free_ag_extent(tp, args.agbp, args.agno, args.agbno, len, 0); | 2470 | error = xfs_free_ag_extent(tp, args.agbp, args.agno, args.agbno, len, 0); |
2471 | if (!error) | ||
2472 | xfs_alloc_busy_insert(tp, args.agno, args.agbno, len, 0); | ||
2471 | error0: | 2473 | error0: |
2472 | xfs_perag_put(args.pag); | 2474 | xfs_perag_put(args.pag); |
2473 | return error; | 2475 | return error; |
2474 | } | 2476 | } |
2475 | 2477 | ||
2476 | |||
2477 | /* | ||
2478 | * AG Busy list management | ||
2479 | * The busy list contains block ranges that have been freed but whose | ||
2480 | * transactions have not yet hit disk. If any block listed in a busy | ||
2481 | * list is reused, the transaction that freed it must be forced to disk | ||
2482 | * before continuing to use the block. | ||
2483 | * | ||
2484 | * xfs_alloc_busy_insert - add to the per-ag busy list | ||
2485 | * xfs_alloc_busy_clear - remove an item from the per-ag busy list | ||
2486 | * xfs_alloc_busy_search - search for a busy extent | ||
2487 | */ | ||
2488 | |||
2489 | /* | ||
2490 | * Insert a new extent into the busy tree. | ||
2491 | * | ||
2492 | * The busy extent tree is indexed by the start block of the busy extent. | ||
2493 | * there can be multiple overlapping ranges in the busy extent tree but only | ||
2494 | * ever one entry at a given start block. The reason for this is that | ||
2495 | * multi-block extents can be freed, then smaller chunks of that extent | ||
2496 | * allocated and freed again before the first transaction commit is on disk. | ||
2497 | * If the exact same start block is freed a second time, we have to wait for | ||
2498 | * that busy extent to pass out of the tree before the new extent is inserted. | ||
2499 | * There are two main cases we have to handle here. | ||
2500 | * | ||
2501 | * The first case is a transaction that triggers a "free - allocate - free" | ||
2502 | * cycle. This can occur during btree manipulations as a btree block is freed | ||
2503 | * to the freelist, then allocated from the free list, then freed again. In | ||
2504 | * this case, the second extxpnet free is what triggers the duplicate and as | ||
2505 | * such the transaction IDs should match. Because the extent was allocated in | ||
2506 | * this transaction, the transaction must be marked as synchronous. This is | ||
2507 | * true for all cases where the free/alloc/free occurs in the one transaction, | ||
2508 | * hence the addition of the ASSERT(tp->t_flags & XFS_TRANS_SYNC) to this case. | ||
2509 | * This serves to catch violations of the second case quite effectively. | ||
2510 | * | ||
2511 | * The second case is where the free/alloc/free occur in different | ||
2512 | * transactions. In this case, the thread freeing the extent the second time | ||
2513 | * can't mark the extent busy immediately because it is already tracked in a | ||
2514 | * transaction that may be committing. When the log commit for the existing | ||
2515 | * busy extent completes, the busy extent will be removed from the tree. If we | ||
2516 | * allow the second busy insert to continue using that busy extent structure, | ||
2517 | * it can be freed before this transaction is safely in the log. Hence our | ||
2518 | * only option in this case is to force the log to remove the existing busy | ||
2519 | * extent from the list before we insert the new one with the current | ||
2520 | * transaction ID. | ||
2521 | * | ||
2522 | * The problem we are trying to avoid in the free-alloc-free in separate | ||
2523 | * transactions is most easily described with a timeline: | ||
2524 | * | ||
2525 | * Thread 1 Thread 2 Thread 3 xfslogd | ||
2526 | * xact alloc | ||
2527 | * free X | ||
2528 | * mark busy | ||
2529 | * commit xact | ||
2530 | * free xact | ||
2531 | * xact alloc | ||
2532 | * alloc X | ||
2533 | * busy search | ||
2534 | * mark xact sync | ||
2535 | * commit xact | ||
2536 | * free xact | ||
2537 | * force log | ||
2538 | * checkpoint starts | ||
2539 | * .... | ||
2540 | * xact alloc | ||
2541 | * free X | ||
2542 | * mark busy | ||
2543 | * finds match | ||
2544 | * *** KABOOM! *** | ||
2545 | * .... | ||
2546 | * log IO completes | ||
2547 | * unbusy X | ||
2548 | * checkpoint completes | ||
2549 | * | ||
2550 | * By issuing a log force in thread 3 @ "KABOOM", the thread will block until | ||
2551 | * the checkpoint completes, and the busy extent it matched will have been | ||
2552 | * removed from the tree when it is woken. Hence it can then continue safely. | ||
2553 | * | ||
2554 | * However, to ensure this matching process is robust, we need to use the | ||
2555 | * transaction ID for identifying transaction, as delayed logging results in | ||
2556 | * the busy extent and transaction lifecycles being different. i.e. the busy | ||
2557 | * extent is active for a lot longer than the transaction. Hence the | ||
2558 | * transaction structure can be freed and reallocated, then mark the same | ||
2559 | * extent busy again in the new transaction. In this case the new transaction | ||
2560 | * will have a different tid but can have the same address, and hence we need | ||
2561 | * to check against the tid. | ||
2562 | * | ||
2563 | * Future: for delayed logging, we could avoid the log force if the extent was | ||
2564 | * first freed in the current checkpoint sequence. This, however, requires the | ||
2565 | * ability to pin the current checkpoint in memory until this transaction | ||
2566 | * commits to ensure that both the original free and the current one combine | ||
2567 | * logically into the one checkpoint. If the checkpoint sequences are | ||
2568 | * different, however, we still need to wait on a log force. | ||
2569 | */ | ||
2570 | void | 2478 | void |
2571 | xfs_alloc_busy_insert( | 2479 | xfs_alloc_busy_insert( |
2572 | struct xfs_trans *tp, | 2480 | struct xfs_trans *tp, |
2573 | xfs_agnumber_t agno, | 2481 | xfs_agnumber_t agno, |
2574 | xfs_agblock_t bno, | 2482 | xfs_agblock_t bno, |
2575 | xfs_extlen_t len) | 2483 | xfs_extlen_t len, |
2484 | unsigned int flags) | ||
2576 | { | 2485 | { |
2577 | struct xfs_busy_extent *new; | 2486 | struct xfs_busy_extent *new; |
2578 | struct xfs_busy_extent *busyp; | 2487 | struct xfs_busy_extent *busyp; |
2579 | struct xfs_perag *pag; | 2488 | struct xfs_perag *pag; |
2580 | struct rb_node **rbp; | 2489 | struct rb_node **rbp; |
2581 | struct rb_node *parent; | 2490 | struct rb_node *parent = NULL; |
2582 | int match; | ||
2583 | |||
2584 | 2491 | ||
2585 | new = kmem_zalloc(sizeof(struct xfs_busy_extent), KM_MAYFAIL); | 2492 | new = kmem_zalloc(sizeof(struct xfs_busy_extent), KM_MAYFAIL); |
2586 | if (!new) { | 2493 | if (!new) { |
@@ -2589,7 +2496,7 @@ xfs_alloc_busy_insert( | |||
2589 | * block, make this a synchronous transaction to insure that | 2496 | * block, make this a synchronous transaction to insure that |
2590 | * the block is not reused before this transaction commits. | 2497 | * the block is not reused before this transaction commits. |
2591 | */ | 2498 | */ |
2592 | trace_xfs_alloc_busy(tp, agno, bno, len, 1); | 2499 | trace_xfs_alloc_busy_enomem(tp->t_mountp, agno, bno, len); |
2593 | xfs_trans_set_sync(tp); | 2500 | xfs_trans_set_sync(tp); |
2594 | return; | 2501 | return; |
2595 | } | 2502 | } |
@@ -2597,66 +2504,29 @@ xfs_alloc_busy_insert( | |||
2597 | new->agno = agno; | 2504 | new->agno = agno; |
2598 | new->bno = bno; | 2505 | new->bno = bno; |
2599 | new->length = len; | 2506 | new->length = len; |
2600 | new->tid = xfs_log_get_trans_ident(tp); | ||
2601 | |||
2602 | INIT_LIST_HEAD(&new->list); | 2507 | INIT_LIST_HEAD(&new->list); |
2508 | new->flags = flags; | ||
2603 | 2509 | ||
2604 | /* trace before insert to be able to see failed inserts */ | 2510 | /* trace before insert to be able to see failed inserts */ |
2605 | trace_xfs_alloc_busy(tp, agno, bno, len, 0); | 2511 | trace_xfs_alloc_busy(tp->t_mountp, agno, bno, len); |
2606 | 2512 | ||
2607 | pag = xfs_perag_get(tp->t_mountp, new->agno); | 2513 | pag = xfs_perag_get(tp->t_mountp, new->agno); |
2608 | restart: | ||
2609 | spin_lock(&pag->pagb_lock); | 2514 | spin_lock(&pag->pagb_lock); |
2610 | rbp = &pag->pagb_tree.rb_node; | 2515 | rbp = &pag->pagb_tree.rb_node; |
2611 | parent = NULL; | 2516 | while (*rbp) { |
2612 | busyp = NULL; | ||
2613 | match = 0; | ||
2614 | while (*rbp && match >= 0) { | ||
2615 | parent = *rbp; | 2517 | parent = *rbp; |
2616 | busyp = rb_entry(parent, struct xfs_busy_extent, rb_node); | 2518 | busyp = rb_entry(parent, struct xfs_busy_extent, rb_node); |
2617 | 2519 | ||
2618 | if (new->bno < busyp->bno) { | 2520 | if (new->bno < busyp->bno) { |
2619 | /* may overlap, but exact start block is lower */ | ||
2620 | rbp = &(*rbp)->rb_left; | 2521 | rbp = &(*rbp)->rb_left; |
2621 | if (new->bno + new->length > busyp->bno) | 2522 | ASSERT(new->bno + new->length <= busyp->bno); |
2622 | match = busyp->tid == new->tid ? 1 : -1; | ||
2623 | } else if (new->bno > busyp->bno) { | 2523 | } else if (new->bno > busyp->bno) { |
2624 | /* may overlap, but exact start block is higher */ | ||
2625 | rbp = &(*rbp)->rb_right; | 2524 | rbp = &(*rbp)->rb_right; |
2626 | if (bno < busyp->bno + busyp->length) | 2525 | ASSERT(bno >= busyp->bno + busyp->length); |
2627 | match = busyp->tid == new->tid ? 1 : -1; | ||
2628 | } else { | 2526 | } else { |
2629 | match = busyp->tid == new->tid ? 1 : -1; | 2527 | ASSERT(0); |
2630 | break; | ||
2631 | } | 2528 | } |
2632 | } | 2529 | } |
2633 | if (match < 0) { | ||
2634 | /* overlap marked busy in different transaction */ | ||
2635 | spin_unlock(&pag->pagb_lock); | ||
2636 | xfs_log_force(tp->t_mountp, XFS_LOG_SYNC); | ||
2637 | goto restart; | ||
2638 | } | ||
2639 | if (match > 0) { | ||
2640 | /* | ||
2641 | * overlap marked busy in same transaction. Update if exact | ||
2642 | * start block match, otherwise combine the busy extents into | ||
2643 | * a single range. | ||
2644 | */ | ||
2645 | if (busyp->bno == new->bno) { | ||
2646 | busyp->length = max(busyp->length, new->length); | ||
2647 | spin_unlock(&pag->pagb_lock); | ||
2648 | ASSERT(tp->t_flags & XFS_TRANS_SYNC); | ||
2649 | xfs_perag_put(pag); | ||
2650 | kmem_free(new); | ||
2651 | return; | ||
2652 | } | ||
2653 | rb_erase(&busyp->rb_node, &pag->pagb_tree); | ||
2654 | new->length = max(busyp->bno + busyp->length, | ||
2655 | new->bno + new->length) - | ||
2656 | min(busyp->bno, new->bno); | ||
2657 | new->bno = min(busyp->bno, new->bno); | ||
2658 | } else | ||
2659 | busyp = NULL; | ||
2660 | 2530 | ||
2661 | rb_link_node(&new->rb_node, parent, rbp); | 2531 | rb_link_node(&new->rb_node, parent, rbp); |
2662 | rb_insert_color(&new->rb_node, &pag->pagb_tree); | 2532 | rb_insert_color(&new->rb_node, &pag->pagb_tree); |
@@ -2664,7 +2534,6 @@ restart: | |||
2664 | list_add(&new->list, &tp->t_busy); | 2534 | list_add(&new->list, &tp->t_busy); |
2665 | spin_unlock(&pag->pagb_lock); | 2535 | spin_unlock(&pag->pagb_lock); |
2666 | xfs_perag_put(pag); | 2536 | xfs_perag_put(pag); |
2667 | kmem_free(busyp); | ||
2668 | } | 2537 | } |
2669 | 2538 | ||
2670 | /* | 2539 | /* |
@@ -2676,7 +2545,7 @@ restart: | |||
2676 | * will require a synchronous transaction, but it can still be | 2545 | * will require a synchronous transaction, but it can still be |
2677 | * used to distinguish between a partial or exact match. | 2546 | * used to distinguish between a partial or exact match. |
2678 | */ | 2547 | */ |
2679 | static int | 2548 | int |
2680 | xfs_alloc_busy_search( | 2549 | xfs_alloc_busy_search( |
2681 | struct xfs_mount *mp, | 2550 | struct xfs_mount *mp, |
2682 | xfs_agnumber_t agno, | 2551 | xfs_agnumber_t agno, |
@@ -2713,31 +2582,466 @@ xfs_alloc_busy_search( | |||
2713 | } | 2582 | } |
2714 | } | 2583 | } |
2715 | spin_unlock(&pag->pagb_lock); | 2584 | spin_unlock(&pag->pagb_lock); |
2716 | trace_xfs_alloc_busysearch(mp, agno, bno, len, !!match); | ||
2717 | xfs_perag_put(pag); | 2585 | xfs_perag_put(pag); |
2718 | return match; | 2586 | return match; |
2719 | } | 2587 | } |
2720 | 2588 | ||
2589 | /* | ||
2590 | * The found free extent [fbno, fend] overlaps part or all of the given busy | ||
2591 | * extent. If the overlap covers the beginning, the end, or all of the busy | ||
2592 | * extent, the overlapping portion can be made unbusy and used for the | ||
2593 | * allocation. We can't split a busy extent because we can't modify a | ||
2594 | * transaction/CIL context busy list, but we can update an entries block | ||
2595 | * number or length. | ||
2596 | * | ||
2597 | * Returns true if the extent can safely be reused, or false if the search | ||
2598 | * needs to be restarted. | ||
2599 | */ | ||
2600 | STATIC bool | ||
2601 | xfs_alloc_busy_update_extent( | ||
2602 | struct xfs_mount *mp, | ||
2603 | struct xfs_perag *pag, | ||
2604 | struct xfs_busy_extent *busyp, | ||
2605 | xfs_agblock_t fbno, | ||
2606 | xfs_extlen_t flen, | ||
2607 | bool userdata) | ||
2608 | { | ||
2609 | xfs_agblock_t fend = fbno + flen; | ||
2610 | xfs_agblock_t bbno = busyp->bno; | ||
2611 | xfs_agblock_t bend = bbno + busyp->length; | ||
2612 | |||
2613 | /* | ||
2614 | * This extent is currently being discarded. Give the thread | ||
2615 | * performing the discard a chance to mark the extent unbusy | ||
2616 | * and retry. | ||
2617 | */ | ||
2618 | if (busyp->flags & XFS_ALLOC_BUSY_DISCARDED) { | ||
2619 | spin_unlock(&pag->pagb_lock); | ||
2620 | delay(1); | ||
2621 | spin_lock(&pag->pagb_lock); | ||
2622 | return false; | ||
2623 | } | ||
2624 | |||
2625 | /* | ||
2626 | * If there is a busy extent overlapping a user allocation, we have | ||
2627 | * no choice but to force the log and retry the search. | ||
2628 | * | ||
2629 | * Fortunately this does not happen during normal operation, but | ||
2630 | * only if the filesystem is very low on space and has to dip into | ||
2631 | * the AGFL for normal allocations. | ||
2632 | */ | ||
2633 | if (userdata) | ||
2634 | goto out_force_log; | ||
2635 | |||
2636 | if (bbno < fbno && bend > fend) { | ||
2637 | /* | ||
2638 | * Case 1: | ||
2639 | * bbno bend | ||
2640 | * +BBBBBBBBBBBBBBBBB+ | ||
2641 | * +---------+ | ||
2642 | * fbno fend | ||
2643 | */ | ||
2644 | |||
2645 | /* | ||
2646 | * We would have to split the busy extent to be able to track | ||
2647 | * it correct, which we cannot do because we would have to | ||
2648 | * modify the list of busy extents attached to the transaction | ||
2649 | * or CIL context, which is immutable. | ||
2650 | * | ||
2651 | * Force out the log to clear the busy extent and retry the | ||
2652 | * search. | ||
2653 | */ | ||
2654 | goto out_force_log; | ||
2655 | } else if (bbno >= fbno && bend <= fend) { | ||
2656 | /* | ||
2657 | * Case 2: | ||
2658 | * bbno bend | ||
2659 | * +BBBBBBBBBBBBBBBBB+ | ||
2660 | * +-----------------+ | ||
2661 | * fbno fend | ||
2662 | * | ||
2663 | * Case 3: | ||
2664 | * bbno bend | ||
2665 | * +BBBBBBBBBBBBBBBBB+ | ||
2666 | * +--------------------------+ | ||
2667 | * fbno fend | ||
2668 | * | ||
2669 | * Case 4: | ||
2670 | * bbno bend | ||
2671 | * +BBBBBBBBBBBBBBBBB+ | ||
2672 | * +--------------------------+ | ||
2673 | * fbno fend | ||
2674 | * | ||
2675 | * Case 5: | ||
2676 | * bbno bend | ||
2677 | * +BBBBBBBBBBBBBBBBB+ | ||
2678 | * +-----------------------------------+ | ||
2679 | * fbno fend | ||
2680 | * | ||
2681 | */ | ||
2682 | |||
2683 | /* | ||
2684 | * The busy extent is fully covered by the extent we are | ||
2685 | * allocating, and can simply be removed from the rbtree. | ||
2686 | * However we cannot remove it from the immutable list | ||
2687 | * tracking busy extents in the transaction or CIL context, | ||
2688 | * so set the length to zero to mark it invalid. | ||
2689 | * | ||
2690 | * We also need to restart the busy extent search from the | ||
2691 | * tree root, because erasing the node can rearrange the | ||
2692 | * tree topology. | ||
2693 | */ | ||
2694 | rb_erase(&busyp->rb_node, &pag->pagb_tree); | ||
2695 | busyp->length = 0; | ||
2696 | return false; | ||
2697 | } else if (fend < bend) { | ||
2698 | /* | ||
2699 | * Case 6: | ||
2700 | * bbno bend | ||
2701 | * +BBBBBBBBBBBBBBBBB+ | ||
2702 | * +---------+ | ||
2703 | * fbno fend | ||
2704 | * | ||
2705 | * Case 7: | ||
2706 | * bbno bend | ||
2707 | * +BBBBBBBBBBBBBBBBB+ | ||
2708 | * +------------------+ | ||
2709 | * fbno fend | ||
2710 | * | ||
2711 | */ | ||
2712 | busyp->bno = fend; | ||
2713 | } else if (bbno < fbno) { | ||
2714 | /* | ||
2715 | * Case 8: | ||
2716 | * bbno bend | ||
2717 | * +BBBBBBBBBBBBBBBBB+ | ||
2718 | * +-------------+ | ||
2719 | * fbno fend | ||
2720 | * | ||
2721 | * Case 9: | ||
2722 | * bbno bend | ||
2723 | * +BBBBBBBBBBBBBBBBB+ | ||
2724 | * +----------------------+ | ||
2725 | * fbno fend | ||
2726 | */ | ||
2727 | busyp->length = fbno - busyp->bno; | ||
2728 | } else { | ||
2729 | ASSERT(0); | ||
2730 | } | ||
2731 | |||
2732 | trace_xfs_alloc_busy_reuse(mp, pag->pag_agno, fbno, flen); | ||
2733 | return true; | ||
2734 | |||
2735 | out_force_log: | ||
2736 | spin_unlock(&pag->pagb_lock); | ||
2737 | xfs_log_force(mp, XFS_LOG_SYNC); | ||
2738 | trace_xfs_alloc_busy_force(mp, pag->pag_agno, fbno, flen); | ||
2739 | spin_lock(&pag->pagb_lock); | ||
2740 | return false; | ||
2741 | } | ||
2742 | |||
2743 | |||
2744 | /* | ||
2745 | * For a given extent [fbno, flen], make sure we can reuse it safely. | ||
2746 | */ | ||
2721 | void | 2747 | void |
2722 | xfs_alloc_busy_clear( | 2748 | xfs_alloc_busy_reuse( |
2723 | struct xfs_mount *mp, | 2749 | struct xfs_mount *mp, |
2724 | struct xfs_busy_extent *busyp) | 2750 | xfs_agnumber_t agno, |
2751 | xfs_agblock_t fbno, | ||
2752 | xfs_extlen_t flen, | ||
2753 | bool userdata) | ||
2725 | { | 2754 | { |
2726 | struct xfs_perag *pag; | 2755 | struct xfs_perag *pag; |
2756 | struct rb_node *rbp; | ||
2727 | 2757 | ||
2728 | trace_xfs_alloc_unbusy(mp, busyp->agno, busyp->bno, | 2758 | ASSERT(flen > 0); |
2729 | busyp->length); | ||
2730 | 2759 | ||
2731 | ASSERT(xfs_alloc_busy_search(mp, busyp->agno, busyp->bno, | 2760 | pag = xfs_perag_get(mp, agno); |
2732 | busyp->length) == 1); | 2761 | spin_lock(&pag->pagb_lock); |
2762 | restart: | ||
2763 | rbp = pag->pagb_tree.rb_node; | ||
2764 | while (rbp) { | ||
2765 | struct xfs_busy_extent *busyp = | ||
2766 | rb_entry(rbp, struct xfs_busy_extent, rb_node); | ||
2767 | xfs_agblock_t bbno = busyp->bno; | ||
2768 | xfs_agblock_t bend = bbno + busyp->length; | ||
2733 | 2769 | ||
2734 | list_del_init(&busyp->list); | 2770 | if (fbno + flen <= bbno) { |
2771 | rbp = rbp->rb_left; | ||
2772 | continue; | ||
2773 | } else if (fbno >= bend) { | ||
2774 | rbp = rbp->rb_right; | ||
2775 | continue; | ||
2776 | } | ||
2735 | 2777 | ||
2736 | pag = xfs_perag_get(mp, busyp->agno); | 2778 | if (!xfs_alloc_busy_update_extent(mp, pag, busyp, fbno, flen, |
2737 | spin_lock(&pag->pagb_lock); | 2779 | userdata)) |
2738 | rb_erase(&busyp->rb_node, &pag->pagb_tree); | 2780 | goto restart; |
2781 | } | ||
2739 | spin_unlock(&pag->pagb_lock); | 2782 | spin_unlock(&pag->pagb_lock); |
2740 | xfs_perag_put(pag); | 2783 | xfs_perag_put(pag); |
2784 | } | ||
2785 | |||
2786 | /* | ||
2787 | * For a given extent [fbno, flen], search the busy extent list to find a | ||
2788 | * subset of the extent that is not busy. If *rlen is smaller than | ||
2789 | * args->minlen no suitable extent could be found, and the higher level | ||
2790 | * code needs to force out the log and retry the allocation. | ||
2791 | */ | ||
2792 | STATIC void | ||
2793 | xfs_alloc_busy_trim( | ||
2794 | struct xfs_alloc_arg *args, | ||
2795 | xfs_agblock_t bno, | ||
2796 | xfs_extlen_t len, | ||
2797 | xfs_agblock_t *rbno, | ||
2798 | xfs_extlen_t *rlen) | ||
2799 | { | ||
2800 | xfs_agblock_t fbno; | ||
2801 | xfs_extlen_t flen; | ||
2802 | struct rb_node *rbp; | ||
2803 | |||
2804 | ASSERT(len > 0); | ||
2805 | |||
2806 | spin_lock(&args->pag->pagb_lock); | ||
2807 | restart: | ||
2808 | fbno = bno; | ||
2809 | flen = len; | ||
2810 | rbp = args->pag->pagb_tree.rb_node; | ||
2811 | while (rbp && flen >= args->minlen) { | ||
2812 | struct xfs_busy_extent *busyp = | ||
2813 | rb_entry(rbp, struct xfs_busy_extent, rb_node); | ||
2814 | xfs_agblock_t fend = fbno + flen; | ||
2815 | xfs_agblock_t bbno = busyp->bno; | ||
2816 | xfs_agblock_t bend = bbno + busyp->length; | ||
2817 | |||
2818 | if (fend <= bbno) { | ||
2819 | rbp = rbp->rb_left; | ||
2820 | continue; | ||
2821 | } else if (fbno >= bend) { | ||
2822 | rbp = rbp->rb_right; | ||
2823 | continue; | ||
2824 | } | ||
2825 | |||
2826 | /* | ||
2827 | * If this is a metadata allocation, try to reuse the busy | ||
2828 | * extent instead of trimming the allocation. | ||
2829 | */ | ||
2830 | if (!args->userdata && | ||
2831 | !(busyp->flags & XFS_ALLOC_BUSY_DISCARDED)) { | ||
2832 | if (!xfs_alloc_busy_update_extent(args->mp, args->pag, | ||
2833 | busyp, fbno, flen, | ||
2834 | false)) | ||
2835 | goto restart; | ||
2836 | continue; | ||
2837 | } | ||
2838 | |||
2839 | if (bbno <= fbno) { | ||
2840 | /* start overlap */ | ||
2741 | 2841 | ||
2842 | /* | ||
2843 | * Case 1: | ||
2844 | * bbno bend | ||
2845 | * +BBBBBBBBBBBBBBBBB+ | ||
2846 | * +---------+ | ||
2847 | * fbno fend | ||
2848 | * | ||
2849 | * Case 2: | ||
2850 | * bbno bend | ||
2851 | * +BBBBBBBBBBBBBBBBB+ | ||
2852 | * +-------------+ | ||
2853 | * fbno fend | ||
2854 | * | ||
2855 | * Case 3: | ||
2856 | * bbno bend | ||
2857 | * +BBBBBBBBBBBBBBBBB+ | ||
2858 | * +-------------+ | ||
2859 | * fbno fend | ||
2860 | * | ||
2861 | * Case 4: | ||
2862 | * bbno bend | ||
2863 | * +BBBBBBBBBBBBBBBBB+ | ||
2864 | * +-----------------+ | ||
2865 | * fbno fend | ||
2866 | * | ||
2867 | * No unbusy region in extent, return failure. | ||
2868 | */ | ||
2869 | if (fend <= bend) | ||
2870 | goto fail; | ||
2871 | |||
2872 | /* | ||
2873 | * Case 5: | ||
2874 | * bbno bend | ||
2875 | * +BBBBBBBBBBBBBBBBB+ | ||
2876 | * +----------------------+ | ||
2877 | * fbno fend | ||
2878 | * | ||
2879 | * Case 6: | ||
2880 | * bbno bend | ||
2881 | * +BBBBBBBBBBBBBBBBB+ | ||
2882 | * +--------------------------+ | ||
2883 | * fbno fend | ||
2884 | * | ||
2885 | * Needs to be trimmed to: | ||
2886 | * +-------+ | ||
2887 | * fbno fend | ||
2888 | */ | ||
2889 | fbno = bend; | ||
2890 | } else if (bend >= fend) { | ||
2891 | /* end overlap */ | ||
2892 | |||
2893 | /* | ||
2894 | * Case 7: | ||
2895 | * bbno bend | ||
2896 | * +BBBBBBBBBBBBBBBBB+ | ||
2897 | * +------------------+ | ||
2898 | * fbno fend | ||
2899 | * | ||
2900 | * Case 8: | ||
2901 | * bbno bend | ||
2902 | * +BBBBBBBBBBBBBBBBB+ | ||
2903 | * +--------------------------+ | ||
2904 | * fbno fend | ||
2905 | * | ||
2906 | * Needs to be trimmed to: | ||
2907 | * +-------+ | ||
2908 | * fbno fend | ||
2909 | */ | ||
2910 | fend = bbno; | ||
2911 | } else { | ||
2912 | /* middle overlap */ | ||
2913 | |||
2914 | /* | ||
2915 | * Case 9: | ||
2916 | * bbno bend | ||
2917 | * +BBBBBBBBBBBBBBBBB+ | ||
2918 | * +-----------------------------------+ | ||
2919 | * fbno fend | ||
2920 | * | ||
2921 | * Can be trimmed to: | ||
2922 | * +-------+ OR +-------+ | ||
2923 | * fbno fend fbno fend | ||
2924 | * | ||
2925 | * Backward allocation leads to significant | ||
2926 | * fragmentation of directories, which degrades | ||
2927 | * directory performance, therefore we always want to | ||
2928 | * choose the option that produces forward allocation | ||
2929 | * patterns. | ||
2930 | * Preferring the lower bno extent will make the next | ||
2931 | * request use "fend" as the start of the next | ||
2932 | * allocation; if the segment is no longer busy at | ||
2933 | * that point, we'll get a contiguous allocation, but | ||
2934 | * even if it is still busy, we will get a forward | ||
2935 | * allocation. | ||
2936 | * We try to avoid choosing the segment at "bend", | ||
2937 | * because that can lead to the next allocation | ||
2938 | * taking the segment at "fbno", which would be a | ||
2939 | * backward allocation. We only use the segment at | ||
2940 | * "fbno" if it is much larger than the current | ||
2941 | * requested size, because in that case there's a | ||
2942 | * good chance subsequent allocations will be | ||
2943 | * contiguous. | ||
2944 | */ | ||
2945 | if (bbno - fbno >= args->maxlen) { | ||
2946 | /* left candidate fits perfect */ | ||
2947 | fend = bbno; | ||
2948 | } else if (fend - bend >= args->maxlen * 4) { | ||
2949 | /* right candidate has enough free space */ | ||
2950 | fbno = bend; | ||
2951 | } else if (bbno - fbno >= args->minlen) { | ||
2952 | /* left candidate fits minimum requirement */ | ||
2953 | fend = bbno; | ||
2954 | } else { | ||
2955 | goto fail; | ||
2956 | } | ||
2957 | } | ||
2958 | |||
2959 | flen = fend - fbno; | ||
2960 | } | ||
2961 | spin_unlock(&args->pag->pagb_lock); | ||
2962 | |||
2963 | if (fbno != bno || flen != len) { | ||
2964 | trace_xfs_alloc_busy_trim(args->mp, args->agno, bno, len, | ||
2965 | fbno, flen); | ||
2966 | } | ||
2967 | *rbno = fbno; | ||
2968 | *rlen = flen; | ||
2969 | return; | ||
2970 | fail: | ||
2971 | /* | ||
2972 | * Return a zero extent length as failure indications. All callers | ||
2973 | * re-check if the trimmed extent satisfies the minlen requirement. | ||
2974 | */ | ||
2975 | spin_unlock(&args->pag->pagb_lock); | ||
2976 | trace_xfs_alloc_busy_trim(args->mp, args->agno, bno, len, fbno, 0); | ||
2977 | *rbno = fbno; | ||
2978 | *rlen = 0; | ||
2979 | } | ||
2980 | |||
2981 | static void | ||
2982 | xfs_alloc_busy_clear_one( | ||
2983 | struct xfs_mount *mp, | ||
2984 | struct xfs_perag *pag, | ||
2985 | struct xfs_busy_extent *busyp) | ||
2986 | { | ||
2987 | if (busyp->length) { | ||
2988 | trace_xfs_alloc_busy_clear(mp, busyp->agno, busyp->bno, | ||
2989 | busyp->length); | ||
2990 | rb_erase(&busyp->rb_node, &pag->pagb_tree); | ||
2991 | } | ||
2992 | |||
2993 | list_del_init(&busyp->list); | ||
2742 | kmem_free(busyp); | 2994 | kmem_free(busyp); |
2743 | } | 2995 | } |
2996 | |||
2997 | /* | ||
2998 | * Remove all extents on the passed in list from the busy extents tree. | ||
2999 | * If do_discard is set skip extents that need to be discarded, and mark | ||
3000 | * these as undergoing a discard operation instead. | ||
3001 | */ | ||
3002 | void | ||
3003 | xfs_alloc_busy_clear( | ||
3004 | struct xfs_mount *mp, | ||
3005 | struct list_head *list, | ||
3006 | bool do_discard) | ||
3007 | { | ||
3008 | struct xfs_busy_extent *busyp, *n; | ||
3009 | struct xfs_perag *pag = NULL; | ||
3010 | xfs_agnumber_t agno = NULLAGNUMBER; | ||
3011 | |||
3012 | list_for_each_entry_safe(busyp, n, list, list) { | ||
3013 | if (busyp->agno != agno) { | ||
3014 | if (pag) { | ||
3015 | spin_unlock(&pag->pagb_lock); | ||
3016 | xfs_perag_put(pag); | ||
3017 | } | ||
3018 | pag = xfs_perag_get(mp, busyp->agno); | ||
3019 | spin_lock(&pag->pagb_lock); | ||
3020 | agno = busyp->agno; | ||
3021 | } | ||
3022 | |||
3023 | if (do_discard && busyp->length && | ||
3024 | !(busyp->flags & XFS_ALLOC_BUSY_SKIP_DISCARD)) | ||
3025 | busyp->flags = XFS_ALLOC_BUSY_DISCARDED; | ||
3026 | else | ||
3027 | xfs_alloc_busy_clear_one(mp, pag, busyp); | ||
3028 | } | ||
3029 | |||
3030 | if (pag) { | ||
3031 | spin_unlock(&pag->pagb_lock); | ||
3032 | xfs_perag_put(pag); | ||
3033 | } | ||
3034 | } | ||
3035 | |||
3036 | /* | ||
3037 | * Callback for list_sort to sort busy extents by the AG they reside in. | ||
3038 | */ | ||
3039 | int | ||
3040 | xfs_busy_extent_ag_cmp( | ||
3041 | void *priv, | ||
3042 | struct list_head *a, | ||
3043 | struct list_head *b) | ||
3044 | { | ||
3045 | return container_of(a, struct xfs_busy_extent, list)->agno - | ||
3046 | container_of(b, struct xfs_busy_extent, list)->agno; | ||
3047 | } | ||
diff --git a/fs/xfs/xfs_alloc.h b/fs/xfs/xfs_alloc.h index 895009a97271..2f52b924be79 100644 --- a/fs/xfs/xfs_alloc.h +++ b/fs/xfs/xfs_alloc.h | |||
@@ -19,6 +19,7 @@ | |||
19 | #define __XFS_ALLOC_H__ | 19 | #define __XFS_ALLOC_H__ |
20 | 20 | ||
21 | struct xfs_buf; | 21 | struct xfs_buf; |
22 | struct xfs_btree_cur; | ||
22 | struct xfs_mount; | 23 | struct xfs_mount; |
23 | struct xfs_perag; | 24 | struct xfs_perag; |
24 | struct xfs_trans; | 25 | struct xfs_trans; |
@@ -74,6 +75,22 @@ typedef unsigned int xfs_alloctype_t; | |||
74 | #define XFS_ALLOC_SET_ASIDE(mp) (4 + ((mp)->m_sb.sb_agcount * 4)) | 75 | #define XFS_ALLOC_SET_ASIDE(mp) (4 + ((mp)->m_sb.sb_agcount * 4)) |
75 | 76 | ||
76 | /* | 77 | /* |
78 | * When deciding how much space to allocate out of an AG, we limit the | ||
79 | * allocation maximum size to the size the AG. However, we cannot use all the | ||
80 | * blocks in the AG - some are permanently used by metadata. These | ||
81 | * blocks are generally: | ||
82 | * - the AG superblock, AGF, AGI and AGFL | ||
83 | * - the AGF (bno and cnt) and AGI btree root blocks | ||
84 | * - 4 blocks on the AGFL according to XFS_ALLOC_SET_ASIDE() limits | ||
85 | * | ||
86 | * The AG headers are sector sized, so the amount of space they take up is | ||
87 | * dependent on filesystem geometry. The others are all single blocks. | ||
88 | */ | ||
89 | #define XFS_ALLOC_AG_MAX_USABLE(mp) \ | ||
90 | ((mp)->m_sb.sb_agblocks - XFS_BB_TO_FSB(mp, XFS_FSS_TO_BB(mp, 4)) - 7) | ||
91 | |||
92 | |||
93 | /* | ||
77 | * Argument structure for xfs_alloc routines. | 94 | * Argument structure for xfs_alloc routines. |
78 | * This is turned into a structure to avoid having 20 arguments passed | 95 | * This is turned into a structure to avoid having 20 arguments passed |
79 | * down several levels of the stack. | 96 | * down several levels of the stack. |
@@ -118,15 +135,29 @@ xfs_alloc_longest_free_extent(struct xfs_mount *mp, | |||
118 | struct xfs_perag *pag); | 135 | struct xfs_perag *pag); |
119 | 136 | ||
120 | #ifdef __KERNEL__ | 137 | #ifdef __KERNEL__ |
138 | void | ||
139 | xfs_alloc_busy_insert(struct xfs_trans *tp, xfs_agnumber_t agno, | ||
140 | xfs_agblock_t bno, xfs_extlen_t len, unsigned int flags); | ||
121 | 141 | ||
122 | void | 142 | void |
123 | xfs_alloc_busy_insert(xfs_trans_t *tp, | 143 | xfs_alloc_busy_clear(struct xfs_mount *mp, struct list_head *list, |
124 | xfs_agnumber_t agno, | 144 | bool do_discard); |
125 | xfs_agblock_t bno, | 145 | |
126 | xfs_extlen_t len); | 146 | int |
147 | xfs_alloc_busy_search(struct xfs_mount *mp, xfs_agnumber_t agno, | ||
148 | xfs_agblock_t bno, xfs_extlen_t len); | ||
127 | 149 | ||
128 | void | 150 | void |
129 | xfs_alloc_busy_clear(struct xfs_mount *mp, struct xfs_busy_extent *busyp); | 151 | xfs_alloc_busy_reuse(struct xfs_mount *mp, xfs_agnumber_t agno, |
152 | xfs_agblock_t fbno, xfs_extlen_t flen, bool userdata); | ||
153 | |||
154 | int | ||
155 | xfs_busy_extent_ag_cmp(void *priv, struct list_head *a, struct list_head *b); | ||
156 | |||
157 | static inline void xfs_alloc_busy_sort(struct list_head *list) | ||
158 | { | ||
159 | list_sort(NULL, list, xfs_busy_extent_ag_cmp); | ||
160 | } | ||
130 | 161 | ||
131 | #endif /* __KERNEL__ */ | 162 | #endif /* __KERNEL__ */ |
132 | 163 | ||
@@ -205,4 +236,18 @@ xfs_free_extent( | |||
205 | xfs_fsblock_t bno, /* starting block number of extent */ | 236 | xfs_fsblock_t bno, /* starting block number of extent */ |
206 | xfs_extlen_t len); /* length of extent */ | 237 | xfs_extlen_t len); /* length of extent */ |
207 | 238 | ||
239 | int /* error */ | ||
240 | xfs_alloc_lookup_le( | ||
241 | struct xfs_btree_cur *cur, /* btree cursor */ | ||
242 | xfs_agblock_t bno, /* starting block of extent */ | ||
243 | xfs_extlen_t len, /* length of extent */ | ||
244 | int *stat); /* success/failure */ | ||
245 | |||
246 | int /* error */ | ||
247 | xfs_alloc_get_rec( | ||
248 | struct xfs_btree_cur *cur, /* btree cursor */ | ||
249 | xfs_agblock_t *bno, /* output: starting block of extent */ | ||
250 | xfs_extlen_t *len, /* output: length of extent */ | ||
251 | int *stat); /* output: success/failure */ | ||
252 | |||
208 | #endif /* __XFS_ALLOC_H__ */ | 253 | #endif /* __XFS_ALLOC_H__ */ |
diff --git a/fs/xfs/xfs_alloc_btree.c b/fs/xfs/xfs_alloc_btree.c index 97f7328967fd..2b3518826a69 100644 --- a/fs/xfs/xfs_alloc_btree.c +++ b/fs/xfs/xfs_alloc_btree.c | |||
@@ -95,6 +95,8 @@ xfs_allocbt_alloc_block( | |||
95 | return 0; | 95 | return 0; |
96 | } | 96 | } |
97 | 97 | ||
98 | xfs_alloc_busy_reuse(cur->bc_mp, cur->bc_private.a.agno, bno, 1, false); | ||
99 | |||
98 | xfs_trans_agbtree_delta(cur->bc_tp, 1); | 100 | xfs_trans_agbtree_delta(cur->bc_tp, 1); |
99 | new->s = cpu_to_be32(bno); | 101 | new->s = cpu_to_be32(bno); |
100 | 102 | ||
@@ -118,18 +120,8 @@ xfs_allocbt_free_block( | |||
118 | if (error) | 120 | if (error) |
119 | return error; | 121 | return error; |
120 | 122 | ||
121 | /* | 123 | xfs_alloc_busy_insert(cur->bc_tp, be32_to_cpu(agf->agf_seqno), bno, 1, |
122 | * Since blocks move to the free list without the coordination used in | 124 | XFS_ALLOC_BUSY_SKIP_DISCARD); |
123 | * xfs_bmap_finish, we can't allow block to be available for | ||
124 | * reallocation and non-transaction writing (user data) until we know | ||
125 | * that the transaction that moved it to the free list is permanently | ||
126 | * on disk. We track the blocks by declaring these blocks as "busy"; | ||
127 | * the busy list is maintained on a per-ag basis and each transaction | ||
128 | * records which entries should be removed when the iclog commits to | ||
129 | * disk. If a busy block is allocated, the iclog is pushed up to the | ||
130 | * LSN that freed the block. | ||
131 | */ | ||
132 | xfs_alloc_busy_insert(cur->bc_tp, be32_to_cpu(agf->agf_seqno), bno, 1); | ||
133 | xfs_trans_agbtree_delta(cur->bc_tp, -1); | 125 | xfs_trans_agbtree_delta(cur->bc_tp, -1); |
134 | return 0; | 126 | return 0; |
135 | } | 127 | } |
@@ -280,38 +272,6 @@ xfs_allocbt_key_diff( | |||
280 | return (__int64_t)be32_to_cpu(kp->ar_startblock) - rec->ar_startblock; | 272 | return (__int64_t)be32_to_cpu(kp->ar_startblock) - rec->ar_startblock; |
281 | } | 273 | } |
282 | 274 | ||
283 | STATIC int | ||
284 | xfs_allocbt_kill_root( | ||
285 | struct xfs_btree_cur *cur, | ||
286 | struct xfs_buf *bp, | ||
287 | int level, | ||
288 | union xfs_btree_ptr *newroot) | ||
289 | { | ||
290 | int error; | ||
291 | |||
292 | XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); | ||
293 | XFS_BTREE_STATS_INC(cur, killroot); | ||
294 | |||
295 | /* | ||
296 | * Update the root pointer, decreasing the level by 1 and then | ||
297 | * free the old root. | ||
298 | */ | ||
299 | xfs_allocbt_set_root(cur, newroot, -1); | ||
300 | error = xfs_allocbt_free_block(cur, bp); | ||
301 | if (error) { | ||
302 | XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); | ||
303 | return error; | ||
304 | } | ||
305 | |||
306 | XFS_BTREE_STATS_INC(cur, free); | ||
307 | |||
308 | xfs_btree_setbuf(cur, level, NULL); | ||
309 | cur->bc_nlevels--; | ||
310 | |||
311 | XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); | ||
312 | return 0; | ||
313 | } | ||
314 | |||
315 | #ifdef DEBUG | 275 | #ifdef DEBUG |
316 | STATIC int | 276 | STATIC int |
317 | xfs_allocbt_keys_inorder( | 277 | xfs_allocbt_keys_inorder( |
@@ -423,7 +383,6 @@ static const struct xfs_btree_ops xfs_allocbt_ops = { | |||
423 | 383 | ||
424 | .dup_cursor = xfs_allocbt_dup_cursor, | 384 | .dup_cursor = xfs_allocbt_dup_cursor, |
425 | .set_root = xfs_allocbt_set_root, | 385 | .set_root = xfs_allocbt_set_root, |
426 | .kill_root = xfs_allocbt_kill_root, | ||
427 | .alloc_block = xfs_allocbt_alloc_block, | 386 | .alloc_block = xfs_allocbt_alloc_block, |
428 | .free_block = xfs_allocbt_free_block, | 387 | .free_block = xfs_allocbt_free_block, |
429 | .update_lastrec = xfs_allocbt_update_lastrec, | 388 | .update_lastrec = xfs_allocbt_update_lastrec, |
diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c index c2568242a901..01d2072fb6d4 100644 --- a/fs/xfs/xfs_attr.c +++ b/fs/xfs/xfs_attr.c | |||
@@ -355,16 +355,15 @@ xfs_attr_set_int( | |||
355 | if (mp->m_flags & XFS_MOUNT_WSYNC) { | 355 | if (mp->m_flags & XFS_MOUNT_WSYNC) { |
356 | xfs_trans_set_sync(args.trans); | 356 | xfs_trans_set_sync(args.trans); |
357 | } | 357 | } |
358 | |||
359 | if (!error && (flags & ATTR_KERNOTIME) == 0) { | ||
360 | xfs_trans_ichgtime(args.trans, dp, | ||
361 | XFS_ICHGTIME_CHG); | ||
362 | } | ||
358 | err2 = xfs_trans_commit(args.trans, | 363 | err2 = xfs_trans_commit(args.trans, |
359 | XFS_TRANS_RELEASE_LOG_RES); | 364 | XFS_TRANS_RELEASE_LOG_RES); |
360 | xfs_iunlock(dp, XFS_ILOCK_EXCL); | 365 | xfs_iunlock(dp, XFS_ILOCK_EXCL); |
361 | 366 | ||
362 | /* | ||
363 | * Hit the inode change time. | ||
364 | */ | ||
365 | if (!error && (flags & ATTR_KERNOTIME) == 0) { | ||
366 | xfs_ichgtime(dp, XFS_ICHGTIME_CHG); | ||
367 | } | ||
368 | return(error == 0 ? err2 : error); | 367 | return(error == 0 ? err2 : error); |
369 | } | 368 | } |
370 | 369 | ||
@@ -420,6 +419,9 @@ xfs_attr_set_int( | |||
420 | xfs_trans_set_sync(args.trans); | 419 | xfs_trans_set_sync(args.trans); |
421 | } | 420 | } |
422 | 421 | ||
422 | if ((flags & ATTR_KERNOTIME) == 0) | ||
423 | xfs_trans_ichgtime(args.trans, dp, XFS_ICHGTIME_CHG); | ||
424 | |||
423 | /* | 425 | /* |
424 | * Commit the last in the sequence of transactions. | 426 | * Commit the last in the sequence of transactions. |
425 | */ | 427 | */ |
@@ -427,13 +429,6 @@ xfs_attr_set_int( | |||
427 | error = xfs_trans_commit(args.trans, XFS_TRANS_RELEASE_LOG_RES); | 429 | error = xfs_trans_commit(args.trans, XFS_TRANS_RELEASE_LOG_RES); |
428 | xfs_iunlock(dp, XFS_ILOCK_EXCL); | 430 | xfs_iunlock(dp, XFS_ILOCK_EXCL); |
429 | 431 | ||
430 | /* | ||
431 | * Hit the inode change time. | ||
432 | */ | ||
433 | if (!error && (flags & ATTR_KERNOTIME) == 0) { | ||
434 | xfs_ichgtime(dp, XFS_ICHGTIME_CHG); | ||
435 | } | ||
436 | |||
437 | return(error); | 432 | return(error); |
438 | 433 | ||
439 | out: | 434 | out: |
@@ -495,6 +490,13 @@ xfs_attr_remove_int(xfs_inode_t *dp, struct xfs_name *name, int flags) | |||
495 | args.whichfork = XFS_ATTR_FORK; | 490 | args.whichfork = XFS_ATTR_FORK; |
496 | 491 | ||
497 | /* | 492 | /* |
493 | * we have no control over the attribute names that userspace passes us | ||
494 | * to remove, so we have to allow the name lookup prior to attribute | ||
495 | * removal to fail. | ||
496 | */ | ||
497 | args.op_flags = XFS_DA_OP_OKNOENT; | ||
498 | |||
499 | /* | ||
498 | * Attach the dquots to the inode. | 500 | * Attach the dquots to the inode. |
499 | */ | 501 | */ |
500 | error = xfs_qm_dqattach(dp, 0); | 502 | error = xfs_qm_dqattach(dp, 0); |
@@ -567,6 +569,9 @@ xfs_attr_remove_int(xfs_inode_t *dp, struct xfs_name *name, int flags) | |||
567 | xfs_trans_set_sync(args.trans); | 569 | xfs_trans_set_sync(args.trans); |
568 | } | 570 | } |
569 | 571 | ||
572 | if ((flags & ATTR_KERNOTIME) == 0) | ||
573 | xfs_trans_ichgtime(args.trans, dp, XFS_ICHGTIME_CHG); | ||
574 | |||
570 | /* | 575 | /* |
571 | * Commit the last in the sequence of transactions. | 576 | * Commit the last in the sequence of transactions. |
572 | */ | 577 | */ |
@@ -574,13 +579,6 @@ xfs_attr_remove_int(xfs_inode_t *dp, struct xfs_name *name, int flags) | |||
574 | error = xfs_trans_commit(args.trans, XFS_TRANS_RELEASE_LOG_RES); | 579 | error = xfs_trans_commit(args.trans, XFS_TRANS_RELEASE_LOG_RES); |
575 | xfs_iunlock(dp, XFS_ILOCK_EXCL); | 580 | xfs_iunlock(dp, XFS_ILOCK_EXCL); |
576 | 581 | ||
577 | /* | ||
578 | * Hit the inode change time. | ||
579 | */ | ||
580 | if (!error && (flags & ATTR_KERNOTIME) == 0) { | ||
581 | xfs_ichgtime(dp, XFS_ICHGTIME_CHG); | ||
582 | } | ||
583 | |||
584 | return(error); | 582 | return(error); |
585 | 583 | ||
586 | out: | 584 | out: |
@@ -1995,7 +1993,7 @@ xfs_attr_rmtval_get(xfs_da_args_t *args) | |||
1995 | 1993 | ||
1996 | tmp = (valuelen < XFS_BUF_SIZE(bp)) | 1994 | tmp = (valuelen < XFS_BUF_SIZE(bp)) |
1997 | ? valuelen : XFS_BUF_SIZE(bp); | 1995 | ? valuelen : XFS_BUF_SIZE(bp); |
1998 | xfs_biomove(bp, 0, tmp, dst, XBF_READ); | 1996 | xfs_buf_iomove(bp, 0, tmp, dst, XBRW_READ); |
1999 | xfs_buf_relse(bp); | 1997 | xfs_buf_relse(bp); |
2000 | dst += tmp; | 1998 | dst += tmp; |
2001 | valuelen -= tmp; | 1999 | valuelen -= tmp; |
@@ -2125,9 +2123,9 @@ xfs_attr_rmtval_set(xfs_da_args_t *args) | |||
2125 | 2123 | ||
2126 | tmp = (valuelen < XFS_BUF_SIZE(bp)) ? valuelen : | 2124 | tmp = (valuelen < XFS_BUF_SIZE(bp)) ? valuelen : |
2127 | XFS_BUF_SIZE(bp); | 2125 | XFS_BUF_SIZE(bp); |
2128 | xfs_biomove(bp, 0, tmp, src, XBF_WRITE); | 2126 | xfs_buf_iomove(bp, 0, tmp, src, XBRW_WRITE); |
2129 | if (tmp < XFS_BUF_SIZE(bp)) | 2127 | if (tmp < XFS_BUF_SIZE(bp)) |
2130 | xfs_biozero(bp, tmp, XFS_BUF_SIZE(bp) - tmp); | 2128 | xfs_buf_zero(bp, tmp, XFS_BUF_SIZE(bp) - tmp); |
2131 | if ((error = xfs_bwrite(mp, bp))) {/* GROT: NOTE: synchronous write */ | 2129 | if ((error = xfs_bwrite(mp, bp))) {/* GROT: NOTE: synchronous write */ |
2132 | return (error); | 2130 | return (error); |
2133 | } | 2131 | } |
diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c index a6cff8edcdb6..71e90dc2aeb1 100644 --- a/fs/xfs/xfs_attr_leaf.c +++ b/fs/xfs/xfs_attr_leaf.c | |||
@@ -637,7 +637,7 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context) | |||
637 | * It didn't all fit, so we have to sort everything on hashval. | 637 | * It didn't all fit, so we have to sort everything on hashval. |
638 | */ | 638 | */ |
639 | sbsize = sf->hdr.count * sizeof(*sbuf); | 639 | sbsize = sf->hdr.count * sizeof(*sbuf); |
640 | sbp = sbuf = kmem_alloc(sbsize, KM_SLEEP); | 640 | sbp = sbuf = kmem_alloc(sbsize, KM_SLEEP | KM_NOFS); |
641 | 641 | ||
642 | /* | 642 | /* |
643 | * Scan the attribute list for the rest of the entries, storing | 643 | * Scan the attribute list for the rest of the entries, storing |
@@ -2386,7 +2386,7 @@ xfs_attr_leaf_list_int(xfs_dabuf_t *bp, xfs_attr_list_context_t *context) | |||
2386 | args.dp = context->dp; | 2386 | args.dp = context->dp; |
2387 | args.whichfork = XFS_ATTR_FORK; | 2387 | args.whichfork = XFS_ATTR_FORK; |
2388 | args.valuelen = valuelen; | 2388 | args.valuelen = valuelen; |
2389 | args.value = kmem_alloc(valuelen, KM_SLEEP); | 2389 | args.value = kmem_alloc(valuelen, KM_SLEEP | KM_NOFS); |
2390 | args.rmtblkno = be32_to_cpu(name_rmt->valueblk); | 2390 | args.rmtblkno = be32_to_cpu(name_rmt->valueblk); |
2391 | args.rmtblkcnt = XFS_B_TO_FSB(args.dp->i_mount, valuelen); | 2391 | args.rmtblkcnt = XFS_B_TO_FSB(args.dp->i_mount, valuelen); |
2392 | retval = xfs_attr_rmtval_get(&args); | 2392 | retval = xfs_attr_rmtval_get(&args); |
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c index f90dadd5a968..e546a33214c9 100644 --- a/fs/xfs/xfs_bmap.c +++ b/fs/xfs/xfs_bmap.c | |||
@@ -89,36 +89,19 @@ xfs_bmap_add_attrfork_local( | |||
89 | int *flags); /* inode logging flags */ | 89 | int *flags); /* inode logging flags */ |
90 | 90 | ||
91 | /* | 91 | /* |
92 | * Called by xfs_bmapi to update file extent records and the btree | ||
93 | * after allocating space (or doing a delayed allocation). | ||
94 | */ | ||
95 | STATIC int /* error */ | ||
96 | xfs_bmap_add_extent( | ||
97 | xfs_inode_t *ip, /* incore inode pointer */ | ||
98 | xfs_extnum_t idx, /* extent number to update/insert */ | ||
99 | xfs_btree_cur_t **curp, /* if *curp is null, not a btree */ | ||
100 | xfs_bmbt_irec_t *new, /* new data to add to file extents */ | ||
101 | xfs_fsblock_t *first, /* pointer to firstblock variable */ | ||
102 | xfs_bmap_free_t *flist, /* list of extents to be freed */ | ||
103 | int *logflagsp, /* inode logging flags */ | ||
104 | int whichfork, /* data or attr fork */ | ||
105 | int rsvd); /* OK to allocate reserved blocks */ | ||
106 | |||
107 | /* | ||
108 | * Called by xfs_bmap_add_extent to handle cases converting a delayed | 92 | * Called by xfs_bmap_add_extent to handle cases converting a delayed |
109 | * allocation to a real allocation. | 93 | * allocation to a real allocation. |
110 | */ | 94 | */ |
111 | STATIC int /* error */ | 95 | STATIC int /* error */ |
112 | xfs_bmap_add_extent_delay_real( | 96 | xfs_bmap_add_extent_delay_real( |
113 | xfs_inode_t *ip, /* incore inode pointer */ | 97 | xfs_inode_t *ip, /* incore inode pointer */ |
114 | xfs_extnum_t idx, /* extent number to update/insert */ | 98 | xfs_extnum_t *idx, /* extent number to update/insert */ |
115 | xfs_btree_cur_t **curp, /* if *curp is null, not a btree */ | 99 | xfs_btree_cur_t **curp, /* if *curp is null, not a btree */ |
116 | xfs_bmbt_irec_t *new, /* new data to add to file extents */ | 100 | xfs_bmbt_irec_t *new, /* new data to add to file extents */ |
117 | xfs_filblks_t *dnew, /* new delayed-alloc indirect blocks */ | 101 | xfs_filblks_t *dnew, /* new delayed-alloc indirect blocks */ |
118 | xfs_fsblock_t *first, /* pointer to firstblock variable */ | 102 | xfs_fsblock_t *first, /* pointer to firstblock variable */ |
119 | xfs_bmap_free_t *flist, /* list of extents to be freed */ | 103 | xfs_bmap_free_t *flist, /* list of extents to be freed */ |
120 | int *logflagsp, /* inode logging flags */ | 104 | int *logflagsp); /* inode logging flags */ |
121 | int rsvd); /* OK to allocate reserved blocks */ | ||
122 | 105 | ||
123 | /* | 106 | /* |
124 | * Called by xfs_bmap_add_extent to handle cases converting a hole | 107 | * Called by xfs_bmap_add_extent to handle cases converting a hole |
@@ -127,10 +110,9 @@ xfs_bmap_add_extent_delay_real( | |||
127 | STATIC int /* error */ | 110 | STATIC int /* error */ |
128 | xfs_bmap_add_extent_hole_delay( | 111 | xfs_bmap_add_extent_hole_delay( |
129 | xfs_inode_t *ip, /* incore inode pointer */ | 112 | xfs_inode_t *ip, /* incore inode pointer */ |
130 | xfs_extnum_t idx, /* extent number to update/insert */ | 113 | xfs_extnum_t *idx, /* extent number to update/insert */ |
131 | xfs_bmbt_irec_t *new, /* new data to add to file extents */ | 114 | xfs_bmbt_irec_t *new, /* new data to add to file extents */ |
132 | int *logflagsp,/* inode logging flags */ | 115 | int *logflagsp); /* inode logging flags */ |
133 | int rsvd); /* OK to allocate reserved blocks */ | ||
134 | 116 | ||
135 | /* | 117 | /* |
136 | * Called by xfs_bmap_add_extent to handle cases converting a hole | 118 | * Called by xfs_bmap_add_extent to handle cases converting a hole |
@@ -139,7 +121,7 @@ xfs_bmap_add_extent_hole_delay( | |||
139 | STATIC int /* error */ | 121 | STATIC int /* error */ |
140 | xfs_bmap_add_extent_hole_real( | 122 | xfs_bmap_add_extent_hole_real( |
141 | xfs_inode_t *ip, /* incore inode pointer */ | 123 | xfs_inode_t *ip, /* incore inode pointer */ |
142 | xfs_extnum_t idx, /* extent number to update/insert */ | 124 | xfs_extnum_t *idx, /* extent number to update/insert */ |
143 | xfs_btree_cur_t *cur, /* if null, not a btree */ | 125 | xfs_btree_cur_t *cur, /* if null, not a btree */ |
144 | xfs_bmbt_irec_t *new, /* new data to add to file extents */ | 126 | xfs_bmbt_irec_t *new, /* new data to add to file extents */ |
145 | int *logflagsp, /* inode logging flags */ | 127 | int *logflagsp, /* inode logging flags */ |
@@ -152,7 +134,7 @@ xfs_bmap_add_extent_hole_real( | |||
152 | STATIC int /* error */ | 134 | STATIC int /* error */ |
153 | xfs_bmap_add_extent_unwritten_real( | 135 | xfs_bmap_add_extent_unwritten_real( |
154 | xfs_inode_t *ip, /* incore inode pointer */ | 136 | xfs_inode_t *ip, /* incore inode pointer */ |
155 | xfs_extnum_t idx, /* extent number to update/insert */ | 137 | xfs_extnum_t *idx, /* extent number to update/insert */ |
156 | xfs_btree_cur_t **curp, /* if *curp is null, not a btree */ | 138 | xfs_btree_cur_t **curp, /* if *curp is null, not a btree */ |
157 | xfs_bmbt_irec_t *new, /* new data to add to file extents */ | 139 | xfs_bmbt_irec_t *new, /* new data to add to file extents */ |
158 | int *logflagsp); /* inode logging flags */ | 140 | int *logflagsp); /* inode logging flags */ |
@@ -180,22 +162,6 @@ xfs_bmap_btree_to_extents( | |||
180 | int whichfork); /* data or attr fork */ | 162 | int whichfork); /* data or attr fork */ |
181 | 163 | ||
182 | /* | 164 | /* |
183 | * Called by xfs_bmapi to update file extent records and the btree | ||
184 | * after removing space (or undoing a delayed allocation). | ||
185 | */ | ||
186 | STATIC int /* error */ | ||
187 | xfs_bmap_del_extent( | ||
188 | xfs_inode_t *ip, /* incore inode pointer */ | ||
189 | xfs_trans_t *tp, /* current trans pointer */ | ||
190 | xfs_extnum_t idx, /* extent number to update/insert */ | ||
191 | xfs_bmap_free_t *flist, /* list of extents to be freed */ | ||
192 | xfs_btree_cur_t *cur, /* if null, not a btree */ | ||
193 | xfs_bmbt_irec_t *new, /* new data to add to file extents */ | ||
194 | int *logflagsp,/* inode logging flags */ | ||
195 | int whichfork, /* data or attr fork */ | ||
196 | int rsvd); /* OK to allocate reserved blocks */ | ||
197 | |||
198 | /* | ||
199 | * Remove the entry "free" from the free item list. Prev points to the | 165 | * Remove the entry "free" from the free item list. Prev points to the |
200 | * previous entry, unless "free" is the head of the list. | 166 | * previous entry, unless "free" is the head of the list. |
201 | */ | 167 | */ |
@@ -474,14 +440,13 @@ xfs_bmap_add_attrfork_local( | |||
474 | STATIC int /* error */ | 440 | STATIC int /* error */ |
475 | xfs_bmap_add_extent( | 441 | xfs_bmap_add_extent( |
476 | xfs_inode_t *ip, /* incore inode pointer */ | 442 | xfs_inode_t *ip, /* incore inode pointer */ |
477 | xfs_extnum_t idx, /* extent number to update/insert */ | 443 | xfs_extnum_t *idx, /* extent number to update/insert */ |
478 | xfs_btree_cur_t **curp, /* if *curp is null, not a btree */ | 444 | xfs_btree_cur_t **curp, /* if *curp is null, not a btree */ |
479 | xfs_bmbt_irec_t *new, /* new data to add to file extents */ | 445 | xfs_bmbt_irec_t *new, /* new data to add to file extents */ |
480 | xfs_fsblock_t *first, /* pointer to firstblock variable */ | 446 | xfs_fsblock_t *first, /* pointer to firstblock variable */ |
481 | xfs_bmap_free_t *flist, /* list of extents to be freed */ | 447 | xfs_bmap_free_t *flist, /* list of extents to be freed */ |
482 | int *logflagsp, /* inode logging flags */ | 448 | int *logflagsp, /* inode logging flags */ |
483 | int whichfork, /* data or attr fork */ | 449 | int whichfork) /* data or attr fork */ |
484 | int rsvd) /* OK to use reserved data blocks */ | ||
485 | { | 450 | { |
486 | xfs_btree_cur_t *cur; /* btree cursor or null */ | 451 | xfs_btree_cur_t *cur; /* btree cursor or null */ |
487 | xfs_filblks_t da_new; /* new count del alloc blocks used */ | 452 | xfs_filblks_t da_new; /* new count del alloc blocks used */ |
@@ -492,23 +457,27 @@ xfs_bmap_add_extent( | |||
492 | xfs_extnum_t nextents; /* number of extents in file now */ | 457 | xfs_extnum_t nextents; /* number of extents in file now */ |
493 | 458 | ||
494 | XFS_STATS_INC(xs_add_exlist); | 459 | XFS_STATS_INC(xs_add_exlist); |
460 | |||
495 | cur = *curp; | 461 | cur = *curp; |
496 | ifp = XFS_IFORK_PTR(ip, whichfork); | 462 | ifp = XFS_IFORK_PTR(ip, whichfork); |
497 | nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); | 463 | nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); |
498 | ASSERT(idx <= nextents); | ||
499 | da_old = da_new = 0; | 464 | da_old = da_new = 0; |
500 | error = 0; | 465 | error = 0; |
466 | |||
467 | ASSERT(*idx >= 0); | ||
468 | ASSERT(*idx <= nextents); | ||
469 | |||
501 | /* | 470 | /* |
502 | * This is the first extent added to a new/empty file. | 471 | * This is the first extent added to a new/empty file. |
503 | * Special case this one, so other routines get to assume there are | 472 | * Special case this one, so other routines get to assume there are |
504 | * already extents in the list. | 473 | * already extents in the list. |
505 | */ | 474 | */ |
506 | if (nextents == 0) { | 475 | if (nextents == 0) { |
507 | xfs_iext_insert(ip, 0, 1, new, | 476 | xfs_iext_insert(ip, *idx, 1, new, |
508 | whichfork == XFS_ATTR_FORK ? BMAP_ATTRFORK : 0); | 477 | whichfork == XFS_ATTR_FORK ? BMAP_ATTRFORK : 0); |
509 | 478 | ||
510 | ASSERT(cur == NULL); | 479 | ASSERT(cur == NULL); |
511 | ifp->if_lastex = 0; | 480 | |
512 | if (!isnullstartblock(new->br_startblock)) { | 481 | if (!isnullstartblock(new->br_startblock)) { |
513 | XFS_IFORK_NEXT_SET(ip, whichfork, 1); | 482 | XFS_IFORK_NEXT_SET(ip, whichfork, 1); |
514 | logflags = XFS_ILOG_CORE | xfs_ilog_fext(whichfork); | 483 | logflags = XFS_ILOG_CORE | xfs_ilog_fext(whichfork); |
@@ -522,27 +491,25 @@ xfs_bmap_add_extent( | |||
522 | if (cur) | 491 | if (cur) |
523 | ASSERT((cur->bc_private.b.flags & | 492 | ASSERT((cur->bc_private.b.flags & |
524 | XFS_BTCUR_BPRV_WASDEL) == 0); | 493 | XFS_BTCUR_BPRV_WASDEL) == 0); |
525 | if ((error = xfs_bmap_add_extent_hole_delay(ip, idx, new, | 494 | error = xfs_bmap_add_extent_hole_delay(ip, idx, new, |
526 | &logflags, rsvd))) | 495 | &logflags); |
527 | goto done; | ||
528 | } | 496 | } |
529 | /* | 497 | /* |
530 | * Real allocation off the end of the file. | 498 | * Real allocation off the end of the file. |
531 | */ | 499 | */ |
532 | else if (idx == nextents) { | 500 | else if (*idx == nextents) { |
533 | if (cur) | 501 | if (cur) |
534 | ASSERT((cur->bc_private.b.flags & | 502 | ASSERT((cur->bc_private.b.flags & |
535 | XFS_BTCUR_BPRV_WASDEL) == 0); | 503 | XFS_BTCUR_BPRV_WASDEL) == 0); |
536 | if ((error = xfs_bmap_add_extent_hole_real(ip, idx, cur, new, | 504 | error = xfs_bmap_add_extent_hole_real(ip, idx, cur, new, |
537 | &logflags, whichfork))) | 505 | &logflags, whichfork); |
538 | goto done; | ||
539 | } else { | 506 | } else { |
540 | xfs_bmbt_irec_t prev; /* old extent at offset idx */ | 507 | xfs_bmbt_irec_t prev; /* old extent at offset idx */ |
541 | 508 | ||
542 | /* | 509 | /* |
543 | * Get the record referred to by idx. | 510 | * Get the record referred to by idx. |
544 | */ | 511 | */ |
545 | xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx), &prev); | 512 | xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx), &prev); |
546 | /* | 513 | /* |
547 | * If it's a real allocation record, and the new allocation ends | 514 | * If it's a real allocation record, and the new allocation ends |
548 | * after the start of the referred to record, then we're filling | 515 | * after the start of the referred to record, then we're filling |
@@ -557,22 +524,18 @@ xfs_bmap_add_extent( | |||
557 | if (cur) | 524 | if (cur) |
558 | ASSERT(cur->bc_private.b.flags & | 525 | ASSERT(cur->bc_private.b.flags & |
559 | XFS_BTCUR_BPRV_WASDEL); | 526 | XFS_BTCUR_BPRV_WASDEL); |
560 | if ((error = xfs_bmap_add_extent_delay_real(ip, | 527 | error = xfs_bmap_add_extent_delay_real(ip, |
561 | idx, &cur, new, &da_new, first, flist, | 528 | idx, &cur, new, &da_new, |
562 | &logflags, rsvd))) | 529 | first, flist, &logflags); |
563 | goto done; | ||
564 | } else if (new->br_state == XFS_EXT_NORM) { | ||
565 | ASSERT(new->br_state == XFS_EXT_NORM); | ||
566 | if ((error = xfs_bmap_add_extent_unwritten_real( | ||
567 | ip, idx, &cur, new, &logflags))) | ||
568 | goto done; | ||
569 | } else { | 530 | } else { |
570 | ASSERT(new->br_state == XFS_EXT_UNWRITTEN); | 531 | ASSERT(new->br_state == XFS_EXT_NORM || |
571 | if ((error = xfs_bmap_add_extent_unwritten_real( | 532 | new->br_state == XFS_EXT_UNWRITTEN); |
572 | ip, idx, &cur, new, &logflags))) | 533 | |
534 | error = xfs_bmap_add_extent_unwritten_real(ip, | ||
535 | idx, &cur, new, &logflags); | ||
536 | if (error) | ||
573 | goto done; | 537 | goto done; |
574 | } | 538 | } |
575 | ASSERT(*curp == cur || *curp == NULL); | ||
576 | } | 539 | } |
577 | /* | 540 | /* |
578 | * Otherwise we're filling in a hole with an allocation. | 541 | * Otherwise we're filling in a hole with an allocation. |
@@ -581,13 +544,15 @@ xfs_bmap_add_extent( | |||
581 | if (cur) | 544 | if (cur) |
582 | ASSERT((cur->bc_private.b.flags & | 545 | ASSERT((cur->bc_private.b.flags & |
583 | XFS_BTCUR_BPRV_WASDEL) == 0); | 546 | XFS_BTCUR_BPRV_WASDEL) == 0); |
584 | if ((error = xfs_bmap_add_extent_hole_real(ip, idx, cur, | 547 | error = xfs_bmap_add_extent_hole_real(ip, idx, cur, |
585 | new, &logflags, whichfork))) | 548 | new, &logflags, whichfork); |
586 | goto done; | ||
587 | } | 549 | } |
588 | } | 550 | } |
589 | 551 | ||
552 | if (error) | ||
553 | goto done; | ||
590 | ASSERT(*curp == cur || *curp == NULL); | 554 | ASSERT(*curp == cur || *curp == NULL); |
555 | |||
591 | /* | 556 | /* |
592 | * Convert to a btree if necessary. | 557 | * Convert to a btree if necessary. |
593 | */ | 558 | */ |
@@ -614,8 +579,8 @@ xfs_bmap_add_extent( | |||
614 | nblks += cur->bc_private.b.allocated; | 579 | nblks += cur->bc_private.b.allocated; |
615 | ASSERT(nblks <= da_old); | 580 | ASSERT(nblks <= da_old); |
616 | if (nblks < da_old) | 581 | if (nblks < da_old) |
617 | xfs_mod_incore_sb(ip->i_mount, XFS_SBS_FDBLOCKS, | 582 | xfs_icsb_modify_counters(ip->i_mount, XFS_SBS_FDBLOCKS, |
618 | (int64_t)(da_old - nblks), rsvd); | 583 | (int64_t)(da_old - nblks), 0); |
619 | } | 584 | } |
620 | /* | 585 | /* |
621 | * Clear out the allocated field, done with it now in any case. | 586 | * Clear out the allocated field, done with it now in any case. |
@@ -640,14 +605,13 @@ done: | |||
640 | STATIC int /* error */ | 605 | STATIC int /* error */ |
641 | xfs_bmap_add_extent_delay_real( | 606 | xfs_bmap_add_extent_delay_real( |
642 | xfs_inode_t *ip, /* incore inode pointer */ | 607 | xfs_inode_t *ip, /* incore inode pointer */ |
643 | xfs_extnum_t idx, /* extent number to update/insert */ | 608 | xfs_extnum_t *idx, /* extent number to update/insert */ |
644 | xfs_btree_cur_t **curp, /* if *curp is null, not a btree */ | 609 | xfs_btree_cur_t **curp, /* if *curp is null, not a btree */ |
645 | xfs_bmbt_irec_t *new, /* new data to add to file extents */ | 610 | xfs_bmbt_irec_t *new, /* new data to add to file extents */ |
646 | xfs_filblks_t *dnew, /* new delayed-alloc indirect blocks */ | 611 | xfs_filblks_t *dnew, /* new delayed-alloc indirect blocks */ |
647 | xfs_fsblock_t *first, /* pointer to firstblock variable */ | 612 | xfs_fsblock_t *first, /* pointer to firstblock variable */ |
648 | xfs_bmap_free_t *flist, /* list of extents to be freed */ | 613 | xfs_bmap_free_t *flist, /* list of extents to be freed */ |
649 | int *logflagsp, /* inode logging flags */ | 614 | int *logflagsp) /* inode logging flags */ |
650 | int rsvd) /* OK to use reserved data block allocation */ | ||
651 | { | 615 | { |
652 | xfs_btree_cur_t *cur; /* btree cursor */ | 616 | xfs_btree_cur_t *cur; /* btree cursor */ |
653 | int diff; /* temp value */ | 617 | int diff; /* temp value */ |
@@ -673,7 +637,7 @@ xfs_bmap_add_extent_delay_real( | |||
673 | */ | 637 | */ |
674 | cur = *curp; | 638 | cur = *curp; |
675 | ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK); | 639 | ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK); |
676 | ep = xfs_iext_get_ext(ifp, idx); | 640 | ep = xfs_iext_get_ext(ifp, *idx); |
677 | xfs_bmbt_get_all(ep, &PREV); | 641 | xfs_bmbt_get_all(ep, &PREV); |
678 | new_endoff = new->br_startoff + new->br_blockcount; | 642 | new_endoff = new->br_startoff + new->br_blockcount; |
679 | ASSERT(PREV.br_startoff <= new->br_startoff); | 643 | ASSERT(PREV.br_startoff <= new->br_startoff); |
@@ -692,9 +656,9 @@ xfs_bmap_add_extent_delay_real( | |||
692 | * Check and set flags if this segment has a left neighbor. | 656 | * Check and set flags if this segment has a left neighbor. |
693 | * Don't set contiguous if the combined extent would be too large. | 657 | * Don't set contiguous if the combined extent would be too large. |
694 | */ | 658 | */ |
695 | if (idx > 0) { | 659 | if (*idx > 0) { |
696 | state |= BMAP_LEFT_VALID; | 660 | state |= BMAP_LEFT_VALID; |
697 | xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx - 1), &LEFT); | 661 | xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx - 1), &LEFT); |
698 | 662 | ||
699 | if (isnullstartblock(LEFT.br_startblock)) | 663 | if (isnullstartblock(LEFT.br_startblock)) |
700 | state |= BMAP_LEFT_DELAY; | 664 | state |= BMAP_LEFT_DELAY; |
@@ -712,9 +676,9 @@ xfs_bmap_add_extent_delay_real( | |||
712 | * Don't set contiguous if the combined extent would be too large. | 676 | * Don't set contiguous if the combined extent would be too large. |
713 | * Also check for all-three-contiguous being too large. | 677 | * Also check for all-three-contiguous being too large. |
714 | */ | 678 | */ |
715 | if (idx < ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t) - 1) { | 679 | if (*idx < ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t) - 1) { |
716 | state |= BMAP_RIGHT_VALID; | 680 | state |= BMAP_RIGHT_VALID; |
717 | xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx + 1), &RIGHT); | 681 | xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx + 1), &RIGHT); |
718 | 682 | ||
719 | if (isnullstartblock(RIGHT.br_startblock)) | 683 | if (isnullstartblock(RIGHT.br_startblock)) |
720 | state |= BMAP_RIGHT_DELAY; | 684 | state |= BMAP_RIGHT_DELAY; |
@@ -745,14 +709,14 @@ xfs_bmap_add_extent_delay_real( | |||
745 | * Filling in all of a previously delayed allocation extent. | 709 | * Filling in all of a previously delayed allocation extent. |
746 | * The left and right neighbors are both contiguous with new. | 710 | * The left and right neighbors are both contiguous with new. |
747 | */ | 711 | */ |
748 | trace_xfs_bmap_pre_update(ip, idx - 1, state, _THIS_IP_); | 712 | --*idx; |
749 | xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1), | 713 | trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); |
714 | xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx), | ||
750 | LEFT.br_blockcount + PREV.br_blockcount + | 715 | LEFT.br_blockcount + PREV.br_blockcount + |
751 | RIGHT.br_blockcount); | 716 | RIGHT.br_blockcount); |
752 | trace_xfs_bmap_post_update(ip, idx - 1, state, _THIS_IP_); | 717 | trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); |
753 | 718 | ||
754 | xfs_iext_remove(ip, idx, 2, state); | 719 | xfs_iext_remove(ip, *idx + 1, 2, state); |
755 | ip->i_df.if_lastex = idx - 1; | ||
756 | ip->i_d.di_nextents--; | 720 | ip->i_d.di_nextents--; |
757 | if (cur == NULL) | 721 | if (cur == NULL) |
758 | rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; | 722 | rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; |
@@ -784,13 +748,14 @@ xfs_bmap_add_extent_delay_real( | |||
784 | * Filling in all of a previously delayed allocation extent. | 748 | * Filling in all of a previously delayed allocation extent. |
785 | * The left neighbor is contiguous, the right is not. | 749 | * The left neighbor is contiguous, the right is not. |
786 | */ | 750 | */ |
787 | trace_xfs_bmap_pre_update(ip, idx - 1, state, _THIS_IP_); | 751 | --*idx; |
788 | xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1), | 752 | |
753 | trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); | ||
754 | xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx), | ||
789 | LEFT.br_blockcount + PREV.br_blockcount); | 755 | LEFT.br_blockcount + PREV.br_blockcount); |
790 | trace_xfs_bmap_post_update(ip, idx - 1, state, _THIS_IP_); | 756 | trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); |
791 | 757 | ||
792 | ip->i_df.if_lastex = idx - 1; | 758 | xfs_iext_remove(ip, *idx + 1, 1, state); |
793 | xfs_iext_remove(ip, idx, 1, state); | ||
794 | if (cur == NULL) | 759 | if (cur == NULL) |
795 | rval = XFS_ILOG_DEXT; | 760 | rval = XFS_ILOG_DEXT; |
796 | else { | 761 | else { |
@@ -814,14 +779,13 @@ xfs_bmap_add_extent_delay_real( | |||
814 | * Filling in all of a previously delayed allocation extent. | 779 | * Filling in all of a previously delayed allocation extent. |
815 | * The right neighbor is contiguous, the left is not. | 780 | * The right neighbor is contiguous, the left is not. |
816 | */ | 781 | */ |
817 | trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_); | 782 | trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); |
818 | xfs_bmbt_set_startblock(ep, new->br_startblock); | 783 | xfs_bmbt_set_startblock(ep, new->br_startblock); |
819 | xfs_bmbt_set_blockcount(ep, | 784 | xfs_bmbt_set_blockcount(ep, |
820 | PREV.br_blockcount + RIGHT.br_blockcount); | 785 | PREV.br_blockcount + RIGHT.br_blockcount); |
821 | trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); | 786 | trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); |
822 | 787 | ||
823 | ip->i_df.if_lastex = idx; | 788 | xfs_iext_remove(ip, *idx + 1, 1, state); |
824 | xfs_iext_remove(ip, idx + 1, 1, state); | ||
825 | if (cur == NULL) | 789 | if (cur == NULL) |
826 | rval = XFS_ILOG_DEXT; | 790 | rval = XFS_ILOG_DEXT; |
827 | else { | 791 | else { |
@@ -837,6 +801,7 @@ xfs_bmap_add_extent_delay_real( | |||
837 | RIGHT.br_blockcount, PREV.br_state))) | 801 | RIGHT.br_blockcount, PREV.br_state))) |
838 | goto done; | 802 | goto done; |
839 | } | 803 | } |
804 | |||
840 | *dnew = 0; | 805 | *dnew = 0; |
841 | break; | 806 | break; |
842 | 807 | ||
@@ -846,11 +811,10 @@ xfs_bmap_add_extent_delay_real( | |||
846 | * Neither the left nor right neighbors are contiguous with | 811 | * Neither the left nor right neighbors are contiguous with |
847 | * the new one. | 812 | * the new one. |
848 | */ | 813 | */ |
849 | trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_); | 814 | trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); |
850 | xfs_bmbt_set_startblock(ep, new->br_startblock); | 815 | xfs_bmbt_set_startblock(ep, new->br_startblock); |
851 | trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); | 816 | trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); |
852 | 817 | ||
853 | ip->i_df.if_lastex = idx; | ||
854 | ip->i_d.di_nextents++; | 818 | ip->i_d.di_nextents++; |
855 | if (cur == NULL) | 819 | if (cur == NULL) |
856 | rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; | 820 | rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; |
@@ -866,6 +830,7 @@ xfs_bmap_add_extent_delay_real( | |||
866 | goto done; | 830 | goto done; |
867 | XFS_WANT_CORRUPTED_GOTO(i == 1, done); | 831 | XFS_WANT_CORRUPTED_GOTO(i == 1, done); |
868 | } | 832 | } |
833 | |||
869 | *dnew = 0; | 834 | *dnew = 0; |
870 | break; | 835 | break; |
871 | 836 | ||
@@ -874,17 +839,16 @@ xfs_bmap_add_extent_delay_real( | |||
874 | * Filling in the first part of a previous delayed allocation. | 839 | * Filling in the first part of a previous delayed allocation. |
875 | * The left neighbor is contiguous. | 840 | * The left neighbor is contiguous. |
876 | */ | 841 | */ |
877 | trace_xfs_bmap_pre_update(ip, idx - 1, state, _THIS_IP_); | 842 | trace_xfs_bmap_pre_update(ip, *idx - 1, state, _THIS_IP_); |
878 | xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1), | 843 | xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx - 1), |
879 | LEFT.br_blockcount + new->br_blockcount); | 844 | LEFT.br_blockcount + new->br_blockcount); |
880 | xfs_bmbt_set_startoff(ep, | 845 | xfs_bmbt_set_startoff(ep, |
881 | PREV.br_startoff + new->br_blockcount); | 846 | PREV.br_startoff + new->br_blockcount); |
882 | trace_xfs_bmap_post_update(ip, idx - 1, state, _THIS_IP_); | 847 | trace_xfs_bmap_post_update(ip, *idx - 1, state, _THIS_IP_); |
883 | 848 | ||
884 | temp = PREV.br_blockcount - new->br_blockcount; | 849 | temp = PREV.br_blockcount - new->br_blockcount; |
885 | trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_); | 850 | trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); |
886 | xfs_bmbt_set_blockcount(ep, temp); | 851 | xfs_bmbt_set_blockcount(ep, temp); |
887 | ip->i_df.if_lastex = idx - 1; | ||
888 | if (cur == NULL) | 852 | if (cur == NULL) |
889 | rval = XFS_ILOG_DEXT; | 853 | rval = XFS_ILOG_DEXT; |
890 | else { | 854 | else { |
@@ -904,7 +868,9 @@ xfs_bmap_add_extent_delay_real( | |||
904 | temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp), | 868 | temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp), |
905 | startblockval(PREV.br_startblock)); | 869 | startblockval(PREV.br_startblock)); |
906 | xfs_bmbt_set_startblock(ep, nullstartblock((int)temp)); | 870 | xfs_bmbt_set_startblock(ep, nullstartblock((int)temp)); |
907 | trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); | 871 | trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); |
872 | |||
873 | --*idx; | ||
908 | *dnew = temp; | 874 | *dnew = temp; |
909 | break; | 875 | break; |
910 | 876 | ||
@@ -913,12 +879,11 @@ xfs_bmap_add_extent_delay_real( | |||
913 | * Filling in the first part of a previous delayed allocation. | 879 | * Filling in the first part of a previous delayed allocation. |
914 | * The left neighbor is not contiguous. | 880 | * The left neighbor is not contiguous. |
915 | */ | 881 | */ |
916 | trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_); | 882 | trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); |
917 | xfs_bmbt_set_startoff(ep, new_endoff); | 883 | xfs_bmbt_set_startoff(ep, new_endoff); |
918 | temp = PREV.br_blockcount - new->br_blockcount; | 884 | temp = PREV.br_blockcount - new->br_blockcount; |
919 | xfs_bmbt_set_blockcount(ep, temp); | 885 | xfs_bmbt_set_blockcount(ep, temp); |
920 | xfs_iext_insert(ip, idx, 1, new, state); | 886 | xfs_iext_insert(ip, *idx, 1, new, state); |
921 | ip->i_df.if_lastex = idx; | ||
922 | ip->i_d.di_nextents++; | 887 | ip->i_d.di_nextents++; |
923 | if (cur == NULL) | 888 | if (cur == NULL) |
924 | rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; | 889 | rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; |
@@ -946,9 +911,10 @@ xfs_bmap_add_extent_delay_real( | |||
946 | temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp), | 911 | temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp), |
947 | startblockval(PREV.br_startblock) - | 912 | startblockval(PREV.br_startblock) - |
948 | (cur ? cur->bc_private.b.allocated : 0)); | 913 | (cur ? cur->bc_private.b.allocated : 0)); |
949 | ep = xfs_iext_get_ext(ifp, idx + 1); | 914 | ep = xfs_iext_get_ext(ifp, *idx + 1); |
950 | xfs_bmbt_set_startblock(ep, nullstartblock((int)temp)); | 915 | xfs_bmbt_set_startblock(ep, nullstartblock((int)temp)); |
951 | trace_xfs_bmap_post_update(ip, idx + 1, state, _THIS_IP_); | 916 | trace_xfs_bmap_post_update(ip, *idx + 1, state, _THIS_IP_); |
917 | |||
952 | *dnew = temp; | 918 | *dnew = temp; |
953 | break; | 919 | break; |
954 | 920 | ||
@@ -958,15 +924,13 @@ xfs_bmap_add_extent_delay_real( | |||
958 | * The right neighbor is contiguous with the new allocation. | 924 | * The right neighbor is contiguous with the new allocation. |
959 | */ | 925 | */ |
960 | temp = PREV.br_blockcount - new->br_blockcount; | 926 | temp = PREV.br_blockcount - new->br_blockcount; |
961 | trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_); | 927 | trace_xfs_bmap_pre_update(ip, *idx + 1, state, _THIS_IP_); |
962 | trace_xfs_bmap_pre_update(ip, idx + 1, state, _THIS_IP_); | ||
963 | xfs_bmbt_set_blockcount(ep, temp); | 928 | xfs_bmbt_set_blockcount(ep, temp); |
964 | xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, idx + 1), | 929 | xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, *idx + 1), |
965 | new->br_startoff, new->br_startblock, | 930 | new->br_startoff, new->br_startblock, |
966 | new->br_blockcount + RIGHT.br_blockcount, | 931 | new->br_blockcount + RIGHT.br_blockcount, |
967 | RIGHT.br_state); | 932 | RIGHT.br_state); |
968 | trace_xfs_bmap_post_update(ip, idx + 1, state, _THIS_IP_); | 933 | trace_xfs_bmap_post_update(ip, *idx + 1, state, _THIS_IP_); |
969 | ip->i_df.if_lastex = idx + 1; | ||
970 | if (cur == NULL) | 934 | if (cur == NULL) |
971 | rval = XFS_ILOG_DEXT; | 935 | rval = XFS_ILOG_DEXT; |
972 | else { | 936 | else { |
@@ -983,10 +947,14 @@ xfs_bmap_add_extent_delay_real( | |||
983 | RIGHT.br_state))) | 947 | RIGHT.br_state))) |
984 | goto done; | 948 | goto done; |
985 | } | 949 | } |
950 | |||
986 | temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp), | 951 | temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp), |
987 | startblockval(PREV.br_startblock)); | 952 | startblockval(PREV.br_startblock)); |
953 | trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); | ||
988 | xfs_bmbt_set_startblock(ep, nullstartblock((int)temp)); | 954 | xfs_bmbt_set_startblock(ep, nullstartblock((int)temp)); |
989 | trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); | 955 | trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); |
956 | |||
957 | ++*idx; | ||
990 | *dnew = temp; | 958 | *dnew = temp; |
991 | break; | 959 | break; |
992 | 960 | ||
@@ -996,10 +964,9 @@ xfs_bmap_add_extent_delay_real( | |||
996 | * The right neighbor is not contiguous. | 964 | * The right neighbor is not contiguous. |
997 | */ | 965 | */ |
998 | temp = PREV.br_blockcount - new->br_blockcount; | 966 | temp = PREV.br_blockcount - new->br_blockcount; |
999 | trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_); | 967 | trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); |
1000 | xfs_bmbt_set_blockcount(ep, temp); | 968 | xfs_bmbt_set_blockcount(ep, temp); |
1001 | xfs_iext_insert(ip, idx + 1, 1, new, state); | 969 | xfs_iext_insert(ip, *idx + 1, 1, new, state); |
1002 | ip->i_df.if_lastex = idx + 1; | ||
1003 | ip->i_d.di_nextents++; | 970 | ip->i_d.di_nextents++; |
1004 | if (cur == NULL) | 971 | if (cur == NULL) |
1005 | rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; | 972 | rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; |
@@ -1027,9 +994,11 @@ xfs_bmap_add_extent_delay_real( | |||
1027 | temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp), | 994 | temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp), |
1028 | startblockval(PREV.br_startblock) - | 995 | startblockval(PREV.br_startblock) - |
1029 | (cur ? cur->bc_private.b.allocated : 0)); | 996 | (cur ? cur->bc_private.b.allocated : 0)); |
1030 | ep = xfs_iext_get_ext(ifp, idx); | 997 | ep = xfs_iext_get_ext(ifp, *idx); |
1031 | xfs_bmbt_set_startblock(ep, nullstartblock((int)temp)); | 998 | xfs_bmbt_set_startblock(ep, nullstartblock((int)temp)); |
1032 | trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); | 999 | trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); |
1000 | |||
1001 | ++*idx; | ||
1033 | *dnew = temp; | 1002 | *dnew = temp; |
1034 | break; | 1003 | break; |
1035 | 1004 | ||
@@ -1038,18 +1007,34 @@ xfs_bmap_add_extent_delay_real( | |||
1038 | * Filling in the middle part of a previous delayed allocation. | 1007 | * Filling in the middle part of a previous delayed allocation. |
1039 | * Contiguity is impossible here. | 1008 | * Contiguity is impossible here. |
1040 | * This case is avoided almost all the time. | 1009 | * This case is avoided almost all the time. |
1010 | * | ||
1011 | * We start with a delayed allocation: | ||
1012 | * | ||
1013 | * +ddddddddddddddddddddddddddddddddddddddddddddddddddddddd+ | ||
1014 | * PREV @ idx | ||
1015 | * | ||
1016 | * and we are allocating: | ||
1017 | * +rrrrrrrrrrrrrrrrr+ | ||
1018 | * new | ||
1019 | * | ||
1020 | * and we set it up for insertion as: | ||
1021 | * +ddddddddddddddddddd+rrrrrrrrrrrrrrrrr+ddddddddddddddddd+ | ||
1022 | * new | ||
1023 | * PREV @ idx LEFT RIGHT | ||
1024 | * inserted at idx + 1 | ||
1041 | */ | 1025 | */ |
1042 | temp = new->br_startoff - PREV.br_startoff; | 1026 | temp = new->br_startoff - PREV.br_startoff; |
1043 | trace_xfs_bmap_pre_update(ip, idx, 0, _THIS_IP_); | ||
1044 | xfs_bmbt_set_blockcount(ep, temp); | ||
1045 | r[0] = *new; | ||
1046 | r[1].br_state = PREV.br_state; | ||
1047 | r[1].br_startblock = 0; | ||
1048 | r[1].br_startoff = new_endoff; | ||
1049 | temp2 = PREV.br_startoff + PREV.br_blockcount - new_endoff; | 1027 | temp2 = PREV.br_startoff + PREV.br_blockcount - new_endoff; |
1050 | r[1].br_blockcount = temp2; | 1028 | trace_xfs_bmap_pre_update(ip, *idx, 0, _THIS_IP_); |
1051 | xfs_iext_insert(ip, idx + 1, 2, &r[0], state); | 1029 | xfs_bmbt_set_blockcount(ep, temp); /* truncate PREV */ |
1052 | ip->i_df.if_lastex = idx + 1; | 1030 | LEFT = *new; |
1031 | RIGHT.br_state = PREV.br_state; | ||
1032 | RIGHT.br_startblock = nullstartblock( | ||
1033 | (int)xfs_bmap_worst_indlen(ip, temp2)); | ||
1034 | RIGHT.br_startoff = new_endoff; | ||
1035 | RIGHT.br_blockcount = temp2; | ||
1036 | /* insert LEFT (r[0]) and RIGHT (r[1]) at the same time */ | ||
1037 | xfs_iext_insert(ip, *idx + 1, 2, &LEFT, state); | ||
1053 | ip->i_d.di_nextents++; | 1038 | ip->i_d.di_nextents++; |
1054 | if (cur == NULL) | 1039 | if (cur == NULL) |
1055 | rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; | 1040 | rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; |
@@ -1079,7 +1064,8 @@ xfs_bmap_add_extent_delay_real( | |||
1079 | diff = (int)(temp + temp2 - startblockval(PREV.br_startblock) - | 1064 | diff = (int)(temp + temp2 - startblockval(PREV.br_startblock) - |
1080 | (cur ? cur->bc_private.b.allocated : 0)); | 1065 | (cur ? cur->bc_private.b.allocated : 0)); |
1081 | if (diff > 0 && | 1066 | if (diff > 0 && |
1082 | xfs_mod_incore_sb(ip->i_mount, XFS_SBS_FDBLOCKS, -((int64_t)diff), rsvd)) { | 1067 | xfs_icsb_modify_counters(ip->i_mount, XFS_SBS_FDBLOCKS, |
1068 | -((int64_t)diff), 0)) { | ||
1083 | /* | 1069 | /* |
1084 | * Ick gross gag me with a spoon. | 1070 | * Ick gross gag me with a spoon. |
1085 | */ | 1071 | */ |
@@ -1089,27 +1075,31 @@ xfs_bmap_add_extent_delay_real( | |||
1089 | temp--; | 1075 | temp--; |
1090 | diff--; | 1076 | diff--; |
1091 | if (!diff || | 1077 | if (!diff || |
1092 | !xfs_mod_incore_sb(ip->i_mount, | 1078 | !xfs_icsb_modify_counters(ip->i_mount, |
1093 | XFS_SBS_FDBLOCKS, -((int64_t)diff), rsvd)) | 1079 | XFS_SBS_FDBLOCKS, |
1080 | -((int64_t)diff), 0)) | ||
1094 | break; | 1081 | break; |
1095 | } | 1082 | } |
1096 | if (temp2) { | 1083 | if (temp2) { |
1097 | temp2--; | 1084 | temp2--; |
1098 | diff--; | 1085 | diff--; |
1099 | if (!diff || | 1086 | if (!diff || |
1100 | !xfs_mod_incore_sb(ip->i_mount, | 1087 | !xfs_icsb_modify_counters(ip->i_mount, |
1101 | XFS_SBS_FDBLOCKS, -((int64_t)diff), rsvd)) | 1088 | XFS_SBS_FDBLOCKS, |
1089 | -((int64_t)diff), 0)) | ||
1102 | break; | 1090 | break; |
1103 | } | 1091 | } |
1104 | } | 1092 | } |
1105 | } | 1093 | } |
1106 | ep = xfs_iext_get_ext(ifp, idx); | 1094 | ep = xfs_iext_get_ext(ifp, *idx); |
1107 | xfs_bmbt_set_startblock(ep, nullstartblock((int)temp)); | 1095 | xfs_bmbt_set_startblock(ep, nullstartblock((int)temp)); |
1108 | trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); | 1096 | trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); |
1109 | trace_xfs_bmap_pre_update(ip, idx + 2, state, _THIS_IP_); | 1097 | trace_xfs_bmap_pre_update(ip, *idx + 2, state, _THIS_IP_); |
1110 | xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, idx + 2), | 1098 | xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, *idx + 2), |
1111 | nullstartblock((int)temp2)); | 1099 | nullstartblock((int)temp2)); |
1112 | trace_xfs_bmap_post_update(ip, idx + 2, state, _THIS_IP_); | 1100 | trace_xfs_bmap_post_update(ip, *idx + 2, state, _THIS_IP_); |
1101 | |||
1102 | ++*idx; | ||
1113 | *dnew = temp + temp2; | 1103 | *dnew = temp + temp2; |
1114 | break; | 1104 | break; |
1115 | 1105 | ||
@@ -1141,7 +1131,7 @@ done: | |||
1141 | STATIC int /* error */ | 1131 | STATIC int /* error */ |
1142 | xfs_bmap_add_extent_unwritten_real( | 1132 | xfs_bmap_add_extent_unwritten_real( |
1143 | xfs_inode_t *ip, /* incore inode pointer */ | 1133 | xfs_inode_t *ip, /* incore inode pointer */ |
1144 | xfs_extnum_t idx, /* extent number to update/insert */ | 1134 | xfs_extnum_t *idx, /* extent number to update/insert */ |
1145 | xfs_btree_cur_t **curp, /* if *curp is null, not a btree */ | 1135 | xfs_btree_cur_t **curp, /* if *curp is null, not a btree */ |
1146 | xfs_bmbt_irec_t *new, /* new data to add to file extents */ | 1136 | xfs_bmbt_irec_t *new, /* new data to add to file extents */ |
1147 | int *logflagsp) /* inode logging flags */ | 1137 | int *logflagsp) /* inode logging flags */ |
@@ -1168,7 +1158,7 @@ xfs_bmap_add_extent_unwritten_real( | |||
1168 | error = 0; | 1158 | error = 0; |
1169 | cur = *curp; | 1159 | cur = *curp; |
1170 | ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK); | 1160 | ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK); |
1171 | ep = xfs_iext_get_ext(ifp, idx); | 1161 | ep = xfs_iext_get_ext(ifp, *idx); |
1172 | xfs_bmbt_get_all(ep, &PREV); | 1162 | xfs_bmbt_get_all(ep, &PREV); |
1173 | newext = new->br_state; | 1163 | newext = new->br_state; |
1174 | oldext = (newext == XFS_EXT_UNWRITTEN) ? | 1164 | oldext = (newext == XFS_EXT_UNWRITTEN) ? |
@@ -1191,9 +1181,9 @@ xfs_bmap_add_extent_unwritten_real( | |||
1191 | * Check and set flags if this segment has a left neighbor. | 1181 | * Check and set flags if this segment has a left neighbor. |
1192 | * Don't set contiguous if the combined extent would be too large. | 1182 | * Don't set contiguous if the combined extent would be too large. |
1193 | */ | 1183 | */ |
1194 | if (idx > 0) { | 1184 | if (*idx > 0) { |
1195 | state |= BMAP_LEFT_VALID; | 1185 | state |= BMAP_LEFT_VALID; |
1196 | xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx - 1), &LEFT); | 1186 | xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx - 1), &LEFT); |
1197 | 1187 | ||
1198 | if (isnullstartblock(LEFT.br_startblock)) | 1188 | if (isnullstartblock(LEFT.br_startblock)) |
1199 | state |= BMAP_LEFT_DELAY; | 1189 | state |= BMAP_LEFT_DELAY; |
@@ -1211,9 +1201,9 @@ xfs_bmap_add_extent_unwritten_real( | |||
1211 | * Don't set contiguous if the combined extent would be too large. | 1201 | * Don't set contiguous if the combined extent would be too large. |
1212 | * Also check for all-three-contiguous being too large. | 1202 | * Also check for all-three-contiguous being too large. |
1213 | */ | 1203 | */ |
1214 | if (idx < ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t) - 1) { | 1204 | if (*idx < ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t) - 1) { |
1215 | state |= BMAP_RIGHT_VALID; | 1205 | state |= BMAP_RIGHT_VALID; |
1216 | xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx + 1), &RIGHT); | 1206 | xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx + 1), &RIGHT); |
1217 | if (isnullstartblock(RIGHT.br_startblock)) | 1207 | if (isnullstartblock(RIGHT.br_startblock)) |
1218 | state |= BMAP_RIGHT_DELAY; | 1208 | state |= BMAP_RIGHT_DELAY; |
1219 | } | 1209 | } |
@@ -1242,14 +1232,15 @@ xfs_bmap_add_extent_unwritten_real( | |||
1242 | * Setting all of a previous oldext extent to newext. | 1232 | * Setting all of a previous oldext extent to newext. |
1243 | * The left and right neighbors are both contiguous with new. | 1233 | * The left and right neighbors are both contiguous with new. |
1244 | */ | 1234 | */ |
1245 | trace_xfs_bmap_pre_update(ip, idx - 1, state, _THIS_IP_); | 1235 | --*idx; |
1246 | xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1), | 1236 | |
1237 | trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); | ||
1238 | xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx), | ||
1247 | LEFT.br_blockcount + PREV.br_blockcount + | 1239 | LEFT.br_blockcount + PREV.br_blockcount + |
1248 | RIGHT.br_blockcount); | 1240 | RIGHT.br_blockcount); |
1249 | trace_xfs_bmap_post_update(ip, idx - 1, state, _THIS_IP_); | 1241 | trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); |
1250 | 1242 | ||
1251 | xfs_iext_remove(ip, idx, 2, state); | 1243 | xfs_iext_remove(ip, *idx + 1, 2, state); |
1252 | ip->i_df.if_lastex = idx - 1; | ||
1253 | ip->i_d.di_nextents -= 2; | 1244 | ip->i_d.di_nextents -= 2; |
1254 | if (cur == NULL) | 1245 | if (cur == NULL) |
1255 | rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; | 1246 | rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; |
@@ -1285,13 +1276,14 @@ xfs_bmap_add_extent_unwritten_real( | |||
1285 | * Setting all of a previous oldext extent to newext. | 1276 | * Setting all of a previous oldext extent to newext. |
1286 | * The left neighbor is contiguous, the right is not. | 1277 | * The left neighbor is contiguous, the right is not. |
1287 | */ | 1278 | */ |
1288 | trace_xfs_bmap_pre_update(ip, idx - 1, state, _THIS_IP_); | 1279 | --*idx; |
1289 | xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1), | 1280 | |
1281 | trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); | ||
1282 | xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx), | ||
1290 | LEFT.br_blockcount + PREV.br_blockcount); | 1283 | LEFT.br_blockcount + PREV.br_blockcount); |
1291 | trace_xfs_bmap_post_update(ip, idx - 1, state, _THIS_IP_); | 1284 | trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); |
1292 | 1285 | ||
1293 | ip->i_df.if_lastex = idx - 1; | 1286 | xfs_iext_remove(ip, *idx + 1, 1, state); |
1294 | xfs_iext_remove(ip, idx, 1, state); | ||
1295 | ip->i_d.di_nextents--; | 1287 | ip->i_d.di_nextents--; |
1296 | if (cur == NULL) | 1288 | if (cur == NULL) |
1297 | rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; | 1289 | rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; |
@@ -1321,13 +1313,12 @@ xfs_bmap_add_extent_unwritten_real( | |||
1321 | * Setting all of a previous oldext extent to newext. | 1313 | * Setting all of a previous oldext extent to newext. |
1322 | * The right neighbor is contiguous, the left is not. | 1314 | * The right neighbor is contiguous, the left is not. |
1323 | */ | 1315 | */ |
1324 | trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_); | 1316 | trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); |
1325 | xfs_bmbt_set_blockcount(ep, | 1317 | xfs_bmbt_set_blockcount(ep, |
1326 | PREV.br_blockcount + RIGHT.br_blockcount); | 1318 | PREV.br_blockcount + RIGHT.br_blockcount); |
1327 | xfs_bmbt_set_state(ep, newext); | 1319 | xfs_bmbt_set_state(ep, newext); |
1328 | trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); | 1320 | trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); |
1329 | ip->i_df.if_lastex = idx; | 1321 | xfs_iext_remove(ip, *idx + 1, 1, state); |
1330 | xfs_iext_remove(ip, idx + 1, 1, state); | ||
1331 | ip->i_d.di_nextents--; | 1322 | ip->i_d.di_nextents--; |
1332 | if (cur == NULL) | 1323 | if (cur == NULL) |
1333 | rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; | 1324 | rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; |
@@ -1358,11 +1349,10 @@ xfs_bmap_add_extent_unwritten_real( | |||
1358 | * Neither the left nor right neighbors are contiguous with | 1349 | * Neither the left nor right neighbors are contiguous with |
1359 | * the new one. | 1350 | * the new one. |
1360 | */ | 1351 | */ |
1361 | trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_); | 1352 | trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); |
1362 | xfs_bmbt_set_state(ep, newext); | 1353 | xfs_bmbt_set_state(ep, newext); |
1363 | trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); | 1354 | trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); |
1364 | 1355 | ||
1365 | ip->i_df.if_lastex = idx; | ||
1366 | if (cur == NULL) | 1356 | if (cur == NULL) |
1367 | rval = XFS_ILOG_DEXT; | 1357 | rval = XFS_ILOG_DEXT; |
1368 | else { | 1358 | else { |
@@ -1384,21 +1374,22 @@ xfs_bmap_add_extent_unwritten_real( | |||
1384 | * Setting the first part of a previous oldext extent to newext. | 1374 | * Setting the first part of a previous oldext extent to newext. |
1385 | * The left neighbor is contiguous. | 1375 | * The left neighbor is contiguous. |
1386 | */ | 1376 | */ |
1387 | trace_xfs_bmap_pre_update(ip, idx - 1, state, _THIS_IP_); | 1377 | trace_xfs_bmap_pre_update(ip, *idx - 1, state, _THIS_IP_); |
1388 | xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1), | 1378 | xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx - 1), |
1389 | LEFT.br_blockcount + new->br_blockcount); | 1379 | LEFT.br_blockcount + new->br_blockcount); |
1390 | xfs_bmbt_set_startoff(ep, | 1380 | xfs_bmbt_set_startoff(ep, |
1391 | PREV.br_startoff + new->br_blockcount); | 1381 | PREV.br_startoff + new->br_blockcount); |
1392 | trace_xfs_bmap_post_update(ip, idx - 1, state, _THIS_IP_); | 1382 | trace_xfs_bmap_post_update(ip, *idx - 1, state, _THIS_IP_); |
1393 | 1383 | ||
1394 | trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_); | 1384 | trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); |
1395 | xfs_bmbt_set_startblock(ep, | 1385 | xfs_bmbt_set_startblock(ep, |
1396 | new->br_startblock + new->br_blockcount); | 1386 | new->br_startblock + new->br_blockcount); |
1397 | xfs_bmbt_set_blockcount(ep, | 1387 | xfs_bmbt_set_blockcount(ep, |
1398 | PREV.br_blockcount - new->br_blockcount); | 1388 | PREV.br_blockcount - new->br_blockcount); |
1399 | trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); | 1389 | trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); |
1390 | |||
1391 | --*idx; | ||
1400 | 1392 | ||
1401 | ip->i_df.if_lastex = idx - 1; | ||
1402 | if (cur == NULL) | 1393 | if (cur == NULL) |
1403 | rval = XFS_ILOG_DEXT; | 1394 | rval = XFS_ILOG_DEXT; |
1404 | else { | 1395 | else { |
@@ -1429,17 +1420,16 @@ xfs_bmap_add_extent_unwritten_real( | |||
1429 | * Setting the first part of a previous oldext extent to newext. | 1420 | * Setting the first part of a previous oldext extent to newext. |
1430 | * The left neighbor is not contiguous. | 1421 | * The left neighbor is not contiguous. |
1431 | */ | 1422 | */ |
1432 | trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_); | 1423 | trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); |
1433 | ASSERT(ep && xfs_bmbt_get_state(ep) == oldext); | 1424 | ASSERT(ep && xfs_bmbt_get_state(ep) == oldext); |
1434 | xfs_bmbt_set_startoff(ep, new_endoff); | 1425 | xfs_bmbt_set_startoff(ep, new_endoff); |
1435 | xfs_bmbt_set_blockcount(ep, | 1426 | xfs_bmbt_set_blockcount(ep, |
1436 | PREV.br_blockcount - new->br_blockcount); | 1427 | PREV.br_blockcount - new->br_blockcount); |
1437 | xfs_bmbt_set_startblock(ep, | 1428 | xfs_bmbt_set_startblock(ep, |
1438 | new->br_startblock + new->br_blockcount); | 1429 | new->br_startblock + new->br_blockcount); |
1439 | trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); | 1430 | trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); |
1440 | 1431 | ||
1441 | xfs_iext_insert(ip, idx, 1, new, state); | 1432 | xfs_iext_insert(ip, *idx, 1, new, state); |
1442 | ip->i_df.if_lastex = idx; | ||
1443 | ip->i_d.di_nextents++; | 1433 | ip->i_d.di_nextents++; |
1444 | if (cur == NULL) | 1434 | if (cur == NULL) |
1445 | rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; | 1435 | rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; |
@@ -1468,17 +1458,19 @@ xfs_bmap_add_extent_unwritten_real( | |||
1468 | * Setting the last part of a previous oldext extent to newext. | 1458 | * Setting the last part of a previous oldext extent to newext. |
1469 | * The right neighbor is contiguous with the new allocation. | 1459 | * The right neighbor is contiguous with the new allocation. |
1470 | */ | 1460 | */ |
1471 | trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_); | 1461 | trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); |
1472 | trace_xfs_bmap_pre_update(ip, idx + 1, state, _THIS_IP_); | ||
1473 | xfs_bmbt_set_blockcount(ep, | 1462 | xfs_bmbt_set_blockcount(ep, |
1474 | PREV.br_blockcount - new->br_blockcount); | 1463 | PREV.br_blockcount - new->br_blockcount); |
1475 | trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); | 1464 | trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); |
1476 | xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, idx + 1), | 1465 | |
1466 | ++*idx; | ||
1467 | |||
1468 | trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); | ||
1469 | xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, *idx), | ||
1477 | new->br_startoff, new->br_startblock, | 1470 | new->br_startoff, new->br_startblock, |
1478 | new->br_blockcount + RIGHT.br_blockcount, newext); | 1471 | new->br_blockcount + RIGHT.br_blockcount, newext); |
1479 | trace_xfs_bmap_post_update(ip, idx + 1, state, _THIS_IP_); | 1472 | trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); |
1480 | 1473 | ||
1481 | ip->i_df.if_lastex = idx + 1; | ||
1482 | if (cur == NULL) | 1474 | if (cur == NULL) |
1483 | rval = XFS_ILOG_DEXT; | 1475 | rval = XFS_ILOG_DEXT; |
1484 | else { | 1476 | else { |
@@ -1508,13 +1500,14 @@ xfs_bmap_add_extent_unwritten_real( | |||
1508 | * Setting the last part of a previous oldext extent to newext. | 1500 | * Setting the last part of a previous oldext extent to newext. |
1509 | * The right neighbor is not contiguous. | 1501 | * The right neighbor is not contiguous. |
1510 | */ | 1502 | */ |
1511 | trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_); | 1503 | trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); |
1512 | xfs_bmbt_set_blockcount(ep, | 1504 | xfs_bmbt_set_blockcount(ep, |
1513 | PREV.br_blockcount - new->br_blockcount); | 1505 | PREV.br_blockcount - new->br_blockcount); |
1514 | trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); | 1506 | trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); |
1507 | |||
1508 | ++*idx; | ||
1509 | xfs_iext_insert(ip, *idx, 1, new, state); | ||
1515 | 1510 | ||
1516 | xfs_iext_insert(ip, idx + 1, 1, new, state); | ||
1517 | ip->i_df.if_lastex = idx + 1; | ||
1518 | ip->i_d.di_nextents++; | 1511 | ip->i_d.di_nextents++; |
1519 | if (cur == NULL) | 1512 | if (cur == NULL) |
1520 | rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; | 1513 | rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; |
@@ -1548,10 +1541,10 @@ xfs_bmap_add_extent_unwritten_real( | |||
1548 | * newext. Contiguity is impossible here. | 1541 | * newext. Contiguity is impossible here. |
1549 | * One extent becomes three extents. | 1542 | * One extent becomes three extents. |
1550 | */ | 1543 | */ |
1551 | trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_); | 1544 | trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); |
1552 | xfs_bmbt_set_blockcount(ep, | 1545 | xfs_bmbt_set_blockcount(ep, |
1553 | new->br_startoff - PREV.br_startoff); | 1546 | new->br_startoff - PREV.br_startoff); |
1554 | trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); | 1547 | trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); |
1555 | 1548 | ||
1556 | r[0] = *new; | 1549 | r[0] = *new; |
1557 | r[1].br_startoff = new_endoff; | 1550 | r[1].br_startoff = new_endoff; |
@@ -1559,8 +1552,10 @@ xfs_bmap_add_extent_unwritten_real( | |||
1559 | PREV.br_startoff + PREV.br_blockcount - new_endoff; | 1552 | PREV.br_startoff + PREV.br_blockcount - new_endoff; |
1560 | r[1].br_startblock = new->br_startblock + new->br_blockcount; | 1553 | r[1].br_startblock = new->br_startblock + new->br_blockcount; |
1561 | r[1].br_state = oldext; | 1554 | r[1].br_state = oldext; |
1562 | xfs_iext_insert(ip, idx + 1, 2, &r[0], state); | 1555 | |
1563 | ip->i_df.if_lastex = idx + 1; | 1556 | ++*idx; |
1557 | xfs_iext_insert(ip, *idx, 2, &r[0], state); | ||
1558 | |||
1564 | ip->i_d.di_nextents += 2; | 1559 | ip->i_d.di_nextents += 2; |
1565 | if (cur == NULL) | 1560 | if (cur == NULL) |
1566 | rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; | 1561 | rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; |
@@ -1630,12 +1625,10 @@ done: | |||
1630 | STATIC int /* error */ | 1625 | STATIC int /* error */ |
1631 | xfs_bmap_add_extent_hole_delay( | 1626 | xfs_bmap_add_extent_hole_delay( |
1632 | xfs_inode_t *ip, /* incore inode pointer */ | 1627 | xfs_inode_t *ip, /* incore inode pointer */ |
1633 | xfs_extnum_t idx, /* extent number to update/insert */ | 1628 | xfs_extnum_t *idx, /* extent number to update/insert */ |
1634 | xfs_bmbt_irec_t *new, /* new data to add to file extents */ | 1629 | xfs_bmbt_irec_t *new, /* new data to add to file extents */ |
1635 | int *logflagsp, /* inode logging flags */ | 1630 | int *logflagsp) /* inode logging flags */ |
1636 | int rsvd) /* OK to allocate reserved blocks */ | ||
1637 | { | 1631 | { |
1638 | xfs_bmbt_rec_host_t *ep; /* extent record for idx */ | ||
1639 | xfs_ifork_t *ifp; /* inode fork pointer */ | 1632 | xfs_ifork_t *ifp; /* inode fork pointer */ |
1640 | xfs_bmbt_irec_t left; /* left neighbor extent entry */ | 1633 | xfs_bmbt_irec_t left; /* left neighbor extent entry */ |
1641 | xfs_filblks_t newlen=0; /* new indirect size */ | 1634 | xfs_filblks_t newlen=0; /* new indirect size */ |
@@ -1645,16 +1638,15 @@ xfs_bmap_add_extent_hole_delay( | |||
1645 | xfs_filblks_t temp=0; /* temp for indirect calculations */ | 1638 | xfs_filblks_t temp=0; /* temp for indirect calculations */ |
1646 | 1639 | ||
1647 | ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK); | 1640 | ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK); |
1648 | ep = xfs_iext_get_ext(ifp, idx); | ||
1649 | state = 0; | 1641 | state = 0; |
1650 | ASSERT(isnullstartblock(new->br_startblock)); | 1642 | ASSERT(isnullstartblock(new->br_startblock)); |
1651 | 1643 | ||
1652 | /* | 1644 | /* |
1653 | * Check and set flags if this segment has a left neighbor | 1645 | * Check and set flags if this segment has a left neighbor |
1654 | */ | 1646 | */ |
1655 | if (idx > 0) { | 1647 | if (*idx > 0) { |
1656 | state |= BMAP_LEFT_VALID; | 1648 | state |= BMAP_LEFT_VALID; |
1657 | xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx - 1), &left); | 1649 | xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx - 1), &left); |
1658 | 1650 | ||
1659 | if (isnullstartblock(left.br_startblock)) | 1651 | if (isnullstartblock(left.br_startblock)) |
1660 | state |= BMAP_LEFT_DELAY; | 1652 | state |= BMAP_LEFT_DELAY; |
@@ -1664,9 +1656,9 @@ xfs_bmap_add_extent_hole_delay( | |||
1664 | * Check and set flags if the current (right) segment exists. | 1656 | * Check and set flags if the current (right) segment exists. |
1665 | * If it doesn't exist, we're converting the hole at end-of-file. | 1657 | * If it doesn't exist, we're converting the hole at end-of-file. |
1666 | */ | 1658 | */ |
1667 | if (idx < ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t)) { | 1659 | if (*idx < ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t)) { |
1668 | state |= BMAP_RIGHT_VALID; | 1660 | state |= BMAP_RIGHT_VALID; |
1669 | xfs_bmbt_get_all(ep, &right); | 1661 | xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx), &right); |
1670 | 1662 | ||
1671 | if (isnullstartblock(right.br_startblock)) | 1663 | if (isnullstartblock(right.br_startblock)) |
1672 | state |= BMAP_RIGHT_DELAY; | 1664 | state |= BMAP_RIGHT_DELAY; |
@@ -1699,21 +1691,21 @@ xfs_bmap_add_extent_hole_delay( | |||
1699 | * on the left and on the right. | 1691 | * on the left and on the right. |
1700 | * Merge all three into a single extent record. | 1692 | * Merge all three into a single extent record. |
1701 | */ | 1693 | */ |
1694 | --*idx; | ||
1702 | temp = left.br_blockcount + new->br_blockcount + | 1695 | temp = left.br_blockcount + new->br_blockcount + |
1703 | right.br_blockcount; | 1696 | right.br_blockcount; |
1704 | 1697 | ||
1705 | trace_xfs_bmap_pre_update(ip, idx - 1, state, _THIS_IP_); | 1698 | trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); |
1706 | xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1), temp); | 1699 | xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx), temp); |
1707 | oldlen = startblockval(left.br_startblock) + | 1700 | oldlen = startblockval(left.br_startblock) + |
1708 | startblockval(new->br_startblock) + | 1701 | startblockval(new->br_startblock) + |
1709 | startblockval(right.br_startblock); | 1702 | startblockval(right.br_startblock); |
1710 | newlen = xfs_bmap_worst_indlen(ip, temp); | 1703 | newlen = xfs_bmap_worst_indlen(ip, temp); |
1711 | xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, idx - 1), | 1704 | xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, *idx), |
1712 | nullstartblock((int)newlen)); | 1705 | nullstartblock((int)newlen)); |
1713 | trace_xfs_bmap_post_update(ip, idx - 1, state, _THIS_IP_); | 1706 | trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); |
1714 | 1707 | ||
1715 | xfs_iext_remove(ip, idx, 1, state); | 1708 | xfs_iext_remove(ip, *idx + 1, 1, state); |
1716 | ip->i_df.if_lastex = idx - 1; | ||
1717 | break; | 1709 | break; |
1718 | 1710 | ||
1719 | case BMAP_LEFT_CONTIG: | 1711 | case BMAP_LEFT_CONTIG: |
@@ -1722,17 +1714,17 @@ xfs_bmap_add_extent_hole_delay( | |||
1722 | * on the left. | 1714 | * on the left. |
1723 | * Merge the new allocation with the left neighbor. | 1715 | * Merge the new allocation with the left neighbor. |
1724 | */ | 1716 | */ |
1717 | --*idx; | ||
1725 | temp = left.br_blockcount + new->br_blockcount; | 1718 | temp = left.br_blockcount + new->br_blockcount; |
1726 | trace_xfs_bmap_pre_update(ip, idx - 1, state, _THIS_IP_); | 1719 | |
1727 | xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1), temp); | 1720 | trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); |
1721 | xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx), temp); | ||
1728 | oldlen = startblockval(left.br_startblock) + | 1722 | oldlen = startblockval(left.br_startblock) + |
1729 | startblockval(new->br_startblock); | 1723 | startblockval(new->br_startblock); |
1730 | newlen = xfs_bmap_worst_indlen(ip, temp); | 1724 | newlen = xfs_bmap_worst_indlen(ip, temp); |
1731 | xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, idx - 1), | 1725 | xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, *idx), |
1732 | nullstartblock((int)newlen)); | 1726 | nullstartblock((int)newlen)); |
1733 | trace_xfs_bmap_post_update(ip, idx - 1, state, _THIS_IP_); | 1727 | trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); |
1734 | |||
1735 | ip->i_df.if_lastex = idx - 1; | ||
1736 | break; | 1728 | break; |
1737 | 1729 | ||
1738 | case BMAP_RIGHT_CONTIG: | 1730 | case BMAP_RIGHT_CONTIG: |
@@ -1741,16 +1733,15 @@ xfs_bmap_add_extent_hole_delay( | |||
1741 | * on the right. | 1733 | * on the right. |
1742 | * Merge the new allocation with the right neighbor. | 1734 | * Merge the new allocation with the right neighbor. |
1743 | */ | 1735 | */ |
1744 | trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_); | 1736 | trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); |
1745 | temp = new->br_blockcount + right.br_blockcount; | 1737 | temp = new->br_blockcount + right.br_blockcount; |
1746 | oldlen = startblockval(new->br_startblock) + | 1738 | oldlen = startblockval(new->br_startblock) + |
1747 | startblockval(right.br_startblock); | 1739 | startblockval(right.br_startblock); |
1748 | newlen = xfs_bmap_worst_indlen(ip, temp); | 1740 | newlen = xfs_bmap_worst_indlen(ip, temp); |
1749 | xfs_bmbt_set_allf(ep, new->br_startoff, | 1741 | xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, *idx), |
1742 | new->br_startoff, | ||
1750 | nullstartblock((int)newlen), temp, right.br_state); | 1743 | nullstartblock((int)newlen), temp, right.br_state); |
1751 | trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); | 1744 | trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); |
1752 | |||
1753 | ip->i_df.if_lastex = idx; | ||
1754 | break; | 1745 | break; |
1755 | 1746 | ||
1756 | case 0: | 1747 | case 0: |
@@ -1760,14 +1751,13 @@ xfs_bmap_add_extent_hole_delay( | |||
1760 | * Insert a new entry. | 1751 | * Insert a new entry. |
1761 | */ | 1752 | */ |
1762 | oldlen = newlen = 0; | 1753 | oldlen = newlen = 0; |
1763 | xfs_iext_insert(ip, idx, 1, new, state); | 1754 | xfs_iext_insert(ip, *idx, 1, new, state); |
1764 | ip->i_df.if_lastex = idx; | ||
1765 | break; | 1755 | break; |
1766 | } | 1756 | } |
1767 | if (oldlen != newlen) { | 1757 | if (oldlen != newlen) { |
1768 | ASSERT(oldlen > newlen); | 1758 | ASSERT(oldlen > newlen); |
1769 | xfs_mod_incore_sb(ip->i_mount, XFS_SBS_FDBLOCKS, | 1759 | xfs_icsb_modify_counters(ip->i_mount, XFS_SBS_FDBLOCKS, |
1770 | (int64_t)(oldlen - newlen), rsvd); | 1760 | (int64_t)(oldlen - newlen), 0); |
1771 | /* | 1761 | /* |
1772 | * Nothing to do for disk quota accounting here. | 1762 | * Nothing to do for disk quota accounting here. |
1773 | */ | 1763 | */ |
@@ -1783,13 +1773,12 @@ xfs_bmap_add_extent_hole_delay( | |||
1783 | STATIC int /* error */ | 1773 | STATIC int /* error */ |
1784 | xfs_bmap_add_extent_hole_real( | 1774 | xfs_bmap_add_extent_hole_real( |
1785 | xfs_inode_t *ip, /* incore inode pointer */ | 1775 | xfs_inode_t *ip, /* incore inode pointer */ |
1786 | xfs_extnum_t idx, /* extent number to update/insert */ | 1776 | xfs_extnum_t *idx, /* extent number to update/insert */ |
1787 | xfs_btree_cur_t *cur, /* if null, not a btree */ | 1777 | xfs_btree_cur_t *cur, /* if null, not a btree */ |
1788 | xfs_bmbt_irec_t *new, /* new data to add to file extents */ | 1778 | xfs_bmbt_irec_t *new, /* new data to add to file extents */ |
1789 | int *logflagsp, /* inode logging flags */ | 1779 | int *logflagsp, /* inode logging flags */ |
1790 | int whichfork) /* data or attr fork */ | 1780 | int whichfork) /* data or attr fork */ |
1791 | { | 1781 | { |
1792 | xfs_bmbt_rec_host_t *ep; /* pointer to extent entry ins. point */ | ||
1793 | int error; /* error return value */ | 1782 | int error; /* error return value */ |
1794 | int i; /* temp state */ | 1783 | int i; /* temp state */ |
1795 | xfs_ifork_t *ifp; /* inode fork pointer */ | 1784 | xfs_ifork_t *ifp; /* inode fork pointer */ |
@@ -1799,8 +1788,7 @@ xfs_bmap_add_extent_hole_real( | |||
1799 | int state; /* state bits, accessed thru macros */ | 1788 | int state; /* state bits, accessed thru macros */ |
1800 | 1789 | ||
1801 | ifp = XFS_IFORK_PTR(ip, whichfork); | 1790 | ifp = XFS_IFORK_PTR(ip, whichfork); |
1802 | ASSERT(idx <= ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t)); | 1791 | ASSERT(*idx <= ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t)); |
1803 | ep = xfs_iext_get_ext(ifp, idx); | ||
1804 | state = 0; | 1792 | state = 0; |
1805 | 1793 | ||
1806 | if (whichfork == XFS_ATTR_FORK) | 1794 | if (whichfork == XFS_ATTR_FORK) |
@@ -1809,9 +1797,9 @@ xfs_bmap_add_extent_hole_real( | |||
1809 | /* | 1797 | /* |
1810 | * Check and set flags if this segment has a left neighbor. | 1798 | * Check and set flags if this segment has a left neighbor. |
1811 | */ | 1799 | */ |
1812 | if (idx > 0) { | 1800 | if (*idx > 0) { |
1813 | state |= BMAP_LEFT_VALID; | 1801 | state |= BMAP_LEFT_VALID; |
1814 | xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx - 1), &left); | 1802 | xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx - 1), &left); |
1815 | if (isnullstartblock(left.br_startblock)) | 1803 | if (isnullstartblock(left.br_startblock)) |
1816 | state |= BMAP_LEFT_DELAY; | 1804 | state |= BMAP_LEFT_DELAY; |
1817 | } | 1805 | } |
@@ -1820,9 +1808,9 @@ xfs_bmap_add_extent_hole_real( | |||
1820 | * Check and set flags if this segment has a current value. | 1808 | * Check and set flags if this segment has a current value. |
1821 | * Not true if we're inserting into the "hole" at eof. | 1809 | * Not true if we're inserting into the "hole" at eof. |
1822 | */ | 1810 | */ |
1823 | if (idx < ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t)) { | 1811 | if (*idx < ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t)) { |
1824 | state |= BMAP_RIGHT_VALID; | 1812 | state |= BMAP_RIGHT_VALID; |
1825 | xfs_bmbt_get_all(ep, &right); | 1813 | xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx), &right); |
1826 | if (isnullstartblock(right.br_startblock)) | 1814 | if (isnullstartblock(right.br_startblock)) |
1827 | state |= BMAP_RIGHT_DELAY; | 1815 | state |= BMAP_RIGHT_DELAY; |
1828 | } | 1816 | } |
@@ -1859,14 +1847,15 @@ xfs_bmap_add_extent_hole_real( | |||
1859 | * left and on the right. | 1847 | * left and on the right. |
1860 | * Merge all three into a single extent record. | 1848 | * Merge all three into a single extent record. |
1861 | */ | 1849 | */ |
1862 | trace_xfs_bmap_pre_update(ip, idx - 1, state, _THIS_IP_); | 1850 | --*idx; |
1863 | xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1), | 1851 | trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); |
1852 | xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx), | ||
1864 | left.br_blockcount + new->br_blockcount + | 1853 | left.br_blockcount + new->br_blockcount + |
1865 | right.br_blockcount); | 1854 | right.br_blockcount); |
1866 | trace_xfs_bmap_post_update(ip, idx - 1, state, _THIS_IP_); | 1855 | trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); |
1856 | |||
1857 | xfs_iext_remove(ip, *idx + 1, 1, state); | ||
1867 | 1858 | ||
1868 | xfs_iext_remove(ip, idx, 1, state); | ||
1869 | ifp->if_lastex = idx - 1; | ||
1870 | XFS_IFORK_NEXT_SET(ip, whichfork, | 1859 | XFS_IFORK_NEXT_SET(ip, whichfork, |
1871 | XFS_IFORK_NEXTENTS(ip, whichfork) - 1); | 1860 | XFS_IFORK_NEXTENTS(ip, whichfork) - 1); |
1872 | if (cur == NULL) { | 1861 | if (cur == NULL) { |
@@ -1901,12 +1890,12 @@ xfs_bmap_add_extent_hole_real( | |||
1901 | * on the left. | 1890 | * on the left. |
1902 | * Merge the new allocation with the left neighbor. | 1891 | * Merge the new allocation with the left neighbor. |
1903 | */ | 1892 | */ |
1904 | trace_xfs_bmap_pre_update(ip, idx - 1, state, _THIS_IP_); | 1893 | --*idx; |
1905 | xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1), | 1894 | trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); |
1895 | xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx), | ||
1906 | left.br_blockcount + new->br_blockcount); | 1896 | left.br_blockcount + new->br_blockcount); |
1907 | trace_xfs_bmap_post_update(ip, idx - 1, state, _THIS_IP_); | 1897 | trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); |
1908 | 1898 | ||
1909 | ifp->if_lastex = idx - 1; | ||
1910 | if (cur == NULL) { | 1899 | if (cur == NULL) { |
1911 | rval = xfs_ilog_fext(whichfork); | 1900 | rval = xfs_ilog_fext(whichfork); |
1912 | } else { | 1901 | } else { |
@@ -1932,13 +1921,13 @@ xfs_bmap_add_extent_hole_real( | |||
1932 | * on the right. | 1921 | * on the right. |
1933 | * Merge the new allocation with the right neighbor. | 1922 | * Merge the new allocation with the right neighbor. |
1934 | */ | 1923 | */ |
1935 | trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_); | 1924 | trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); |
1936 | xfs_bmbt_set_allf(ep, new->br_startoff, new->br_startblock, | 1925 | xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, *idx), |
1926 | new->br_startoff, new->br_startblock, | ||
1937 | new->br_blockcount + right.br_blockcount, | 1927 | new->br_blockcount + right.br_blockcount, |
1938 | right.br_state); | 1928 | right.br_state); |
1939 | trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); | 1929 | trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); |
1940 | 1930 | ||
1941 | ifp->if_lastex = idx; | ||
1942 | if (cur == NULL) { | 1931 | if (cur == NULL) { |
1943 | rval = xfs_ilog_fext(whichfork); | 1932 | rval = xfs_ilog_fext(whichfork); |
1944 | } else { | 1933 | } else { |
@@ -1964,8 +1953,7 @@ xfs_bmap_add_extent_hole_real( | |||
1964 | * real allocation. | 1953 | * real allocation. |
1965 | * Insert a new entry. | 1954 | * Insert a new entry. |
1966 | */ | 1955 | */ |
1967 | xfs_iext_insert(ip, idx, 1, new, state); | 1956 | xfs_iext_insert(ip, *idx, 1, new, state); |
1968 | ifp->if_lastex = idx; | ||
1969 | XFS_IFORK_NEXT_SET(ip, whichfork, | 1957 | XFS_IFORK_NEXT_SET(ip, whichfork, |
1970 | XFS_IFORK_NEXTENTS(ip, whichfork) + 1); | 1958 | XFS_IFORK_NEXTENTS(ip, whichfork) + 1); |
1971 | if (cur == NULL) { | 1959 | if (cur == NULL) { |
@@ -2345,6 +2333,13 @@ xfs_bmap_rtalloc( | |||
2345 | */ | 2333 | */ |
2346 | if (ralen * mp->m_sb.sb_rextsize >= MAXEXTLEN) | 2334 | if (ralen * mp->m_sb.sb_rextsize >= MAXEXTLEN) |
2347 | ralen = MAXEXTLEN / mp->m_sb.sb_rextsize; | 2335 | ralen = MAXEXTLEN / mp->m_sb.sb_rextsize; |
2336 | |||
2337 | /* | ||
2338 | * Lock out other modifications to the RT bitmap inode. | ||
2339 | */ | ||
2340 | xfs_ilock(mp->m_rbmip, XFS_ILOCK_EXCL); | ||
2341 | xfs_trans_ijoin_ref(ap->tp, mp->m_rbmip, XFS_ILOCK_EXCL); | ||
2342 | |||
2348 | /* | 2343 | /* |
2349 | * If it's an allocation to an empty file at offset 0, | 2344 | * If it's an allocation to an empty file at offset 0, |
2350 | * pick an extent that will space things out in the rt area. | 2345 | * pick an extent that will space things out in the rt area. |
@@ -2427,7 +2422,7 @@ xfs_bmap_btalloc_nullfb( | |||
2427 | startag = ag = 0; | 2422 | startag = ag = 0; |
2428 | 2423 | ||
2429 | pag = xfs_perag_get(mp, ag); | 2424 | pag = xfs_perag_get(mp, ag); |
2430 | while (*blen < ap->alen) { | 2425 | while (*blen < args->maxlen) { |
2431 | if (!pag->pagf_init) { | 2426 | if (!pag->pagf_init) { |
2432 | error = xfs_alloc_pagf_init(mp, args->tp, ag, | 2427 | error = xfs_alloc_pagf_init(mp, args->tp, ag, |
2433 | XFS_ALLOC_FLAG_TRYLOCK); | 2428 | XFS_ALLOC_FLAG_TRYLOCK); |
@@ -2449,7 +2444,7 @@ xfs_bmap_btalloc_nullfb( | |||
2449 | notinit = 1; | 2444 | notinit = 1; |
2450 | 2445 | ||
2451 | if (xfs_inode_is_filestream(ap->ip)) { | 2446 | if (xfs_inode_is_filestream(ap->ip)) { |
2452 | if (*blen >= ap->alen) | 2447 | if (*blen >= args->maxlen) |
2453 | break; | 2448 | break; |
2454 | 2449 | ||
2455 | if (ap->userdata) { | 2450 | if (ap->userdata) { |
@@ -2495,14 +2490,14 @@ xfs_bmap_btalloc_nullfb( | |||
2495 | * If the best seen length is less than the request | 2490 | * If the best seen length is less than the request |
2496 | * length, use the best as the minimum. | 2491 | * length, use the best as the minimum. |
2497 | */ | 2492 | */ |
2498 | else if (*blen < ap->alen) | 2493 | else if (*blen < args->maxlen) |
2499 | args->minlen = *blen; | 2494 | args->minlen = *blen; |
2500 | /* | 2495 | /* |
2501 | * Otherwise we've seen an extent as big as alen, | 2496 | * Otherwise we've seen an extent as big as maxlen, |
2502 | * use that as the minimum. | 2497 | * use that as the minimum. |
2503 | */ | 2498 | */ |
2504 | else | 2499 | else |
2505 | args->minlen = ap->alen; | 2500 | args->minlen = args->maxlen; |
2506 | 2501 | ||
2507 | /* | 2502 | /* |
2508 | * set the failure fallback case to look in the selected | 2503 | * set the failure fallback case to look in the selected |
@@ -2570,7 +2565,9 @@ xfs_bmap_btalloc( | |||
2570 | args.tp = ap->tp; | 2565 | args.tp = ap->tp; |
2571 | args.mp = mp; | 2566 | args.mp = mp; |
2572 | args.fsbno = ap->rval; | 2567 | args.fsbno = ap->rval; |
2573 | args.maxlen = MIN(ap->alen, mp->m_sb.sb_agblocks); | 2568 | |
2569 | /* Trim the allocation back to the maximum an AG can fit. */ | ||
2570 | args.maxlen = MIN(ap->alen, XFS_ALLOC_AG_MAX_USABLE(mp)); | ||
2574 | args.firstblock = ap->firstblock; | 2571 | args.firstblock = ap->firstblock; |
2575 | blen = 0; | 2572 | blen = 0; |
2576 | if (nullfb) { | 2573 | if (nullfb) { |
@@ -2618,7 +2615,7 @@ xfs_bmap_btalloc( | |||
2618 | /* | 2615 | /* |
2619 | * Adjust for alignment | 2616 | * Adjust for alignment |
2620 | */ | 2617 | */ |
2621 | if (blen > args.alignment && blen <= ap->alen) | 2618 | if (blen > args.alignment && blen <= args.maxlen) |
2622 | args.minlen = blen - args.alignment; | 2619 | args.minlen = blen - args.alignment; |
2623 | args.minalignslop = 0; | 2620 | args.minalignslop = 0; |
2624 | } else { | 2621 | } else { |
@@ -2637,7 +2634,7 @@ xfs_bmap_btalloc( | |||
2637 | * of minlen+alignment+slop doesn't go up | 2634 | * of minlen+alignment+slop doesn't go up |
2638 | * between the calls. | 2635 | * between the calls. |
2639 | */ | 2636 | */ |
2640 | if (blen > mp->m_dalign && blen <= ap->alen) | 2637 | if (blen > mp->m_dalign && blen <= args.maxlen) |
2641 | nextminlen = blen - mp->m_dalign; | 2638 | nextminlen = blen - mp->m_dalign; |
2642 | else | 2639 | else |
2643 | nextminlen = args.minlen; | 2640 | nextminlen = args.minlen; |
@@ -2804,13 +2801,12 @@ STATIC int /* error */ | |||
2804 | xfs_bmap_del_extent( | 2801 | xfs_bmap_del_extent( |
2805 | xfs_inode_t *ip, /* incore inode pointer */ | 2802 | xfs_inode_t *ip, /* incore inode pointer */ |
2806 | xfs_trans_t *tp, /* current transaction pointer */ | 2803 | xfs_trans_t *tp, /* current transaction pointer */ |
2807 | xfs_extnum_t idx, /* extent number to update/delete */ | 2804 | xfs_extnum_t *idx, /* extent number to update/delete */ |
2808 | xfs_bmap_free_t *flist, /* list of extents to be freed */ | 2805 | xfs_bmap_free_t *flist, /* list of extents to be freed */ |
2809 | xfs_btree_cur_t *cur, /* if null, not a btree */ | 2806 | xfs_btree_cur_t *cur, /* if null, not a btree */ |
2810 | xfs_bmbt_irec_t *del, /* data to remove from extents */ | 2807 | xfs_bmbt_irec_t *del, /* data to remove from extents */ |
2811 | int *logflagsp, /* inode logging flags */ | 2808 | int *logflagsp, /* inode logging flags */ |
2812 | int whichfork, /* data or attr fork */ | 2809 | int whichfork) /* data or attr fork */ |
2813 | int rsvd) /* OK to allocate reserved blocks */ | ||
2814 | { | 2810 | { |
2815 | xfs_filblks_t da_new; /* new delay-alloc indirect blocks */ | 2811 | xfs_filblks_t da_new; /* new delay-alloc indirect blocks */ |
2816 | xfs_filblks_t da_old; /* old delay-alloc indirect blocks */ | 2812 | xfs_filblks_t da_old; /* old delay-alloc indirect blocks */ |
@@ -2841,10 +2837,10 @@ xfs_bmap_del_extent( | |||
2841 | 2837 | ||
2842 | mp = ip->i_mount; | 2838 | mp = ip->i_mount; |
2843 | ifp = XFS_IFORK_PTR(ip, whichfork); | 2839 | ifp = XFS_IFORK_PTR(ip, whichfork); |
2844 | ASSERT((idx >= 0) && (idx < ifp->if_bytes / | 2840 | ASSERT((*idx >= 0) && (*idx < ifp->if_bytes / |
2845 | (uint)sizeof(xfs_bmbt_rec_t))); | 2841 | (uint)sizeof(xfs_bmbt_rec_t))); |
2846 | ASSERT(del->br_blockcount > 0); | 2842 | ASSERT(del->br_blockcount > 0); |
2847 | ep = xfs_iext_get_ext(ifp, idx); | 2843 | ep = xfs_iext_get_ext(ifp, *idx); |
2848 | xfs_bmbt_get_all(ep, &got); | 2844 | xfs_bmbt_get_all(ep, &got); |
2849 | ASSERT(got.br_startoff <= del->br_startoff); | 2845 | ASSERT(got.br_startoff <= del->br_startoff); |
2850 | del_endoff = del->br_startoff + del->br_blockcount; | 2846 | del_endoff = del->br_startoff + del->br_blockcount; |
@@ -2918,11 +2914,12 @@ xfs_bmap_del_extent( | |||
2918 | /* | 2914 | /* |
2919 | * Matches the whole extent. Delete the entry. | 2915 | * Matches the whole extent. Delete the entry. |
2920 | */ | 2916 | */ |
2921 | xfs_iext_remove(ip, idx, 1, | 2917 | xfs_iext_remove(ip, *idx, 1, |
2922 | whichfork == XFS_ATTR_FORK ? BMAP_ATTRFORK : 0); | 2918 | whichfork == XFS_ATTR_FORK ? BMAP_ATTRFORK : 0); |
2923 | ifp->if_lastex = idx; | 2919 | --*idx; |
2924 | if (delay) | 2920 | if (delay) |
2925 | break; | 2921 | break; |
2922 | |||
2926 | XFS_IFORK_NEXT_SET(ip, whichfork, | 2923 | XFS_IFORK_NEXT_SET(ip, whichfork, |
2927 | XFS_IFORK_NEXTENTS(ip, whichfork) - 1); | 2924 | XFS_IFORK_NEXTENTS(ip, whichfork) - 1); |
2928 | flags |= XFS_ILOG_CORE; | 2925 | flags |= XFS_ILOG_CORE; |
@@ -2939,21 +2936,20 @@ xfs_bmap_del_extent( | |||
2939 | /* | 2936 | /* |
2940 | * Deleting the first part of the extent. | 2937 | * Deleting the first part of the extent. |
2941 | */ | 2938 | */ |
2942 | trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_); | 2939 | trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); |
2943 | xfs_bmbt_set_startoff(ep, del_endoff); | 2940 | xfs_bmbt_set_startoff(ep, del_endoff); |
2944 | temp = got.br_blockcount - del->br_blockcount; | 2941 | temp = got.br_blockcount - del->br_blockcount; |
2945 | xfs_bmbt_set_blockcount(ep, temp); | 2942 | xfs_bmbt_set_blockcount(ep, temp); |
2946 | ifp->if_lastex = idx; | ||
2947 | if (delay) { | 2943 | if (delay) { |
2948 | temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp), | 2944 | temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp), |
2949 | da_old); | 2945 | da_old); |
2950 | xfs_bmbt_set_startblock(ep, nullstartblock((int)temp)); | 2946 | xfs_bmbt_set_startblock(ep, nullstartblock((int)temp)); |
2951 | trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); | 2947 | trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); |
2952 | da_new = temp; | 2948 | da_new = temp; |
2953 | break; | 2949 | break; |
2954 | } | 2950 | } |
2955 | xfs_bmbt_set_startblock(ep, del_endblock); | 2951 | xfs_bmbt_set_startblock(ep, del_endblock); |
2956 | trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); | 2952 | trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); |
2957 | if (!cur) { | 2953 | if (!cur) { |
2958 | flags |= xfs_ilog_fext(whichfork); | 2954 | flags |= xfs_ilog_fext(whichfork); |
2959 | break; | 2955 | break; |
@@ -2969,18 +2965,17 @@ xfs_bmap_del_extent( | |||
2969 | * Deleting the last part of the extent. | 2965 | * Deleting the last part of the extent. |
2970 | */ | 2966 | */ |
2971 | temp = got.br_blockcount - del->br_blockcount; | 2967 | temp = got.br_blockcount - del->br_blockcount; |
2972 | trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_); | 2968 | trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); |
2973 | xfs_bmbt_set_blockcount(ep, temp); | 2969 | xfs_bmbt_set_blockcount(ep, temp); |
2974 | ifp->if_lastex = idx; | ||
2975 | if (delay) { | 2970 | if (delay) { |
2976 | temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp), | 2971 | temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp), |
2977 | da_old); | 2972 | da_old); |
2978 | xfs_bmbt_set_startblock(ep, nullstartblock((int)temp)); | 2973 | xfs_bmbt_set_startblock(ep, nullstartblock((int)temp)); |
2979 | trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); | 2974 | trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); |
2980 | da_new = temp; | 2975 | da_new = temp; |
2981 | break; | 2976 | break; |
2982 | } | 2977 | } |
2983 | trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); | 2978 | trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); |
2984 | if (!cur) { | 2979 | if (!cur) { |
2985 | flags |= xfs_ilog_fext(whichfork); | 2980 | flags |= xfs_ilog_fext(whichfork); |
2986 | break; | 2981 | break; |
@@ -2997,7 +2992,7 @@ xfs_bmap_del_extent( | |||
2997 | * Deleting the middle of the extent. | 2992 | * Deleting the middle of the extent. |
2998 | */ | 2993 | */ |
2999 | temp = del->br_startoff - got.br_startoff; | 2994 | temp = del->br_startoff - got.br_startoff; |
3000 | trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_); | 2995 | trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); |
3001 | xfs_bmbt_set_blockcount(ep, temp); | 2996 | xfs_bmbt_set_blockcount(ep, temp); |
3002 | new.br_startoff = del_endoff; | 2997 | new.br_startoff = del_endoff; |
3003 | temp2 = got_endoff - del_endoff; | 2998 | temp2 = got_endoff - del_endoff; |
@@ -3084,9 +3079,9 @@ xfs_bmap_del_extent( | |||
3084 | } | 3079 | } |
3085 | } | 3080 | } |
3086 | } | 3081 | } |
3087 | trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); | 3082 | trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); |
3088 | xfs_iext_insert(ip, idx + 1, 1, &new, state); | 3083 | xfs_iext_insert(ip, *idx + 1, 1, &new, state); |
3089 | ifp->if_lastex = idx + 1; | 3084 | ++*idx; |
3090 | break; | 3085 | break; |
3091 | } | 3086 | } |
3092 | /* | 3087 | /* |
@@ -3111,9 +3106,10 @@ xfs_bmap_del_extent( | |||
3111 | * Nothing to do for disk quota accounting here. | 3106 | * Nothing to do for disk quota accounting here. |
3112 | */ | 3107 | */ |
3113 | ASSERT(da_old >= da_new); | 3108 | ASSERT(da_old >= da_new); |
3114 | if (da_old > da_new) | 3109 | if (da_old > da_new) { |
3115 | xfs_mod_incore_sb(mp, XFS_SBS_FDBLOCKS, (int64_t)(da_old - da_new), | 3110 | xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS, |
3116 | rsvd); | 3111 | (int64_t)(da_old - da_new), 0); |
3112 | } | ||
3117 | done: | 3113 | done: |
3118 | *logflagsp = flags; | 3114 | *logflagsp = flags; |
3119 | return error; | 3115 | return error; |
@@ -3496,7 +3492,7 @@ xfs_bmap_search_extents( | |||
3496 | 3492 | ||
3497 | if (unlikely(!(gotp->br_startblock) && (*lastxp != NULLEXTNUM) && | 3493 | if (unlikely(!(gotp->br_startblock) && (*lastxp != NULLEXTNUM) && |
3498 | !(XFS_IS_REALTIME_INODE(ip) && fork == XFS_DATA_FORK))) { | 3494 | !(XFS_IS_REALTIME_INODE(ip) && fork == XFS_DATA_FORK))) { |
3499 | xfs_cmn_err(XFS_PTAG_FSBLOCK_ZERO, CE_ALERT, ip->i_mount, | 3495 | xfs_alert_tag(ip->i_mount, XFS_PTAG_FSBLOCK_ZERO, |
3500 | "Access to block zero in inode %llu " | 3496 | "Access to block zero in inode %llu " |
3501 | "start_block: %llx start_off: %llx " | 3497 | "start_block: %llx start_off: %llx " |
3502 | "blkcnt: %llx extent-state: %x lastx: %x\n", | 3498 | "blkcnt: %llx extent-state: %x lastx: %x\n", |
@@ -4170,12 +4166,11 @@ xfs_bmap_read_extents( | |||
4170 | num_recs = xfs_btree_get_numrecs(block); | 4166 | num_recs = xfs_btree_get_numrecs(block); |
4171 | if (unlikely(i + num_recs > room)) { | 4167 | if (unlikely(i + num_recs > room)) { |
4172 | ASSERT(i + num_recs <= room); | 4168 | ASSERT(i + num_recs <= room); |
4173 | xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount, | 4169 | xfs_warn(ip->i_mount, |
4174 | "corrupt dinode %Lu, (btree extents).", | 4170 | "corrupt dinode %Lu, (btree extents).", |
4175 | (unsigned long long) ip->i_ino); | 4171 | (unsigned long long) ip->i_ino); |
4176 | XFS_ERROR_REPORT("xfs_bmap_read_extents(1)", | 4172 | XFS_CORRUPTION_ERROR("xfs_bmap_read_extents(1)", |
4177 | XFS_ERRLEVEL_LOW, | 4173 | XFS_ERRLEVEL_LOW, ip->i_mount, block); |
4178 | ip->i_mount); | ||
4179 | goto error0; | 4174 | goto error0; |
4180 | } | 4175 | } |
4181 | XFS_WANT_CORRUPTED_GOTO( | 4176 | XFS_WANT_CORRUPTED_GOTO( |
@@ -4481,6 +4476,16 @@ xfs_bmapi( | |||
4481 | /* Figure out the extent size, adjust alen */ | 4476 | /* Figure out the extent size, adjust alen */ |
4482 | extsz = xfs_get_extsz_hint(ip); | 4477 | extsz = xfs_get_extsz_hint(ip); |
4483 | if (extsz) { | 4478 | if (extsz) { |
4479 | /* | ||
4480 | * make sure we don't exceed a single | ||
4481 | * extent length when we align the | ||
4482 | * extent by reducing length we are | ||
4483 | * going to allocate by the maximum | ||
4484 | * amount extent size aligment may | ||
4485 | * require. | ||
4486 | */ | ||
4487 | alen = XFS_FILBLKS_MIN(len, | ||
4488 | MAXEXTLEN - (2 * extsz - 1)); | ||
4484 | error = xfs_bmap_extsize_align(mp, | 4489 | error = xfs_bmap_extsize_align(mp, |
4485 | &got, &prev, extsz, | 4490 | &got, &prev, extsz, |
4486 | rt, eof, | 4491 | rt, eof, |
@@ -4523,29 +4528,24 @@ xfs_bmapi( | |||
4523 | if (rt) { | 4528 | if (rt) { |
4524 | error = xfs_mod_incore_sb(mp, | 4529 | error = xfs_mod_incore_sb(mp, |
4525 | XFS_SBS_FREXTENTS, | 4530 | XFS_SBS_FREXTENTS, |
4526 | -((int64_t)extsz), (flags & | 4531 | -((int64_t)extsz), 0); |
4527 | XFS_BMAPI_RSVBLOCKS)); | ||
4528 | } else { | 4532 | } else { |
4529 | error = xfs_mod_incore_sb(mp, | 4533 | error = xfs_icsb_modify_counters(mp, |
4530 | XFS_SBS_FDBLOCKS, | 4534 | XFS_SBS_FDBLOCKS, |
4531 | -((int64_t)alen), (flags & | 4535 | -((int64_t)alen), 0); |
4532 | XFS_BMAPI_RSVBLOCKS)); | ||
4533 | } | 4536 | } |
4534 | if (!error) { | 4537 | if (!error) { |
4535 | error = xfs_mod_incore_sb(mp, | 4538 | error = xfs_icsb_modify_counters(mp, |
4536 | XFS_SBS_FDBLOCKS, | 4539 | XFS_SBS_FDBLOCKS, |
4537 | -((int64_t)indlen), (flags & | 4540 | -((int64_t)indlen), 0); |
4538 | XFS_BMAPI_RSVBLOCKS)); | ||
4539 | if (error && rt) | 4541 | if (error && rt) |
4540 | xfs_mod_incore_sb(mp, | 4542 | xfs_mod_incore_sb(mp, |
4541 | XFS_SBS_FREXTENTS, | 4543 | XFS_SBS_FREXTENTS, |
4542 | (int64_t)extsz, (flags & | 4544 | (int64_t)extsz, 0); |
4543 | XFS_BMAPI_RSVBLOCKS)); | ||
4544 | else if (error) | 4545 | else if (error) |
4545 | xfs_mod_incore_sb(mp, | 4546 | xfs_icsb_modify_counters(mp, |
4546 | XFS_SBS_FDBLOCKS, | 4547 | XFS_SBS_FDBLOCKS, |
4547 | (int64_t)alen, (flags & | 4548 | (int64_t)alen, 0); |
4548 | XFS_BMAPI_RSVBLOCKS)); | ||
4549 | } | 4549 | } |
4550 | 4550 | ||
4551 | if (error) { | 4551 | if (error) { |
@@ -4662,13 +4662,12 @@ xfs_bmapi( | |||
4662 | if (!wasdelay && (flags & XFS_BMAPI_PREALLOC)) | 4662 | if (!wasdelay && (flags & XFS_BMAPI_PREALLOC)) |
4663 | got.br_state = XFS_EXT_UNWRITTEN; | 4663 | got.br_state = XFS_EXT_UNWRITTEN; |
4664 | } | 4664 | } |
4665 | error = xfs_bmap_add_extent(ip, lastx, &cur, &got, | 4665 | error = xfs_bmap_add_extent(ip, &lastx, &cur, &got, |
4666 | firstblock, flist, &tmp_logflags, | 4666 | firstblock, flist, &tmp_logflags, |
4667 | whichfork, (flags & XFS_BMAPI_RSVBLOCKS)); | 4667 | whichfork); |
4668 | logflags |= tmp_logflags; | 4668 | logflags |= tmp_logflags; |
4669 | if (error) | 4669 | if (error) |
4670 | goto error0; | 4670 | goto error0; |
4671 | lastx = ifp->if_lastex; | ||
4672 | ep = xfs_iext_get_ext(ifp, lastx); | 4671 | ep = xfs_iext_get_ext(ifp, lastx); |
4673 | nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); | 4672 | nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); |
4674 | xfs_bmbt_get_all(ep, &got); | 4673 | xfs_bmbt_get_all(ep, &got); |
@@ -4744,8 +4743,12 @@ xfs_bmapi( | |||
4744 | * Check if writing previously allocated but | 4743 | * Check if writing previously allocated but |
4745 | * unwritten extents. | 4744 | * unwritten extents. |
4746 | */ | 4745 | */ |
4747 | if (wr && mval->br_state == XFS_EXT_UNWRITTEN && | 4746 | if (wr && |
4748 | ((flags & (XFS_BMAPI_PREALLOC|XFS_BMAPI_DELAY)) == 0)) { | 4747 | ((mval->br_state == XFS_EXT_UNWRITTEN && |
4748 | ((flags & (XFS_BMAPI_PREALLOC|XFS_BMAPI_DELAY)) == 0)) || | ||
4749 | (mval->br_state == XFS_EXT_NORM && | ||
4750 | ((flags & (XFS_BMAPI_PREALLOC|XFS_BMAPI_CONVERT)) == | ||
4751 | (XFS_BMAPI_PREALLOC|XFS_BMAPI_CONVERT))))) { | ||
4749 | /* | 4752 | /* |
4750 | * Modify (by adding) the state flag, if writing. | 4753 | * Modify (by adding) the state flag, if writing. |
4751 | */ | 4754 | */ |
@@ -4757,14 +4760,15 @@ xfs_bmapi( | |||
4757 | *firstblock; | 4760 | *firstblock; |
4758 | cur->bc_private.b.flist = flist; | 4761 | cur->bc_private.b.flist = flist; |
4759 | } | 4762 | } |
4760 | mval->br_state = XFS_EXT_NORM; | 4763 | mval->br_state = (mval->br_state == XFS_EXT_UNWRITTEN) |
4761 | error = xfs_bmap_add_extent(ip, lastx, &cur, mval, | 4764 | ? XFS_EXT_NORM |
4765 | : XFS_EXT_UNWRITTEN; | ||
4766 | error = xfs_bmap_add_extent(ip, &lastx, &cur, mval, | ||
4762 | firstblock, flist, &tmp_logflags, | 4767 | firstblock, flist, &tmp_logflags, |
4763 | whichfork, (flags & XFS_BMAPI_RSVBLOCKS)); | 4768 | whichfork); |
4764 | logflags |= tmp_logflags; | 4769 | logflags |= tmp_logflags; |
4765 | if (error) | 4770 | if (error) |
4766 | goto error0; | 4771 | goto error0; |
4767 | lastx = ifp->if_lastex; | ||
4768 | ep = xfs_iext_get_ext(ifp, lastx); | 4772 | ep = xfs_iext_get_ext(ifp, lastx); |
4769 | nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); | 4773 | nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); |
4770 | xfs_bmbt_get_all(ep, &got); | 4774 | xfs_bmbt_get_all(ep, &got); |
@@ -4823,14 +4827,14 @@ xfs_bmapi( | |||
4823 | /* | 4827 | /* |
4824 | * Else go on to the next record. | 4828 | * Else go on to the next record. |
4825 | */ | 4829 | */ |
4826 | ep = xfs_iext_get_ext(ifp, ++lastx); | ||
4827 | prev = got; | 4830 | prev = got; |
4828 | if (lastx >= nextents) | 4831 | if (++lastx < nextents) { |
4829 | eof = 1; | 4832 | ep = xfs_iext_get_ext(ifp, lastx); |
4830 | else | ||
4831 | xfs_bmbt_get_all(ep, &got); | 4833 | xfs_bmbt_get_all(ep, &got); |
4834 | } else { | ||
4835 | eof = 1; | ||
4836 | } | ||
4832 | } | 4837 | } |
4833 | ifp->if_lastex = lastx; | ||
4834 | *nmap = n; | 4838 | *nmap = n; |
4835 | /* | 4839 | /* |
4836 | * Transform from btree to extents, give it cur. | 4840 | * Transform from btree to extents, give it cur. |
@@ -4939,7 +4943,6 @@ xfs_bmapi_single( | |||
4939 | ASSERT(!isnullstartblock(got.br_startblock)); | 4943 | ASSERT(!isnullstartblock(got.br_startblock)); |
4940 | ASSERT(bno < got.br_startoff + got.br_blockcount); | 4944 | ASSERT(bno < got.br_startoff + got.br_blockcount); |
4941 | *fsb = got.br_startblock + (bno - got.br_startoff); | 4945 | *fsb = got.br_startblock + (bno - got.br_startoff); |
4942 | ifp->if_lastex = lastx; | ||
4943 | return 0; | 4946 | return 0; |
4944 | } | 4947 | } |
4945 | 4948 | ||
@@ -4981,7 +4984,6 @@ xfs_bunmapi( | |||
4981 | int tmp_logflags; /* partial logging flags */ | 4984 | int tmp_logflags; /* partial logging flags */ |
4982 | int wasdel; /* was a delayed alloc extent */ | 4985 | int wasdel; /* was a delayed alloc extent */ |
4983 | int whichfork; /* data or attribute fork */ | 4986 | int whichfork; /* data or attribute fork */ |
4984 | int rsvd; /* OK to allocate reserved blocks */ | ||
4985 | xfs_fsblock_t sum; | 4987 | xfs_fsblock_t sum; |
4986 | 4988 | ||
4987 | trace_xfs_bunmap(ip, bno, len, flags, _RET_IP_); | 4989 | trace_xfs_bunmap(ip, bno, len, flags, _RET_IP_); |
@@ -4999,7 +5001,7 @@ xfs_bunmapi( | |||
4999 | mp = ip->i_mount; | 5001 | mp = ip->i_mount; |
5000 | if (XFS_FORCED_SHUTDOWN(mp)) | 5002 | if (XFS_FORCED_SHUTDOWN(mp)) |
5001 | return XFS_ERROR(EIO); | 5003 | return XFS_ERROR(EIO); |
5002 | rsvd = (flags & XFS_BMAPI_RSVBLOCKS) != 0; | 5004 | |
5003 | ASSERT(len > 0); | 5005 | ASSERT(len > 0); |
5004 | ASSERT(nexts >= 0); | 5006 | ASSERT(nexts >= 0); |
5005 | ASSERT(ifp->if_ext_max == | 5007 | ASSERT(ifp->if_ext_max == |
@@ -5115,9 +5117,9 @@ xfs_bunmapi( | |||
5115 | del.br_blockcount = mod; | 5117 | del.br_blockcount = mod; |
5116 | } | 5118 | } |
5117 | del.br_state = XFS_EXT_UNWRITTEN; | 5119 | del.br_state = XFS_EXT_UNWRITTEN; |
5118 | error = xfs_bmap_add_extent(ip, lastx, &cur, &del, | 5120 | error = xfs_bmap_add_extent(ip, &lastx, &cur, &del, |
5119 | firstblock, flist, &logflags, | 5121 | firstblock, flist, &logflags, |
5120 | XFS_DATA_FORK, 0); | 5122 | XFS_DATA_FORK); |
5121 | if (error) | 5123 | if (error) |
5122 | goto error0; | 5124 | goto error0; |
5123 | goto nodelete; | 5125 | goto nodelete; |
@@ -5143,9 +5145,12 @@ xfs_bunmapi( | |||
5143 | */ | 5145 | */ |
5144 | ASSERT(bno >= del.br_blockcount); | 5146 | ASSERT(bno >= del.br_blockcount); |
5145 | bno -= del.br_blockcount; | 5147 | bno -= del.br_blockcount; |
5146 | if (bno < got.br_startoff) { | 5148 | if (got.br_startoff > bno) { |
5147 | if (--lastx >= 0) | 5149 | if (--lastx >= 0) { |
5148 | xfs_bmbt_get_all(--ep, &got); | 5150 | ep = xfs_iext_get_ext(ifp, |
5151 | lastx); | ||
5152 | xfs_bmbt_get_all(ep, &got); | ||
5153 | } | ||
5149 | } | 5154 | } |
5150 | continue; | 5155 | continue; |
5151 | } else if (del.br_state == XFS_EXT_UNWRITTEN) { | 5156 | } else if (del.br_state == XFS_EXT_UNWRITTEN) { |
@@ -5169,18 +5174,19 @@ xfs_bunmapi( | |||
5169 | prev.br_startoff = start; | 5174 | prev.br_startoff = start; |
5170 | } | 5175 | } |
5171 | prev.br_state = XFS_EXT_UNWRITTEN; | 5176 | prev.br_state = XFS_EXT_UNWRITTEN; |
5172 | error = xfs_bmap_add_extent(ip, lastx - 1, &cur, | 5177 | lastx--; |
5178 | error = xfs_bmap_add_extent(ip, &lastx, &cur, | ||
5173 | &prev, firstblock, flist, &logflags, | 5179 | &prev, firstblock, flist, &logflags, |
5174 | XFS_DATA_FORK, 0); | 5180 | XFS_DATA_FORK); |
5175 | if (error) | 5181 | if (error) |
5176 | goto error0; | 5182 | goto error0; |
5177 | goto nodelete; | 5183 | goto nodelete; |
5178 | } else { | 5184 | } else { |
5179 | ASSERT(del.br_state == XFS_EXT_NORM); | 5185 | ASSERT(del.br_state == XFS_EXT_NORM); |
5180 | del.br_state = XFS_EXT_UNWRITTEN; | 5186 | del.br_state = XFS_EXT_UNWRITTEN; |
5181 | error = xfs_bmap_add_extent(ip, lastx, &cur, | 5187 | error = xfs_bmap_add_extent(ip, &lastx, &cur, |
5182 | &del, firstblock, flist, &logflags, | 5188 | &del, firstblock, flist, &logflags, |
5183 | XFS_DATA_FORK, 0); | 5189 | XFS_DATA_FORK); |
5184 | if (error) | 5190 | if (error) |
5185 | goto error0; | 5191 | goto error0; |
5186 | goto nodelete; | 5192 | goto nodelete; |
@@ -5195,13 +5201,13 @@ xfs_bunmapi( | |||
5195 | rtexts = XFS_FSB_TO_B(mp, del.br_blockcount); | 5201 | rtexts = XFS_FSB_TO_B(mp, del.br_blockcount); |
5196 | do_div(rtexts, mp->m_sb.sb_rextsize); | 5202 | do_div(rtexts, mp->m_sb.sb_rextsize); |
5197 | xfs_mod_incore_sb(mp, XFS_SBS_FREXTENTS, | 5203 | xfs_mod_incore_sb(mp, XFS_SBS_FREXTENTS, |
5198 | (int64_t)rtexts, rsvd); | 5204 | (int64_t)rtexts, 0); |
5199 | (void)xfs_trans_reserve_quota_nblks(NULL, | 5205 | (void)xfs_trans_reserve_quota_nblks(NULL, |
5200 | ip, -((long)del.br_blockcount), 0, | 5206 | ip, -((long)del.br_blockcount), 0, |
5201 | XFS_QMOPT_RES_RTBLKS); | 5207 | XFS_QMOPT_RES_RTBLKS); |
5202 | } else { | 5208 | } else { |
5203 | xfs_mod_incore_sb(mp, XFS_SBS_FDBLOCKS, | 5209 | xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS, |
5204 | (int64_t)del.br_blockcount, rsvd); | 5210 | (int64_t)del.br_blockcount, 0); |
5205 | (void)xfs_trans_reserve_quota_nblks(NULL, | 5211 | (void)xfs_trans_reserve_quota_nblks(NULL, |
5206 | ip, -((long)del.br_blockcount), 0, | 5212 | ip, -((long)del.br_blockcount), 0, |
5207 | XFS_QMOPT_RES_REGBLKS); | 5213 | XFS_QMOPT_RES_REGBLKS); |
@@ -5232,31 +5238,29 @@ xfs_bunmapi( | |||
5232 | error = XFS_ERROR(ENOSPC); | 5238 | error = XFS_ERROR(ENOSPC); |
5233 | goto error0; | 5239 | goto error0; |
5234 | } | 5240 | } |
5235 | error = xfs_bmap_del_extent(ip, tp, lastx, flist, cur, &del, | 5241 | error = xfs_bmap_del_extent(ip, tp, &lastx, flist, cur, &del, |
5236 | &tmp_logflags, whichfork, rsvd); | 5242 | &tmp_logflags, whichfork); |
5237 | logflags |= tmp_logflags; | 5243 | logflags |= tmp_logflags; |
5238 | if (error) | 5244 | if (error) |
5239 | goto error0; | 5245 | goto error0; |
5240 | bno = del.br_startoff - 1; | 5246 | bno = del.br_startoff - 1; |
5241 | nodelete: | 5247 | nodelete: |
5242 | lastx = ifp->if_lastex; | ||
5243 | /* | 5248 | /* |
5244 | * If not done go on to the next (previous) record. | 5249 | * If not done go on to the next (previous) record. |
5245 | * Reset ep in case the extents array was re-alloced. | ||
5246 | */ | 5250 | */ |
5247 | ep = xfs_iext_get_ext(ifp, lastx); | ||
5248 | if (bno != (xfs_fileoff_t)-1 && bno >= start) { | 5251 | if (bno != (xfs_fileoff_t)-1 && bno >= start) { |
5249 | if (lastx >= XFS_IFORK_NEXTENTS(ip, whichfork) || | 5252 | if (lastx >= 0) { |
5250 | xfs_bmbt_get_startoff(ep) > bno) { | 5253 | ep = xfs_iext_get_ext(ifp, lastx); |
5251 | if (--lastx >= 0) | 5254 | if (xfs_bmbt_get_startoff(ep) > bno) { |
5252 | ep = xfs_iext_get_ext(ifp, lastx); | 5255 | if (--lastx >= 0) |
5253 | } | 5256 | ep = xfs_iext_get_ext(ifp, |
5254 | if (lastx >= 0) | 5257 | lastx); |
5258 | } | ||
5255 | xfs_bmbt_get_all(ep, &got); | 5259 | xfs_bmbt_get_all(ep, &got); |
5260 | } | ||
5256 | extno++; | 5261 | extno++; |
5257 | } | 5262 | } |
5258 | } | 5263 | } |
5259 | ifp->if_lastex = lastx; | ||
5260 | *done = bno == (xfs_fileoff_t)-1 || bno < start || lastx < 0; | 5264 | *done = bno == (xfs_fileoff_t)-1 || bno < start || lastx < 0; |
5261 | ASSERT(ifp->if_ext_max == | 5265 | ASSERT(ifp->if_ext_max == |
5262 | XFS_IFORK_SIZE(ip, whichfork) / (uint)sizeof(xfs_bmbt_rec_t)); | 5266 | XFS_IFORK_SIZE(ip, whichfork) / (uint)sizeof(xfs_bmbt_rec_t)); |
@@ -5461,8 +5465,13 @@ xfs_getbmap( | |||
5461 | if (error) | 5465 | if (error) |
5462 | goto out_unlock_iolock; | 5466 | goto out_unlock_iolock; |
5463 | } | 5467 | } |
5464 | 5468 | /* | |
5465 | ASSERT(ip->i_delayed_blks == 0); | 5469 | * even after flushing the inode, there can still be delalloc |
5470 | * blocks on the inode beyond EOF due to speculative | ||
5471 | * preallocation. These are not removed until the release | ||
5472 | * function is called or the inode is inactivated. Hence we | ||
5473 | * cannot assert here that ip->i_delayed_blks == 0. | ||
5474 | */ | ||
5466 | } | 5475 | } |
5467 | 5476 | ||
5468 | lock = xfs_ilock_map_shared(ip); | 5477 | lock = xfs_ilock_map_shared(ip); |
@@ -5728,7 +5737,7 @@ xfs_check_block( | |||
5728 | else | 5737 | else |
5729 | thispa = XFS_BMBT_PTR_ADDR(mp, block, j, dmxr); | 5738 | thispa = XFS_BMBT_PTR_ADDR(mp, block, j, dmxr); |
5730 | if (*thispa == *pp) { | 5739 | if (*thispa == *pp) { |
5731 | cmn_err(CE_WARN, "%s: thispa(%d) == pp(%d) %Ld", | 5740 | xfs_warn(mp, "%s: thispa(%d) == pp(%d) %Ld", |
5732 | __func__, j, i, | 5741 | __func__, j, i, |
5733 | (unsigned long long)be64_to_cpu(*thispa)); | 5742 | (unsigned long long)be64_to_cpu(*thispa)); |
5734 | panic("%s: ptrs are equal in node\n", | 5743 | panic("%s: ptrs are equal in node\n", |
@@ -5893,11 +5902,11 @@ xfs_bmap_check_leaf_extents( | |||
5893 | return; | 5902 | return; |
5894 | 5903 | ||
5895 | error0: | 5904 | error0: |
5896 | cmn_err(CE_WARN, "%s: at error0", __func__); | 5905 | xfs_warn(mp, "%s: at error0", __func__); |
5897 | if (bp_release) | 5906 | if (bp_release) |
5898 | xfs_trans_brelse(NULL, bp); | 5907 | xfs_trans_brelse(NULL, bp); |
5899 | error_norelse: | 5908 | error_norelse: |
5900 | cmn_err(CE_WARN, "%s: BAD after btree leaves for %d extents", | 5909 | xfs_warn(mp, "%s: BAD after btree leaves for %d extents", |
5901 | __func__, i); | 5910 | __func__, i); |
5902 | panic("%s: CORRUPTED BTREE OR SOMETHING", __func__); | 5911 | panic("%s: CORRUPTED BTREE OR SOMETHING", __func__); |
5903 | return; | 5912 | return; |
@@ -6060,3 +6069,79 @@ xfs_bmap_disk_count_leaves( | |||
6060 | *count += xfs_bmbt_disk_get_blockcount(frp); | 6069 | *count += xfs_bmbt_disk_get_blockcount(frp); |
6061 | } | 6070 | } |
6062 | } | 6071 | } |
6072 | |||
6073 | /* | ||
6074 | * dead simple method of punching delalyed allocation blocks from a range in | ||
6075 | * the inode. Walks a block at a time so will be slow, but is only executed in | ||
6076 | * rare error cases so the overhead is not critical. This will alays punch out | ||
6077 | * both the start and end blocks, even if the ranges only partially overlap | ||
6078 | * them, so it is up to the caller to ensure that partial blocks are not | ||
6079 | * passed in. | ||
6080 | */ | ||
6081 | int | ||
6082 | xfs_bmap_punch_delalloc_range( | ||
6083 | struct xfs_inode *ip, | ||
6084 | xfs_fileoff_t start_fsb, | ||
6085 | xfs_fileoff_t length) | ||
6086 | { | ||
6087 | xfs_fileoff_t remaining = length; | ||
6088 | int error = 0; | ||
6089 | |||
6090 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); | ||
6091 | |||
6092 | do { | ||
6093 | int done; | ||
6094 | xfs_bmbt_irec_t imap; | ||
6095 | int nimaps = 1; | ||
6096 | xfs_fsblock_t firstblock; | ||
6097 | xfs_bmap_free_t flist; | ||
6098 | |||
6099 | /* | ||
6100 | * Map the range first and check that it is a delalloc extent | ||
6101 | * before trying to unmap the range. Otherwise we will be | ||
6102 | * trying to remove a real extent (which requires a | ||
6103 | * transaction) or a hole, which is probably a bad idea... | ||
6104 | */ | ||
6105 | error = xfs_bmapi(NULL, ip, start_fsb, 1, | ||
6106 | XFS_BMAPI_ENTIRE, NULL, 0, &imap, | ||
6107 | &nimaps, NULL); | ||
6108 | |||
6109 | if (error) { | ||
6110 | /* something screwed, just bail */ | ||
6111 | if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) { | ||
6112 | xfs_alert(ip->i_mount, | ||
6113 | "Failed delalloc mapping lookup ino %lld fsb %lld.", | ||
6114 | ip->i_ino, start_fsb); | ||
6115 | } | ||
6116 | break; | ||
6117 | } | ||
6118 | if (!nimaps) { | ||
6119 | /* nothing there */ | ||
6120 | goto next_block; | ||
6121 | } | ||
6122 | if (imap.br_startblock != DELAYSTARTBLOCK) { | ||
6123 | /* been converted, ignore */ | ||
6124 | goto next_block; | ||
6125 | } | ||
6126 | WARN_ON(imap.br_blockcount == 0); | ||
6127 | |||
6128 | /* | ||
6129 | * Note: while we initialise the firstblock/flist pair, they | ||
6130 | * should never be used because blocks should never be | ||
6131 | * allocated or freed for a delalloc extent and hence we need | ||
6132 | * don't cancel or finish them after the xfs_bunmapi() call. | ||
6133 | */ | ||
6134 | xfs_bmap_init(&flist, &firstblock); | ||
6135 | error = xfs_bunmapi(NULL, ip, start_fsb, 1, 0, 1, &firstblock, | ||
6136 | &flist, &done); | ||
6137 | if (error) | ||
6138 | break; | ||
6139 | |||
6140 | ASSERT(!flist.xbf_count && !flist.xbf_first); | ||
6141 | next_block: | ||
6142 | start_fsb++; | ||
6143 | remaining--; | ||
6144 | } while(remaining > 0); | ||
6145 | |||
6146 | return error; | ||
6147 | } | ||
diff --git a/fs/xfs/xfs_bmap.h b/fs/xfs/xfs_bmap.h index b13569a6179b..c62234bde053 100644 --- a/fs/xfs/xfs_bmap.h +++ b/fs/xfs/xfs_bmap.h | |||
@@ -69,14 +69,16 @@ typedef struct xfs_bmap_free | |||
69 | #define XFS_BMAPI_ENTIRE 0x004 /* return entire extent, not trimmed */ | 69 | #define XFS_BMAPI_ENTIRE 0x004 /* return entire extent, not trimmed */ |
70 | #define XFS_BMAPI_METADATA 0x008 /* mapping metadata not user data */ | 70 | #define XFS_BMAPI_METADATA 0x008 /* mapping metadata not user data */ |
71 | #define XFS_BMAPI_ATTRFORK 0x010 /* use attribute fork not data */ | 71 | #define XFS_BMAPI_ATTRFORK 0x010 /* use attribute fork not data */ |
72 | #define XFS_BMAPI_RSVBLOCKS 0x020 /* OK to alloc. reserved data blocks */ | ||
73 | #define XFS_BMAPI_PREALLOC 0x040 /* preallocation op: unwritten space */ | 72 | #define XFS_BMAPI_PREALLOC 0x040 /* preallocation op: unwritten space */ |
74 | #define XFS_BMAPI_IGSTATE 0x080 /* Ignore state - */ | 73 | #define XFS_BMAPI_IGSTATE 0x080 /* Ignore state - */ |
75 | /* combine contig. space */ | 74 | /* combine contig. space */ |
76 | #define XFS_BMAPI_CONTIG 0x100 /* must allocate only one extent */ | 75 | #define XFS_BMAPI_CONTIG 0x100 /* must allocate only one extent */ |
77 | #define XFS_BMAPI_CONVERT 0x200 /* unwritten extent conversion - */ | 76 | /* |
78 | /* need write cache flushing and no */ | 77 | * unwritten extent conversion - this needs write cache flushing and no additional |
79 | /* additional allocation alignments */ | 78 | * allocation alignments. When specified with XFS_BMAPI_PREALLOC it converts |
79 | * from written to unwritten, otherwise convert from unwritten to written. | ||
80 | */ | ||
81 | #define XFS_BMAPI_CONVERT 0x200 | ||
80 | 82 | ||
81 | #define XFS_BMAPI_FLAGS \ | 83 | #define XFS_BMAPI_FLAGS \ |
82 | { XFS_BMAPI_WRITE, "WRITE" }, \ | 84 | { XFS_BMAPI_WRITE, "WRITE" }, \ |
@@ -84,7 +86,6 @@ typedef struct xfs_bmap_free | |||
84 | { XFS_BMAPI_ENTIRE, "ENTIRE" }, \ | 86 | { XFS_BMAPI_ENTIRE, "ENTIRE" }, \ |
85 | { XFS_BMAPI_METADATA, "METADATA" }, \ | 87 | { XFS_BMAPI_METADATA, "METADATA" }, \ |
86 | { XFS_BMAPI_ATTRFORK, "ATTRFORK" }, \ | 88 | { XFS_BMAPI_ATTRFORK, "ATTRFORK" }, \ |
87 | { XFS_BMAPI_RSVBLOCKS, "RSVBLOCKS" }, \ | ||
88 | { XFS_BMAPI_PREALLOC, "PREALLOC" }, \ | 89 | { XFS_BMAPI_PREALLOC, "PREALLOC" }, \ |
89 | { XFS_BMAPI_IGSTATE, "IGSTATE" }, \ | 90 | { XFS_BMAPI_IGSTATE, "IGSTATE" }, \ |
90 | { XFS_BMAPI_CONTIG, "CONTIG" }, \ | 91 | { XFS_BMAPI_CONTIG, "CONTIG" }, \ |
@@ -391,6 +392,11 @@ xfs_bmap_count_blocks( | |||
391 | int whichfork, | 392 | int whichfork, |
392 | int *count); | 393 | int *count); |
393 | 394 | ||
395 | int | ||
396 | xfs_bmap_punch_delalloc_range( | ||
397 | struct xfs_inode *ip, | ||
398 | xfs_fileoff_t start_fsb, | ||
399 | xfs_fileoff_t length); | ||
394 | #endif /* __KERNEL__ */ | 400 | #endif /* __KERNEL__ */ |
395 | 401 | ||
396 | #endif /* __XFS_BMAP_H__ */ | 402 | #endif /* __XFS_BMAP_H__ */ |
diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c index 829af92f0fba..2f9e97c128a0 100644 --- a/fs/xfs/xfs_btree.c +++ b/fs/xfs/xfs_btree.c | |||
@@ -217,7 +217,7 @@ xfs_btree_del_cursor( | |||
217 | */ | 217 | */ |
218 | for (i = 0; i < cur->bc_nlevels; i++) { | 218 | for (i = 0; i < cur->bc_nlevels; i++) { |
219 | if (cur->bc_bufs[i]) | 219 | if (cur->bc_bufs[i]) |
220 | xfs_btree_setbuf(cur, i, NULL); | 220 | xfs_trans_brelse(cur->bc_tp, cur->bc_bufs[i]); |
221 | else if (!error) | 221 | else if (!error) |
222 | break; | 222 | break; |
223 | } | 223 | } |
@@ -634,9 +634,8 @@ xfs_btree_read_bufl( | |||
634 | return error; | 634 | return error; |
635 | } | 635 | } |
636 | ASSERT(!bp || !XFS_BUF_GETERROR(bp)); | 636 | ASSERT(!bp || !XFS_BUF_GETERROR(bp)); |
637 | if (bp != NULL) { | 637 | if (bp) |
638 | XFS_BUF_SET_VTYPE_REF(bp, B_FS_MAP, refval); | 638 | XFS_BUF_SET_VTYPE_REF(bp, B_FS_MAP, refval); |
639 | } | ||
640 | *bpp = bp; | 639 | *bpp = bp; |
641 | return 0; | 640 | return 0; |
642 | } | 641 | } |
@@ -656,7 +655,7 @@ xfs_btree_reada_bufl( | |||
656 | 655 | ||
657 | ASSERT(fsbno != NULLFSBLOCK); | 656 | ASSERT(fsbno != NULLFSBLOCK); |
658 | d = XFS_FSB_TO_DADDR(mp, fsbno); | 657 | d = XFS_FSB_TO_DADDR(mp, fsbno); |
659 | xfs_baread(mp->m_ddev_targp, d, mp->m_bsize * count); | 658 | xfs_buf_readahead(mp->m_ddev_targp, d, mp->m_bsize * count); |
660 | } | 659 | } |
661 | 660 | ||
662 | /* | 661 | /* |
@@ -676,7 +675,7 @@ xfs_btree_reada_bufs( | |||
676 | ASSERT(agno != NULLAGNUMBER); | 675 | ASSERT(agno != NULLAGNUMBER); |
677 | ASSERT(agbno != NULLAGBLOCK); | 676 | ASSERT(agbno != NULLAGBLOCK); |
678 | d = XFS_AGB_TO_DADDR(mp, agno, agbno); | 677 | d = XFS_AGB_TO_DADDR(mp, agno, agbno); |
679 | xfs_baread(mp->m_ddev_targp, d, mp->m_bsize * count); | 678 | xfs_buf_readahead(mp->m_ddev_targp, d, mp->m_bsize * count); |
680 | } | 679 | } |
681 | 680 | ||
682 | STATIC int | 681 | STATIC int |
@@ -763,22 +762,19 @@ xfs_btree_readahead( | |||
763 | * Set the buffer for level "lev" in the cursor to bp, releasing | 762 | * Set the buffer for level "lev" in the cursor to bp, releasing |
764 | * any previous buffer. | 763 | * any previous buffer. |
765 | */ | 764 | */ |
766 | void | 765 | STATIC void |
767 | xfs_btree_setbuf( | 766 | xfs_btree_setbuf( |
768 | xfs_btree_cur_t *cur, /* btree cursor */ | 767 | xfs_btree_cur_t *cur, /* btree cursor */ |
769 | int lev, /* level in btree */ | 768 | int lev, /* level in btree */ |
770 | xfs_buf_t *bp) /* new buffer to set */ | 769 | xfs_buf_t *bp) /* new buffer to set */ |
771 | { | 770 | { |
772 | struct xfs_btree_block *b; /* btree block */ | 771 | struct xfs_btree_block *b; /* btree block */ |
773 | xfs_buf_t *obp; /* old buffer pointer */ | ||
774 | 772 | ||
775 | obp = cur->bc_bufs[lev]; | 773 | if (cur->bc_bufs[lev]) |
776 | if (obp) | 774 | xfs_trans_brelse(cur->bc_tp, cur->bc_bufs[lev]); |
777 | xfs_trans_brelse(cur->bc_tp, obp); | ||
778 | cur->bc_bufs[lev] = bp; | 775 | cur->bc_bufs[lev] = bp; |
779 | cur->bc_ra[lev] = 0; | 776 | cur->bc_ra[lev] = 0; |
780 | if (!bp) | 777 | |
781 | return; | ||
782 | b = XFS_BUF_TO_BLOCK(bp); | 778 | b = XFS_BUF_TO_BLOCK(bp); |
783 | if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { | 779 | if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { |
784 | if (be64_to_cpu(b->bb_u.l.bb_leftsib) == NULLDFSBNO) | 780 | if (be64_to_cpu(b->bb_u.l.bb_leftsib) == NULLDFSBNO) |
@@ -947,13 +943,13 @@ xfs_btree_set_refs( | |||
947 | switch (cur->bc_btnum) { | 943 | switch (cur->bc_btnum) { |
948 | case XFS_BTNUM_BNO: | 944 | case XFS_BTNUM_BNO: |
949 | case XFS_BTNUM_CNT: | 945 | case XFS_BTNUM_CNT: |
950 | XFS_BUF_SET_VTYPE_REF(*bpp, B_FS_MAP, XFS_ALLOC_BTREE_REF); | 946 | XFS_BUF_SET_VTYPE_REF(bp, B_FS_MAP, XFS_ALLOC_BTREE_REF); |
951 | break; | 947 | break; |
952 | case XFS_BTNUM_INO: | 948 | case XFS_BTNUM_INO: |
953 | XFS_BUF_SET_VTYPE_REF(*bpp, B_FS_INOMAP, XFS_INO_BTREE_REF); | 949 | XFS_BUF_SET_VTYPE_REF(bp, B_FS_INOMAP, XFS_INO_BTREE_REF); |
954 | break; | 950 | break; |
955 | case XFS_BTNUM_BMAP: | 951 | case XFS_BTNUM_BMAP: |
956 | XFS_BUF_SET_VTYPE_REF(*bpp, B_FS_MAP, XFS_BMAP_BTREE_REF); | 952 | XFS_BUF_SET_VTYPE_REF(bp, B_FS_MAP, XFS_BMAP_BTREE_REF); |
957 | break; | 953 | break; |
958 | default: | 954 | default: |
959 | ASSERT(0); | 955 | ASSERT(0); |
@@ -3011,6 +3007,43 @@ out0: | |||
3011 | return 0; | 3007 | return 0; |
3012 | } | 3008 | } |
3013 | 3009 | ||
3010 | /* | ||
3011 | * Kill the current root node, and replace it with it's only child node. | ||
3012 | */ | ||
3013 | STATIC int | ||
3014 | xfs_btree_kill_root( | ||
3015 | struct xfs_btree_cur *cur, | ||
3016 | struct xfs_buf *bp, | ||
3017 | int level, | ||
3018 | union xfs_btree_ptr *newroot) | ||
3019 | { | ||
3020 | int error; | ||
3021 | |||
3022 | XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); | ||
3023 | XFS_BTREE_STATS_INC(cur, killroot); | ||
3024 | |||
3025 | /* | ||
3026 | * Update the root pointer, decreasing the level by 1 and then | ||
3027 | * free the old root. | ||
3028 | */ | ||
3029 | cur->bc_ops->set_root(cur, newroot, -1); | ||
3030 | |||
3031 | error = cur->bc_ops->free_block(cur, bp); | ||
3032 | if (error) { | ||
3033 | XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); | ||
3034 | return error; | ||
3035 | } | ||
3036 | |||
3037 | XFS_BTREE_STATS_INC(cur, free); | ||
3038 | |||
3039 | cur->bc_bufs[level] = NULL; | ||
3040 | cur->bc_ra[level] = 0; | ||
3041 | cur->bc_nlevels--; | ||
3042 | |||
3043 | XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); | ||
3044 | return 0; | ||
3045 | } | ||
3046 | |||
3014 | STATIC int | 3047 | STATIC int |
3015 | xfs_btree_dec_cursor( | 3048 | xfs_btree_dec_cursor( |
3016 | struct xfs_btree_cur *cur, | 3049 | struct xfs_btree_cur *cur, |
@@ -3195,7 +3228,7 @@ xfs_btree_delrec( | |||
3195 | * Make it the new root of the btree. | 3228 | * Make it the new root of the btree. |
3196 | */ | 3229 | */ |
3197 | pp = xfs_btree_ptr_addr(cur, 1, block); | 3230 | pp = xfs_btree_ptr_addr(cur, 1, block); |
3198 | error = cur->bc_ops->kill_root(cur, bp, level, pp); | 3231 | error = xfs_btree_kill_root(cur, bp, level, pp); |
3199 | if (error) | 3232 | if (error) |
3200 | goto error0; | 3233 | goto error0; |
3201 | } else if (level > 0) { | 3234 | } else if (level > 0) { |
diff --git a/fs/xfs/xfs_btree.h b/fs/xfs/xfs_btree.h index 7fa07062bdda..82fafc66bd1f 100644 --- a/fs/xfs/xfs_btree.h +++ b/fs/xfs/xfs_btree.h | |||
@@ -152,9 +152,7 @@ struct xfs_btree_ops { | |||
152 | 152 | ||
153 | /* update btree root pointer */ | 153 | /* update btree root pointer */ |
154 | void (*set_root)(struct xfs_btree_cur *cur, | 154 | void (*set_root)(struct xfs_btree_cur *cur, |
155 | union xfs_btree_ptr *nptr, int level_change); | 155 | union xfs_btree_ptr *nptr, int level_change); |
156 | int (*kill_root)(struct xfs_btree_cur *cur, struct xfs_buf *bp, | ||
157 | int level, union xfs_btree_ptr *newroot); | ||
158 | 156 | ||
159 | /* block allocation / freeing */ | 157 | /* block allocation / freeing */ |
160 | int (*alloc_block)(struct xfs_btree_cur *cur, | 158 | int (*alloc_block)(struct xfs_btree_cur *cur, |
@@ -399,16 +397,6 @@ xfs_btree_reada_bufs( | |||
399 | xfs_agblock_t agbno, /* allocation group block number */ | 397 | xfs_agblock_t agbno, /* allocation group block number */ |
400 | xfs_extlen_t count); /* count of filesystem blocks */ | 398 | xfs_extlen_t count); /* count of filesystem blocks */ |
401 | 399 | ||
402 | /* | ||
403 | * Set the buffer for level "lev" in the cursor to bp, releasing | ||
404 | * any previous buffer. | ||
405 | */ | ||
406 | void | ||
407 | xfs_btree_setbuf( | ||
408 | xfs_btree_cur_t *cur, /* btree cursor */ | ||
409 | int lev, /* level in btree */ | ||
410 | struct xfs_buf *bp); /* new buffer to set */ | ||
411 | |||
412 | 400 | ||
413 | /* | 401 | /* |
414 | * Common btree core entry points. | 402 | * Common btree core entry points. |
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index 1b09d7a280df..7b7e005e3dcc 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c | |||
@@ -130,10 +130,12 @@ xfs_buf_item_log_check( | |||
130 | orig = bip->bli_orig; | 130 | orig = bip->bli_orig; |
131 | buffer = XFS_BUF_PTR(bp); | 131 | buffer = XFS_BUF_PTR(bp); |
132 | for (x = 0; x < XFS_BUF_COUNT(bp); x++) { | 132 | for (x = 0; x < XFS_BUF_COUNT(bp); x++) { |
133 | if (orig[x] != buffer[x] && !btst(bip->bli_logged, x)) | 133 | if (orig[x] != buffer[x] && !btst(bip->bli_logged, x)) { |
134 | cmn_err(CE_PANIC, | 134 | xfs_emerg(bp->b_mount, |
135 | "xfs_buf_item_log_check bip %x buffer %x orig %x index %d", | 135 | "%s: bip %x buffer %x orig %x index %d", |
136 | bip, bp, orig, x); | 136 | __func__, bip, bp, orig, x); |
137 | ASSERT(0); | ||
138 | } | ||
137 | } | 139 | } |
138 | } | 140 | } |
139 | #else | 141 | #else |
@@ -141,8 +143,7 @@ xfs_buf_item_log_check( | |||
141 | #define xfs_buf_item_log_check(x) | 143 | #define xfs_buf_item_log_check(x) |
142 | #endif | 144 | #endif |
143 | 145 | ||
144 | STATIC void xfs_buf_error_relse(xfs_buf_t *bp); | 146 | STATIC void xfs_buf_do_callbacks(struct xfs_buf *bp); |
145 | STATIC void xfs_buf_do_callbacks(xfs_buf_t *bp, xfs_log_item_t *lip); | ||
146 | 147 | ||
147 | /* | 148 | /* |
148 | * This returns the number of log iovecs needed to log the | 149 | * This returns the number of log iovecs needed to log the |
@@ -428,13 +429,15 @@ xfs_buf_item_unpin( | |||
428 | 429 | ||
429 | if (remove) { | 430 | if (remove) { |
430 | /* | 431 | /* |
431 | * We have to remove the log item from the transaction | 432 | * If we are in a transaction context, we have to |
432 | * as we are about to release our reference to the | 433 | * remove the log item from the transaction as we are |
433 | * buffer. If we don't, the unlock that occurs later | 434 | * about to release our reference to the buffer. If we |
434 | * in xfs_trans_uncommit() will ry to reference the | 435 | * don't, the unlock that occurs later in |
436 | * xfs_trans_uncommit() will try to reference the | ||
435 | * buffer which we no longer have a hold on. | 437 | * buffer which we no longer have a hold on. |
436 | */ | 438 | */ |
437 | xfs_trans_del_item(lip); | 439 | if (lip->li_desc) |
440 | xfs_trans_del_item(lip); | ||
438 | 441 | ||
439 | /* | 442 | /* |
440 | * Since the transaction no longer refers to the buffer, | 443 | * Since the transaction no longer refers to the buffer, |
@@ -450,7 +453,7 @@ xfs_buf_item_unpin( | |||
450 | * xfs_trans_ail_delete() drops the AIL lock. | 453 | * xfs_trans_ail_delete() drops the AIL lock. |
451 | */ | 454 | */ |
452 | if (bip->bli_flags & XFS_BLI_STALE_INODE) { | 455 | if (bip->bli_flags & XFS_BLI_STALE_INODE) { |
453 | xfs_buf_do_callbacks(bp, (xfs_log_item_t *)bip); | 456 | xfs_buf_do_callbacks(bp); |
454 | XFS_BUF_SET_FSPRIVATE(bp, NULL); | 457 | XFS_BUF_SET_FSPRIVATE(bp, NULL); |
455 | XFS_BUF_CLR_IODONE_FUNC(bp); | 458 | XFS_BUF_CLR_IODONE_FUNC(bp); |
456 | } else { | 459 | } else { |
@@ -692,8 +695,7 @@ xfs_buf_item_init( | |||
692 | * the first. If we do already have one, there is | 695 | * the first. If we do already have one, there is |
693 | * nothing to do here so return. | 696 | * nothing to do here so return. |
694 | */ | 697 | */ |
695 | if (bp->b_mount != mp) | 698 | ASSERT(bp->b_target->bt_mount == mp); |
696 | bp->b_mount = mp; | ||
697 | if (XFS_BUF_FSPRIVATE(bp, void *) != NULL) { | 699 | if (XFS_BUF_FSPRIVATE(bp, void *) != NULL) { |
698 | lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *); | 700 | lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *); |
699 | if (lip->li_type == XFS_LI_BUF) { | 701 | if (lip->li_type == XFS_LI_BUF) { |
@@ -919,15 +921,26 @@ xfs_buf_attach_iodone( | |||
919 | XFS_BUF_SET_IODONE_FUNC(bp, xfs_buf_iodone_callbacks); | 921 | XFS_BUF_SET_IODONE_FUNC(bp, xfs_buf_iodone_callbacks); |
920 | } | 922 | } |
921 | 923 | ||
924 | /* | ||
925 | * We can have many callbacks on a buffer. Running the callbacks individually | ||
926 | * can cause a lot of contention on the AIL lock, so we allow for a single | ||
927 | * callback to be able to scan the remaining lip->li_bio_list for other items | ||
928 | * of the same type and callback to be processed in the first call. | ||
929 | * | ||
930 | * As a result, the loop walking the callback list below will also modify the | ||
931 | * list. it removes the first item from the list and then runs the callback. | ||
932 | * The loop then restarts from the new head of the list. This allows the | ||
933 | * callback to scan and modify the list attached to the buffer and we don't | ||
934 | * have to care about maintaining a next item pointer. | ||
935 | */ | ||
922 | STATIC void | 936 | STATIC void |
923 | xfs_buf_do_callbacks( | 937 | xfs_buf_do_callbacks( |
924 | xfs_buf_t *bp, | 938 | struct xfs_buf *bp) |
925 | xfs_log_item_t *lip) | ||
926 | { | 939 | { |
927 | xfs_log_item_t *nlip; | 940 | struct xfs_log_item *lip; |
928 | 941 | ||
929 | while (lip != NULL) { | 942 | while ((lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *)) != NULL) { |
930 | nlip = lip->li_bio_list; | 943 | XFS_BUF_SET_FSPRIVATE(bp, lip->li_bio_list); |
931 | ASSERT(lip->li_cb != NULL); | 944 | ASSERT(lip->li_cb != NULL); |
932 | /* | 945 | /* |
933 | * Clear the next pointer so we don't have any | 946 | * Clear the next pointer so we don't have any |
@@ -937,7 +950,6 @@ xfs_buf_do_callbacks( | |||
937 | */ | 950 | */ |
938 | lip->li_bio_list = NULL; | 951 | lip->li_bio_list = NULL; |
939 | lip->li_cb(bp, lip); | 952 | lip->li_cb(bp, lip); |
940 | lip = nlip; | ||
941 | } | 953 | } |
942 | } | 954 | } |
943 | 955 | ||
@@ -950,128 +962,75 @@ xfs_buf_do_callbacks( | |||
950 | */ | 962 | */ |
951 | void | 963 | void |
952 | xfs_buf_iodone_callbacks( | 964 | xfs_buf_iodone_callbacks( |
953 | xfs_buf_t *bp) | 965 | struct xfs_buf *bp) |
954 | { | 966 | { |
955 | xfs_log_item_t *lip; | 967 | struct xfs_log_item *lip = bp->b_fspriv; |
956 | static ulong lasttime; | 968 | struct xfs_mount *mp = lip->li_mountp; |
957 | static xfs_buftarg_t *lasttarg; | 969 | static ulong lasttime; |
958 | xfs_mount_t *mp; | 970 | static xfs_buftarg_t *lasttarg; |
959 | 971 | ||
960 | ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL); | 972 | if (likely(!XFS_BUF_GETERROR(bp))) |
961 | lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *); | 973 | goto do_callbacks; |
962 | 974 | ||
963 | if (XFS_BUF_GETERROR(bp) != 0) { | 975 | /* |
964 | /* | 976 | * If we've already decided to shutdown the filesystem because of |
965 | * If we've already decided to shutdown the filesystem | 977 | * I/O errors, there's no point in giving this a retry. |
966 | * because of IO errors, there's no point in giving this | 978 | */ |
967 | * a retry. | 979 | if (XFS_FORCED_SHUTDOWN(mp)) { |
968 | */ | 980 | XFS_BUF_SUPER_STALE(bp); |
969 | mp = lip->li_mountp; | 981 | trace_xfs_buf_item_iodone(bp, _RET_IP_); |
970 | if (XFS_FORCED_SHUTDOWN(mp)) { | 982 | goto do_callbacks; |
971 | ASSERT(XFS_BUF_TARGET(bp) == mp->m_ddev_targp); | 983 | } |
972 | XFS_BUF_SUPER_STALE(bp); | ||
973 | trace_xfs_buf_item_iodone(bp, _RET_IP_); | ||
974 | xfs_buf_do_callbacks(bp, lip); | ||
975 | XFS_BUF_SET_FSPRIVATE(bp, NULL); | ||
976 | XFS_BUF_CLR_IODONE_FUNC(bp); | ||
977 | xfs_biodone(bp); | ||
978 | return; | ||
979 | } | ||
980 | 984 | ||
981 | if ((XFS_BUF_TARGET(bp) != lasttarg) || | 985 | if (XFS_BUF_TARGET(bp) != lasttarg || |
982 | (time_after(jiffies, (lasttime + 5*HZ)))) { | 986 | time_after(jiffies, (lasttime + 5*HZ))) { |
983 | lasttime = jiffies; | 987 | lasttime = jiffies; |
984 | cmn_err(CE_ALERT, "Device %s, XFS metadata write error" | 988 | xfs_alert(mp, "Device %s: metadata write error block 0x%llx", |
985 | " block 0x%llx in %s", | 989 | XFS_BUFTARG_NAME(XFS_BUF_TARGET(bp)), |
986 | XFS_BUFTARG_NAME(XFS_BUF_TARGET(bp)), | 990 | (__uint64_t)XFS_BUF_ADDR(bp)); |
987 | (__uint64_t)XFS_BUF_ADDR(bp), mp->m_fsname); | 991 | } |
988 | } | 992 | lasttarg = XFS_BUF_TARGET(bp); |
989 | lasttarg = XFS_BUF_TARGET(bp); | ||
990 | 993 | ||
991 | if (XFS_BUF_ISASYNC(bp)) { | 994 | /* |
992 | /* | 995 | * If the write was asynchronous then no one will be looking for the |
993 | * If the write was asynchronous then noone will be | 996 | * error. Clear the error state and write the buffer out again. |
994 | * looking for the error. Clear the error state | 997 | * |
995 | * and write the buffer out again delayed write. | 998 | * During sync or umount we'll write all pending buffers again |
996 | * | 999 | * synchronous, which will catch these errors if they keep hanging |
997 | * XXXsup This is OK, so long as we catch these | 1000 | * around. |
998 | * before we start the umount; we don't want these | 1001 | */ |
999 | * DELWRI metadata bufs to be hanging around. | 1002 | if (XFS_BUF_ISASYNC(bp)) { |
1000 | */ | 1003 | XFS_BUF_ERROR(bp, 0); /* errno of 0 unsets the flag */ |
1001 | XFS_BUF_ERROR(bp,0); /* errno of 0 unsets the flag */ | 1004 | |
1002 | 1005 | if (!XFS_BUF_ISSTALE(bp)) { | |
1003 | if (!(XFS_BUF_ISSTALE(bp))) { | 1006 | XFS_BUF_DELAYWRITE(bp); |
1004 | XFS_BUF_DELAYWRITE(bp); | ||
1005 | XFS_BUF_DONE(bp); | ||
1006 | XFS_BUF_SET_START(bp); | ||
1007 | } | ||
1008 | ASSERT(XFS_BUF_IODONE_FUNC(bp)); | ||
1009 | trace_xfs_buf_item_iodone_async(bp, _RET_IP_); | ||
1010 | xfs_buf_relse(bp); | ||
1011 | } else { | ||
1012 | /* | ||
1013 | * If the write of the buffer was not asynchronous, | ||
1014 | * then we want to make sure to return the error | ||
1015 | * to the caller of bwrite(). Because of this we | ||
1016 | * cannot clear the B_ERROR state at this point. | ||
1017 | * Instead we install a callback function that | ||
1018 | * will be called when the buffer is released, and | ||
1019 | * that routine will clear the error state and | ||
1020 | * set the buffer to be written out again after | ||
1021 | * some delay. | ||
1022 | */ | ||
1023 | /* We actually overwrite the existing b-relse | ||
1024 | function at times, but we're gonna be shutting down | ||
1025 | anyway. */ | ||
1026 | XFS_BUF_SET_BRELSE_FUNC(bp,xfs_buf_error_relse); | ||
1027 | XFS_BUF_DONE(bp); | 1007 | XFS_BUF_DONE(bp); |
1028 | XFS_BUF_FINISH_IOWAIT(bp); | 1008 | XFS_BUF_SET_START(bp); |
1029 | } | 1009 | } |
1010 | ASSERT(XFS_BUF_IODONE_FUNC(bp)); | ||
1011 | trace_xfs_buf_item_iodone_async(bp, _RET_IP_); | ||
1012 | xfs_buf_relse(bp); | ||
1030 | return; | 1013 | return; |
1031 | } | 1014 | } |
1032 | 1015 | ||
1033 | xfs_buf_do_callbacks(bp, lip); | 1016 | /* |
1034 | XFS_BUF_SET_FSPRIVATE(bp, NULL); | 1017 | * If the write of the buffer was synchronous, we want to make |
1035 | XFS_BUF_CLR_IODONE_FUNC(bp); | 1018 | * sure to return the error to the caller of xfs_bwrite(). |
1036 | xfs_biodone(bp); | 1019 | */ |
1037 | } | ||
1038 | |||
1039 | /* | ||
1040 | * This is a callback routine attached to a buffer which gets an error | ||
1041 | * when being written out synchronously. | ||
1042 | */ | ||
1043 | STATIC void | ||
1044 | xfs_buf_error_relse( | ||
1045 | xfs_buf_t *bp) | ||
1046 | { | ||
1047 | xfs_log_item_t *lip; | ||
1048 | xfs_mount_t *mp; | ||
1049 | |||
1050 | lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *); | ||
1051 | mp = (xfs_mount_t *)lip->li_mountp; | ||
1052 | ASSERT(XFS_BUF_TARGET(bp) == mp->m_ddev_targp); | ||
1053 | |||
1054 | XFS_BUF_STALE(bp); | 1020 | XFS_BUF_STALE(bp); |
1055 | XFS_BUF_DONE(bp); | 1021 | XFS_BUF_DONE(bp); |
1056 | XFS_BUF_UNDELAYWRITE(bp); | 1022 | XFS_BUF_UNDELAYWRITE(bp); |
1057 | XFS_BUF_ERROR(bp,0); | ||
1058 | 1023 | ||
1059 | trace_xfs_buf_error_relse(bp, _RET_IP_); | 1024 | trace_xfs_buf_error_relse(bp, _RET_IP_); |
1025 | xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR); | ||
1060 | 1026 | ||
1061 | if (! XFS_FORCED_SHUTDOWN(mp)) | 1027 | do_callbacks: |
1062 | xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR); | 1028 | xfs_buf_do_callbacks(bp); |
1063 | /* | ||
1064 | * We have to unpin the pinned buffers so do the | ||
1065 | * callbacks. | ||
1066 | */ | ||
1067 | xfs_buf_do_callbacks(bp, lip); | ||
1068 | XFS_BUF_SET_FSPRIVATE(bp, NULL); | 1029 | XFS_BUF_SET_FSPRIVATE(bp, NULL); |
1069 | XFS_BUF_CLR_IODONE_FUNC(bp); | 1030 | XFS_BUF_CLR_IODONE_FUNC(bp); |
1070 | XFS_BUF_SET_BRELSE_FUNC(bp,NULL); | 1031 | xfs_buf_ioend(bp, 0); |
1071 | xfs_buf_relse(bp); | ||
1072 | } | 1032 | } |
1073 | 1033 | ||
1074 | |||
1075 | /* | 1034 | /* |
1076 | * This is the iodone() function for buffers which have been | 1035 | * This is the iodone() function for buffers which have been |
1077 | * logged. It is called when they are eventually flushed out. | 1036 | * logged. It is called when they are eventually flushed out. |
diff --git a/fs/xfs/xfs_buf_item.h b/fs/xfs/xfs_buf_item.h index 0e2ed43f16c7..b6ecd2061e7c 100644 --- a/fs/xfs/xfs_buf_item.h +++ b/fs/xfs/xfs_buf_item.h | |||
@@ -105,17 +105,6 @@ typedef struct xfs_buf_log_item { | |||
105 | xfs_buf_log_format_t bli_format; /* in-log header */ | 105 | xfs_buf_log_format_t bli_format; /* in-log header */ |
106 | } xfs_buf_log_item_t; | 106 | } xfs_buf_log_item_t; |
107 | 107 | ||
108 | /* | ||
109 | * This structure is used during recovery to record the buf log | ||
110 | * items which have been canceled and should not be replayed. | ||
111 | */ | ||
112 | typedef struct xfs_buf_cancel { | ||
113 | xfs_daddr_t bc_blkno; | ||
114 | uint bc_len; | ||
115 | int bc_refcount; | ||
116 | struct xfs_buf_cancel *bc_next; | ||
117 | } xfs_buf_cancel_t; | ||
118 | |||
119 | void xfs_buf_item_init(struct xfs_buf *, struct xfs_mount *); | 108 | void xfs_buf_item_init(struct xfs_buf *, struct xfs_mount *); |
120 | void xfs_buf_item_relse(struct xfs_buf *); | 109 | void xfs_buf_item_relse(struct xfs_buf *); |
121 | void xfs_buf_item_log(xfs_buf_log_item_t *, uint, uint); | 110 | void xfs_buf_item_log(xfs_buf_log_item_t *, uint, uint); |
diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/xfs_da_btree.c index 30fa0e206fba..6102ac6d1dff 100644 --- a/fs/xfs/xfs_da_btree.c +++ b/fs/xfs/xfs_da_btree.c | |||
@@ -1995,13 +1995,12 @@ xfs_da_do_buf( | |||
1995 | error = mappedbno == -2 ? 0 : XFS_ERROR(EFSCORRUPTED); | 1995 | error = mappedbno == -2 ? 0 : XFS_ERROR(EFSCORRUPTED); |
1996 | if (unlikely(error == EFSCORRUPTED)) { | 1996 | if (unlikely(error == EFSCORRUPTED)) { |
1997 | if (xfs_error_level >= XFS_ERRLEVEL_LOW) { | 1997 | if (xfs_error_level >= XFS_ERRLEVEL_LOW) { |
1998 | cmn_err(CE_ALERT, "xfs_da_do_buf: bno %lld\n", | 1998 | xfs_alert(mp, "%s: bno %lld dir: inode %lld", |
1999 | (long long)bno); | 1999 | __func__, (long long)bno, |
2000 | cmn_err(CE_ALERT, "dir: inode %lld\n", | ||
2001 | (long long)dp->i_ino); | 2000 | (long long)dp->i_ino); |
2002 | for (i = 0; i < nmap; i++) { | 2001 | for (i = 0; i < nmap; i++) { |
2003 | cmn_err(CE_ALERT, | 2002 | xfs_alert(mp, |
2004 | "[%02d] br_startoff %lld br_startblock %lld br_blockcount %lld br_state %d\n", | 2003 | "[%02d] br_startoff %lld br_startblock %lld br_blockcount %lld br_state %d", |
2005 | i, | 2004 | i, |
2006 | (long long)mapp[i].br_startoff, | 2005 | (long long)mapp[i].br_startoff, |
2007 | (long long)mapp[i].br_startblock, | 2006 | (long long)mapp[i].br_startblock, |
@@ -2042,7 +2041,7 @@ xfs_da_do_buf( | |||
2042 | mappedbno, nmapped, 0, &bp); | 2041 | mappedbno, nmapped, 0, &bp); |
2043 | break; | 2042 | break; |
2044 | case 3: | 2043 | case 3: |
2045 | xfs_baread(mp->m_ddev_targp, mappedbno, nmapped); | 2044 | xfs_buf_readahead(mp->m_ddev_targp, mappedbno, nmapped); |
2046 | error = 0; | 2045 | error = 0; |
2047 | bp = NULL; | 2046 | bp = NULL; |
2048 | break; | 2047 | break; |
diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c index 3b9582c60a22..9a84a85c03b1 100644 --- a/fs/xfs/xfs_dfrag.c +++ b/fs/xfs/xfs_dfrag.c | |||
@@ -202,7 +202,7 @@ xfs_swap_extents( | |||
202 | xfs_inode_t *tip, /* tmp inode */ | 202 | xfs_inode_t *tip, /* tmp inode */ |
203 | xfs_swapext_t *sxp) | 203 | xfs_swapext_t *sxp) |
204 | { | 204 | { |
205 | xfs_mount_t *mp; | 205 | xfs_mount_t *mp = ip->i_mount; |
206 | xfs_trans_t *tp; | 206 | xfs_trans_t *tp; |
207 | xfs_bstat_t *sbp = &sxp->sx_stat; | 207 | xfs_bstat_t *sbp = &sxp->sx_stat; |
208 | xfs_ifork_t *tempifp, *ifp, *tifp; | 208 | xfs_ifork_t *tempifp, *ifp, *tifp; |
@@ -212,16 +212,12 @@ xfs_swap_extents( | |||
212 | int taforkblks = 0; | 212 | int taforkblks = 0; |
213 | __uint64_t tmp; | 213 | __uint64_t tmp; |
214 | 214 | ||
215 | mp = ip->i_mount; | ||
216 | |||
217 | tempifp = kmem_alloc(sizeof(xfs_ifork_t), KM_MAYFAIL); | 215 | tempifp = kmem_alloc(sizeof(xfs_ifork_t), KM_MAYFAIL); |
218 | if (!tempifp) { | 216 | if (!tempifp) { |
219 | error = XFS_ERROR(ENOMEM); | 217 | error = XFS_ERROR(ENOMEM); |
220 | goto out; | 218 | goto out; |
221 | } | 219 | } |
222 | 220 | ||
223 | sbp = &sxp->sx_stat; | ||
224 | |||
225 | /* | 221 | /* |
226 | * we have to do two separate lock calls here to keep lockdep | 222 | * we have to do two separate lock calls here to keep lockdep |
227 | * happy. If we try to get all the locks in one call, lock will | 223 | * happy. If we try to get all the locks in one call, lock will |
@@ -270,9 +266,9 @@ xfs_swap_extents( | |||
270 | /* check inode formats now that data is flushed */ | 266 | /* check inode formats now that data is flushed */ |
271 | error = xfs_swap_extents_check_format(ip, tip); | 267 | error = xfs_swap_extents_check_format(ip, tip); |
272 | if (error) { | 268 | if (error) { |
273 | xfs_fs_cmn_err(CE_NOTE, mp, | 269 | xfs_notice(mp, |
274 | "%s: inode 0x%llx format is incompatible for exchanging.", | 270 | "%s: inode 0x%llx format is incompatible for exchanging.", |
275 | __FILE__, ip->i_ino); | 271 | __func__, ip->i_ino); |
276 | goto out_unlock; | 272 | goto out_unlock; |
277 | } | 273 | } |
278 | 274 | ||
@@ -377,6 +373,19 @@ xfs_swap_extents( | |||
377 | ip->i_d.di_format = tip->i_d.di_format; | 373 | ip->i_d.di_format = tip->i_d.di_format; |
378 | tip->i_d.di_format = tmp; | 374 | tip->i_d.di_format = tmp; |
379 | 375 | ||
376 | /* | ||
377 | * The extents in the source inode could still contain speculative | ||
378 | * preallocation beyond EOF (e.g. the file is open but not modified | ||
379 | * while defrag is in progress). In that case, we need to copy over the | ||
380 | * number of delalloc blocks the data fork in the source inode is | ||
381 | * tracking beyond EOF so that when the fork is truncated away when the | ||
382 | * temporary inode is unlinked we don't underrun the i_delayed_blks | ||
383 | * counter on that inode. | ||
384 | */ | ||
385 | ASSERT(tip->i_delayed_blks == 0); | ||
386 | tip->i_delayed_blks = ip->i_delayed_blks; | ||
387 | ip->i_delayed_blks = 0; | ||
388 | |||
380 | ilf_fields = XFS_ILOG_CORE; | 389 | ilf_fields = XFS_ILOG_CORE; |
381 | 390 | ||
382 | switch(ip->i_d.di_format) { | 391 | switch(ip->i_d.di_format) { |
diff --git a/fs/xfs/xfs_dinode.h b/fs/xfs/xfs_dinode.h index e5b153b2e6a3..dffba9ba0db6 100644 --- a/fs/xfs/xfs_dinode.h +++ b/fs/xfs/xfs_dinode.h | |||
@@ -49,8 +49,9 @@ typedef struct xfs_dinode { | |||
49 | __be32 di_uid; /* owner's user id */ | 49 | __be32 di_uid; /* owner's user id */ |
50 | __be32 di_gid; /* owner's group id */ | 50 | __be32 di_gid; /* owner's group id */ |
51 | __be32 di_nlink; /* number of links to file */ | 51 | __be32 di_nlink; /* number of links to file */ |
52 | __be16 di_projid; /* owner's project id */ | 52 | __be16 di_projid_lo; /* lower part of owner's project id */ |
53 | __u8 di_pad[8]; /* unused, zeroed space */ | 53 | __be16 di_projid_hi; /* higher part owner's project id */ |
54 | __u8 di_pad[6]; /* unused, zeroed space */ | ||
54 | __be16 di_flushiter; /* incremented on flush */ | 55 | __be16 di_flushiter; /* incremented on flush */ |
55 | xfs_timestamp_t di_atime; /* time last accessed */ | 56 | xfs_timestamp_t di_atime; /* time last accessed */ |
56 | xfs_timestamp_t di_mtime; /* time last modified */ | 57 | xfs_timestamp_t di_mtime; /* time last modified */ |
diff --git a/fs/xfs/xfs_dir2.c b/fs/xfs/xfs_dir2.c index a1321bc7f192..dba7a71cedf3 100644 --- a/fs/xfs/xfs_dir2.c +++ b/fs/xfs/xfs_dir2.c | |||
@@ -159,7 +159,7 @@ xfs_dir_ino_validate( | |||
159 | XFS_AGINO_TO_INO(mp, agno, agino) == ino; | 159 | XFS_AGINO_TO_INO(mp, agno, agino) == ino; |
160 | if (unlikely(XFS_TEST_ERROR(!ino_ok, mp, XFS_ERRTAG_DIR_INO_VALIDATE, | 160 | if (unlikely(XFS_TEST_ERROR(!ino_ok, mp, XFS_ERRTAG_DIR_INO_VALIDATE, |
161 | XFS_RANDOM_DIR_INO_VALIDATE))) { | 161 | XFS_RANDOM_DIR_INO_VALIDATE))) { |
162 | xfs_fs_cmn_err(CE_WARN, mp, "Invalid inode number 0x%Lx", | 162 | xfs_warn(mp, "Invalid inode number 0x%Lx", |
163 | (unsigned long long) ino); | 163 | (unsigned long long) ino); |
164 | XFS_ERROR_REPORT("xfs_dir_ino_validate", XFS_ERRLEVEL_LOW, mp); | 164 | XFS_ERROR_REPORT("xfs_dir_ino_validate", XFS_ERRLEVEL_LOW, mp); |
165 | return XFS_ERROR(EFSCORRUPTED); | 165 | return XFS_ERROR(EFSCORRUPTED); |
diff --git a/fs/xfs/xfs_dir2_leaf.c b/fs/xfs/xfs_dir2_leaf.c index 504be8640e91..ae891223be90 100644 --- a/fs/xfs/xfs_dir2_leaf.c +++ b/fs/xfs/xfs_dir2_leaf.c | |||
@@ -961,7 +961,7 @@ xfs_dir2_leaf_getdents( | |||
961 | if (i > ra_current && | 961 | if (i > ra_current && |
962 | map[ra_index].br_blockcount >= | 962 | map[ra_index].br_blockcount >= |
963 | mp->m_dirblkfsbs) { | 963 | mp->m_dirblkfsbs) { |
964 | xfs_baread(mp->m_ddev_targp, | 964 | xfs_buf_readahead(mp->m_ddev_targp, |
965 | XFS_FSB_TO_DADDR(mp, | 965 | XFS_FSB_TO_DADDR(mp, |
966 | map[ra_index].br_startblock + | 966 | map[ra_index].br_startblock + |
967 | ra_offset), | 967 | ra_offset), |
diff --git a/fs/xfs/xfs_dir2_node.c b/fs/xfs/xfs_dir2_node.c index f9a0864b696a..a0aab7d3294f 100644 --- a/fs/xfs/xfs_dir2_node.c +++ b/fs/xfs/xfs_dir2_node.c | |||
@@ -899,10 +899,9 @@ xfs_dir2_leafn_rebalance( | |||
899 | if(blk2->index < 0) { | 899 | if(blk2->index < 0) { |
900 | state->inleaf = 1; | 900 | state->inleaf = 1; |
901 | blk2->index = 0; | 901 | blk2->index = 0; |
902 | cmn_err(CE_ALERT, | 902 | xfs_alert(args->dp->i_mount, |
903 | "xfs_dir2_leafn_rebalance: picked the wrong leaf? reverting original leaf: " | 903 | "%s: picked the wrong leaf? reverting original leaf: blk1->index %d\n", |
904 | "blk1->index %d\n", | 904 | __func__, blk1->index); |
905 | blk1->index); | ||
906 | } | 905 | } |
907 | } | 906 | } |
908 | 907 | ||
@@ -1641,26 +1640,22 @@ xfs_dir2_node_addname_int( | |||
1641 | } | 1640 | } |
1642 | 1641 | ||
1643 | if (unlikely(xfs_dir2_db_to_fdb(mp, dbno) != fbno)) { | 1642 | if (unlikely(xfs_dir2_db_to_fdb(mp, dbno) != fbno)) { |
1644 | cmn_err(CE_ALERT, | 1643 | xfs_alert(mp, |
1645 | "xfs_dir2_node_addname_int: dir ino " | 1644 | "%s: dir ino " "%llu needed freesp block %lld for\n" |
1646 | "%llu needed freesp block %lld for\n" | 1645 | " data block %lld, got %lld ifbno %llu lastfbno %d", |
1647 | " data block %lld, got %lld\n" | 1646 | __func__, (unsigned long long)dp->i_ino, |
1648 | " ifbno %llu lastfbno %d\n", | ||
1649 | (unsigned long long)dp->i_ino, | ||
1650 | (long long)xfs_dir2_db_to_fdb(mp, dbno), | 1647 | (long long)xfs_dir2_db_to_fdb(mp, dbno), |
1651 | (long long)dbno, (long long)fbno, | 1648 | (long long)dbno, (long long)fbno, |
1652 | (unsigned long long)ifbno, lastfbno); | 1649 | (unsigned long long)ifbno, lastfbno); |
1653 | if (fblk) { | 1650 | if (fblk) { |
1654 | cmn_err(CE_ALERT, | 1651 | xfs_alert(mp, |
1655 | " fblk 0x%p blkno %llu " | 1652 | " fblk 0x%p blkno %llu index %d magic 0x%x", |
1656 | "index %d magic 0x%x\n", | ||
1657 | fblk, | 1653 | fblk, |
1658 | (unsigned long long)fblk->blkno, | 1654 | (unsigned long long)fblk->blkno, |
1659 | fblk->index, | 1655 | fblk->index, |
1660 | fblk->magic); | 1656 | fblk->magic); |
1661 | } else { | 1657 | } else { |
1662 | cmn_err(CE_ALERT, | 1658 | xfs_alert(mp, " ... fblk is NULL"); |
1663 | " ... fblk is NULL\n"); | ||
1664 | } | 1659 | } |
1665 | XFS_ERROR_REPORT("xfs_dir2_node_addname_int", | 1660 | XFS_ERROR_REPORT("xfs_dir2_node_addname_int", |
1666 | XFS_ERRLEVEL_LOW, mp); | 1661 | XFS_ERRLEVEL_LOW, mp); |
diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c index ed9990267661..39f06336b99d 100644 --- a/fs/xfs/xfs_error.c +++ b/fs/xfs/xfs_error.c | |||
@@ -48,7 +48,7 @@ xfs_error_trap(int e) | |||
48 | break; | 48 | break; |
49 | if (e != xfs_etrap[i]) | 49 | if (e != xfs_etrap[i]) |
50 | continue; | 50 | continue; |
51 | cmn_err(CE_NOTE, "xfs_error_trap: error %d", e); | 51 | xfs_notice(NULL, "%s: error %d", __func__, e); |
52 | BUG(); | 52 | BUG(); |
53 | break; | 53 | break; |
54 | } | 54 | } |
@@ -58,6 +58,7 @@ xfs_error_trap(int e) | |||
58 | int xfs_etest[XFS_NUM_INJECT_ERROR]; | 58 | int xfs_etest[XFS_NUM_INJECT_ERROR]; |
59 | int64_t xfs_etest_fsid[XFS_NUM_INJECT_ERROR]; | 59 | int64_t xfs_etest_fsid[XFS_NUM_INJECT_ERROR]; |
60 | char * xfs_etest_fsname[XFS_NUM_INJECT_ERROR]; | 60 | char * xfs_etest_fsname[XFS_NUM_INJECT_ERROR]; |
61 | int xfs_error_test_active; | ||
61 | 62 | ||
62 | int | 63 | int |
63 | xfs_error_test(int error_tag, int *fsidp, char *expression, | 64 | xfs_error_test(int error_tag, int *fsidp, char *expression, |
@@ -73,7 +74,7 @@ xfs_error_test(int error_tag, int *fsidp, char *expression, | |||
73 | 74 | ||
74 | for (i = 0; i < XFS_NUM_INJECT_ERROR; i++) { | 75 | for (i = 0; i < XFS_NUM_INJECT_ERROR; i++) { |
75 | if (xfs_etest[i] == error_tag && xfs_etest_fsid[i] == fsid) { | 76 | if (xfs_etest[i] == error_tag && xfs_etest_fsid[i] == fsid) { |
76 | cmn_err(CE_WARN, | 77 | xfs_warn(NULL, |
77 | "Injecting error (%s) at file %s, line %d, on filesystem \"%s\"", | 78 | "Injecting error (%s) at file %s, line %d, on filesystem \"%s\"", |
78 | expression, file, line, xfs_etest_fsname[i]); | 79 | expression, file, line, xfs_etest_fsname[i]); |
79 | return 1; | 80 | return 1; |
@@ -94,25 +95,26 @@ xfs_errortag_add(int error_tag, xfs_mount_t *mp) | |||
94 | 95 | ||
95 | for (i = 0; i < XFS_NUM_INJECT_ERROR; i++) { | 96 | for (i = 0; i < XFS_NUM_INJECT_ERROR; i++) { |
96 | if (xfs_etest_fsid[i] == fsid && xfs_etest[i] == error_tag) { | 97 | if (xfs_etest_fsid[i] == fsid && xfs_etest[i] == error_tag) { |
97 | cmn_err(CE_WARN, "XFS error tag #%d on", error_tag); | 98 | xfs_warn(mp, "error tag #%d on", error_tag); |
98 | return 0; | 99 | return 0; |
99 | } | 100 | } |
100 | } | 101 | } |
101 | 102 | ||
102 | for (i = 0; i < XFS_NUM_INJECT_ERROR; i++) { | 103 | for (i = 0; i < XFS_NUM_INJECT_ERROR; i++) { |
103 | if (xfs_etest[i] == 0) { | 104 | if (xfs_etest[i] == 0) { |
104 | cmn_err(CE_WARN, "Turned on XFS error tag #%d", | 105 | xfs_warn(mp, "Turned on XFS error tag #%d", |
105 | error_tag); | 106 | error_tag); |
106 | xfs_etest[i] = error_tag; | 107 | xfs_etest[i] = error_tag; |
107 | xfs_etest_fsid[i] = fsid; | 108 | xfs_etest_fsid[i] = fsid; |
108 | len = strlen(mp->m_fsname); | 109 | len = strlen(mp->m_fsname); |
109 | xfs_etest_fsname[i] = kmem_alloc(len + 1, KM_SLEEP); | 110 | xfs_etest_fsname[i] = kmem_alloc(len + 1, KM_SLEEP); |
110 | strcpy(xfs_etest_fsname[i], mp->m_fsname); | 111 | strcpy(xfs_etest_fsname[i], mp->m_fsname); |
112 | xfs_error_test_active++; | ||
111 | return 0; | 113 | return 0; |
112 | } | 114 | } |
113 | } | 115 | } |
114 | 116 | ||
115 | cmn_err(CE_WARN, "error tag overflow, too many turned on"); | 117 | xfs_warn(mp, "error tag overflow, too many turned on"); |
116 | 118 | ||
117 | return 1; | 119 | return 1; |
118 | } | 120 | } |
@@ -131,55 +133,23 @@ xfs_errortag_clearall(xfs_mount_t *mp, int loud) | |||
131 | if ((fsid == 0LL || xfs_etest_fsid[i] == fsid) && | 133 | if ((fsid == 0LL || xfs_etest_fsid[i] == fsid) && |
132 | xfs_etest[i] != 0) { | 134 | xfs_etest[i] != 0) { |
133 | cleared = 1; | 135 | cleared = 1; |
134 | cmn_err(CE_WARN, "Clearing XFS error tag #%d", | 136 | xfs_warn(mp, "Clearing XFS error tag #%d", |
135 | xfs_etest[i]); | 137 | xfs_etest[i]); |
136 | xfs_etest[i] = 0; | 138 | xfs_etest[i] = 0; |
137 | xfs_etest_fsid[i] = 0LL; | 139 | xfs_etest_fsid[i] = 0LL; |
138 | kmem_free(xfs_etest_fsname[i]); | 140 | kmem_free(xfs_etest_fsname[i]); |
139 | xfs_etest_fsname[i] = NULL; | 141 | xfs_etest_fsname[i] = NULL; |
142 | xfs_error_test_active--; | ||
140 | } | 143 | } |
141 | } | 144 | } |
142 | 145 | ||
143 | if (loud || cleared) | 146 | if (loud || cleared) |
144 | cmn_err(CE_WARN, | 147 | xfs_warn(mp, "Cleared all XFS error tags for filesystem"); |
145 | "Cleared all XFS error tags for filesystem \"%s\"", | ||
146 | mp->m_fsname); | ||
147 | 148 | ||
148 | return 0; | 149 | return 0; |
149 | } | 150 | } |
150 | #endif /* DEBUG */ | 151 | #endif /* DEBUG */ |
151 | 152 | ||
152 | |||
153 | void | ||
154 | xfs_fs_cmn_err(int level, xfs_mount_t *mp, char *fmt, ...) | ||
155 | { | ||
156 | va_list ap; | ||
157 | |||
158 | va_start(ap, fmt); | ||
159 | xfs_fs_vcmn_err(level, mp, fmt, ap); | ||
160 | va_end(ap); | ||
161 | } | ||
162 | |||
163 | void | ||
164 | xfs_cmn_err(int panic_tag, int level, xfs_mount_t *mp, char *fmt, ...) | ||
165 | { | ||
166 | va_list ap; | ||
167 | |||
168 | #ifdef DEBUG | ||
169 | xfs_panic_mask |= (XFS_PTAG_SHUTDOWN_CORRUPT | XFS_PTAG_LOGRES); | ||
170 | #endif | ||
171 | |||
172 | if (xfs_panic_mask && (xfs_panic_mask & panic_tag) | ||
173 | && (level & CE_ALERT)) { | ||
174 | level &= ~CE_ALERT; | ||
175 | level |= CE_PANIC; | ||
176 | cmn_err(CE_ALERT, "XFS: Transforming an alert into a BUG."); | ||
177 | } | ||
178 | va_start(ap, fmt); | ||
179 | xfs_fs_vcmn_err(level, mp, fmt, ap); | ||
180 | va_end(ap); | ||
181 | } | ||
182 | |||
183 | void | 153 | void |
184 | xfs_error_report( | 154 | xfs_error_report( |
185 | const char *tag, | 155 | const char *tag, |
@@ -190,9 +160,8 @@ xfs_error_report( | |||
190 | inst_t *ra) | 160 | inst_t *ra) |
191 | { | 161 | { |
192 | if (level <= xfs_error_level) { | 162 | if (level <= xfs_error_level) { |
193 | xfs_cmn_err(XFS_PTAG_ERROR_REPORT, | 163 | xfs_alert_tag(mp, XFS_PTAG_ERROR_REPORT, |
194 | CE_ALERT, mp, | 164 | "Internal error %s at line %d of file %s. Caller 0x%p\n", |
195 | "XFS internal error %s at line %d of file %s. Caller 0x%p\n", | ||
196 | tag, linenum, filename, ra); | 165 | tag, linenum, filename, ra); |
197 | 166 | ||
198 | xfs_stack_trace(); | 167 | xfs_stack_trace(); |
@@ -212,4 +181,5 @@ xfs_corruption_error( | |||
212 | if (level <= xfs_error_level) | 181 | if (level <= xfs_error_level) |
213 | xfs_hex_dump(p, 16); | 182 | xfs_hex_dump(p, 16); |
214 | xfs_error_report(tag, level, mp, filename, linenum, ra); | 183 | xfs_error_report(tag, level, mp, filename, linenum, ra); |
184 | xfs_alert(mp, "Corruption detected. Unmount and run xfs_repair"); | ||
215 | } | 185 | } |
diff --git a/fs/xfs/xfs_error.h b/fs/xfs/xfs_error.h index c2c1a072bb82..079a367f44ee 100644 --- a/fs/xfs/xfs_error.h +++ b/fs/xfs/xfs_error.h | |||
@@ -127,16 +127,17 @@ extern void xfs_corruption_error(const char *tag, int level, | |||
127 | #define XFS_RANDOM_BMAPIFORMAT XFS_RANDOM_DEFAULT | 127 | #define XFS_RANDOM_BMAPIFORMAT XFS_RANDOM_DEFAULT |
128 | 128 | ||
129 | #ifdef DEBUG | 129 | #ifdef DEBUG |
130 | extern int xfs_error_test_active; | ||
130 | extern int xfs_error_test(int, int *, char *, int, char *, unsigned long); | 131 | extern int xfs_error_test(int, int *, char *, int, char *, unsigned long); |
131 | 132 | ||
132 | #define XFS_NUM_INJECT_ERROR 10 | 133 | #define XFS_NUM_INJECT_ERROR 10 |
133 | #define XFS_TEST_ERROR(expr, mp, tag, rf) \ | 134 | #define XFS_TEST_ERROR(expr, mp, tag, rf) \ |
134 | ((expr) || \ | 135 | ((expr) || (xfs_error_test_active && \ |
135 | xfs_error_test((tag), (mp)->m_fixedfsid, "expr", __LINE__, __FILE__, \ | 136 | xfs_error_test((tag), (mp)->m_fixedfsid, "expr", __LINE__, __FILE__, \ |
136 | (rf))) | 137 | (rf)))) |
137 | 138 | ||
138 | extern int xfs_errortag_add(int error_tag, xfs_mount_t *mp); | 139 | extern int xfs_errortag_add(int error_tag, struct xfs_mount *mp); |
139 | extern int xfs_errortag_clearall(xfs_mount_t *mp, int loud); | 140 | extern int xfs_errortag_clearall(struct xfs_mount *mp, int loud); |
140 | #else | 141 | #else |
141 | #define XFS_TEST_ERROR(expr, mp, tag, rf) (expr) | 142 | #define XFS_TEST_ERROR(expr, mp, tag, rf) (expr) |
142 | #define xfs_errortag_add(tag, mp) (ENOSYS) | 143 | #define xfs_errortag_add(tag, mp) (ENOSYS) |
@@ -144,10 +145,8 @@ extern int xfs_errortag_clearall(xfs_mount_t *mp, int loud); | |||
144 | #endif /* DEBUG */ | 145 | #endif /* DEBUG */ |
145 | 146 | ||
146 | /* | 147 | /* |
147 | * XFS panic tags -- allow a call to xfs_cmn_err() be turned into | 148 | * XFS panic tags -- allow a call to xfs_alert_tag() be turned into |
148 | * a panic by setting xfs_panic_mask in a | 149 | * a panic by setting xfs_panic_mask in a sysctl. |
149 | * sysctl. update xfs_max[XFS_PARAM] if | ||
150 | * more are added. | ||
151 | */ | 150 | */ |
152 | #define XFS_NO_PTAG 0 | 151 | #define XFS_NO_PTAG 0 |
153 | #define XFS_PTAG_IFLUSH 0x00000001 | 152 | #define XFS_PTAG_IFLUSH 0x00000001 |
@@ -159,23 +158,4 @@ extern int xfs_errortag_clearall(xfs_mount_t *mp, int loud); | |||
159 | #define XFS_PTAG_SHUTDOWN_LOGERROR 0x00000040 | 158 | #define XFS_PTAG_SHUTDOWN_LOGERROR 0x00000040 |
160 | #define XFS_PTAG_FSBLOCK_ZERO 0x00000080 | 159 | #define XFS_PTAG_FSBLOCK_ZERO 0x00000080 |
161 | 160 | ||
162 | struct xfs_mount; | ||
163 | |||
164 | extern void xfs_fs_vcmn_err(int level, struct xfs_mount *mp, | ||
165 | char *fmt, va_list ap) | ||
166 | __attribute__ ((format (printf, 3, 0))); | ||
167 | extern void xfs_cmn_err(int panic_tag, int level, struct xfs_mount *mp, | ||
168 | char *fmt, ...) | ||
169 | __attribute__ ((format (printf, 4, 5))); | ||
170 | extern void xfs_fs_cmn_err(int level, struct xfs_mount *mp, char *fmt, ...) | ||
171 | __attribute__ ((format (printf, 3, 4))); | ||
172 | |||
173 | extern void xfs_hex_dump(void *p, int length); | ||
174 | |||
175 | #define xfs_fs_repair_cmn_err(level, mp, fmt, args...) \ | ||
176 | xfs_fs_cmn_err(level, mp, fmt " Unmount and run xfs_repair.", ## args) | ||
177 | |||
178 | #define xfs_fs_mount_cmn_err(f, fmt, args...) \ | ||
179 | ((f & XFS_MFSI_QUIET)? (void)0 : cmn_err(CE_WARN, "XFS: " fmt, ## args)) | ||
180 | |||
181 | #endif /* __XFS_ERROR_H__ */ | 161 | #endif /* __XFS_ERROR_H__ */ |
diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c index a55e687bf562..d22e62623437 100644 --- a/fs/xfs/xfs_extfree_item.c +++ b/fs/xfs/xfs_extfree_item.c | |||
@@ -48,6 +48,28 @@ xfs_efi_item_free( | |||
48 | } | 48 | } |
49 | 49 | ||
50 | /* | 50 | /* |
51 | * Freeing the efi requires that we remove it from the AIL if it has already | ||
52 | * been placed there. However, the EFI may not yet have been placed in the AIL | ||
53 | * when called by xfs_efi_release() from EFD processing due to the ordering of | ||
54 | * committed vs unpin operations in bulk insert operations. Hence the | ||
55 | * test_and_clear_bit(XFS_EFI_COMMITTED) to ensure only the last caller frees | ||
56 | * the EFI. | ||
57 | */ | ||
58 | STATIC void | ||
59 | __xfs_efi_release( | ||
60 | struct xfs_efi_log_item *efip) | ||
61 | { | ||
62 | struct xfs_ail *ailp = efip->efi_item.li_ailp; | ||
63 | |||
64 | if (!test_and_clear_bit(XFS_EFI_COMMITTED, &efip->efi_flags)) { | ||
65 | spin_lock(&ailp->xa_lock); | ||
66 | /* xfs_trans_ail_delete() drops the AIL lock. */ | ||
67 | xfs_trans_ail_delete(ailp, &efip->efi_item); | ||
68 | xfs_efi_item_free(efip); | ||
69 | } | ||
70 | } | ||
71 | |||
72 | /* | ||
51 | * This returns the number of iovecs needed to log the given efi item. | 73 | * This returns the number of iovecs needed to log the given efi item. |
52 | * We only need 1 iovec for an efi item. It just logs the efi_log_format | 74 | * We only need 1 iovec for an efi item. It just logs the efi_log_format |
53 | * structure. | 75 | * structure. |
@@ -74,7 +96,8 @@ xfs_efi_item_format( | |||
74 | struct xfs_efi_log_item *efip = EFI_ITEM(lip); | 96 | struct xfs_efi_log_item *efip = EFI_ITEM(lip); |
75 | uint size; | 97 | uint size; |
76 | 98 | ||
77 | ASSERT(efip->efi_next_extent == efip->efi_format.efi_nextents); | 99 | ASSERT(atomic_read(&efip->efi_next_extent) == |
100 | efip->efi_format.efi_nextents); | ||
78 | 101 | ||
79 | efip->efi_format.efi_type = XFS_LI_EFI; | 102 | efip->efi_format.efi_type = XFS_LI_EFI; |
80 | 103 | ||
@@ -99,10 +122,12 @@ xfs_efi_item_pin( | |||
99 | } | 122 | } |
100 | 123 | ||
101 | /* | 124 | /* |
102 | * While EFIs cannot really be pinned, the unpin operation is the | 125 | * While EFIs cannot really be pinned, the unpin operation is the last place at |
103 | * last place at which the EFI is manipulated during a transaction. | 126 | * which the EFI is manipulated during a transaction. If we are being asked to |
104 | * Here we coordinate with xfs_efi_cancel() to determine who gets to | 127 | * remove the EFI it's because the transaction has been cancelled and by |
105 | * free the EFI. | 128 | * definition that means the EFI cannot be in the AIL so remove it from the |
129 | * transaction and free it. Otherwise coordinate with xfs_efi_release() (via | ||
130 | * XFS_EFI_COMMITTED) to determine who gets to free the EFI. | ||
106 | */ | 131 | */ |
107 | STATIC void | 132 | STATIC void |
108 | xfs_efi_item_unpin( | 133 | xfs_efi_item_unpin( |
@@ -110,20 +135,15 @@ xfs_efi_item_unpin( | |||
110 | int remove) | 135 | int remove) |
111 | { | 136 | { |
112 | struct xfs_efi_log_item *efip = EFI_ITEM(lip); | 137 | struct xfs_efi_log_item *efip = EFI_ITEM(lip); |
113 | struct xfs_ail *ailp = lip->li_ailp; | ||
114 | 138 | ||
115 | spin_lock(&ailp->xa_lock); | 139 | if (remove) { |
116 | if (efip->efi_flags & XFS_EFI_CANCELED) { | 140 | ASSERT(!(lip->li_flags & XFS_LI_IN_AIL)); |
117 | if (remove) | 141 | if (lip->li_desc) |
118 | xfs_trans_del_item(lip); | 142 | xfs_trans_del_item(lip); |
119 | |||
120 | /* xfs_trans_ail_delete() drops the AIL lock. */ | ||
121 | xfs_trans_ail_delete(ailp, lip); | ||
122 | xfs_efi_item_free(efip); | 143 | xfs_efi_item_free(efip); |
123 | } else { | 144 | return; |
124 | efip->efi_flags |= XFS_EFI_COMMITTED; | ||
125 | spin_unlock(&ailp->xa_lock); | ||
126 | } | 145 | } |
146 | __xfs_efi_release(efip); | ||
127 | } | 147 | } |
128 | 148 | ||
129 | /* | 149 | /* |
@@ -152,16 +172,20 @@ xfs_efi_item_unlock( | |||
152 | } | 172 | } |
153 | 173 | ||
154 | /* | 174 | /* |
155 | * The EFI is logged only once and cannot be moved in the log, so | 175 | * The EFI is logged only once and cannot be moved in the log, so simply return |
156 | * simply return the lsn at which it's been logged. The canceled | 176 | * the lsn at which it's been logged. For bulk transaction committed |
157 | * flag is not paid any attention here. Checking for that is delayed | 177 | * processing, the EFI may be processed but not yet unpinned prior to the EFD |
158 | * until the EFI is unpinned. | 178 | * being processed. Set the XFS_EFI_COMMITTED flag so this case can be detected |
179 | * when processing the EFD. | ||
159 | */ | 180 | */ |
160 | STATIC xfs_lsn_t | 181 | STATIC xfs_lsn_t |
161 | xfs_efi_item_committed( | 182 | xfs_efi_item_committed( |
162 | struct xfs_log_item *lip, | 183 | struct xfs_log_item *lip, |
163 | xfs_lsn_t lsn) | 184 | xfs_lsn_t lsn) |
164 | { | 185 | { |
186 | struct xfs_efi_log_item *efip = EFI_ITEM(lip); | ||
187 | |||
188 | set_bit(XFS_EFI_COMMITTED, &efip->efi_flags); | ||
165 | return lsn; | 189 | return lsn; |
166 | } | 190 | } |
167 | 191 | ||
@@ -230,6 +254,7 @@ xfs_efi_init( | |||
230 | xfs_log_item_init(mp, &efip->efi_item, XFS_LI_EFI, &xfs_efi_item_ops); | 254 | xfs_log_item_init(mp, &efip->efi_item, XFS_LI_EFI, &xfs_efi_item_ops); |
231 | efip->efi_format.efi_nextents = nextents; | 255 | efip->efi_format.efi_nextents = nextents; |
232 | efip->efi_format.efi_id = (__psint_t)(void*)efip; | 256 | efip->efi_format.efi_id = (__psint_t)(void*)efip; |
257 | atomic_set(&efip->efi_next_extent, 0); | ||
233 | 258 | ||
234 | return efip; | 259 | return efip; |
235 | } | 260 | } |
@@ -289,37 +314,18 @@ xfs_efi_copy_format(xfs_log_iovec_t *buf, xfs_efi_log_format_t *dst_efi_fmt) | |||
289 | } | 314 | } |
290 | 315 | ||
291 | /* | 316 | /* |
292 | * This is called by the efd item code below to release references to | 317 | * This is called by the efd item code below to release references to the given |
293 | * the given efi item. Each efd calls this with the number of | 318 | * efi item. Each efd calls this with the number of extents that it has |
294 | * extents that it has logged, and when the sum of these reaches | 319 | * logged, and when the sum of these reaches the total number of extents logged |
295 | * the total number of extents logged by this efi item we can free | 320 | * by this efi item we can free the efi item. |
296 | * the efi item. | ||
297 | * | ||
298 | * Freeing the efi item requires that we remove it from the AIL. | ||
299 | * We'll use the AIL lock to protect our counters as well as | ||
300 | * the removal from the AIL. | ||
301 | */ | 321 | */ |
302 | void | 322 | void |
303 | xfs_efi_release(xfs_efi_log_item_t *efip, | 323 | xfs_efi_release(xfs_efi_log_item_t *efip, |
304 | uint nextents) | 324 | uint nextents) |
305 | { | 325 | { |
306 | struct xfs_ail *ailp = efip->efi_item.li_ailp; | 326 | ASSERT(atomic_read(&efip->efi_next_extent) >= nextents); |
307 | int extents_left; | 327 | if (atomic_sub_and_test(nextents, &efip->efi_next_extent)) |
308 | 328 | __xfs_efi_release(efip); | |
309 | ASSERT(efip->efi_next_extent > 0); | ||
310 | ASSERT(efip->efi_flags & XFS_EFI_COMMITTED); | ||
311 | |||
312 | spin_lock(&ailp->xa_lock); | ||
313 | ASSERT(efip->efi_next_extent >= nextents); | ||
314 | efip->efi_next_extent -= nextents; | ||
315 | extents_left = efip->efi_next_extent; | ||
316 | if (extents_left == 0) { | ||
317 | /* xfs_trans_ail_delete() drops the AIL lock. */ | ||
318 | xfs_trans_ail_delete(ailp, (xfs_log_item_t *)efip); | ||
319 | xfs_efi_item_free(efip); | ||
320 | } else { | ||
321 | spin_unlock(&ailp->xa_lock); | ||
322 | } | ||
323 | } | 329 | } |
324 | 330 | ||
325 | static inline struct xfs_efd_log_item *EFD_ITEM(struct xfs_log_item *lip) | 331 | static inline struct xfs_efd_log_item *EFD_ITEM(struct xfs_log_item *lip) |
diff --git a/fs/xfs/xfs_extfree_item.h b/fs/xfs/xfs_extfree_item.h index 0d22c56fdf64..375f68e42531 100644 --- a/fs/xfs/xfs_extfree_item.h +++ b/fs/xfs/xfs_extfree_item.h | |||
@@ -111,11 +111,10 @@ typedef struct xfs_efd_log_format_64 { | |||
111 | #define XFS_EFI_MAX_FAST_EXTENTS 16 | 111 | #define XFS_EFI_MAX_FAST_EXTENTS 16 |
112 | 112 | ||
113 | /* | 113 | /* |
114 | * Define EFI flags. | 114 | * Define EFI flag bits. Manipulated by set/clear/test_bit operators. |
115 | */ | 115 | */ |
116 | #define XFS_EFI_RECOVERED 0x1 | 116 | #define XFS_EFI_RECOVERED 1 |
117 | #define XFS_EFI_COMMITTED 0x2 | 117 | #define XFS_EFI_COMMITTED 2 |
118 | #define XFS_EFI_CANCELED 0x4 | ||
119 | 118 | ||
120 | /* | 119 | /* |
121 | * This is the "extent free intention" log item. It is used | 120 | * This is the "extent free intention" log item. It is used |
@@ -125,8 +124,8 @@ typedef struct xfs_efd_log_format_64 { | |||
125 | */ | 124 | */ |
126 | typedef struct xfs_efi_log_item { | 125 | typedef struct xfs_efi_log_item { |
127 | xfs_log_item_t efi_item; | 126 | xfs_log_item_t efi_item; |
128 | uint efi_flags; /* misc flags */ | 127 | atomic_t efi_next_extent; |
129 | uint efi_next_extent; | 128 | unsigned long efi_flags; /* misc flags */ |
130 | xfs_efi_log_format_t efi_format; | 129 | xfs_efi_log_format_t efi_format; |
131 | } xfs_efi_log_item_t; | 130 | } xfs_efi_log_item_t; |
132 | 131 | ||
diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c index 9b715dce5699..9124425b7f2f 100644 --- a/fs/xfs/xfs_filestream.c +++ b/fs/xfs/xfs_filestream.c | |||
@@ -744,9 +744,15 @@ xfs_filestream_new_ag( | |||
744 | * If the file's parent directory is known, take its iolock in exclusive | 744 | * If the file's parent directory is known, take its iolock in exclusive |
745 | * mode to prevent two sibling files from racing each other to migrate | 745 | * mode to prevent two sibling files from racing each other to migrate |
746 | * themselves and their parent to different AGs. | 746 | * themselves and their parent to different AGs. |
747 | * | ||
748 | * Note that we lock the parent directory iolock inside the child | ||
749 | * iolock here. That's fine as we never hold both parent and child | ||
750 | * iolock in any other place. This is different from the ilock, | ||
751 | * which requires locking of the child after the parent for namespace | ||
752 | * operations. | ||
747 | */ | 753 | */ |
748 | if (pip) | 754 | if (pip) |
749 | xfs_ilock(pip, XFS_IOLOCK_EXCL); | 755 | xfs_ilock(pip, XFS_IOLOCK_EXCL | XFS_IOLOCK_PARENT); |
750 | 756 | ||
751 | /* | 757 | /* |
752 | * A new AG needs to be found for the file. If the file's parent | 758 | * A new AG needs to be found for the file. If the file's parent |
diff --git a/fs/xfs/xfs_fs.h b/fs/xfs/xfs_fs.h index 87c2e9d02288..8f6fc1a96386 100644 --- a/fs/xfs/xfs_fs.h +++ b/fs/xfs/xfs_fs.h | |||
@@ -293,9 +293,11 @@ typedef struct xfs_bstat { | |||
293 | __s32 bs_extsize; /* extent size */ | 293 | __s32 bs_extsize; /* extent size */ |
294 | __s32 bs_extents; /* number of extents */ | 294 | __s32 bs_extents; /* number of extents */ |
295 | __u32 bs_gen; /* generation count */ | 295 | __u32 bs_gen; /* generation count */ |
296 | __u16 bs_projid; /* project id */ | 296 | __u16 bs_projid_lo; /* lower part of project id */ |
297 | #define bs_projid bs_projid_lo /* (previously just bs_projid) */ | ||
297 | __u16 bs_forkoff; /* inode fork offset in bytes */ | 298 | __u16 bs_forkoff; /* inode fork offset in bytes */ |
298 | unsigned char bs_pad[12]; /* pad space, unused */ | 299 | __u16 bs_projid_hi; /* higher part of project id */ |
300 | unsigned char bs_pad[10]; /* pad space, unused */ | ||
299 | __u32 bs_dmevmask; /* DMIG event mask */ | 301 | __u32 bs_dmevmask; /* DMIG event mask */ |
300 | __u16 bs_dmstate; /* DMIG state info */ | 302 | __u16 bs_dmstate; /* DMIG state info */ |
301 | __u16 bs_aextents; /* attribute number of extents */ | 303 | __u16 bs_aextents; /* attribute number of extents */ |
@@ -448,6 +450,7 @@ typedef struct xfs_handle { | |||
448 | /* XFS_IOC_SETBIOSIZE ---- deprecated 46 */ | 450 | /* XFS_IOC_SETBIOSIZE ---- deprecated 46 */ |
449 | /* XFS_IOC_GETBIOSIZE ---- deprecated 47 */ | 451 | /* XFS_IOC_GETBIOSIZE ---- deprecated 47 */ |
450 | #define XFS_IOC_GETBMAPX _IOWR('X', 56, struct getbmap) | 452 | #define XFS_IOC_GETBMAPX _IOWR('X', 56, struct getbmap) |
453 | #define XFS_IOC_ZERO_RANGE _IOW ('X', 57, struct xfs_flock64) | ||
451 | 454 | ||
452 | /* | 455 | /* |
453 | * ioctl commands that replace IRIX syssgi()'s | 456 | * ioctl commands that replace IRIX syssgi()'s |
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c index 43b1d5699335..9153d2c77caf 100644 --- a/fs/xfs/xfs_fsops.c +++ b/fs/xfs/xfs_fsops.c | |||
@@ -53,6 +53,9 @@ xfs_fs_geometry( | |||
53 | xfs_fsop_geom_t *geo, | 53 | xfs_fsop_geom_t *geo, |
54 | int new_version) | 54 | int new_version) |
55 | { | 55 | { |
56 | |||
57 | memset(geo, 0, sizeof(*geo)); | ||
58 | |||
56 | geo->blocksize = mp->m_sb.sb_blocksize; | 59 | geo->blocksize = mp->m_sb.sb_blocksize; |
57 | geo->rtextsize = mp->m_sb.sb_rextsize; | 60 | geo->rtextsize = mp->m_sb.sb_rextsize; |
58 | geo->agblocks = mp->m_sb.sb_agblocks; | 61 | geo->agblocks = mp->m_sb.sb_agblocks; |
@@ -144,12 +147,11 @@ xfs_growfs_data_private( | |||
144 | if ((error = xfs_sb_validate_fsb_count(&mp->m_sb, nb))) | 147 | if ((error = xfs_sb_validate_fsb_count(&mp->m_sb, nb))) |
145 | return error; | 148 | return error; |
146 | dpct = pct - mp->m_sb.sb_imax_pct; | 149 | dpct = pct - mp->m_sb.sb_imax_pct; |
147 | error = xfs_read_buf(mp, mp->m_ddev_targp, | 150 | bp = xfs_buf_read_uncached(mp, mp->m_ddev_targp, |
148 | XFS_FSB_TO_BB(mp, nb) - XFS_FSS_TO_BB(mp, 1), | 151 | XFS_FSB_TO_BB(mp, nb) - XFS_FSS_TO_BB(mp, 1), |
149 | XFS_FSS_TO_BB(mp, 1), 0, &bp); | 152 | BBTOB(XFS_FSS_TO_BB(mp, 1)), 0); |
150 | if (error) | 153 | if (!bp) |
151 | return error; | 154 | return EIO; |
152 | ASSERT(bp); | ||
153 | xfs_buf_relse(bp); | 155 | xfs_buf_relse(bp); |
154 | 156 | ||
155 | new = nb; /* use new as a temporary here */ | 157 | new = nb; /* use new as a temporary here */ |
@@ -375,6 +377,7 @@ xfs_growfs_data_private( | |||
375 | mp->m_maxicount = icount << mp->m_sb.sb_inopblog; | 377 | mp->m_maxicount = icount << mp->m_sb.sb_inopblog; |
376 | } else | 378 | } else |
377 | mp->m_maxicount = 0; | 379 | mp->m_maxicount = 0; |
380 | xfs_set_low_space_thresholds(mp); | ||
378 | 381 | ||
379 | /* update secondary superblocks. */ | 382 | /* update secondary superblocks. */ |
380 | for (agno = 1; agno < nagcount; agno++) { | 383 | for (agno = 1; agno < nagcount; agno++) { |
@@ -382,8 +385,8 @@ xfs_growfs_data_private( | |||
382 | XFS_AGB_TO_DADDR(mp, agno, XFS_SB_BLOCK(mp)), | 385 | XFS_AGB_TO_DADDR(mp, agno, XFS_SB_BLOCK(mp)), |
383 | XFS_FSS_TO_BB(mp, 1), 0, &bp); | 386 | XFS_FSS_TO_BB(mp, 1), 0, &bp); |
384 | if (error) { | 387 | if (error) { |
385 | xfs_fs_cmn_err(CE_WARN, mp, | 388 | xfs_warn(mp, |
386 | "error %d reading secondary superblock for ag %d", | 389 | "error %d reading secondary superblock for ag %d", |
387 | error, agno); | 390 | error, agno); |
388 | break; | 391 | break; |
389 | } | 392 | } |
@@ -396,7 +399,7 @@ xfs_growfs_data_private( | |||
396 | if (!(error = xfs_bwrite(mp, bp))) { | 399 | if (!(error = xfs_bwrite(mp, bp))) { |
397 | continue; | 400 | continue; |
398 | } else { | 401 | } else { |
399 | xfs_fs_cmn_err(CE_WARN, mp, | 402 | xfs_warn(mp, |
400 | "write error %d updating secondary superblock for ag %d", | 403 | "write error %d updating secondary superblock for ag %d", |
401 | error, agno); | 404 | error, agno); |
402 | break; /* no point in continuing */ | 405 | break; /* no point in continuing */ |
@@ -597,7 +600,8 @@ out: | |||
597 | * the extra reserve blocks from the reserve..... | 600 | * the extra reserve blocks from the reserve..... |
598 | */ | 601 | */ |
599 | int error; | 602 | int error; |
600 | error = xfs_mod_incore_sb(mp, XFS_SBS_FDBLOCKS, fdblks_delta, 0); | 603 | error = xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS, |
604 | fdblks_delta, 0); | ||
601 | if (error == ENOSPC) | 605 | if (error == ENOSPC) |
602 | goto retry; | 606 | goto retry; |
603 | } | 607 | } |
@@ -611,12 +615,13 @@ out: | |||
611 | * | 615 | * |
612 | * We cannot use an inode here for this - that will push dirty state back up | 616 | * We cannot use an inode here for this - that will push dirty state back up |
613 | * into the VFS and then periodic inode flushing will prevent log covering from | 617 | * into the VFS and then periodic inode flushing will prevent log covering from |
614 | * making progress. Hence we log a field in the superblock instead. | 618 | * making progress. Hence we log a field in the superblock instead and use a |
619 | * synchronous transaction to ensure the superblock is immediately unpinned | ||
620 | * and can be written back. | ||
615 | */ | 621 | */ |
616 | int | 622 | int |
617 | xfs_fs_log_dummy( | 623 | xfs_fs_log_dummy( |
618 | xfs_mount_t *mp, | 624 | xfs_mount_t *mp) |
619 | int flags) | ||
620 | { | 625 | { |
621 | xfs_trans_t *tp; | 626 | xfs_trans_t *tp; |
622 | int error; | 627 | int error; |
@@ -631,8 +636,7 @@ xfs_fs_log_dummy( | |||
631 | 636 | ||
632 | /* log the UUID because it is an unchanging field */ | 637 | /* log the UUID because it is an unchanging field */ |
633 | xfs_mod_sb(tp, XFS_SB_UUID); | 638 | xfs_mod_sb(tp, XFS_SB_UUID); |
634 | if (flags & SYNC_WAIT) | 639 | xfs_trans_set_sync(tp); |
635 | xfs_trans_set_sync(tp); | ||
636 | return xfs_trans_commit(tp, 0); | 640 | return xfs_trans_commit(tp, 0); |
637 | } | 641 | } |
638 | 642 | ||
diff --git a/fs/xfs/xfs_fsops.h b/fs/xfs/xfs_fsops.h index a786c5212c1e..1b6a98b66886 100644 --- a/fs/xfs/xfs_fsops.h +++ b/fs/xfs/xfs_fsops.h | |||
@@ -25,6 +25,6 @@ extern int xfs_fs_counts(xfs_mount_t *mp, xfs_fsop_counts_t *cnt); | |||
25 | extern int xfs_reserve_blocks(xfs_mount_t *mp, __uint64_t *inval, | 25 | extern int xfs_reserve_blocks(xfs_mount_t *mp, __uint64_t *inval, |
26 | xfs_fsop_resblks_t *outval); | 26 | xfs_fsop_resblks_t *outval); |
27 | extern int xfs_fs_goingdown(xfs_mount_t *mp, __uint32_t inflags); | 27 | extern int xfs_fs_goingdown(xfs_mount_t *mp, __uint32_t inflags); |
28 | extern int xfs_fs_log_dummy(xfs_mount_t *mp, int flags); | 28 | extern int xfs_fs_log_dummy(struct xfs_mount *mp); |
29 | 29 | ||
30 | #endif /* __XFS_FSOPS_H__ */ | 30 | #endif /* __XFS_FSOPS_H__ */ |
diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c index 5371d2dc360e..84ebeec16642 100644 --- a/fs/xfs/xfs_ialloc.c +++ b/fs/xfs/xfs_ialloc.c | |||
@@ -212,7 +212,7 @@ xfs_ialloc_inode_init( | |||
212 | * to log a whole cluster of inodes instead of all the | 212 | * to log a whole cluster of inodes instead of all the |
213 | * individual transactions causing a lot of log traffic. | 213 | * individual transactions causing a lot of log traffic. |
214 | */ | 214 | */ |
215 | xfs_biozero(fbuf, 0, ninodes << mp->m_sb.sb_inodelog); | 215 | xfs_buf_zero(fbuf, 0, ninodes << mp->m_sb.sb_inodelog); |
216 | for (i = 0; i < ninodes; i++) { | 216 | for (i = 0; i < ninodes; i++) { |
217 | int ioffset = i << mp->m_sb.sb_inodelog; | 217 | int ioffset = i << mp->m_sb.sb_inodelog; |
218 | uint isize = sizeof(struct xfs_dinode); | 218 | uint isize = sizeof(struct xfs_dinode); |
@@ -1055,28 +1055,23 @@ xfs_difree( | |||
1055 | */ | 1055 | */ |
1056 | agno = XFS_INO_TO_AGNO(mp, inode); | 1056 | agno = XFS_INO_TO_AGNO(mp, inode); |
1057 | if (agno >= mp->m_sb.sb_agcount) { | 1057 | if (agno >= mp->m_sb.sb_agcount) { |
1058 | cmn_err(CE_WARN, | 1058 | xfs_warn(mp, "%s: agno >= mp->m_sb.sb_agcount (%d >= %d).", |
1059 | "xfs_difree: agno >= mp->m_sb.sb_agcount (%d >= %d) on %s. Returning EINVAL.", | 1059 | __func__, agno, mp->m_sb.sb_agcount); |
1060 | agno, mp->m_sb.sb_agcount, mp->m_fsname); | ||
1061 | ASSERT(0); | 1060 | ASSERT(0); |
1062 | return XFS_ERROR(EINVAL); | 1061 | return XFS_ERROR(EINVAL); |
1063 | } | 1062 | } |
1064 | agino = XFS_INO_TO_AGINO(mp, inode); | 1063 | agino = XFS_INO_TO_AGINO(mp, inode); |
1065 | if (inode != XFS_AGINO_TO_INO(mp, agno, agino)) { | 1064 | if (inode != XFS_AGINO_TO_INO(mp, agno, agino)) { |
1066 | cmn_err(CE_WARN, | 1065 | xfs_warn(mp, "%s: inode != XFS_AGINO_TO_INO() (%llu != %llu).", |
1067 | "xfs_difree: inode != XFS_AGINO_TO_INO() " | 1066 | __func__, (unsigned long long)inode, |
1068 | "(%llu != %llu) on %s. Returning EINVAL.", | 1067 | (unsigned long long)XFS_AGINO_TO_INO(mp, agno, agino)); |
1069 | (unsigned long long)inode, | ||
1070 | (unsigned long long)XFS_AGINO_TO_INO(mp, agno, agino), | ||
1071 | mp->m_fsname); | ||
1072 | ASSERT(0); | 1068 | ASSERT(0); |
1073 | return XFS_ERROR(EINVAL); | 1069 | return XFS_ERROR(EINVAL); |
1074 | } | 1070 | } |
1075 | agbno = XFS_AGINO_TO_AGBNO(mp, agino); | 1071 | agbno = XFS_AGINO_TO_AGBNO(mp, agino); |
1076 | if (agbno >= mp->m_sb.sb_agblocks) { | 1072 | if (agbno >= mp->m_sb.sb_agblocks) { |
1077 | cmn_err(CE_WARN, | 1073 | xfs_warn(mp, "%s: agbno >= mp->m_sb.sb_agblocks (%d >= %d).", |
1078 | "xfs_difree: agbno >= mp->m_sb.sb_agblocks (%d >= %d) on %s. Returning EINVAL.", | 1074 | __func__, agbno, mp->m_sb.sb_agblocks); |
1079 | agbno, mp->m_sb.sb_agblocks, mp->m_fsname); | ||
1080 | ASSERT(0); | 1075 | ASSERT(0); |
1081 | return XFS_ERROR(EINVAL); | 1076 | return XFS_ERROR(EINVAL); |
1082 | } | 1077 | } |
@@ -1085,9 +1080,8 @@ xfs_difree( | |||
1085 | */ | 1080 | */ |
1086 | error = xfs_ialloc_read_agi(mp, tp, agno, &agbp); | 1081 | error = xfs_ialloc_read_agi(mp, tp, agno, &agbp); |
1087 | if (error) { | 1082 | if (error) { |
1088 | cmn_err(CE_WARN, | 1083 | xfs_warn(mp, "%s: xfs_ialloc_read_agi() returned error %d.", |
1089 | "xfs_difree: xfs_ialloc_read_agi() returned an error %d on %s. Returning error.", | 1084 | __func__, error); |
1090 | error, mp->m_fsname); | ||
1091 | return error; | 1085 | return error; |
1092 | } | 1086 | } |
1093 | agi = XFS_BUF_TO_AGI(agbp); | 1087 | agi = XFS_BUF_TO_AGI(agbp); |
@@ -1106,17 +1100,15 @@ xfs_difree( | |||
1106 | * Look for the entry describing this inode. | 1100 | * Look for the entry describing this inode. |
1107 | */ | 1101 | */ |
1108 | if ((error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, &i))) { | 1102 | if ((error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, &i))) { |
1109 | cmn_err(CE_WARN, | 1103 | xfs_warn(mp, "%s: xfs_inobt_lookup() returned error %d.", |
1110 | "xfs_difree: xfs_inobt_lookup returned() an error %d on %s. Returning error.", | 1104 | __func__, error); |
1111 | error, mp->m_fsname); | ||
1112 | goto error0; | 1105 | goto error0; |
1113 | } | 1106 | } |
1114 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | 1107 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); |
1115 | error = xfs_inobt_get_rec(cur, &rec, &i); | 1108 | error = xfs_inobt_get_rec(cur, &rec, &i); |
1116 | if (error) { | 1109 | if (error) { |
1117 | cmn_err(CE_WARN, | 1110 | xfs_warn(mp, "%s: xfs_inobt_get_rec() returned error %d.", |
1118 | "xfs_difree: xfs_inobt_get_rec() returned an error %d on %s. Returning error.", | 1111 | __func__, error); |
1119 | error, mp->m_fsname); | ||
1120 | goto error0; | 1112 | goto error0; |
1121 | } | 1113 | } |
1122 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | 1114 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); |
@@ -1157,8 +1149,8 @@ xfs_difree( | |||
1157 | xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, -(ilen - 1)); | 1149 | xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, -(ilen - 1)); |
1158 | 1150 | ||
1159 | if ((error = xfs_btree_delete(cur, &i))) { | 1151 | if ((error = xfs_btree_delete(cur, &i))) { |
1160 | cmn_err(CE_WARN, "xfs_difree: xfs_btree_delete returned an error %d on %s.\n", | 1152 | xfs_warn(mp, "%s: xfs_btree_delete returned error %d.", |
1161 | error, mp->m_fsname); | 1153 | __func__, error); |
1162 | goto error0; | 1154 | goto error0; |
1163 | } | 1155 | } |
1164 | 1156 | ||
@@ -1170,9 +1162,8 @@ xfs_difree( | |||
1170 | 1162 | ||
1171 | error = xfs_inobt_update(cur, &rec); | 1163 | error = xfs_inobt_update(cur, &rec); |
1172 | if (error) { | 1164 | if (error) { |
1173 | cmn_err(CE_WARN, | 1165 | xfs_warn(mp, "%s: xfs_inobt_update returned error %d.", |
1174 | "xfs_difree: xfs_inobt_update returned an error %d on %s.", | 1166 | __func__, error); |
1175 | error, mp->m_fsname); | ||
1176 | goto error0; | 1167 | goto error0; |
1177 | } | 1168 | } |
1178 | 1169 | ||
@@ -1218,10 +1209,9 @@ xfs_imap_lookup( | |||
1218 | 1209 | ||
1219 | error = xfs_ialloc_read_agi(mp, tp, agno, &agbp); | 1210 | error = xfs_ialloc_read_agi(mp, tp, agno, &agbp); |
1220 | if (error) { | 1211 | if (error) { |
1221 | xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: " | 1212 | xfs_alert(mp, |
1222 | "xfs_ialloc_read_agi() returned " | 1213 | "%s: xfs_ialloc_read_agi() returned error %d, agno %d", |
1223 | "error %d, agno %d", | 1214 | __func__, error, agno); |
1224 | error, agno); | ||
1225 | return error; | 1215 | return error; |
1226 | } | 1216 | } |
1227 | 1217 | ||
@@ -1299,24 +1289,21 @@ xfs_imap( | |||
1299 | if (flags & XFS_IGET_UNTRUSTED) | 1289 | if (flags & XFS_IGET_UNTRUSTED) |
1300 | return XFS_ERROR(EINVAL); | 1290 | return XFS_ERROR(EINVAL); |
1301 | if (agno >= mp->m_sb.sb_agcount) { | 1291 | if (agno >= mp->m_sb.sb_agcount) { |
1302 | xfs_fs_cmn_err(CE_ALERT, mp, | 1292 | xfs_alert(mp, |
1303 | "xfs_imap: agno (%d) >= " | 1293 | "%s: agno (%d) >= mp->m_sb.sb_agcount (%d)", |
1304 | "mp->m_sb.sb_agcount (%d)", | 1294 | __func__, agno, mp->m_sb.sb_agcount); |
1305 | agno, mp->m_sb.sb_agcount); | ||
1306 | } | 1295 | } |
1307 | if (agbno >= mp->m_sb.sb_agblocks) { | 1296 | if (agbno >= mp->m_sb.sb_agblocks) { |
1308 | xfs_fs_cmn_err(CE_ALERT, mp, | 1297 | xfs_alert(mp, |
1309 | "xfs_imap: agbno (0x%llx) >= " | 1298 | "%s: agbno (0x%llx) >= mp->m_sb.sb_agblocks (0x%lx)", |
1310 | "mp->m_sb.sb_agblocks (0x%lx)", | 1299 | __func__, (unsigned long long)agbno, |
1311 | (unsigned long long) agbno, | 1300 | (unsigned long)mp->m_sb.sb_agblocks); |
1312 | (unsigned long) mp->m_sb.sb_agblocks); | ||
1313 | } | 1301 | } |
1314 | if (ino != XFS_AGINO_TO_INO(mp, agno, agino)) { | 1302 | if (ino != XFS_AGINO_TO_INO(mp, agno, agino)) { |
1315 | xfs_fs_cmn_err(CE_ALERT, mp, | 1303 | xfs_alert(mp, |
1316 | "xfs_imap: ino (0x%llx) != " | 1304 | "%s: ino (0x%llx) != XFS_AGINO_TO_INO() (0x%llx)", |
1317 | "XFS_AGINO_TO_INO(mp, agno, agino) " | 1305 | __func__, ino, |
1318 | "(0x%llx)", | 1306 | XFS_AGINO_TO_INO(mp, agno, agino)); |
1319 | ino, XFS_AGINO_TO_INO(mp, agno, agino)); | ||
1320 | } | 1307 | } |
1321 | xfs_stack_trace(); | 1308 | xfs_stack_trace(); |
1322 | #endif /* DEBUG */ | 1309 | #endif /* DEBUG */ |
@@ -1388,10 +1375,9 @@ out_map: | |||
1388 | */ | 1375 | */ |
1389 | if ((imap->im_blkno + imap->im_len) > | 1376 | if ((imap->im_blkno + imap->im_len) > |
1390 | XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)) { | 1377 | XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)) { |
1391 | xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: " | 1378 | xfs_alert(mp, |
1392 | "(imap->im_blkno (0x%llx) + imap->im_len (0x%llx)) > " | 1379 | "%s: (im_blkno (0x%llx) + im_len (0x%llx)) > sb_dblocks (0x%llx)", |
1393 | " XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks) (0x%llx)", | 1380 | __func__, (unsigned long long) imap->im_blkno, |
1394 | (unsigned long long) imap->im_blkno, | ||
1395 | (unsigned long long) imap->im_len, | 1381 | (unsigned long long) imap->im_len, |
1396 | XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)); | 1382 | XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)); |
1397 | return XFS_ERROR(EINVAL); | 1383 | return XFS_ERROR(EINVAL); |
diff --git a/fs/xfs/xfs_ialloc_btree.c b/fs/xfs/xfs_ialloc_btree.c index d352862cefa0..16921f55c542 100644 --- a/fs/xfs/xfs_ialloc_btree.c +++ b/fs/xfs/xfs_ialloc_btree.c | |||
@@ -183,38 +183,6 @@ xfs_inobt_key_diff( | |||
183 | cur->bc_rec.i.ir_startino; | 183 | cur->bc_rec.i.ir_startino; |
184 | } | 184 | } |
185 | 185 | ||
186 | STATIC int | ||
187 | xfs_inobt_kill_root( | ||
188 | struct xfs_btree_cur *cur, | ||
189 | struct xfs_buf *bp, | ||
190 | int level, | ||
191 | union xfs_btree_ptr *newroot) | ||
192 | { | ||
193 | int error; | ||
194 | |||
195 | XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); | ||
196 | XFS_BTREE_STATS_INC(cur, killroot); | ||
197 | |||
198 | /* | ||
199 | * Update the root pointer, decreasing the level by 1 and then | ||
200 | * free the old root. | ||
201 | */ | ||
202 | xfs_inobt_set_root(cur, newroot, -1); | ||
203 | error = xfs_inobt_free_block(cur, bp); | ||
204 | if (error) { | ||
205 | XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); | ||
206 | return error; | ||
207 | } | ||
208 | |||
209 | XFS_BTREE_STATS_INC(cur, free); | ||
210 | |||
211 | cur->bc_bufs[level] = NULL; | ||
212 | cur->bc_nlevels--; | ||
213 | |||
214 | XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); | ||
215 | return 0; | ||
216 | } | ||
217 | |||
218 | #ifdef DEBUG | 186 | #ifdef DEBUG |
219 | STATIC int | 187 | STATIC int |
220 | xfs_inobt_keys_inorder( | 188 | xfs_inobt_keys_inorder( |
@@ -309,7 +277,6 @@ static const struct xfs_btree_ops xfs_inobt_ops = { | |||
309 | 277 | ||
310 | .dup_cursor = xfs_inobt_dup_cursor, | 278 | .dup_cursor = xfs_inobt_dup_cursor, |
311 | .set_root = xfs_inobt_set_root, | 279 | .set_root = xfs_inobt_set_root, |
312 | .kill_root = xfs_inobt_kill_root, | ||
313 | .alloc_block = xfs_inobt_alloc_block, | 280 | .alloc_block = xfs_inobt_alloc_block, |
314 | .free_block = xfs_inobt_free_block, | 281 | .free_block = xfs_inobt_free_block, |
315 | .get_minrecs = xfs_inobt_get_minrecs, | 282 | .get_minrecs = xfs_inobt_get_minrecs, |
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c index b1ecc6f97ade..3631783b2b53 100644 --- a/fs/xfs/xfs_iget.c +++ b/fs/xfs/xfs_iget.c | |||
@@ -43,6 +43,17 @@ | |||
43 | 43 | ||
44 | 44 | ||
45 | /* | 45 | /* |
46 | * Define xfs inode iolock lockdep classes. We need to ensure that all active | ||
47 | * inodes are considered the same for lockdep purposes, including inodes that | ||
48 | * are recycled through the XFS_IRECLAIMABLE state. This is the the only way to | ||
49 | * guarantee the locks are considered the same when there are multiple lock | ||
50 | * initialisation siteѕ. Also, define a reclaimable inode class so it is | ||
51 | * obvious in lockdep reports which class the report is against. | ||
52 | */ | ||
53 | static struct lock_class_key xfs_iolock_active; | ||
54 | struct lock_class_key xfs_iolock_reclaimable; | ||
55 | |||
56 | /* | ||
46 | * Allocate and initialise an xfs_inode. | 57 | * Allocate and initialise an xfs_inode. |
47 | */ | 58 | */ |
48 | STATIC struct xfs_inode * | 59 | STATIC struct xfs_inode * |
@@ -69,8 +80,11 @@ xfs_inode_alloc( | |||
69 | ASSERT(atomic_read(&ip->i_pincount) == 0); | 80 | ASSERT(atomic_read(&ip->i_pincount) == 0); |
70 | ASSERT(!spin_is_locked(&ip->i_flags_lock)); | 81 | ASSERT(!spin_is_locked(&ip->i_flags_lock)); |
71 | ASSERT(completion_done(&ip->i_flush)); | 82 | ASSERT(completion_done(&ip->i_flush)); |
83 | ASSERT(ip->i_ino == 0); | ||
72 | 84 | ||
73 | mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino); | 85 | mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino); |
86 | lockdep_set_class_and_name(&ip->i_iolock.mr_lock, | ||
87 | &xfs_iolock_active, "xfs_iolock_active"); | ||
74 | 88 | ||
75 | /* initialise the xfs inode */ | 89 | /* initialise the xfs inode */ |
76 | ip->i_ino = ino; | 90 | ip->i_ino = ino; |
@@ -85,12 +99,20 @@ xfs_inode_alloc( | |||
85 | ip->i_size = 0; | 99 | ip->i_size = 0; |
86 | ip->i_new_size = 0; | 100 | ip->i_new_size = 0; |
87 | 101 | ||
88 | /* prevent anyone from using this yet */ | ||
89 | VFS_I(ip)->i_state = I_NEW; | ||
90 | |||
91 | return ip; | 102 | return ip; |
92 | } | 103 | } |
93 | 104 | ||
105 | STATIC void | ||
106 | xfs_inode_free_callback( | ||
107 | struct rcu_head *head) | ||
108 | { | ||
109 | struct inode *inode = container_of(head, struct inode, i_rcu); | ||
110 | struct xfs_inode *ip = XFS_I(inode); | ||
111 | |||
112 | INIT_LIST_HEAD(&inode->i_dentry); | ||
113 | kmem_zone_free(xfs_inode_zone, ip); | ||
114 | } | ||
115 | |||
94 | void | 116 | void |
95 | xfs_inode_free( | 117 | xfs_inode_free( |
96 | struct xfs_inode *ip) | 118 | struct xfs_inode *ip) |
@@ -134,7 +156,18 @@ xfs_inode_free( | |||
134 | ASSERT(!spin_is_locked(&ip->i_flags_lock)); | 156 | ASSERT(!spin_is_locked(&ip->i_flags_lock)); |
135 | ASSERT(completion_done(&ip->i_flush)); | 157 | ASSERT(completion_done(&ip->i_flush)); |
136 | 158 | ||
137 | kmem_zone_free(xfs_inode_zone, ip); | 159 | /* |
160 | * Because we use RCU freeing we need to ensure the inode always | ||
161 | * appears to be reclaimed with an invalid inode number when in the | ||
162 | * free state. The ip->i_flags_lock provides the barrier against lookup | ||
163 | * races. | ||
164 | */ | ||
165 | spin_lock(&ip->i_flags_lock); | ||
166 | ip->i_flags = XFS_IRECLAIM; | ||
167 | ip->i_ino = 0; | ||
168 | spin_unlock(&ip->i_flags_lock); | ||
169 | |||
170 | call_rcu(&VFS_I(ip)->i_rcu, xfs_inode_free_callback); | ||
138 | } | 171 | } |
139 | 172 | ||
140 | /* | 173 | /* |
@@ -144,14 +177,29 @@ static int | |||
144 | xfs_iget_cache_hit( | 177 | xfs_iget_cache_hit( |
145 | struct xfs_perag *pag, | 178 | struct xfs_perag *pag, |
146 | struct xfs_inode *ip, | 179 | struct xfs_inode *ip, |
180 | xfs_ino_t ino, | ||
147 | int flags, | 181 | int flags, |
148 | int lock_flags) __releases(pag->pag_ici_lock) | 182 | int lock_flags) __releases(RCU) |
149 | { | 183 | { |
150 | struct inode *inode = VFS_I(ip); | 184 | struct inode *inode = VFS_I(ip); |
151 | struct xfs_mount *mp = ip->i_mount; | 185 | struct xfs_mount *mp = ip->i_mount; |
152 | int error; | 186 | int error; |
153 | 187 | ||
188 | /* | ||
189 | * check for re-use of an inode within an RCU grace period due to the | ||
190 | * radix tree nodes not being updated yet. We monitor for this by | ||
191 | * setting the inode number to zero before freeing the inode structure. | ||
192 | * If the inode has been reallocated and set up, then the inode number | ||
193 | * will not match, so check for that, too. | ||
194 | */ | ||
154 | spin_lock(&ip->i_flags_lock); | 195 | spin_lock(&ip->i_flags_lock); |
196 | if (ip->i_ino != ino) { | ||
197 | trace_xfs_iget_skip(ip); | ||
198 | XFS_STATS_INC(xs_ig_frecycle); | ||
199 | error = EAGAIN; | ||
200 | goto out_error; | ||
201 | } | ||
202 | |||
155 | 203 | ||
156 | /* | 204 | /* |
157 | * If we are racing with another cache hit that is currently | 205 | * If we are racing with another cache hit that is currently |
@@ -194,7 +242,7 @@ xfs_iget_cache_hit( | |||
194 | ip->i_flags |= XFS_IRECLAIM; | 242 | ip->i_flags |= XFS_IRECLAIM; |
195 | 243 | ||
196 | spin_unlock(&ip->i_flags_lock); | 244 | spin_unlock(&ip->i_flags_lock); |
197 | read_unlock(&pag->pag_ici_lock); | 245 | rcu_read_unlock(); |
198 | 246 | ||
199 | error = -inode_init_always(mp->m_super, inode); | 247 | error = -inode_init_always(mp->m_super, inode); |
200 | if (error) { | 248 | if (error) { |
@@ -202,24 +250,35 @@ xfs_iget_cache_hit( | |||
202 | * Re-initializing the inode failed, and we are in deep | 250 | * Re-initializing the inode failed, and we are in deep |
203 | * trouble. Try to re-add it to the reclaim list. | 251 | * trouble. Try to re-add it to the reclaim list. |
204 | */ | 252 | */ |
205 | read_lock(&pag->pag_ici_lock); | 253 | rcu_read_lock(); |
206 | spin_lock(&ip->i_flags_lock); | 254 | spin_lock(&ip->i_flags_lock); |
207 | 255 | ||
208 | ip->i_flags &= ~XFS_INEW; | 256 | ip->i_flags &= ~(XFS_INEW | XFS_IRECLAIM); |
209 | ip->i_flags |= XFS_IRECLAIMABLE; | 257 | ASSERT(ip->i_flags & XFS_IRECLAIMABLE); |
210 | __xfs_inode_set_reclaim_tag(pag, ip); | ||
211 | trace_xfs_iget_reclaim_fail(ip); | 258 | trace_xfs_iget_reclaim_fail(ip); |
212 | goto out_error; | 259 | goto out_error; |
213 | } | 260 | } |
214 | 261 | ||
215 | write_lock(&pag->pag_ici_lock); | 262 | spin_lock(&pag->pag_ici_lock); |
216 | spin_lock(&ip->i_flags_lock); | 263 | spin_lock(&ip->i_flags_lock); |
217 | ip->i_flags &= ~(XFS_IRECLAIMABLE | XFS_IRECLAIM); | 264 | |
265 | /* | ||
266 | * Clear the per-lifetime state in the inode as we are now | ||
267 | * effectively a new inode and need to return to the initial | ||
268 | * state before reuse occurs. | ||
269 | */ | ||
270 | ip->i_flags &= ~XFS_IRECLAIM_RESET_FLAGS; | ||
218 | ip->i_flags |= XFS_INEW; | 271 | ip->i_flags |= XFS_INEW; |
219 | __xfs_inode_clear_reclaim_tag(mp, pag, ip); | 272 | __xfs_inode_clear_reclaim_tag(mp, pag, ip); |
220 | inode->i_state = I_NEW; | 273 | inode->i_state = I_NEW; |
274 | |||
275 | ASSERT(!rwsem_is_locked(&ip->i_iolock.mr_lock)); | ||
276 | mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino); | ||
277 | lockdep_set_class_and_name(&ip->i_iolock.mr_lock, | ||
278 | &xfs_iolock_active, "xfs_iolock_active"); | ||
279 | |||
221 | spin_unlock(&ip->i_flags_lock); | 280 | spin_unlock(&ip->i_flags_lock); |
222 | write_unlock(&pag->pag_ici_lock); | 281 | spin_unlock(&pag->pag_ici_lock); |
223 | } else { | 282 | } else { |
224 | /* If the VFS inode is being torn down, pause and try again. */ | 283 | /* If the VFS inode is being torn down, pause and try again. */ |
225 | if (!igrab(inode)) { | 284 | if (!igrab(inode)) { |
@@ -230,7 +289,7 @@ xfs_iget_cache_hit( | |||
230 | 289 | ||
231 | /* We've got a live one. */ | 290 | /* We've got a live one. */ |
232 | spin_unlock(&ip->i_flags_lock); | 291 | spin_unlock(&ip->i_flags_lock); |
233 | read_unlock(&pag->pag_ici_lock); | 292 | rcu_read_unlock(); |
234 | trace_xfs_iget_hit(ip); | 293 | trace_xfs_iget_hit(ip); |
235 | } | 294 | } |
236 | 295 | ||
@@ -244,7 +303,7 @@ xfs_iget_cache_hit( | |||
244 | 303 | ||
245 | out_error: | 304 | out_error: |
246 | spin_unlock(&ip->i_flags_lock); | 305 | spin_unlock(&ip->i_flags_lock); |
247 | read_unlock(&pag->pag_ici_lock); | 306 | rcu_read_unlock(); |
248 | return error; | 307 | return error; |
249 | } | 308 | } |
250 | 309 | ||
@@ -297,7 +356,7 @@ xfs_iget_cache_miss( | |||
297 | BUG(); | 356 | BUG(); |
298 | } | 357 | } |
299 | 358 | ||
300 | write_lock(&pag->pag_ici_lock); | 359 | spin_lock(&pag->pag_ici_lock); |
301 | 360 | ||
302 | /* insert the new inode */ | 361 | /* insert the new inode */ |
303 | error = radix_tree_insert(&pag->pag_ici_root, agino, ip); | 362 | error = radix_tree_insert(&pag->pag_ici_root, agino, ip); |
@@ -312,14 +371,14 @@ xfs_iget_cache_miss( | |||
312 | ip->i_udquot = ip->i_gdquot = NULL; | 371 | ip->i_udquot = ip->i_gdquot = NULL; |
313 | xfs_iflags_set(ip, XFS_INEW); | 372 | xfs_iflags_set(ip, XFS_INEW); |
314 | 373 | ||
315 | write_unlock(&pag->pag_ici_lock); | 374 | spin_unlock(&pag->pag_ici_lock); |
316 | radix_tree_preload_end(); | 375 | radix_tree_preload_end(); |
317 | 376 | ||
318 | *ipp = ip; | 377 | *ipp = ip; |
319 | return 0; | 378 | return 0; |
320 | 379 | ||
321 | out_preload_end: | 380 | out_preload_end: |
322 | write_unlock(&pag->pag_ici_lock); | 381 | spin_unlock(&pag->pag_ici_lock); |
323 | radix_tree_preload_end(); | 382 | radix_tree_preload_end(); |
324 | if (lock_flags) | 383 | if (lock_flags) |
325 | xfs_iunlock(ip, lock_flags); | 384 | xfs_iunlock(ip, lock_flags); |
@@ -365,8 +424,8 @@ xfs_iget( | |||
365 | xfs_perag_t *pag; | 424 | xfs_perag_t *pag; |
366 | xfs_agino_t agino; | 425 | xfs_agino_t agino; |
367 | 426 | ||
368 | /* the radix tree exists only in inode capable AGs */ | 427 | /* reject inode numbers outside existing AGs */ |
369 | if (XFS_INO_TO_AGNO(mp, ino) >= mp->m_maxagi) | 428 | if (!ino || XFS_INO_TO_AGNO(mp, ino) >= mp->m_sb.sb_agcount) |
370 | return EINVAL; | 429 | return EINVAL; |
371 | 430 | ||
372 | /* get the perag structure and ensure that it's inode capable */ | 431 | /* get the perag structure and ensure that it's inode capable */ |
@@ -375,15 +434,15 @@ xfs_iget( | |||
375 | 434 | ||
376 | again: | 435 | again: |
377 | error = 0; | 436 | error = 0; |
378 | read_lock(&pag->pag_ici_lock); | 437 | rcu_read_lock(); |
379 | ip = radix_tree_lookup(&pag->pag_ici_root, agino); | 438 | ip = radix_tree_lookup(&pag->pag_ici_root, agino); |
380 | 439 | ||
381 | if (ip) { | 440 | if (ip) { |
382 | error = xfs_iget_cache_hit(pag, ip, flags, lock_flags); | 441 | error = xfs_iget_cache_hit(pag, ip, ino, flags, lock_flags); |
383 | if (error) | 442 | if (error) |
384 | goto out_error_or_again; | 443 | goto out_error_or_again; |
385 | } else { | 444 | } else { |
386 | read_unlock(&pag->pag_ici_lock); | 445 | rcu_read_unlock(); |
387 | XFS_STATS_INC(xs_ig_missed); | 446 | XFS_STATS_INC(xs_ig_missed); |
388 | 447 | ||
389 | error = xfs_iget_cache_miss(mp, pag, tp, ino, &ip, | 448 | error = xfs_iget_cache_miss(mp, pag, tp, ino, &ip, |
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 34798f391c49..a098a20ca63e 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c | |||
@@ -110,8 +110,8 @@ xfs_inobp_check( | |||
110 | dip = (xfs_dinode_t *)xfs_buf_offset(bp, | 110 | dip = (xfs_dinode_t *)xfs_buf_offset(bp, |
111 | i * mp->m_sb.sb_inodesize); | 111 | i * mp->m_sb.sb_inodesize); |
112 | if (!dip->di_next_unlinked) { | 112 | if (!dip->di_next_unlinked) { |
113 | xfs_fs_cmn_err(CE_ALERT, mp, | 113 | xfs_alert(mp, |
114 | "Detected a bogus zero next_unlinked field in incore inode buffer 0x%p. About to pop an ASSERT.", | 114 | "Detected bogus zero next_unlinked field in incore inode buffer 0x%p.", |
115 | bp); | 115 | bp); |
116 | ASSERT(dip->di_next_unlinked); | 116 | ASSERT(dip->di_next_unlinked); |
117 | } | 117 | } |
@@ -142,10 +142,9 @@ xfs_imap_to_bp( | |||
142 | (int)imap->im_len, buf_flags, &bp); | 142 | (int)imap->im_len, buf_flags, &bp); |
143 | if (error) { | 143 | if (error) { |
144 | if (error != EAGAIN) { | 144 | if (error != EAGAIN) { |
145 | cmn_err(CE_WARN, | 145 | xfs_warn(mp, |
146 | "xfs_imap_to_bp: xfs_trans_read_buf()returned " | 146 | "%s: xfs_trans_read_buf() returned error %d.", |
147 | "an error %d on %s. Returning error.", | 147 | __func__, error); |
148 | error, mp->m_fsname); | ||
149 | } else { | 148 | } else { |
150 | ASSERT(buf_flags & XBF_TRYLOCK); | 149 | ASSERT(buf_flags & XBF_TRYLOCK); |
151 | } | 150 | } |
@@ -180,12 +179,11 @@ xfs_imap_to_bp( | |||
180 | XFS_CORRUPTION_ERROR("xfs_imap_to_bp", | 179 | XFS_CORRUPTION_ERROR("xfs_imap_to_bp", |
181 | XFS_ERRLEVEL_HIGH, mp, dip); | 180 | XFS_ERRLEVEL_HIGH, mp, dip); |
182 | #ifdef DEBUG | 181 | #ifdef DEBUG |
183 | cmn_err(CE_PANIC, | 182 | xfs_emerg(mp, |
184 | "Device %s - bad inode magic/vsn " | 183 | "bad inode magic/vsn daddr %lld #%d (magic=%x)", |
185 | "daddr %lld #%d (magic=%x)", | ||
186 | XFS_BUFTARG_NAME(mp->m_ddev_targp), | ||
187 | (unsigned long long)imap->im_blkno, i, | 184 | (unsigned long long)imap->im_blkno, i, |
188 | be16_to_cpu(dip->di_magic)); | 185 | be16_to_cpu(dip->di_magic)); |
186 | ASSERT(0); | ||
189 | #endif | 187 | #endif |
190 | xfs_trans_brelse(tp, bp); | 188 | xfs_trans_brelse(tp, bp); |
191 | return XFS_ERROR(EFSCORRUPTED); | 189 | return XFS_ERROR(EFSCORRUPTED); |
@@ -317,7 +315,7 @@ xfs_iformat( | |||
317 | if (unlikely(be32_to_cpu(dip->di_nextents) + | 315 | if (unlikely(be32_to_cpu(dip->di_nextents) + |
318 | be16_to_cpu(dip->di_anextents) > | 316 | be16_to_cpu(dip->di_anextents) > |
319 | be64_to_cpu(dip->di_nblocks))) { | 317 | be64_to_cpu(dip->di_nblocks))) { |
320 | xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount, | 318 | xfs_warn(ip->i_mount, |
321 | "corrupt dinode %Lu, extent total = %d, nblocks = %Lu.", | 319 | "corrupt dinode %Lu, extent total = %d, nblocks = %Lu.", |
322 | (unsigned long long)ip->i_ino, | 320 | (unsigned long long)ip->i_ino, |
323 | (int)(be32_to_cpu(dip->di_nextents) + | 321 | (int)(be32_to_cpu(dip->di_nextents) + |
@@ -330,8 +328,7 @@ xfs_iformat( | |||
330 | } | 328 | } |
331 | 329 | ||
332 | if (unlikely(dip->di_forkoff > ip->i_mount->m_sb.sb_inodesize)) { | 330 | if (unlikely(dip->di_forkoff > ip->i_mount->m_sb.sb_inodesize)) { |
333 | xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount, | 331 | xfs_warn(ip->i_mount, "corrupt dinode %Lu, forkoff = 0x%x.", |
334 | "corrupt dinode %Lu, forkoff = 0x%x.", | ||
335 | (unsigned long long)ip->i_ino, | 332 | (unsigned long long)ip->i_ino, |
336 | dip->di_forkoff); | 333 | dip->di_forkoff); |
337 | XFS_CORRUPTION_ERROR("xfs_iformat(2)", XFS_ERRLEVEL_LOW, | 334 | XFS_CORRUPTION_ERROR("xfs_iformat(2)", XFS_ERRLEVEL_LOW, |
@@ -341,7 +338,7 @@ xfs_iformat( | |||
341 | 338 | ||
342 | if (unlikely((ip->i_d.di_flags & XFS_DIFLAG_REALTIME) && | 339 | if (unlikely((ip->i_d.di_flags & XFS_DIFLAG_REALTIME) && |
343 | !ip->i_mount->m_rtdev_targp)) { | 340 | !ip->i_mount->m_rtdev_targp)) { |
344 | xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount, | 341 | xfs_warn(ip->i_mount, |
345 | "corrupt dinode %Lu, has realtime flag set.", | 342 | "corrupt dinode %Lu, has realtime flag set.", |
346 | ip->i_ino); | 343 | ip->i_ino); |
347 | XFS_CORRUPTION_ERROR("xfs_iformat(realtime)", | 344 | XFS_CORRUPTION_ERROR("xfs_iformat(realtime)", |
@@ -373,9 +370,8 @@ xfs_iformat( | |||
373 | * no local regular files yet | 370 | * no local regular files yet |
374 | */ | 371 | */ |
375 | if (unlikely((be16_to_cpu(dip->di_mode) & S_IFMT) == S_IFREG)) { | 372 | if (unlikely((be16_to_cpu(dip->di_mode) & S_IFMT) == S_IFREG)) { |
376 | xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount, | 373 | xfs_warn(ip->i_mount, |
377 | "corrupt inode %Lu " | 374 | "corrupt inode %Lu (local format for regular file).", |
378 | "(local format for regular file).", | ||
379 | (unsigned long long) ip->i_ino); | 375 | (unsigned long long) ip->i_ino); |
380 | XFS_CORRUPTION_ERROR("xfs_iformat(4)", | 376 | XFS_CORRUPTION_ERROR("xfs_iformat(4)", |
381 | XFS_ERRLEVEL_LOW, | 377 | XFS_ERRLEVEL_LOW, |
@@ -385,9 +381,8 @@ xfs_iformat( | |||
385 | 381 | ||
386 | di_size = be64_to_cpu(dip->di_size); | 382 | di_size = be64_to_cpu(dip->di_size); |
387 | if (unlikely(di_size > XFS_DFORK_DSIZE(dip, ip->i_mount))) { | 383 | if (unlikely(di_size > XFS_DFORK_DSIZE(dip, ip->i_mount))) { |
388 | xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount, | 384 | xfs_warn(ip->i_mount, |
389 | "corrupt inode %Lu " | 385 | "corrupt inode %Lu (bad size %Ld for local inode).", |
390 | "(bad size %Ld for local inode).", | ||
391 | (unsigned long long) ip->i_ino, | 386 | (unsigned long long) ip->i_ino, |
392 | (long long) di_size); | 387 | (long long) di_size); |
393 | XFS_CORRUPTION_ERROR("xfs_iformat(5)", | 388 | XFS_CORRUPTION_ERROR("xfs_iformat(5)", |
@@ -431,9 +426,8 @@ xfs_iformat( | |||
431 | size = be16_to_cpu(atp->hdr.totsize); | 426 | size = be16_to_cpu(atp->hdr.totsize); |
432 | 427 | ||
433 | if (unlikely(size < sizeof(struct xfs_attr_sf_hdr))) { | 428 | if (unlikely(size < sizeof(struct xfs_attr_sf_hdr))) { |
434 | xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount, | 429 | xfs_warn(ip->i_mount, |
435 | "corrupt inode %Lu " | 430 | "corrupt inode %Lu (bad attr fork size %Ld).", |
436 | "(bad attr fork size %Ld).", | ||
437 | (unsigned long long) ip->i_ino, | 431 | (unsigned long long) ip->i_ino, |
438 | (long long) size); | 432 | (long long) size); |
439 | XFS_CORRUPTION_ERROR("xfs_iformat(8)", | 433 | XFS_CORRUPTION_ERROR("xfs_iformat(8)", |
@@ -488,9 +482,8 @@ xfs_iformat_local( | |||
488 | * kmem_alloc() or memcpy() below. | 482 | * kmem_alloc() or memcpy() below. |
489 | */ | 483 | */ |
490 | if (unlikely(size > XFS_DFORK_SIZE(dip, ip->i_mount, whichfork))) { | 484 | if (unlikely(size > XFS_DFORK_SIZE(dip, ip->i_mount, whichfork))) { |
491 | xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount, | 485 | xfs_warn(ip->i_mount, |
492 | "corrupt inode %Lu " | 486 | "corrupt inode %Lu (bad size %d for local fork, size = %d).", |
493 | "(bad size %d for local fork, size = %d).", | ||
494 | (unsigned long long) ip->i_ino, size, | 487 | (unsigned long long) ip->i_ino, size, |
495 | XFS_DFORK_SIZE(dip, ip->i_mount, whichfork)); | 488 | XFS_DFORK_SIZE(dip, ip->i_mount, whichfork)); |
496 | XFS_CORRUPTION_ERROR("xfs_iformat_local", XFS_ERRLEVEL_LOW, | 489 | XFS_CORRUPTION_ERROR("xfs_iformat_local", XFS_ERRLEVEL_LOW, |
@@ -547,8 +540,7 @@ xfs_iformat_extents( | |||
547 | * kmem_alloc() or memcpy() below. | 540 | * kmem_alloc() or memcpy() below. |
548 | */ | 541 | */ |
549 | if (unlikely(size < 0 || size > XFS_DFORK_SIZE(dip, ip->i_mount, whichfork))) { | 542 | if (unlikely(size < 0 || size > XFS_DFORK_SIZE(dip, ip->i_mount, whichfork))) { |
550 | xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount, | 543 | xfs_warn(ip->i_mount, "corrupt inode %Lu ((a)extents = %d).", |
551 | "corrupt inode %Lu ((a)extents = %d).", | ||
552 | (unsigned long long) ip->i_ino, nex); | 544 | (unsigned long long) ip->i_ino, nex); |
553 | XFS_CORRUPTION_ERROR("xfs_iformat_extents(1)", XFS_ERRLEVEL_LOW, | 545 | XFS_CORRUPTION_ERROR("xfs_iformat_extents(1)", XFS_ERRLEVEL_LOW, |
554 | ip->i_mount, dip); | 546 | ip->i_mount, dip); |
@@ -623,11 +615,10 @@ xfs_iformat_btree( | |||
623 | || XFS_BMDR_SPACE_CALC(nrecs) > | 615 | || XFS_BMDR_SPACE_CALC(nrecs) > |
624 | XFS_DFORK_SIZE(dip, ip->i_mount, whichfork) | 616 | XFS_DFORK_SIZE(dip, ip->i_mount, whichfork) |
625 | || XFS_IFORK_NEXTENTS(ip, whichfork) > ip->i_d.di_nblocks)) { | 617 | || XFS_IFORK_NEXTENTS(ip, whichfork) > ip->i_d.di_nblocks)) { |
626 | xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount, | 618 | xfs_warn(ip->i_mount, "corrupt inode %Lu (btree).", |
627 | "corrupt inode %Lu (btree).", | ||
628 | (unsigned long long) ip->i_ino); | 619 | (unsigned long long) ip->i_ino); |
629 | XFS_ERROR_REPORT("xfs_iformat_btree", XFS_ERRLEVEL_LOW, | 620 | XFS_CORRUPTION_ERROR("xfs_iformat_btree", XFS_ERRLEVEL_LOW, |
630 | ip->i_mount); | 621 | ip->i_mount, dip); |
631 | return XFS_ERROR(EFSCORRUPTED); | 622 | return XFS_ERROR(EFSCORRUPTED); |
632 | } | 623 | } |
633 | 624 | ||
@@ -660,7 +651,8 @@ xfs_dinode_from_disk( | |||
660 | to->di_uid = be32_to_cpu(from->di_uid); | 651 | to->di_uid = be32_to_cpu(from->di_uid); |
661 | to->di_gid = be32_to_cpu(from->di_gid); | 652 | to->di_gid = be32_to_cpu(from->di_gid); |
662 | to->di_nlink = be32_to_cpu(from->di_nlink); | 653 | to->di_nlink = be32_to_cpu(from->di_nlink); |
663 | to->di_projid = be16_to_cpu(from->di_projid); | 654 | to->di_projid_lo = be16_to_cpu(from->di_projid_lo); |
655 | to->di_projid_hi = be16_to_cpu(from->di_projid_hi); | ||
664 | memcpy(to->di_pad, from->di_pad, sizeof(to->di_pad)); | 656 | memcpy(to->di_pad, from->di_pad, sizeof(to->di_pad)); |
665 | to->di_flushiter = be16_to_cpu(from->di_flushiter); | 657 | to->di_flushiter = be16_to_cpu(from->di_flushiter); |
666 | to->di_atime.t_sec = be32_to_cpu(from->di_atime.t_sec); | 658 | to->di_atime.t_sec = be32_to_cpu(from->di_atime.t_sec); |
@@ -695,7 +687,8 @@ xfs_dinode_to_disk( | |||
695 | to->di_uid = cpu_to_be32(from->di_uid); | 687 | to->di_uid = cpu_to_be32(from->di_uid); |
696 | to->di_gid = cpu_to_be32(from->di_gid); | 688 | to->di_gid = cpu_to_be32(from->di_gid); |
697 | to->di_nlink = cpu_to_be32(from->di_nlink); | 689 | to->di_nlink = cpu_to_be32(from->di_nlink); |
698 | to->di_projid = cpu_to_be16(from->di_projid); | 690 | to->di_projid_lo = cpu_to_be16(from->di_projid_lo); |
691 | to->di_projid_hi = cpu_to_be16(from->di_projid_hi); | ||
699 | memcpy(to->di_pad, from->di_pad, sizeof(to->di_pad)); | 692 | memcpy(to->di_pad, from->di_pad, sizeof(to->di_pad)); |
700 | to->di_flushiter = cpu_to_be16(from->di_flushiter); | 693 | to->di_flushiter = cpu_to_be16(from->di_flushiter); |
701 | to->di_atime.t_sec = cpu_to_be32(from->di_atime.t_sec); | 694 | to->di_atime.t_sec = cpu_to_be32(from->di_atime.t_sec); |
@@ -811,11 +804,9 @@ xfs_iread( | |||
811 | */ | 804 | */ |
812 | if (be16_to_cpu(dip->di_magic) != XFS_DINODE_MAGIC) { | 805 | if (be16_to_cpu(dip->di_magic) != XFS_DINODE_MAGIC) { |
813 | #ifdef DEBUG | 806 | #ifdef DEBUG |
814 | xfs_fs_cmn_err(CE_ALERT, mp, "xfs_iread: " | 807 | xfs_alert(mp, |
815 | "dip->di_magic (0x%x) != " | 808 | "%s: dip->di_magic (0x%x) != XFS_DINODE_MAGIC (0x%x)", |
816 | "XFS_DINODE_MAGIC (0x%x)", | 809 | __func__, be16_to_cpu(dip->di_magic), XFS_DINODE_MAGIC); |
817 | be16_to_cpu(dip->di_magic), | ||
818 | XFS_DINODE_MAGIC); | ||
819 | #endif /* DEBUG */ | 810 | #endif /* DEBUG */ |
820 | error = XFS_ERROR(EINVAL); | 811 | error = XFS_ERROR(EINVAL); |
821 | goto out_brelse; | 812 | goto out_brelse; |
@@ -833,9 +824,8 @@ xfs_iread( | |||
833 | error = xfs_iformat(ip, dip); | 824 | error = xfs_iformat(ip, dip); |
834 | if (error) { | 825 | if (error) { |
835 | #ifdef DEBUG | 826 | #ifdef DEBUG |
836 | xfs_fs_cmn_err(CE_ALERT, mp, "xfs_iread: " | 827 | xfs_alert(mp, "%s: xfs_iformat() returned error %d", |
837 | "xfs_iformat() returned error %d", | 828 | __func__, error); |
838 | error); | ||
839 | #endif /* DEBUG */ | 829 | #endif /* DEBUG */ |
840 | goto out_brelse; | 830 | goto out_brelse; |
841 | } | 831 | } |
@@ -874,7 +864,7 @@ xfs_iread( | |||
874 | if (ip->i_d.di_version == 1) { | 864 | if (ip->i_d.di_version == 1) { |
875 | ip->i_d.di_nlink = ip->i_d.di_onlink; | 865 | ip->i_d.di_nlink = ip->i_d.di_onlink; |
876 | ip->i_d.di_onlink = 0; | 866 | ip->i_d.di_onlink = 0; |
877 | ip->i_d.di_projid = 0; | 867 | xfs_set_projid(ip, 0); |
878 | } | 868 | } |
879 | 869 | ||
880 | ip->i_delayed_blks = 0; | 870 | ip->i_delayed_blks = 0; |
@@ -885,7 +875,7 @@ xfs_iread( | |||
885 | * around for a while. This helps to keep recently accessed | 875 | * around for a while. This helps to keep recently accessed |
886 | * meta-data in-core longer. | 876 | * meta-data in-core longer. |
887 | */ | 877 | */ |
888 | XFS_BUF_SET_REF(bp, XFS_INO_REF); | 878 | xfs_buf_set_ref(bp, XFS_INO_REF); |
889 | 879 | ||
890 | /* | 880 | /* |
891 | * Use xfs_trans_brelse() to release the buffer containing the | 881 | * Use xfs_trans_brelse() to release the buffer containing the |
@@ -930,7 +920,6 @@ xfs_iread_extents( | |||
930 | /* | 920 | /* |
931 | * We know that the size is valid (it's checked in iformat_btree) | 921 | * We know that the size is valid (it's checked in iformat_btree) |
932 | */ | 922 | */ |
933 | ifp->if_lastex = NULLEXTNUM; | ||
934 | ifp->if_bytes = ifp->if_real_bytes = 0; | 923 | ifp->if_bytes = ifp->if_real_bytes = 0; |
935 | ifp->if_flags |= XFS_IFEXTENTS; | 924 | ifp->if_flags |= XFS_IFEXTENTS; |
936 | xfs_iext_add(ifp, 0, nextents); | 925 | xfs_iext_add(ifp, 0, nextents); |
@@ -982,8 +971,7 @@ xfs_ialloc( | |||
982 | mode_t mode, | 971 | mode_t mode, |
983 | xfs_nlink_t nlink, | 972 | xfs_nlink_t nlink, |
984 | xfs_dev_t rdev, | 973 | xfs_dev_t rdev, |
985 | cred_t *cr, | 974 | prid_t prid, |
986 | xfs_prid_t prid, | ||
987 | int okalloc, | 975 | int okalloc, |
988 | xfs_buf_t **ialloc_context, | 976 | xfs_buf_t **ialloc_context, |
989 | boolean_t *call_again, | 977 | boolean_t *call_again, |
@@ -1015,8 +1003,8 @@ xfs_ialloc( | |||
1015 | * This is because we're setting fields here we need | 1003 | * This is because we're setting fields here we need |
1016 | * to prevent others from looking at until we're done. | 1004 | * to prevent others from looking at until we're done. |
1017 | */ | 1005 | */ |
1018 | error = xfs_trans_iget(tp->t_mountp, tp, ino, | 1006 | error = xfs_iget(tp->t_mountp, tp, ino, XFS_IGET_CREATE, |
1019 | XFS_IGET_CREATE, XFS_ILOCK_EXCL, &ip); | 1007 | XFS_ILOCK_EXCL, &ip); |
1020 | if (error) | 1008 | if (error) |
1021 | return error; | 1009 | return error; |
1022 | ASSERT(ip != NULL); | 1010 | ASSERT(ip != NULL); |
@@ -1027,7 +1015,7 @@ xfs_ialloc( | |||
1027 | ASSERT(ip->i_d.di_nlink == nlink); | 1015 | ASSERT(ip->i_d.di_nlink == nlink); |
1028 | ip->i_d.di_uid = current_fsuid(); | 1016 | ip->i_d.di_uid = current_fsuid(); |
1029 | ip->i_d.di_gid = current_fsgid(); | 1017 | ip->i_d.di_gid = current_fsgid(); |
1030 | ip->i_d.di_projid = prid; | 1018 | xfs_set_projid(ip, prid); |
1031 | memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad)); | 1019 | memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad)); |
1032 | 1020 | ||
1033 | /* | 1021 | /* |
@@ -1165,6 +1153,7 @@ xfs_ialloc( | |||
1165 | /* | 1153 | /* |
1166 | * Log the new values stuffed into the inode. | 1154 | * Log the new values stuffed into the inode. |
1167 | */ | 1155 | */ |
1156 | xfs_trans_ijoin_ref(tp, ip, XFS_ILOCK_EXCL); | ||
1168 | xfs_trans_log_inode(tp, ip, flags); | 1157 | xfs_trans_log_inode(tp, ip, flags); |
1169 | 1158 | ||
1170 | /* now that we have an i_mode we can setup inode ops and unlock */ | 1159 | /* now that we have an i_mode we can setup inode ops and unlock */ |
@@ -1364,7 +1353,7 @@ xfs_itruncate_start( | |||
1364 | return 0; | 1353 | return 0; |
1365 | } | 1354 | } |
1366 | last_byte = xfs_file_last_byte(ip); | 1355 | last_byte = xfs_file_last_byte(ip); |
1367 | trace_xfs_itruncate_start(ip, flags, new_size, toss_start, last_byte); | 1356 | trace_xfs_itruncate_start(ip, new_size, flags, toss_start, last_byte); |
1368 | if (last_byte > toss_start) { | 1357 | if (last_byte > toss_start) { |
1369 | if (flags & XFS_ITRUNC_DEFINITE) { | 1358 | if (flags & XFS_ITRUNC_DEFINITE) { |
1370 | xfs_tosspages(ip, toss_start, | 1359 | xfs_tosspages(ip, toss_start, |
@@ -1480,7 +1469,7 @@ xfs_itruncate_finish( | |||
1480 | * file but the log buffers containing the free and reallocation | 1469 | * file but the log buffers containing the free and reallocation |
1481 | * don't, then we'd end up with garbage in the blocks being freed. | 1470 | * don't, then we'd end up with garbage in the blocks being freed. |
1482 | * As long as we make the new_size permanent before actually | 1471 | * As long as we make the new_size permanent before actually |
1483 | * freeing any blocks it doesn't matter if they get writtten to. | 1472 | * freeing any blocks it doesn't matter if they get written to. |
1484 | * | 1473 | * |
1485 | * The callers must signal into us whether or not the size | 1474 | * The callers must signal into us whether or not the size |
1486 | * setting here must be synchronous. There are a few cases | 1475 | * setting here must be synchronous. There are a few cases |
@@ -1819,9 +1808,8 @@ xfs_iunlink_remove( | |||
1819 | */ | 1808 | */ |
1820 | error = xfs_itobp(mp, tp, ip, &dip, &ibp, XBF_LOCK); | 1809 | error = xfs_itobp(mp, tp, ip, &dip, &ibp, XBF_LOCK); |
1821 | if (error) { | 1810 | if (error) { |
1822 | cmn_err(CE_WARN, | 1811 | xfs_warn(mp, "%s: xfs_itobp() returned error %d.", |
1823 | "xfs_iunlink_remove: xfs_itobp() returned an error %d on %s. Returning error.", | 1812 | __func__, error); |
1824 | error, mp->m_fsname); | ||
1825 | return error; | 1813 | return error; |
1826 | } | 1814 | } |
1827 | next_agino = be32_to_cpu(dip->di_next_unlinked); | 1815 | next_agino = be32_to_cpu(dip->di_next_unlinked); |
@@ -1866,9 +1854,9 @@ xfs_iunlink_remove( | |||
1866 | error = xfs_inotobp(mp, tp, next_ino, &last_dip, | 1854 | error = xfs_inotobp(mp, tp, next_ino, &last_dip, |
1867 | &last_ibp, &last_offset, 0); | 1855 | &last_ibp, &last_offset, 0); |
1868 | if (error) { | 1856 | if (error) { |
1869 | cmn_err(CE_WARN, | 1857 | xfs_warn(mp, |
1870 | "xfs_iunlink_remove: xfs_inotobp() returned an error %d on %s. Returning error.", | 1858 | "%s: xfs_inotobp() returned error %d.", |
1871 | error, mp->m_fsname); | 1859 | __func__, error); |
1872 | return error; | 1860 | return error; |
1873 | } | 1861 | } |
1874 | next_agino = be32_to_cpu(last_dip->di_next_unlinked); | 1862 | next_agino = be32_to_cpu(last_dip->di_next_unlinked); |
@@ -1881,9 +1869,8 @@ xfs_iunlink_remove( | |||
1881 | */ | 1869 | */ |
1882 | error = xfs_itobp(mp, tp, ip, &dip, &ibp, XBF_LOCK); | 1870 | error = xfs_itobp(mp, tp, ip, &dip, &ibp, XBF_LOCK); |
1883 | if (error) { | 1871 | if (error) { |
1884 | cmn_err(CE_WARN, | 1872 | xfs_warn(mp, "%s: xfs_itobp(2) returned error %d.", |
1885 | "xfs_iunlink_remove: xfs_itobp() returned an error %d on %s. Returning error.", | 1873 | __func__, error); |
1886 | error, mp->m_fsname); | ||
1887 | return error; | 1874 | return error; |
1888 | } | 1875 | } |
1889 | next_agino = be32_to_cpu(dip->di_next_unlinked); | 1876 | next_agino = be32_to_cpu(dip->di_next_unlinked); |
@@ -1999,15 +1986,31 @@ xfs_ifree_cluster( | |||
1999 | */ | 1986 | */ |
2000 | for (i = 0; i < ninodes; i++) { | 1987 | for (i = 0; i < ninodes; i++) { |
2001 | retry: | 1988 | retry: |
2002 | read_lock(&pag->pag_ici_lock); | 1989 | rcu_read_lock(); |
2003 | ip = radix_tree_lookup(&pag->pag_ici_root, | 1990 | ip = radix_tree_lookup(&pag->pag_ici_root, |
2004 | XFS_INO_TO_AGINO(mp, (inum + i))); | 1991 | XFS_INO_TO_AGINO(mp, (inum + i))); |
2005 | 1992 | ||
2006 | /* Inode not in memory or stale, nothing to do */ | 1993 | /* Inode not in memory, nothing to do */ |
2007 | if (!ip || xfs_iflags_test(ip, XFS_ISTALE)) { | 1994 | if (!ip) { |
2008 | read_unlock(&pag->pag_ici_lock); | 1995 | rcu_read_unlock(); |
1996 | continue; | ||
1997 | } | ||
1998 | |||
1999 | /* | ||
2000 | * because this is an RCU protected lookup, we could | ||
2001 | * find a recently freed or even reallocated inode | ||
2002 | * during the lookup. We need to check under the | ||
2003 | * i_flags_lock for a valid inode here. Skip it if it | ||
2004 | * is not valid, the wrong inode or stale. | ||
2005 | */ | ||
2006 | spin_lock(&ip->i_flags_lock); | ||
2007 | if (ip->i_ino != inum + i || | ||
2008 | __xfs_iflags_test(ip, XFS_ISTALE)) { | ||
2009 | spin_unlock(&ip->i_flags_lock); | ||
2010 | rcu_read_unlock(); | ||
2009 | continue; | 2011 | continue; |
2010 | } | 2012 | } |
2013 | spin_unlock(&ip->i_flags_lock); | ||
2011 | 2014 | ||
2012 | /* | 2015 | /* |
2013 | * Don't try to lock/unlock the current inode, but we | 2016 | * Don't try to lock/unlock the current inode, but we |
@@ -2018,11 +2021,11 @@ retry: | |||
2018 | */ | 2021 | */ |
2019 | if (ip != free_ip && | 2022 | if (ip != free_ip && |
2020 | !xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) { | 2023 | !xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) { |
2021 | read_unlock(&pag->pag_ici_lock); | 2024 | rcu_read_unlock(); |
2022 | delay(1); | 2025 | delay(1); |
2023 | goto retry; | 2026 | goto retry; |
2024 | } | 2027 | } |
2025 | read_unlock(&pag->pag_ici_lock); | 2028 | rcu_read_unlock(); |
2026 | 2029 | ||
2027 | xfs_iflock(ip); | 2030 | xfs_iflock(ip); |
2028 | xfs_iflags_set(ip, XFS_ISTALE); | 2031 | xfs_iflags_set(ip, XFS_ISTALE); |
@@ -2554,12 +2557,9 @@ xfs_iflush_fork( | |||
2554 | case XFS_DINODE_FMT_EXTENTS: | 2557 | case XFS_DINODE_FMT_EXTENTS: |
2555 | ASSERT((ifp->if_flags & XFS_IFEXTENTS) || | 2558 | ASSERT((ifp->if_flags & XFS_IFEXTENTS) || |
2556 | !(iip->ili_format.ilf_fields & extflag[whichfork])); | 2559 | !(iip->ili_format.ilf_fields & extflag[whichfork])); |
2557 | ASSERT((xfs_iext_get_ext(ifp, 0) != NULL) || | ||
2558 | (ifp->if_bytes == 0)); | ||
2559 | ASSERT((xfs_iext_get_ext(ifp, 0) == NULL) || | ||
2560 | (ifp->if_bytes > 0)); | ||
2561 | if ((iip->ili_format.ilf_fields & extflag[whichfork]) && | 2560 | if ((iip->ili_format.ilf_fields & extflag[whichfork]) && |
2562 | (ifp->if_bytes > 0)) { | 2561 | (ifp->if_bytes > 0)) { |
2562 | ASSERT(xfs_iext_get_ext(ifp, 0)); | ||
2563 | ASSERT(XFS_IFORK_NEXTENTS(ip, whichfork) > 0); | 2563 | ASSERT(XFS_IFORK_NEXTENTS(ip, whichfork) > 0); |
2564 | (void)xfs_iextents_copy(ip, (xfs_bmbt_rec_t *)cp, | 2564 | (void)xfs_iextents_copy(ip, (xfs_bmbt_rec_t *)cp, |
2565 | whichfork); | 2565 | whichfork); |
@@ -2628,7 +2628,7 @@ xfs_iflush_cluster( | |||
2628 | 2628 | ||
2629 | mask = ~(((XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog)) - 1); | 2629 | mask = ~(((XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog)) - 1); |
2630 | first_index = XFS_INO_TO_AGINO(mp, ip->i_ino) & mask; | 2630 | first_index = XFS_INO_TO_AGINO(mp, ip->i_ino) & mask; |
2631 | read_lock(&pag->pag_ici_lock); | 2631 | rcu_read_lock(); |
2632 | /* really need a gang lookup range call here */ | 2632 | /* really need a gang lookup range call here */ |
2633 | nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, (void**)ilist, | 2633 | nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, (void**)ilist, |
2634 | first_index, inodes_per_cluster); | 2634 | first_index, inodes_per_cluster); |
@@ -2639,9 +2639,21 @@ xfs_iflush_cluster( | |||
2639 | iq = ilist[i]; | 2639 | iq = ilist[i]; |
2640 | if (iq == ip) | 2640 | if (iq == ip) |
2641 | continue; | 2641 | continue; |
2642 | /* if the inode lies outside this cluster, we're done. */ | 2642 | |
2643 | if ((XFS_INO_TO_AGINO(mp, iq->i_ino) & mask) != first_index) | 2643 | /* |
2644 | break; | 2644 | * because this is an RCU protected lookup, we could find a |
2645 | * recently freed or even reallocated inode during the lookup. | ||
2646 | * We need to check under the i_flags_lock for a valid inode | ||
2647 | * here. Skip it if it is not valid or the wrong inode. | ||
2648 | */ | ||
2649 | spin_lock(&ip->i_flags_lock); | ||
2650 | if (!ip->i_ino || | ||
2651 | (XFS_INO_TO_AGINO(mp, iq->i_ino) & mask) != first_index) { | ||
2652 | spin_unlock(&ip->i_flags_lock); | ||
2653 | continue; | ||
2654 | } | ||
2655 | spin_unlock(&ip->i_flags_lock); | ||
2656 | |||
2645 | /* | 2657 | /* |
2646 | * Do an un-protected check to see if the inode is dirty and | 2658 | * Do an un-protected check to see if the inode is dirty and |
2647 | * is a candidate for flushing. These checks will be repeated | 2659 | * is a candidate for flushing. These checks will be repeated |
@@ -2691,7 +2703,7 @@ xfs_iflush_cluster( | |||
2691 | } | 2703 | } |
2692 | 2704 | ||
2693 | out_free: | 2705 | out_free: |
2694 | read_unlock(&pag->pag_ici_lock); | 2706 | rcu_read_unlock(); |
2695 | kmem_free(ilist); | 2707 | kmem_free(ilist); |
2696 | out_put: | 2708 | out_put: |
2697 | xfs_perag_put(pag); | 2709 | xfs_perag_put(pag); |
@@ -2703,7 +2715,7 @@ cluster_corrupt_out: | |||
2703 | * Corruption detected in the clustering loop. Invalidate the | 2715 | * Corruption detected in the clustering loop. Invalidate the |
2704 | * inode buffer and shut down the filesystem. | 2716 | * inode buffer and shut down the filesystem. |
2705 | */ | 2717 | */ |
2706 | read_unlock(&pag->pag_ici_lock); | 2718 | rcu_read_unlock(); |
2707 | /* | 2719 | /* |
2708 | * Clean up the buffer. If it was B_DELWRI, just release it -- | 2720 | * Clean up the buffer. If it was B_DELWRI, just release it -- |
2709 | * brelse can handle it with no problems. If not, shut down the | 2721 | * brelse can handle it with no problems. If not, shut down the |
@@ -2725,7 +2737,7 @@ cluster_corrupt_out: | |||
2725 | XFS_BUF_UNDONE(bp); | 2737 | XFS_BUF_UNDONE(bp); |
2726 | XFS_BUF_STALE(bp); | 2738 | XFS_BUF_STALE(bp); |
2727 | XFS_BUF_ERROR(bp,EIO); | 2739 | XFS_BUF_ERROR(bp,EIO); |
2728 | xfs_biodone(bp); | 2740 | xfs_buf_ioend(bp, 0); |
2729 | } else { | 2741 | } else { |
2730 | XFS_BUF_STALE(bp); | 2742 | XFS_BUF_STALE(bp); |
2731 | xfs_buf_relse(bp); | 2743 | xfs_buf_relse(bp); |
@@ -2773,7 +2785,7 @@ xfs_iflush( | |||
2773 | 2785 | ||
2774 | /* | 2786 | /* |
2775 | * We can't flush the inode until it is unpinned, so wait for it if we | 2787 | * We can't flush the inode until it is unpinned, so wait for it if we |
2776 | * are allowed to block. We know noone new can pin it, because we are | 2788 | * are allowed to block. We know no one new can pin it, because we are |
2777 | * holding the inode lock shared and you need to hold it exclusively to | 2789 | * holding the inode lock shared and you need to hold it exclusively to |
2778 | * pin the inode. | 2790 | * pin the inode. |
2779 | * | 2791 | * |
@@ -2819,7 +2831,7 @@ xfs_iflush( | |||
2819 | * Get the buffer containing the on-disk inode. | 2831 | * Get the buffer containing the on-disk inode. |
2820 | */ | 2832 | */ |
2821 | error = xfs_itobp(mp, NULL, ip, &dip, &bp, | 2833 | error = xfs_itobp(mp, NULL, ip, &dip, &bp, |
2822 | (flags & SYNC_WAIT) ? XBF_LOCK : XBF_TRYLOCK); | 2834 | (flags & SYNC_TRYLOCK) ? XBF_TRYLOCK : XBF_LOCK); |
2823 | if (error || !bp) { | 2835 | if (error || !bp) { |
2824 | xfs_ifunlock(ip); | 2836 | xfs_ifunlock(ip); |
2825 | return error; | 2837 | return error; |
@@ -2910,16 +2922,16 @@ xfs_iflush_int( | |||
2910 | 2922 | ||
2911 | if (XFS_TEST_ERROR(be16_to_cpu(dip->di_magic) != XFS_DINODE_MAGIC, | 2923 | if (XFS_TEST_ERROR(be16_to_cpu(dip->di_magic) != XFS_DINODE_MAGIC, |
2912 | mp, XFS_ERRTAG_IFLUSH_1, XFS_RANDOM_IFLUSH_1)) { | 2924 | mp, XFS_ERRTAG_IFLUSH_1, XFS_RANDOM_IFLUSH_1)) { |
2913 | xfs_cmn_err(XFS_PTAG_IFLUSH, CE_ALERT, mp, | 2925 | xfs_alert_tag(mp, XFS_PTAG_IFLUSH, |
2914 | "xfs_iflush: Bad inode %Lu magic number 0x%x, ptr 0x%p", | 2926 | "%s: Bad inode %Lu magic number 0x%x, ptr 0x%p", |
2915 | ip->i_ino, be16_to_cpu(dip->di_magic), dip); | 2927 | __func__, ip->i_ino, be16_to_cpu(dip->di_magic), dip); |
2916 | goto corrupt_out; | 2928 | goto corrupt_out; |
2917 | } | 2929 | } |
2918 | if (XFS_TEST_ERROR(ip->i_d.di_magic != XFS_DINODE_MAGIC, | 2930 | if (XFS_TEST_ERROR(ip->i_d.di_magic != XFS_DINODE_MAGIC, |
2919 | mp, XFS_ERRTAG_IFLUSH_2, XFS_RANDOM_IFLUSH_2)) { | 2931 | mp, XFS_ERRTAG_IFLUSH_2, XFS_RANDOM_IFLUSH_2)) { |
2920 | xfs_cmn_err(XFS_PTAG_IFLUSH, CE_ALERT, mp, | 2932 | xfs_alert_tag(mp, XFS_PTAG_IFLUSH, |
2921 | "xfs_iflush: Bad inode %Lu, ptr 0x%p, magic number 0x%x", | 2933 | "%s: Bad inode %Lu, ptr 0x%p, magic number 0x%x", |
2922 | ip->i_ino, ip, ip->i_d.di_magic); | 2934 | __func__, ip->i_ino, ip, ip->i_d.di_magic); |
2923 | goto corrupt_out; | 2935 | goto corrupt_out; |
2924 | } | 2936 | } |
2925 | if ((ip->i_d.di_mode & S_IFMT) == S_IFREG) { | 2937 | if ((ip->i_d.di_mode & S_IFMT) == S_IFREG) { |
@@ -2927,9 +2939,9 @@ xfs_iflush_int( | |||
2927 | (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS) && | 2939 | (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS) && |
2928 | (ip->i_d.di_format != XFS_DINODE_FMT_BTREE), | 2940 | (ip->i_d.di_format != XFS_DINODE_FMT_BTREE), |
2929 | mp, XFS_ERRTAG_IFLUSH_3, XFS_RANDOM_IFLUSH_3)) { | 2941 | mp, XFS_ERRTAG_IFLUSH_3, XFS_RANDOM_IFLUSH_3)) { |
2930 | xfs_cmn_err(XFS_PTAG_IFLUSH, CE_ALERT, mp, | 2942 | xfs_alert_tag(mp, XFS_PTAG_IFLUSH, |
2931 | "xfs_iflush: Bad regular inode %Lu, ptr 0x%p", | 2943 | "%s: Bad regular inode %Lu, ptr 0x%p", |
2932 | ip->i_ino, ip); | 2944 | __func__, ip->i_ino, ip); |
2933 | goto corrupt_out; | 2945 | goto corrupt_out; |
2934 | } | 2946 | } |
2935 | } else if ((ip->i_d.di_mode & S_IFMT) == S_IFDIR) { | 2947 | } else if ((ip->i_d.di_mode & S_IFMT) == S_IFDIR) { |
@@ -2938,28 +2950,28 @@ xfs_iflush_int( | |||
2938 | (ip->i_d.di_format != XFS_DINODE_FMT_BTREE) && | 2950 | (ip->i_d.di_format != XFS_DINODE_FMT_BTREE) && |
2939 | (ip->i_d.di_format != XFS_DINODE_FMT_LOCAL), | 2951 | (ip->i_d.di_format != XFS_DINODE_FMT_LOCAL), |
2940 | mp, XFS_ERRTAG_IFLUSH_4, XFS_RANDOM_IFLUSH_4)) { | 2952 | mp, XFS_ERRTAG_IFLUSH_4, XFS_RANDOM_IFLUSH_4)) { |
2941 | xfs_cmn_err(XFS_PTAG_IFLUSH, CE_ALERT, mp, | 2953 | xfs_alert_tag(mp, XFS_PTAG_IFLUSH, |
2942 | "xfs_iflush: Bad directory inode %Lu, ptr 0x%p", | 2954 | "%s: Bad directory inode %Lu, ptr 0x%p", |
2943 | ip->i_ino, ip); | 2955 | __func__, ip->i_ino, ip); |
2944 | goto corrupt_out; | 2956 | goto corrupt_out; |
2945 | } | 2957 | } |
2946 | } | 2958 | } |
2947 | if (XFS_TEST_ERROR(ip->i_d.di_nextents + ip->i_d.di_anextents > | 2959 | if (XFS_TEST_ERROR(ip->i_d.di_nextents + ip->i_d.di_anextents > |
2948 | ip->i_d.di_nblocks, mp, XFS_ERRTAG_IFLUSH_5, | 2960 | ip->i_d.di_nblocks, mp, XFS_ERRTAG_IFLUSH_5, |
2949 | XFS_RANDOM_IFLUSH_5)) { | 2961 | XFS_RANDOM_IFLUSH_5)) { |
2950 | xfs_cmn_err(XFS_PTAG_IFLUSH, CE_ALERT, mp, | 2962 | xfs_alert_tag(mp, XFS_PTAG_IFLUSH, |
2951 | "xfs_iflush: detected corrupt incore inode %Lu, total extents = %d, nblocks = %Ld, ptr 0x%p", | 2963 | "%s: detected corrupt incore inode %Lu, " |
2952 | ip->i_ino, | 2964 | "total extents = %d, nblocks = %Ld, ptr 0x%p", |
2965 | __func__, ip->i_ino, | ||
2953 | ip->i_d.di_nextents + ip->i_d.di_anextents, | 2966 | ip->i_d.di_nextents + ip->i_d.di_anextents, |
2954 | ip->i_d.di_nblocks, | 2967 | ip->i_d.di_nblocks, ip); |
2955 | ip); | ||
2956 | goto corrupt_out; | 2968 | goto corrupt_out; |
2957 | } | 2969 | } |
2958 | if (XFS_TEST_ERROR(ip->i_d.di_forkoff > mp->m_sb.sb_inodesize, | 2970 | if (XFS_TEST_ERROR(ip->i_d.di_forkoff > mp->m_sb.sb_inodesize, |
2959 | mp, XFS_ERRTAG_IFLUSH_6, XFS_RANDOM_IFLUSH_6)) { | 2971 | mp, XFS_ERRTAG_IFLUSH_6, XFS_RANDOM_IFLUSH_6)) { |
2960 | xfs_cmn_err(XFS_PTAG_IFLUSH, CE_ALERT, mp, | 2972 | xfs_alert_tag(mp, XFS_PTAG_IFLUSH, |
2961 | "xfs_iflush: bad inode %Lu, forkoff 0x%x, ptr 0x%p", | 2973 | "%s: bad inode %Lu, forkoff 0x%x, ptr 0x%p", |
2962 | ip->i_ino, ip->i_d.di_forkoff, ip); | 2974 | __func__, ip->i_ino, ip->i_d.di_forkoff, ip); |
2963 | goto corrupt_out; | 2975 | goto corrupt_out; |
2964 | } | 2976 | } |
2965 | /* | 2977 | /* |
@@ -3008,7 +3020,7 @@ xfs_iflush_int( | |||
3008 | memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad)); | 3020 | memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad)); |
3009 | memset(&(dip->di_pad[0]), 0, | 3021 | memset(&(dip->di_pad[0]), 0, |
3010 | sizeof(dip->di_pad)); | 3022 | sizeof(dip->di_pad)); |
3011 | ASSERT(ip->i_d.di_projid == 0); | 3023 | ASSERT(xfs_get_projid(ip) == 0); |
3012 | } | 3024 | } |
3013 | } | 3025 | } |
3014 | 3026 | ||
@@ -3096,6 +3108,8 @@ xfs_iext_get_ext( | |||
3096 | xfs_extnum_t idx) /* index of target extent */ | 3108 | xfs_extnum_t idx) /* index of target extent */ |
3097 | { | 3109 | { |
3098 | ASSERT(idx >= 0); | 3110 | ASSERT(idx >= 0); |
3111 | ASSERT(idx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t)); | ||
3112 | |||
3099 | if ((ifp->if_flags & XFS_IFEXTIREC) && (idx == 0)) { | 3113 | if ((ifp->if_flags & XFS_IFEXTIREC) && (idx == 0)) { |
3100 | return ifp->if_u1.if_ext_irec->er_extbuf; | 3114 | return ifp->if_u1.if_ext_irec->er_extbuf; |
3101 | } else if (ifp->if_flags & XFS_IFEXTIREC) { | 3115 | } else if (ifp->if_flags & XFS_IFEXTIREC) { |
@@ -3175,7 +3189,6 @@ xfs_iext_add( | |||
3175 | } | 3189 | } |
3176 | ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext; | 3190 | ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext; |
3177 | ifp->if_real_bytes = 0; | 3191 | ifp->if_real_bytes = 0; |
3178 | ifp->if_lastex = nextents + ext_diff; | ||
3179 | } | 3192 | } |
3180 | /* | 3193 | /* |
3181 | * Otherwise use a linear (direct) extent list. | 3194 | * Otherwise use a linear (direct) extent list. |
@@ -3870,8 +3883,10 @@ xfs_iext_idx_to_irec( | |||
3870 | xfs_extnum_t page_idx = *idxp; /* extent index in target list */ | 3883 | xfs_extnum_t page_idx = *idxp; /* extent index in target list */ |
3871 | 3884 | ||
3872 | ASSERT(ifp->if_flags & XFS_IFEXTIREC); | 3885 | ASSERT(ifp->if_flags & XFS_IFEXTIREC); |
3873 | ASSERT(page_idx >= 0 && page_idx <= | 3886 | ASSERT(page_idx >= 0); |
3874 | ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t)); | 3887 | ASSERT(page_idx <= ifp->if_bytes / sizeof(xfs_bmbt_rec_t)); |
3888 | ASSERT(page_idx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t) || realloc); | ||
3889 | |||
3875 | nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; | 3890 | nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; |
3876 | erp_idx = 0; | 3891 | erp_idx = 0; |
3877 | low = 0; | 3892 | low = 0; |
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 0898c5417d12..964cfea77686 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h | |||
@@ -67,7 +67,6 @@ typedef struct xfs_ifork { | |||
67 | short if_broot_bytes; /* bytes allocated for root */ | 67 | short if_broot_bytes; /* bytes allocated for root */ |
68 | unsigned char if_flags; /* per-fork flags */ | 68 | unsigned char if_flags; /* per-fork flags */ |
69 | unsigned char if_ext_max; /* max # of extent records */ | 69 | unsigned char if_ext_max; /* max # of extent records */ |
70 | xfs_extnum_t if_lastex; /* last if_extents used */ | ||
71 | union { | 70 | union { |
72 | xfs_bmbt_rec_host_t *if_extents;/* linear map file exts */ | 71 | xfs_bmbt_rec_host_t *if_extents;/* linear map file exts */ |
73 | xfs_ext_irec_t *if_ext_irec; /* irec map file exts */ | 72 | xfs_ext_irec_t *if_ext_irec; /* irec map file exts */ |
@@ -111,7 +110,7 @@ struct xfs_imap { | |||
111 | * Generally, we do not want to hold the i_rlock while holding the | 110 | * Generally, we do not want to hold the i_rlock while holding the |
112 | * i_ilock. Hierarchy is i_iolock followed by i_rlock. | 111 | * i_ilock. Hierarchy is i_iolock followed by i_rlock. |
113 | * | 112 | * |
114 | * xfs_iptr_t contains all the inode fields upto and including the | 113 | * xfs_iptr_t contains all the inode fields up to and including the |
115 | * i_mnext and i_mprev fields, it is used as a marker in the inode | 114 | * i_mnext and i_mprev fields, it is used as a marker in the inode |
116 | * chain off the mount structure by xfs_sync calls. | 115 | * chain off the mount structure by xfs_sync calls. |
117 | */ | 116 | */ |
@@ -134,8 +133,9 @@ typedef struct xfs_icdinode { | |||
134 | __uint32_t di_uid; /* owner's user id */ | 133 | __uint32_t di_uid; /* owner's user id */ |
135 | __uint32_t di_gid; /* owner's group id */ | 134 | __uint32_t di_gid; /* owner's group id */ |
136 | __uint32_t di_nlink; /* number of links to file */ | 135 | __uint32_t di_nlink; /* number of links to file */ |
137 | __uint16_t di_projid; /* owner's project id */ | 136 | __uint16_t di_projid_lo; /* lower part of owner's project id */ |
138 | __uint8_t di_pad[8]; /* unused, zeroed space */ | 137 | __uint16_t di_projid_hi; /* higher part of owner's project id */ |
138 | __uint8_t di_pad[6]; /* unused, zeroed space */ | ||
139 | __uint16_t di_flushiter; /* incremented on flush */ | 139 | __uint16_t di_flushiter; /* incremented on flush */ |
140 | xfs_ictimestamp_t di_atime; /* time last accessed */ | 140 | xfs_ictimestamp_t di_atime; /* time last accessed */ |
141 | xfs_ictimestamp_t di_mtime; /* time last modified */ | 141 | xfs_ictimestamp_t di_mtime; /* time last modified */ |
@@ -212,7 +212,6 @@ typedef struct xfs_icdinode { | |||
212 | #ifdef __KERNEL__ | 212 | #ifdef __KERNEL__ |
213 | 213 | ||
214 | struct bhv_desc; | 214 | struct bhv_desc; |
215 | struct cred; | ||
216 | struct xfs_buf; | 215 | struct xfs_buf; |
217 | struct xfs_bmap_free; | 216 | struct xfs_bmap_free; |
218 | struct xfs_bmbt_irec; | 217 | struct xfs_bmbt_irec; |
@@ -335,6 +334,25 @@ xfs_iflags_test_and_clear(xfs_inode_t *ip, unsigned short flags) | |||
335 | } | 334 | } |
336 | 335 | ||
337 | /* | 336 | /* |
337 | * Project quota id helpers (previously projid was 16bit only | ||
338 | * and using two 16bit values to hold new 32bit projid was chosen | ||
339 | * to retain compatibility with "old" filesystems). | ||
340 | */ | ||
341 | static inline prid_t | ||
342 | xfs_get_projid(struct xfs_inode *ip) | ||
343 | { | ||
344 | return (prid_t)ip->i_d.di_projid_hi << 16 | ip->i_d.di_projid_lo; | ||
345 | } | ||
346 | |||
347 | static inline void | ||
348 | xfs_set_projid(struct xfs_inode *ip, | ||
349 | prid_t projid) | ||
350 | { | ||
351 | ip->i_d.di_projid_hi = (__uint16_t) (projid >> 16); | ||
352 | ip->i_d.di_projid_lo = (__uint16_t) (projid & 0xffff); | ||
353 | } | ||
354 | |||
355 | /* | ||
338 | * Manage the i_flush queue embedded in the inode. This completion | 356 | * Manage the i_flush queue embedded in the inode. This completion |
339 | * queue synchronizes processes attempting to flush the in-core | 357 | * queue synchronizes processes attempting to flush the in-core |
340 | * inode back to disk. | 358 | * inode back to disk. |
@@ -357,12 +375,23 @@ static inline void xfs_ifunlock(xfs_inode_t *ip) | |||
357 | /* | 375 | /* |
358 | * In-core inode flags. | 376 | * In-core inode flags. |
359 | */ | 377 | */ |
360 | #define XFS_IRECLAIM 0x0001 /* we have started reclaiming this inode */ | 378 | #define XFS_IRECLAIM 0x0001 /* started reclaiming this inode */ |
361 | #define XFS_ISTALE 0x0002 /* inode has been staled */ | 379 | #define XFS_ISTALE 0x0002 /* inode has been staled */ |
362 | #define XFS_IRECLAIMABLE 0x0004 /* inode can be reclaimed */ | 380 | #define XFS_IRECLAIMABLE 0x0004 /* inode can be reclaimed */ |
363 | #define XFS_INEW 0x0008 /* inode has just been allocated */ | 381 | #define XFS_INEW 0x0008 /* inode has just been allocated */ |
364 | #define XFS_IFILESTREAM 0x0010 /* inode is in a filestream directory */ | 382 | #define XFS_IFILESTREAM 0x0010 /* inode is in a filestream directory */ |
365 | #define XFS_ITRUNCATED 0x0020 /* truncated down so flush-on-close */ | 383 | #define XFS_ITRUNCATED 0x0020 /* truncated down so flush-on-close */ |
384 | #define XFS_IDIRTY_RELEASE 0x0040 /* dirty release already seen */ | ||
385 | |||
386 | /* | ||
387 | * Per-lifetime flags need to be reset when re-using a reclaimable inode during | ||
388 | * inode lookup. Thi prevents unintended behaviour on the new inode from | ||
389 | * ocurring. | ||
390 | */ | ||
391 | #define XFS_IRECLAIM_RESET_FLAGS \ | ||
392 | (XFS_IRECLAIMABLE | XFS_IRECLAIM | \ | ||
393 | XFS_IDIRTY_RELEASE | XFS_ITRUNCATED | \ | ||
394 | XFS_IFILESTREAM); | ||
366 | 395 | ||
367 | /* | 396 | /* |
368 | * Flags for inode locking. | 397 | * Flags for inode locking. |
@@ -389,28 +418,35 @@ static inline void xfs_ifunlock(xfs_inode_t *ip) | |||
389 | /* | 418 | /* |
390 | * Flags for lockdep annotations. | 419 | * Flags for lockdep annotations. |
391 | * | 420 | * |
392 | * XFS_I[O]LOCK_PARENT - for operations that require locking two inodes | 421 | * XFS_LOCK_PARENT - for directory operations that require locking a |
393 | * (ie directory operations that require locking a directory inode and | 422 | * parent directory inode and a child entry inode. The parent gets locked |
394 | * an entry inode). The first inode gets locked with this flag so it | 423 | * with this flag so it gets a lockdep subclass of 1 and the child entry |
395 | * gets a lockdep subclass of 1 and the second lock will have a lockdep | 424 | * lock will have a lockdep subclass of 0. |
396 | * subclass of 0. | 425 | * |
426 | * XFS_LOCK_RTBITMAP/XFS_LOCK_RTSUM - the realtime device bitmap and summary | ||
427 | * inodes do not participate in the normal lock order, and thus have their | ||
428 | * own subclasses. | ||
397 | * | 429 | * |
398 | * XFS_LOCK_INUMORDER - for locking several inodes at the some time | 430 | * XFS_LOCK_INUMORDER - for locking several inodes at the some time |
399 | * with xfs_lock_inodes(). This flag is used as the starting subclass | 431 | * with xfs_lock_inodes(). This flag is used as the starting subclass |
400 | * and each subsequent lock acquired will increment the subclass by one. | 432 | * and each subsequent lock acquired will increment the subclass by one. |
401 | * So the first lock acquired will have a lockdep subclass of 2, the | 433 | * So the first lock acquired will have a lockdep subclass of 4, the |
402 | * second lock will have a lockdep subclass of 3, and so on. It is | 434 | * second lock will have a lockdep subclass of 5, and so on. It is |
403 | * the responsibility of the class builder to shift this to the correct | 435 | * the responsibility of the class builder to shift this to the correct |
404 | * portion of the lock_mode lockdep mask. | 436 | * portion of the lock_mode lockdep mask. |
405 | */ | 437 | */ |
406 | #define XFS_LOCK_PARENT 1 | 438 | #define XFS_LOCK_PARENT 1 |
407 | #define XFS_LOCK_INUMORDER 2 | 439 | #define XFS_LOCK_RTBITMAP 2 |
440 | #define XFS_LOCK_RTSUM 3 | ||
441 | #define XFS_LOCK_INUMORDER 4 | ||
408 | 442 | ||
409 | #define XFS_IOLOCK_SHIFT 16 | 443 | #define XFS_IOLOCK_SHIFT 16 |
410 | #define XFS_IOLOCK_PARENT (XFS_LOCK_PARENT << XFS_IOLOCK_SHIFT) | 444 | #define XFS_IOLOCK_PARENT (XFS_LOCK_PARENT << XFS_IOLOCK_SHIFT) |
411 | 445 | ||
412 | #define XFS_ILOCK_SHIFT 24 | 446 | #define XFS_ILOCK_SHIFT 24 |
413 | #define XFS_ILOCK_PARENT (XFS_LOCK_PARENT << XFS_ILOCK_SHIFT) | 447 | #define XFS_ILOCK_PARENT (XFS_LOCK_PARENT << XFS_ILOCK_SHIFT) |
448 | #define XFS_ILOCK_RTBITMAP (XFS_LOCK_RTBITMAP << XFS_ILOCK_SHIFT) | ||
449 | #define XFS_ILOCK_RTSUM (XFS_LOCK_RTSUM << XFS_ILOCK_SHIFT) | ||
414 | 450 | ||
415 | #define XFS_IOLOCK_DEP_MASK 0x00ff0000 | 451 | #define XFS_IOLOCK_DEP_MASK 0x00ff0000 |
416 | #define XFS_ILOCK_DEP_MASK 0xff000000 | 452 | #define XFS_ILOCK_DEP_MASK 0xff000000 |
@@ -419,6 +455,8 @@ static inline void xfs_ifunlock(xfs_inode_t *ip) | |||
419 | #define XFS_IOLOCK_DEP(flags) (((flags) & XFS_IOLOCK_DEP_MASK) >> XFS_IOLOCK_SHIFT) | 455 | #define XFS_IOLOCK_DEP(flags) (((flags) & XFS_IOLOCK_DEP_MASK) >> XFS_IOLOCK_SHIFT) |
420 | #define XFS_ILOCK_DEP(flags) (((flags) & XFS_ILOCK_DEP_MASK) >> XFS_ILOCK_SHIFT) | 456 | #define XFS_ILOCK_DEP(flags) (((flags) & XFS_ILOCK_DEP_MASK) >> XFS_ILOCK_SHIFT) |
421 | 457 | ||
458 | extern struct lock_class_key xfs_iolock_reclaimable; | ||
459 | |||
422 | /* | 460 | /* |
423 | * Flags for xfs_itruncate_start(). | 461 | * Flags for xfs_itruncate_start(). |
424 | */ | 462 | */ |
@@ -456,8 +494,8 @@ void xfs_inode_free(struct xfs_inode *ip); | |||
456 | * xfs_inode.c prototypes. | 494 | * xfs_inode.c prototypes. |
457 | */ | 495 | */ |
458 | int xfs_ialloc(struct xfs_trans *, xfs_inode_t *, mode_t, | 496 | int xfs_ialloc(struct xfs_trans *, xfs_inode_t *, mode_t, |
459 | xfs_nlink_t, xfs_dev_t, cred_t *, xfs_prid_t, | 497 | xfs_nlink_t, xfs_dev_t, prid_t, int, |
460 | int, struct xfs_buf **, boolean_t *, xfs_inode_t **); | 498 | struct xfs_buf **, boolean_t *, xfs_inode_t **); |
461 | 499 | ||
462 | uint xfs_ip2xflags(struct xfs_inode *); | 500 | uint xfs_ip2xflags(struct xfs_inode *); |
463 | uint xfs_dic2xflags(struct xfs_dinode *); | 501 | uint xfs_dic2xflags(struct xfs_dinode *); |
@@ -471,7 +509,6 @@ int xfs_iunlink(struct xfs_trans *, xfs_inode_t *); | |||
471 | void xfs_iext_realloc(xfs_inode_t *, int, int); | 509 | void xfs_iext_realloc(xfs_inode_t *, int, int); |
472 | void xfs_iunpin_wait(xfs_inode_t *); | 510 | void xfs_iunpin_wait(xfs_inode_t *); |
473 | int xfs_iflush(xfs_inode_t *, uint); | 511 | int xfs_iflush(xfs_inode_t *, uint); |
474 | void xfs_ichgtime(xfs_inode_t *, int); | ||
475 | void xfs_lock_inodes(xfs_inode_t **, int, uint); | 512 | void xfs_lock_inodes(xfs_inode_t **, int, uint); |
476 | void xfs_lock_two_inodes(xfs_inode_t *, xfs_inode_t *, uint); | 513 | void xfs_lock_two_inodes(xfs_inode_t *, xfs_inode_t *, uint); |
477 | 514 | ||
@@ -482,7 +519,7 @@ void xfs_mark_inode_dirty_sync(xfs_inode_t *); | |||
482 | #define IHOLD(ip) \ | 519 | #define IHOLD(ip) \ |
483 | do { \ | 520 | do { \ |
484 | ASSERT(atomic_read(&VFS_I(ip)->i_count) > 0) ; \ | 521 | ASSERT(atomic_read(&VFS_I(ip)->i_count) > 0) ; \ |
485 | atomic_inc(&(VFS_I(ip)->i_count)); \ | 522 | ihold(VFS_I(ip)); \ |
486 | trace_xfs_ihold(ip, _THIS_IP_); \ | 523 | trace_xfs_ihold(ip, _THIS_IP_); \ |
487 | } while (0) | 524 | } while (0) |
488 | 525 | ||
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index fe00777e2796..b1e88d56069c 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c | |||
@@ -198,6 +198,41 @@ xfs_inode_item_size( | |||
198 | } | 198 | } |
199 | 199 | ||
200 | /* | 200 | /* |
201 | * xfs_inode_item_format_extents - convert in-core extents to on-disk form | ||
202 | * | ||
203 | * For either the data or attr fork in extent format, we need to endian convert | ||
204 | * the in-core extent as we place them into the on-disk inode. In this case, we | ||
205 | * need to do this conversion before we write the extents into the log. Because | ||
206 | * we don't have the disk inode to write into here, we allocate a buffer and | ||
207 | * format the extents into it via xfs_iextents_copy(). We free the buffer in | ||
208 | * the unlock routine after the copy for the log has been made. | ||
209 | * | ||
210 | * In the case of the data fork, the in-core and on-disk fork sizes can be | ||
211 | * different due to delayed allocation extents. We only log on-disk extents | ||
212 | * here, so always use the physical fork size to determine the size of the | ||
213 | * buffer we need to allocate. | ||
214 | */ | ||
215 | STATIC void | ||
216 | xfs_inode_item_format_extents( | ||
217 | struct xfs_inode *ip, | ||
218 | struct xfs_log_iovec *vecp, | ||
219 | int whichfork, | ||
220 | int type) | ||
221 | { | ||
222 | xfs_bmbt_rec_t *ext_buffer; | ||
223 | |||
224 | ext_buffer = kmem_alloc(XFS_IFORK_SIZE(ip, whichfork), KM_SLEEP); | ||
225 | if (whichfork == XFS_DATA_FORK) | ||
226 | ip->i_itemp->ili_extents_buf = ext_buffer; | ||
227 | else | ||
228 | ip->i_itemp->ili_aextents_buf = ext_buffer; | ||
229 | |||
230 | vecp->i_addr = ext_buffer; | ||
231 | vecp->i_len = xfs_iextents_copy(ip, ext_buffer, whichfork); | ||
232 | vecp->i_type = type; | ||
233 | } | ||
234 | |||
235 | /* | ||
201 | * This is called to fill in the vector of log iovecs for the | 236 | * This is called to fill in the vector of log iovecs for the |
202 | * given inode log item. It fills the first item with an inode | 237 | * given inode log item. It fills the first item with an inode |
203 | * log format structure, the second with the on-disk inode structure, | 238 | * log format structure, the second with the on-disk inode structure, |
@@ -213,7 +248,6 @@ xfs_inode_item_format( | |||
213 | struct xfs_inode *ip = iip->ili_inode; | 248 | struct xfs_inode *ip = iip->ili_inode; |
214 | uint nvecs; | 249 | uint nvecs; |
215 | size_t data_bytes; | 250 | size_t data_bytes; |
216 | xfs_bmbt_rec_t *ext_buffer; | ||
217 | xfs_mount_t *mp; | 251 | xfs_mount_t *mp; |
218 | 252 | ||
219 | vecp->i_addr = &iip->ili_format; | 253 | vecp->i_addr = &iip->ili_format; |
@@ -223,15 +257,6 @@ xfs_inode_item_format( | |||
223 | nvecs = 1; | 257 | nvecs = 1; |
224 | 258 | ||
225 | /* | 259 | /* |
226 | * Make sure the linux inode is dirty. We do this before | ||
227 | * clearing i_update_core as the VFS will call back into | ||
228 | * XFS here and set i_update_core, so we need to dirty the | ||
229 | * inode first so that the ordering of i_update_core and | ||
230 | * unlogged modifications still works as described below. | ||
231 | */ | ||
232 | xfs_mark_inode_dirty_sync(ip); | ||
233 | |||
234 | /* | ||
235 | * Clear i_update_core if the timestamps (or any other | 260 | * Clear i_update_core if the timestamps (or any other |
236 | * non-transactional modification) need flushing/logging | 261 | * non-transactional modification) need flushing/logging |
237 | * and we're about to log them with the rest of the core. | 262 | * and we're about to log them with the rest of the core. |
@@ -329,22 +354,8 @@ xfs_inode_item_format( | |||
329 | } else | 354 | } else |
330 | #endif | 355 | #endif |
331 | { | 356 | { |
332 | /* | 357 | xfs_inode_item_format_extents(ip, vecp, |
333 | * There are delayed allocation extents | 358 | XFS_DATA_FORK, XLOG_REG_TYPE_IEXT); |
334 | * in the inode, or we need to convert | ||
335 | * the extents to on disk format. | ||
336 | * Use xfs_iextents_copy() | ||
337 | * to copy only the real extents into | ||
338 | * a separate buffer. We'll free the | ||
339 | * buffer in the unlock routine. | ||
340 | */ | ||
341 | ext_buffer = kmem_alloc(ip->i_df.if_bytes, | ||
342 | KM_SLEEP); | ||
343 | iip->ili_extents_buf = ext_buffer; | ||
344 | vecp->i_addr = ext_buffer; | ||
345 | vecp->i_len = xfs_iextents_copy(ip, ext_buffer, | ||
346 | XFS_DATA_FORK); | ||
347 | vecp->i_type = XLOG_REG_TYPE_IEXT; | ||
348 | } | 359 | } |
349 | ASSERT(vecp->i_len <= ip->i_df.if_bytes); | 360 | ASSERT(vecp->i_len <= ip->i_df.if_bytes); |
350 | iip->ili_format.ilf_dsize = vecp->i_len; | 361 | iip->ili_format.ilf_dsize = vecp->i_len; |
@@ -454,19 +465,12 @@ xfs_inode_item_format( | |||
454 | */ | 465 | */ |
455 | vecp->i_addr = ip->i_afp->if_u1.if_extents; | 466 | vecp->i_addr = ip->i_afp->if_u1.if_extents; |
456 | vecp->i_len = ip->i_afp->if_bytes; | 467 | vecp->i_len = ip->i_afp->if_bytes; |
468 | vecp->i_type = XLOG_REG_TYPE_IATTR_EXT; | ||
457 | #else | 469 | #else |
458 | ASSERT(iip->ili_aextents_buf == NULL); | 470 | ASSERT(iip->ili_aextents_buf == NULL); |
459 | /* | 471 | xfs_inode_item_format_extents(ip, vecp, |
460 | * Need to endian flip before logging | 472 | XFS_ATTR_FORK, XLOG_REG_TYPE_IATTR_EXT); |
461 | */ | ||
462 | ext_buffer = kmem_alloc(ip->i_afp->if_bytes, | ||
463 | KM_SLEEP); | ||
464 | iip->ili_aextents_buf = ext_buffer; | ||
465 | vecp->i_addr = ext_buffer; | ||
466 | vecp->i_len = xfs_iextents_copy(ip, ext_buffer, | ||
467 | XFS_ATTR_FORK); | ||
468 | #endif | 473 | #endif |
469 | vecp->i_type = XLOG_REG_TYPE_IATTR_EXT; | ||
470 | iip->ili_format.ilf_asize = vecp->i_len; | 474 | iip->ili_format.ilf_asize = vecp->i_len; |
471 | vecp++; | 475 | vecp++; |
472 | nvecs++; | 476 | nvecs++; |
@@ -666,18 +670,39 @@ xfs_inode_item_unlock( | |||
666 | } | 670 | } |
667 | 671 | ||
668 | /* | 672 | /* |
669 | * This is called to find out where the oldest active copy of the | 673 | * This is called to find out where the oldest active copy of the inode log |
670 | * inode log item in the on disk log resides now that the last log | 674 | * item in the on disk log resides now that the last log write of it completed |
671 | * write of it completed at the given lsn. Since we always re-log | 675 | * at the given lsn. Since we always re-log all dirty data in an inode, the |
672 | * all dirty data in an inode, the latest copy in the on disk log | 676 | * latest copy in the on disk log is the only one that matters. Therefore, |
673 | * is the only one that matters. Therefore, simply return the | 677 | * simply return the given lsn. |
674 | * given lsn. | 678 | * |
679 | * If the inode has been marked stale because the cluster is being freed, we | ||
680 | * don't want to (re-)insert this inode into the AIL. There is a race condition | ||
681 | * where the cluster buffer may be unpinned before the inode is inserted into | ||
682 | * the AIL during transaction committed processing. If the buffer is unpinned | ||
683 | * before the inode item has been committed and inserted, then it is possible | ||
684 | * for the buffer to be written and IO completes before the inode is inserted | ||
685 | * into the AIL. In that case, we'd be inserting a clean, stale inode into the | ||
686 | * AIL which will never get removed. It will, however, get reclaimed which | ||
687 | * triggers an assert in xfs_inode_free() complaining about freein an inode | ||
688 | * still in the AIL. | ||
689 | * | ||
690 | * To avoid this, just unpin the inode directly and return a LSN of -1 so the | ||
691 | * transaction committed code knows that it does not need to do any further | ||
692 | * processing on the item. | ||
675 | */ | 693 | */ |
676 | STATIC xfs_lsn_t | 694 | STATIC xfs_lsn_t |
677 | xfs_inode_item_committed( | 695 | xfs_inode_item_committed( |
678 | struct xfs_log_item *lip, | 696 | struct xfs_log_item *lip, |
679 | xfs_lsn_t lsn) | 697 | xfs_lsn_t lsn) |
680 | { | 698 | { |
699 | struct xfs_inode_log_item *iip = INODE_ITEM(lip); | ||
700 | struct xfs_inode *ip = iip->ili_inode; | ||
701 | |||
702 | if (xfs_iflags_test(ip, XFS_ISTALE)) { | ||
703 | xfs_inode_item_unpin(lip, 0); | ||
704 | return -1; | ||
705 | } | ||
681 | return lsn; | 706 | return lsn; |
682 | } | 707 | } |
683 | 708 | ||
@@ -750,11 +775,11 @@ xfs_inode_item_push( | |||
750 | * Push the inode to it's backing buffer. This will not remove the | 775 | * Push the inode to it's backing buffer. This will not remove the |
751 | * inode from the AIL - a further push will be required to trigger a | 776 | * inode from the AIL - a further push will be required to trigger a |
752 | * buffer push. However, this allows all the dirty inodes to be pushed | 777 | * buffer push. However, this allows all the dirty inodes to be pushed |
753 | * to the buffer before it is pushed to disk. THe buffer IO completion | 778 | * to the buffer before it is pushed to disk. The buffer IO completion |
754 | * will pull th einode from the AIL, mark it clean and unlock the flush | 779 | * will pull the inode from the AIL, mark it clean and unlock the flush |
755 | * lock. | 780 | * lock. |
756 | */ | 781 | */ |
757 | (void) xfs_iflush(ip, 0); | 782 | (void) xfs_iflush(ip, SYNC_TRYLOCK); |
758 | xfs_iunlock(ip, XFS_ILOCK_SHARED); | 783 | xfs_iunlock(ip, XFS_ILOCK_SHARED); |
759 | } | 784 | } |
760 | 785 | ||
@@ -832,15 +857,64 @@ xfs_inode_item_destroy( | |||
832 | * flushed to disk. It is responsible for removing the inode item | 857 | * flushed to disk. It is responsible for removing the inode item |
833 | * from the AIL if it has not been re-logged, and unlocking the inode's | 858 | * from the AIL if it has not been re-logged, and unlocking the inode's |
834 | * flush lock. | 859 | * flush lock. |
860 | * | ||
861 | * To reduce AIL lock traffic as much as possible, we scan the buffer log item | ||
862 | * list for other inodes that will run this function. We remove them from the | ||
863 | * buffer list so we can process all the inode IO completions in one AIL lock | ||
864 | * traversal. | ||
835 | */ | 865 | */ |
836 | void | 866 | void |
837 | xfs_iflush_done( | 867 | xfs_iflush_done( |
838 | struct xfs_buf *bp, | 868 | struct xfs_buf *bp, |
839 | struct xfs_log_item *lip) | 869 | struct xfs_log_item *lip) |
840 | { | 870 | { |
841 | struct xfs_inode_log_item *iip = INODE_ITEM(lip); | 871 | struct xfs_inode_log_item *iip; |
842 | xfs_inode_t *ip = iip->ili_inode; | 872 | struct xfs_log_item *blip; |
873 | struct xfs_log_item *next; | ||
874 | struct xfs_log_item *prev; | ||
843 | struct xfs_ail *ailp = lip->li_ailp; | 875 | struct xfs_ail *ailp = lip->li_ailp; |
876 | int need_ail = 0; | ||
877 | |||
878 | /* | ||
879 | * Scan the buffer IO completions for other inodes being completed and | ||
880 | * attach them to the current inode log item. | ||
881 | */ | ||
882 | blip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *); | ||
883 | prev = NULL; | ||
884 | while (blip != NULL) { | ||
885 | if (lip->li_cb != xfs_iflush_done) { | ||
886 | prev = blip; | ||
887 | blip = blip->li_bio_list; | ||
888 | continue; | ||
889 | } | ||
890 | |||
891 | /* remove from list */ | ||
892 | next = blip->li_bio_list; | ||
893 | if (!prev) { | ||
894 | XFS_BUF_SET_FSPRIVATE(bp, next); | ||
895 | } else { | ||
896 | prev->li_bio_list = next; | ||
897 | } | ||
898 | |||
899 | /* add to current list */ | ||
900 | blip->li_bio_list = lip->li_bio_list; | ||
901 | lip->li_bio_list = blip; | ||
902 | |||
903 | /* | ||
904 | * while we have the item, do the unlocked check for needing | ||
905 | * the AIL lock. | ||
906 | */ | ||
907 | iip = INODE_ITEM(blip); | ||
908 | if (iip->ili_logged && blip->li_lsn == iip->ili_flush_lsn) | ||
909 | need_ail++; | ||
910 | |||
911 | blip = next; | ||
912 | } | ||
913 | |||
914 | /* make sure we capture the state of the initial inode. */ | ||
915 | iip = INODE_ITEM(lip); | ||
916 | if (iip->ili_logged && lip->li_lsn == iip->ili_flush_lsn) | ||
917 | need_ail++; | ||
844 | 918 | ||
845 | /* | 919 | /* |
846 | * We only want to pull the item from the AIL if it is | 920 | * We only want to pull the item from the AIL if it is |
@@ -851,28 +925,37 @@ xfs_iflush_done( | |||
851 | * the lock since it's cheaper, and then we recheck while | 925 | * the lock since it's cheaper, and then we recheck while |
852 | * holding the lock before removing the inode from the AIL. | 926 | * holding the lock before removing the inode from the AIL. |
853 | */ | 927 | */ |
854 | if (iip->ili_logged && lip->li_lsn == iip->ili_flush_lsn) { | 928 | if (need_ail) { |
929 | struct xfs_log_item *log_items[need_ail]; | ||
930 | int i = 0; | ||
855 | spin_lock(&ailp->xa_lock); | 931 | spin_lock(&ailp->xa_lock); |
856 | if (lip->li_lsn == iip->ili_flush_lsn) { | 932 | for (blip = lip; blip; blip = blip->li_bio_list) { |
857 | /* xfs_trans_ail_delete() drops the AIL lock. */ | 933 | iip = INODE_ITEM(blip); |
858 | xfs_trans_ail_delete(ailp, lip); | 934 | if (iip->ili_logged && |
859 | } else { | 935 | blip->li_lsn == iip->ili_flush_lsn) { |
860 | spin_unlock(&ailp->xa_lock); | 936 | log_items[i++] = blip; |
937 | } | ||
938 | ASSERT(i <= need_ail); | ||
861 | } | 939 | } |
940 | /* xfs_trans_ail_delete_bulk() drops the AIL lock. */ | ||
941 | xfs_trans_ail_delete_bulk(ailp, log_items, i); | ||
862 | } | 942 | } |
863 | 943 | ||
864 | iip->ili_logged = 0; | ||
865 | 944 | ||
866 | /* | 945 | /* |
867 | * Clear the ili_last_fields bits now that we know that the | 946 | * clean up and unlock the flush lock now we are done. We can clear the |
868 | * data corresponding to them is safely on disk. | 947 | * ili_last_fields bits now that we know that the data corresponding to |
948 | * them is safely on disk. | ||
869 | */ | 949 | */ |
870 | iip->ili_last_fields = 0; | 950 | for (blip = lip; blip; blip = next) { |
951 | next = blip->li_bio_list; | ||
952 | blip->li_bio_list = NULL; | ||
871 | 953 | ||
872 | /* | 954 | iip = INODE_ITEM(blip); |
873 | * Release the inode's flush lock since we're done with it. | 955 | iip->ili_logged = 0; |
874 | */ | 956 | iip->ili_last_fields = 0; |
875 | xfs_ifunlock(ip); | 957 | xfs_ifunlock(iip->ili_inode); |
958 | } | ||
876 | } | 959 | } |
877 | 960 | ||
878 | /* | 961 | /* |
@@ -889,7 +972,6 @@ xfs_iflush_abort( | |||
889 | { | 972 | { |
890 | xfs_inode_log_item_t *iip = ip->i_itemp; | 973 | xfs_inode_log_item_t *iip = ip->i_itemp; |
891 | 974 | ||
892 | iip = ip->i_itemp; | ||
893 | if (iip) { | 975 | if (iip) { |
894 | struct xfs_ail *ailp = iip->ili_item.li_ailp; | 976 | struct xfs_ail *ailp = iip->ili_item.li_ailp; |
895 | if (iip->ili_item.li_flags & XFS_LI_IN_AIL) { | 977 | if (iip->ili_item.li_flags & XFS_LI_IN_AIL) { |
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 20576146369f..091d82b94c4d 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c | |||
@@ -47,127 +47,8 @@ | |||
47 | 47 | ||
48 | #define XFS_WRITEIO_ALIGN(mp,off) (((off) >> mp->m_writeio_log) \ | 48 | #define XFS_WRITEIO_ALIGN(mp,off) (((off) >> mp->m_writeio_log) \ |
49 | << mp->m_writeio_log) | 49 | << mp->m_writeio_log) |
50 | #define XFS_STRAT_WRITE_IMAPS 2 | ||
51 | #define XFS_WRITE_IMAPS XFS_BMAP_MAX_NMAP | 50 | #define XFS_WRITE_IMAPS XFS_BMAP_MAX_NMAP |
52 | 51 | ||
53 | STATIC int xfs_iomap_write_direct(struct xfs_inode *, xfs_off_t, size_t, | ||
54 | int, struct xfs_bmbt_irec *, int *); | ||
55 | STATIC int xfs_iomap_write_delay(struct xfs_inode *, xfs_off_t, size_t, int, | ||
56 | struct xfs_bmbt_irec *, int *); | ||
57 | STATIC int xfs_iomap_write_allocate(struct xfs_inode *, xfs_off_t, size_t, | ||
58 | struct xfs_bmbt_irec *, int *); | ||
59 | |||
60 | int | ||
61 | xfs_iomap( | ||
62 | struct xfs_inode *ip, | ||
63 | xfs_off_t offset, | ||
64 | ssize_t count, | ||
65 | int flags, | ||
66 | struct xfs_bmbt_irec *imap, | ||
67 | int *nimaps, | ||
68 | int *new) | ||
69 | { | ||
70 | struct xfs_mount *mp = ip->i_mount; | ||
71 | xfs_fileoff_t offset_fsb, end_fsb; | ||
72 | int error = 0; | ||
73 | int lockmode = 0; | ||
74 | int bmapi_flags = 0; | ||
75 | |||
76 | ASSERT((ip->i_d.di_mode & S_IFMT) == S_IFREG); | ||
77 | |||
78 | *new = 0; | ||
79 | |||
80 | if (XFS_FORCED_SHUTDOWN(mp)) | ||
81 | return XFS_ERROR(EIO); | ||
82 | |||
83 | trace_xfs_iomap_enter(ip, offset, count, flags, NULL); | ||
84 | |||
85 | switch (flags & (BMAPI_READ | BMAPI_WRITE | BMAPI_ALLOCATE)) { | ||
86 | case BMAPI_READ: | ||
87 | lockmode = xfs_ilock_map_shared(ip); | ||
88 | bmapi_flags = XFS_BMAPI_ENTIRE; | ||
89 | break; | ||
90 | case BMAPI_WRITE: | ||
91 | lockmode = XFS_ILOCK_EXCL; | ||
92 | if (flags & BMAPI_IGNSTATE) | ||
93 | bmapi_flags |= XFS_BMAPI_IGSTATE|XFS_BMAPI_ENTIRE; | ||
94 | xfs_ilock(ip, lockmode); | ||
95 | break; | ||
96 | case BMAPI_ALLOCATE: | ||
97 | lockmode = XFS_ILOCK_SHARED; | ||
98 | bmapi_flags = XFS_BMAPI_ENTIRE; | ||
99 | |||
100 | /* Attempt non-blocking lock */ | ||
101 | if (flags & BMAPI_TRYLOCK) { | ||
102 | if (!xfs_ilock_nowait(ip, lockmode)) | ||
103 | return XFS_ERROR(EAGAIN); | ||
104 | } else { | ||
105 | xfs_ilock(ip, lockmode); | ||
106 | } | ||
107 | break; | ||
108 | default: | ||
109 | BUG(); | ||
110 | } | ||
111 | |||
112 | ASSERT(offset <= mp->m_maxioffset); | ||
113 | if ((xfs_fsize_t)offset + count > mp->m_maxioffset) | ||
114 | count = mp->m_maxioffset - offset; | ||
115 | end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count); | ||
116 | offset_fsb = XFS_B_TO_FSBT(mp, offset); | ||
117 | |||
118 | error = xfs_bmapi(NULL, ip, offset_fsb, | ||
119 | (xfs_filblks_t)(end_fsb - offset_fsb), | ||
120 | bmapi_flags, NULL, 0, imap, | ||
121 | nimaps, NULL); | ||
122 | |||
123 | if (error) | ||
124 | goto out; | ||
125 | |||
126 | switch (flags & (BMAPI_WRITE|BMAPI_ALLOCATE)) { | ||
127 | case BMAPI_WRITE: | ||
128 | /* If we found an extent, return it */ | ||
129 | if (*nimaps && | ||
130 | (imap->br_startblock != HOLESTARTBLOCK) && | ||
131 | (imap->br_startblock != DELAYSTARTBLOCK)) { | ||
132 | trace_xfs_iomap_found(ip, offset, count, flags, imap); | ||
133 | break; | ||
134 | } | ||
135 | |||
136 | if (flags & BMAPI_DIRECT) { | ||
137 | error = xfs_iomap_write_direct(ip, offset, count, flags, | ||
138 | imap, nimaps); | ||
139 | } else { | ||
140 | error = xfs_iomap_write_delay(ip, offset, count, flags, | ||
141 | imap, nimaps); | ||
142 | } | ||
143 | if (!error) { | ||
144 | trace_xfs_iomap_alloc(ip, offset, count, flags, imap); | ||
145 | } | ||
146 | *new = 1; | ||
147 | break; | ||
148 | case BMAPI_ALLOCATE: | ||
149 | /* If we found an extent, return it */ | ||
150 | xfs_iunlock(ip, lockmode); | ||
151 | lockmode = 0; | ||
152 | |||
153 | if (*nimaps && !isnullstartblock(imap->br_startblock)) { | ||
154 | trace_xfs_iomap_found(ip, offset, count, flags, imap); | ||
155 | break; | ||
156 | } | ||
157 | |||
158 | error = xfs_iomap_write_allocate(ip, offset, count, | ||
159 | imap, nimaps); | ||
160 | break; | ||
161 | } | ||
162 | |||
163 | ASSERT(*nimaps <= 1); | ||
164 | |||
165 | out: | ||
166 | if (lockmode) | ||
167 | xfs_iunlock(ip, lockmode); | ||
168 | return XFS_ERROR(error); | ||
169 | } | ||
170 | |||
171 | STATIC int | 52 | STATIC int |
172 | xfs_iomap_eof_align_last_fsb( | 53 | xfs_iomap_eof_align_last_fsb( |
173 | xfs_mount_t *mp, | 54 | xfs_mount_t *mp, |
@@ -220,11 +101,11 @@ xfs_iomap_eof_align_last_fsb( | |||
220 | } | 101 | } |
221 | 102 | ||
222 | STATIC int | 103 | STATIC int |
223 | xfs_cmn_err_fsblock_zero( | 104 | xfs_alert_fsblock_zero( |
224 | xfs_inode_t *ip, | 105 | xfs_inode_t *ip, |
225 | xfs_bmbt_irec_t *imap) | 106 | xfs_bmbt_irec_t *imap) |
226 | { | 107 | { |
227 | xfs_cmn_err(XFS_PTAG_FSBLOCK_ZERO, CE_ALERT, ip->i_mount, | 108 | xfs_alert_tag(ip->i_mount, XFS_PTAG_FSBLOCK_ZERO, |
228 | "Access to block zero in inode %llu " | 109 | "Access to block zero in inode %llu " |
229 | "start_block: %llx start_off: %llx " | 110 | "start_block: %llx start_off: %llx " |
230 | "blkcnt: %llx extent-state: %x\n", | 111 | "blkcnt: %llx extent-state: %x\n", |
@@ -236,14 +117,13 @@ xfs_cmn_err_fsblock_zero( | |||
236 | return EFSCORRUPTED; | 117 | return EFSCORRUPTED; |
237 | } | 118 | } |
238 | 119 | ||
239 | STATIC int | 120 | int |
240 | xfs_iomap_write_direct( | 121 | xfs_iomap_write_direct( |
241 | xfs_inode_t *ip, | 122 | xfs_inode_t *ip, |
242 | xfs_off_t offset, | 123 | xfs_off_t offset, |
243 | size_t count, | 124 | size_t count, |
244 | int flags, | ||
245 | xfs_bmbt_irec_t *imap, | 125 | xfs_bmbt_irec_t *imap, |
246 | int *nmaps) | 126 | int nmaps) |
247 | { | 127 | { |
248 | xfs_mount_t *mp = ip->i_mount; | 128 | xfs_mount_t *mp = ip->i_mount; |
249 | xfs_fileoff_t offset_fsb; | 129 | xfs_fileoff_t offset_fsb; |
@@ -279,7 +159,7 @@ xfs_iomap_write_direct( | |||
279 | if (error) | 159 | if (error) |
280 | goto error_out; | 160 | goto error_out; |
281 | } else { | 161 | } else { |
282 | if (*nmaps && (imap->br_startblock == HOLESTARTBLOCK)) | 162 | if (nmaps && (imap->br_startblock == HOLESTARTBLOCK)) |
283 | last_fsb = MIN(last_fsb, (xfs_fileoff_t) | 163 | last_fsb = MIN(last_fsb, (xfs_fileoff_t) |
284 | imap->br_blockcount + | 164 | imap->br_blockcount + |
285 | imap->br_startoff); | 165 | imap->br_startoff); |
@@ -331,7 +211,7 @@ xfs_iomap_write_direct( | |||
331 | xfs_trans_ijoin(tp, ip); | 211 | xfs_trans_ijoin(tp, ip); |
332 | 212 | ||
333 | bmapi_flag = XFS_BMAPI_WRITE; | 213 | bmapi_flag = XFS_BMAPI_WRITE; |
334 | if ((flags & BMAPI_DIRECT) && (offset < ip->i_size || extsz)) | 214 | if (offset < ip->i_size || extsz) |
335 | bmapi_flag |= XFS_BMAPI_PREALLOC; | 215 | bmapi_flag |= XFS_BMAPI_PREALLOC; |
336 | 216 | ||
337 | /* | 217 | /* |
@@ -366,11 +246,10 @@ xfs_iomap_write_direct( | |||
366 | } | 246 | } |
367 | 247 | ||
368 | if (!(imap->br_startblock || XFS_IS_REALTIME_INODE(ip))) { | 248 | if (!(imap->br_startblock || XFS_IS_REALTIME_INODE(ip))) { |
369 | error = xfs_cmn_err_fsblock_zero(ip, imap); | 249 | error = xfs_alert_fsblock_zero(ip, imap); |
370 | goto error_out; | 250 | goto error_out; |
371 | } | 251 | } |
372 | 252 | ||
373 | *nmaps = 1; | ||
374 | return 0; | 253 | return 0; |
375 | 254 | ||
376 | error0: /* Cancel bmap, unlock inode, unreserve quota blocks, cancel trans */ | 255 | error0: /* Cancel bmap, unlock inode, unreserve quota blocks, cancel trans */ |
@@ -379,7 +258,6 @@ error0: /* Cancel bmap, unlock inode, unreserve quota blocks, cancel trans */ | |||
379 | 258 | ||
380 | error1: /* Just cancel transaction */ | 259 | error1: /* Just cancel transaction */ |
381 | xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); | 260 | xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); |
382 | *nmaps = 0; /* nothing set-up here */ | ||
383 | 261 | ||
384 | error_out: | 262 | error_out: |
385 | return XFS_ERROR(error); | 263 | return XFS_ERROR(error); |
@@ -389,6 +267,9 @@ error_out: | |||
389 | * If the caller is doing a write at the end of the file, then extend the | 267 | * If the caller is doing a write at the end of the file, then extend the |
390 | * allocation out to the file system's write iosize. We clean up any extra | 268 | * allocation out to the file system's write iosize. We clean up any extra |
391 | * space left over when the file is closed in xfs_inactive(). | 269 | * space left over when the file is closed in xfs_inactive(). |
270 | * | ||
271 | * If we find we already have delalloc preallocation beyond EOF, don't do more | ||
272 | * preallocation as it it not needed. | ||
392 | */ | 273 | */ |
393 | STATIC int | 274 | STATIC int |
394 | xfs_iomap_eof_want_preallocate( | 275 | xfs_iomap_eof_want_preallocate( |
@@ -396,7 +277,6 @@ xfs_iomap_eof_want_preallocate( | |||
396 | xfs_inode_t *ip, | 277 | xfs_inode_t *ip, |
397 | xfs_off_t offset, | 278 | xfs_off_t offset, |
398 | size_t count, | 279 | size_t count, |
399 | int ioflag, | ||
400 | xfs_bmbt_irec_t *imap, | 280 | xfs_bmbt_irec_t *imap, |
401 | int nimaps, | 281 | int nimaps, |
402 | int *prealloc) | 282 | int *prealloc) |
@@ -405,6 +285,7 @@ xfs_iomap_eof_want_preallocate( | |||
405 | xfs_filblks_t count_fsb; | 285 | xfs_filblks_t count_fsb; |
406 | xfs_fsblock_t firstblock; | 286 | xfs_fsblock_t firstblock; |
407 | int n, error, imaps; | 287 | int n, error, imaps; |
288 | int found_delalloc = 0; | ||
408 | 289 | ||
409 | *prealloc = 0; | 290 | *prealloc = 0; |
410 | if ((offset + count) <= ip->i_size) | 291 | if ((offset + count) <= ip->i_size) |
@@ -429,20 +310,71 @@ xfs_iomap_eof_want_preallocate( | |||
429 | return 0; | 310 | return 0; |
430 | start_fsb += imap[n].br_blockcount; | 311 | start_fsb += imap[n].br_blockcount; |
431 | count_fsb -= imap[n].br_blockcount; | 312 | count_fsb -= imap[n].br_blockcount; |
313 | |||
314 | if (imap[n].br_startblock == DELAYSTARTBLOCK) | ||
315 | found_delalloc = 1; | ||
432 | } | 316 | } |
433 | } | 317 | } |
434 | *prealloc = 1; | 318 | if (!found_delalloc) |
319 | *prealloc = 1; | ||
435 | return 0; | 320 | return 0; |
436 | } | 321 | } |
437 | 322 | ||
438 | STATIC int | 323 | /* |
324 | * If we don't have a user specified preallocation size, dynamically increase | ||
325 | * the preallocation size as the size of the file grows. Cap the maximum size | ||
326 | * at a single extent or less if the filesystem is near full. The closer the | ||
327 | * filesystem is to full, the smaller the maximum prealocation. | ||
328 | */ | ||
329 | STATIC xfs_fsblock_t | ||
330 | xfs_iomap_prealloc_size( | ||
331 | struct xfs_mount *mp, | ||
332 | struct xfs_inode *ip) | ||
333 | { | ||
334 | xfs_fsblock_t alloc_blocks = 0; | ||
335 | |||
336 | if (!(mp->m_flags & XFS_MOUNT_DFLT_IOSIZE)) { | ||
337 | int shift = 0; | ||
338 | int64_t freesp; | ||
339 | |||
340 | /* | ||
341 | * rounddown_pow_of_two() returns an undefined result | ||
342 | * if we pass in alloc_blocks = 0. Hence the "+ 1" to | ||
343 | * ensure we always pass in a non-zero value. | ||
344 | */ | ||
345 | alloc_blocks = XFS_B_TO_FSB(mp, ip->i_size) + 1; | ||
346 | alloc_blocks = XFS_FILEOFF_MIN(MAXEXTLEN, | ||
347 | rounddown_pow_of_two(alloc_blocks)); | ||
348 | |||
349 | xfs_icsb_sync_counters(mp, XFS_ICSB_LAZY_COUNT); | ||
350 | freesp = mp->m_sb.sb_fdblocks; | ||
351 | if (freesp < mp->m_low_space[XFS_LOWSP_5_PCNT]) { | ||
352 | shift = 2; | ||
353 | if (freesp < mp->m_low_space[XFS_LOWSP_4_PCNT]) | ||
354 | shift++; | ||
355 | if (freesp < mp->m_low_space[XFS_LOWSP_3_PCNT]) | ||
356 | shift++; | ||
357 | if (freesp < mp->m_low_space[XFS_LOWSP_2_PCNT]) | ||
358 | shift++; | ||
359 | if (freesp < mp->m_low_space[XFS_LOWSP_1_PCNT]) | ||
360 | shift++; | ||
361 | } | ||
362 | if (shift) | ||
363 | alloc_blocks >>= shift; | ||
364 | } | ||
365 | |||
366 | if (alloc_blocks < mp->m_writeio_blocks) | ||
367 | alloc_blocks = mp->m_writeio_blocks; | ||
368 | |||
369 | return alloc_blocks; | ||
370 | } | ||
371 | |||
372 | int | ||
439 | xfs_iomap_write_delay( | 373 | xfs_iomap_write_delay( |
440 | xfs_inode_t *ip, | 374 | xfs_inode_t *ip, |
441 | xfs_off_t offset, | 375 | xfs_off_t offset, |
442 | size_t count, | 376 | size_t count, |
443 | int ioflag, | 377 | xfs_bmbt_irec_t *ret_imap) |
444 | xfs_bmbt_irec_t *ret_imap, | ||
445 | int *nmaps) | ||
446 | { | 378 | { |
447 | xfs_mount_t *mp = ip->i_mount; | 379 | xfs_mount_t *mp = ip->i_mount; |
448 | xfs_fileoff_t offset_fsb; | 380 | xfs_fileoff_t offset_fsb; |
@@ -469,16 +401,19 @@ xfs_iomap_write_delay( | |||
469 | extsz = xfs_get_extsz_hint(ip); | 401 | extsz = xfs_get_extsz_hint(ip); |
470 | offset_fsb = XFS_B_TO_FSBT(mp, offset); | 402 | offset_fsb = XFS_B_TO_FSBT(mp, offset); |
471 | 403 | ||
404 | |||
472 | error = xfs_iomap_eof_want_preallocate(mp, ip, offset, count, | 405 | error = xfs_iomap_eof_want_preallocate(mp, ip, offset, count, |
473 | ioflag, imap, XFS_WRITE_IMAPS, &prealloc); | 406 | imap, XFS_WRITE_IMAPS, &prealloc); |
474 | if (error) | 407 | if (error) |
475 | return error; | 408 | return error; |
476 | 409 | ||
477 | retry: | 410 | retry: |
478 | if (prealloc) { | 411 | if (prealloc) { |
412 | xfs_fsblock_t alloc_blocks = xfs_iomap_prealloc_size(mp, ip); | ||
413 | |||
479 | aligned_offset = XFS_WRITEIO_ALIGN(mp, (offset + count - 1)); | 414 | aligned_offset = XFS_WRITEIO_ALIGN(mp, (offset + count - 1)); |
480 | ioalign = XFS_B_TO_FSBT(mp, aligned_offset); | 415 | ioalign = XFS_B_TO_FSBT(mp, aligned_offset); |
481 | last_fsb = ioalign + mp->m_writeio_blocks; | 416 | last_fsb = ioalign + alloc_blocks; |
482 | } else { | 417 | } else { |
483 | last_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)(offset + count))); | 418 | last_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)(offset + count))); |
484 | } | 419 | } |
@@ -496,22 +431,31 @@ retry: | |||
496 | XFS_BMAPI_DELAY | XFS_BMAPI_WRITE | | 431 | XFS_BMAPI_DELAY | XFS_BMAPI_WRITE | |
497 | XFS_BMAPI_ENTIRE, &firstblock, 1, imap, | 432 | XFS_BMAPI_ENTIRE, &firstblock, 1, imap, |
498 | &nimaps, NULL); | 433 | &nimaps, NULL); |
499 | if (error && (error != ENOSPC)) | 434 | switch (error) { |
435 | case 0: | ||
436 | case ENOSPC: | ||
437 | case EDQUOT: | ||
438 | break; | ||
439 | default: | ||
500 | return XFS_ERROR(error); | 440 | return XFS_ERROR(error); |
441 | } | ||
501 | 442 | ||
502 | /* | 443 | /* |
503 | * If bmapi returned us nothing, and if we didn't get back EDQUOT, | 444 | * If bmapi returned us nothing, we got either ENOSPC or EDQUOT. For |
504 | * then we must have run out of space - flush all other inodes with | 445 | * ENOSPC, * flush all other inodes with delalloc blocks to free up |
505 | * delalloc blocks and retry without EOF preallocation. | 446 | * some of the excess reserved metadata space. For both cases, retry |
447 | * without EOF preallocation. | ||
506 | */ | 448 | */ |
507 | if (nimaps == 0) { | 449 | if (nimaps == 0) { |
508 | trace_xfs_delalloc_enospc(ip, offset, count); | 450 | trace_xfs_delalloc_enospc(ip, offset, count); |
509 | if (flushed) | 451 | if (flushed) |
510 | return XFS_ERROR(ENOSPC); | 452 | return XFS_ERROR(error ? error : ENOSPC); |
511 | 453 | ||
512 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | 454 | if (error == ENOSPC) { |
513 | xfs_flush_inodes(ip); | 455 | xfs_iunlock(ip, XFS_ILOCK_EXCL); |
514 | xfs_ilock(ip, XFS_ILOCK_EXCL); | 456 | xfs_flush_inodes(ip); |
457 | xfs_ilock(ip, XFS_ILOCK_EXCL); | ||
458 | } | ||
515 | 459 | ||
516 | flushed = 1; | 460 | flushed = 1; |
517 | error = 0; | 461 | error = 0; |
@@ -520,11 +464,9 @@ retry: | |||
520 | } | 464 | } |
521 | 465 | ||
522 | if (!(imap[0].br_startblock || XFS_IS_REALTIME_INODE(ip))) | 466 | if (!(imap[0].br_startblock || XFS_IS_REALTIME_INODE(ip))) |
523 | return xfs_cmn_err_fsblock_zero(ip, &imap[0]); | 467 | return xfs_alert_fsblock_zero(ip, &imap[0]); |
524 | 468 | ||
525 | *ret_imap = imap[0]; | 469 | *ret_imap = imap[0]; |
526 | *nmaps = 1; | ||
527 | |||
528 | return 0; | 470 | return 0; |
529 | } | 471 | } |
530 | 472 | ||
@@ -538,13 +480,12 @@ retry: | |||
538 | * We no longer bother to look at the incoming map - all we have to | 480 | * We no longer bother to look at the incoming map - all we have to |
539 | * guarantee is that whatever we allocate fills the required range. | 481 | * guarantee is that whatever we allocate fills the required range. |
540 | */ | 482 | */ |
541 | STATIC int | 483 | int |
542 | xfs_iomap_write_allocate( | 484 | xfs_iomap_write_allocate( |
543 | xfs_inode_t *ip, | 485 | xfs_inode_t *ip, |
544 | xfs_off_t offset, | 486 | xfs_off_t offset, |
545 | size_t count, | 487 | size_t count, |
546 | xfs_bmbt_irec_t *imap, | 488 | xfs_bmbt_irec_t *imap) |
547 | int *retmap) | ||
548 | { | 489 | { |
549 | xfs_mount_t *mp = ip->i_mount; | 490 | xfs_mount_t *mp = ip->i_mount; |
550 | xfs_fileoff_t offset_fsb, last_block; | 491 | xfs_fileoff_t offset_fsb, last_block; |
@@ -557,8 +498,6 @@ xfs_iomap_write_allocate( | |||
557 | int error = 0; | 498 | int error = 0; |
558 | int nres; | 499 | int nres; |
559 | 500 | ||
560 | *retmap = 0; | ||
561 | |||
562 | /* | 501 | /* |
563 | * Make sure that the dquots are there. | 502 | * Make sure that the dquots are there. |
564 | */ | 503 | */ |
@@ -675,12 +614,11 @@ xfs_iomap_write_allocate( | |||
675 | * covers at least part of the callers request | 614 | * covers at least part of the callers request |
676 | */ | 615 | */ |
677 | if (!(imap->br_startblock || XFS_IS_REALTIME_INODE(ip))) | 616 | if (!(imap->br_startblock || XFS_IS_REALTIME_INODE(ip))) |
678 | return xfs_cmn_err_fsblock_zero(ip, imap); | 617 | return xfs_alert_fsblock_zero(ip, imap); |
679 | 618 | ||
680 | if ((offset_fsb >= imap->br_startoff) && | 619 | if ((offset_fsb >= imap->br_startoff) && |
681 | (offset_fsb < (imap->br_startoff + | 620 | (offset_fsb < (imap->br_startoff + |
682 | imap->br_blockcount))) { | 621 | imap->br_blockcount))) { |
683 | *retmap = 1; | ||
684 | XFS_STATS_INC(xs_xstrat_quick); | 622 | XFS_STATS_INC(xs_xstrat_quick); |
685 | return 0; | 623 | return 0; |
686 | } | 624 | } |
@@ -786,7 +724,7 @@ xfs_iomap_write_unwritten( | |||
786 | return XFS_ERROR(error); | 724 | return XFS_ERROR(error); |
787 | 725 | ||
788 | if (!(imap.br_startblock || XFS_IS_REALTIME_INODE(ip))) | 726 | if (!(imap.br_startblock || XFS_IS_REALTIME_INODE(ip))) |
789 | return xfs_cmn_err_fsblock_zero(ip, &imap); | 727 | return xfs_alert_fsblock_zero(ip, &imap); |
790 | 728 | ||
791 | if ((numblks_fsb = imap.br_blockcount) == 0) { | 729 | if ((numblks_fsb = imap.br_blockcount) == 0) { |
792 | /* | 730 | /* |
diff --git a/fs/xfs/xfs_iomap.h b/fs/xfs/xfs_iomap.h index 7748a430f50d..80615760959a 100644 --- a/fs/xfs/xfs_iomap.h +++ b/fs/xfs/xfs_iomap.h | |||
@@ -18,30 +18,15 @@ | |||
18 | #ifndef __XFS_IOMAP_H__ | 18 | #ifndef __XFS_IOMAP_H__ |
19 | #define __XFS_IOMAP_H__ | 19 | #define __XFS_IOMAP_H__ |
20 | 20 | ||
21 | /* base extent manipulation calls */ | ||
22 | #define BMAPI_READ (1 << 0) /* read extents */ | ||
23 | #define BMAPI_WRITE (1 << 1) /* create extents */ | ||
24 | #define BMAPI_ALLOCATE (1 << 2) /* delayed allocate to real extents */ | ||
25 | |||
26 | /* modifiers */ | ||
27 | #define BMAPI_IGNSTATE (1 << 4) /* ignore unwritten state on read */ | ||
28 | #define BMAPI_DIRECT (1 << 5) /* direct instead of buffered write */ | ||
29 | #define BMAPI_MMA (1 << 6) /* allocate for mmap write */ | ||
30 | #define BMAPI_TRYLOCK (1 << 7) /* non-blocking request */ | ||
31 | |||
32 | #define BMAPI_FLAGS \ | ||
33 | { BMAPI_READ, "READ" }, \ | ||
34 | { BMAPI_WRITE, "WRITE" }, \ | ||
35 | { BMAPI_ALLOCATE, "ALLOCATE" }, \ | ||
36 | { BMAPI_IGNSTATE, "IGNSTATE" }, \ | ||
37 | { BMAPI_DIRECT, "DIRECT" }, \ | ||
38 | { BMAPI_TRYLOCK, "TRYLOCK" } | ||
39 | |||
40 | struct xfs_inode; | 21 | struct xfs_inode; |
41 | struct xfs_bmbt_irec; | 22 | struct xfs_bmbt_irec; |
42 | 23 | ||
43 | extern int xfs_iomap(struct xfs_inode *, xfs_off_t, ssize_t, int, | 24 | extern int xfs_iomap_write_direct(struct xfs_inode *, xfs_off_t, size_t, |
44 | struct xfs_bmbt_irec *, int *, int *); | 25 | struct xfs_bmbt_irec *, int); |
26 | extern int xfs_iomap_write_delay(struct xfs_inode *, xfs_off_t, size_t, | ||
27 | struct xfs_bmbt_irec *); | ||
28 | extern int xfs_iomap_write_allocate(struct xfs_inode *, xfs_off_t, size_t, | ||
29 | struct xfs_bmbt_irec *); | ||
45 | extern int xfs_iomap_write_unwritten(struct xfs_inode *, xfs_off_t, size_t); | 30 | extern int xfs_iomap_write_unwritten(struct xfs_inode *, xfs_off_t, size_t); |
46 | 31 | ||
47 | #endif /* __XFS_IOMAP_H__*/ | 32 | #endif /* __XFS_IOMAP_H__*/ |
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c index 7e3626e5925c..751e94fe1f77 100644 --- a/fs/xfs/xfs_itable.c +++ b/fs/xfs/xfs_itable.c | |||
@@ -92,7 +92,8 @@ xfs_bulkstat_one_int( | |||
92 | * further change. | 92 | * further change. |
93 | */ | 93 | */ |
94 | buf->bs_nlink = dic->di_nlink; | 94 | buf->bs_nlink = dic->di_nlink; |
95 | buf->bs_projid = dic->di_projid; | 95 | buf->bs_projid_lo = dic->di_projid_lo; |
96 | buf->bs_projid_hi = dic->di_projid_hi; | ||
96 | buf->bs_ino = ino; | 97 | buf->bs_ino = ino; |
97 | buf->bs_mode = dic->di_mode; | 98 | buf->bs_mode = dic->di_mode; |
98 | buf->bs_uid = dic->di_uid; | 99 | buf->bs_uid = dic->di_uid; |
@@ -203,7 +204,6 @@ xfs_bulkstat( | |||
203 | xfs_agi_t *agi; /* agi header data */ | 204 | xfs_agi_t *agi; /* agi header data */ |
204 | xfs_agino_t agino; /* inode # in allocation group */ | 205 | xfs_agino_t agino; /* inode # in allocation group */ |
205 | xfs_agnumber_t agno; /* allocation group number */ | 206 | xfs_agnumber_t agno; /* allocation group number */ |
206 | xfs_daddr_t bno; /* inode cluster start daddr */ | ||
207 | int chunkidx; /* current index into inode chunk */ | 207 | int chunkidx; /* current index into inode chunk */ |
208 | int clustidx; /* current index into inode cluster */ | 208 | int clustidx; /* current index into inode cluster */ |
209 | xfs_btree_cur_t *cur; /* btree cursor for ialloc btree */ | 209 | xfs_btree_cur_t *cur; /* btree cursor for ialloc btree */ |
@@ -462,7 +462,6 @@ xfs_bulkstat( | |||
462 | mp->m_sb.sb_inopblog); | 462 | mp->m_sb.sb_inopblog); |
463 | } | 463 | } |
464 | ino = XFS_AGINO_TO_INO(mp, agno, agino); | 464 | ino = XFS_AGINO_TO_INO(mp, agno, agino); |
465 | bno = XFS_AGB_TO_DADDR(mp, agno, agbno); | ||
466 | /* | 465 | /* |
467 | * Skip if this inode is free. | 466 | * Skip if this inode is free. |
468 | */ | 467 | */ |
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 33f718f92a48..41d5b8f2bf92 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c | |||
@@ -47,7 +47,7 @@ STATIC xlog_t * xlog_alloc_log(xfs_mount_t *mp, | |||
47 | xfs_buftarg_t *log_target, | 47 | xfs_buftarg_t *log_target, |
48 | xfs_daddr_t blk_offset, | 48 | xfs_daddr_t blk_offset, |
49 | int num_bblks); | 49 | int num_bblks); |
50 | STATIC int xlog_space_left(xlog_t *log, int cycle, int bytes); | 50 | STATIC int xlog_space_left(struct log *log, atomic64_t *head); |
51 | STATIC int xlog_sync(xlog_t *log, xlog_in_core_t *iclog); | 51 | STATIC int xlog_sync(xlog_t *log, xlog_in_core_t *iclog); |
52 | STATIC void xlog_dealloc_log(xlog_t *log); | 52 | STATIC void xlog_dealloc_log(xlog_t *log); |
53 | 53 | ||
@@ -70,7 +70,7 @@ STATIC void xlog_state_want_sync(xlog_t *log, xlog_in_core_t *iclog); | |||
70 | /* local functions to manipulate grant head */ | 70 | /* local functions to manipulate grant head */ |
71 | STATIC int xlog_grant_log_space(xlog_t *log, | 71 | STATIC int xlog_grant_log_space(xlog_t *log, |
72 | xlog_ticket_t *xtic); | 72 | xlog_ticket_t *xtic); |
73 | STATIC void xlog_grant_push_ail(xfs_mount_t *mp, | 73 | STATIC void xlog_grant_push_ail(struct log *log, |
74 | int need_bytes); | 74 | int need_bytes); |
75 | STATIC void xlog_regrant_reserve_log_space(xlog_t *log, | 75 | STATIC void xlog_regrant_reserve_log_space(xlog_t *log, |
76 | xlog_ticket_t *ticket); | 76 | xlog_ticket_t *ticket); |
@@ -81,98 +81,73 @@ STATIC void xlog_ungrant_log_space(xlog_t *log, | |||
81 | 81 | ||
82 | #if defined(DEBUG) | 82 | #if defined(DEBUG) |
83 | STATIC void xlog_verify_dest_ptr(xlog_t *log, char *ptr); | 83 | STATIC void xlog_verify_dest_ptr(xlog_t *log, char *ptr); |
84 | STATIC void xlog_verify_grant_head(xlog_t *log, int equals); | 84 | STATIC void xlog_verify_grant_tail(struct log *log); |
85 | STATIC void xlog_verify_iclog(xlog_t *log, xlog_in_core_t *iclog, | 85 | STATIC void xlog_verify_iclog(xlog_t *log, xlog_in_core_t *iclog, |
86 | int count, boolean_t syncing); | 86 | int count, boolean_t syncing); |
87 | STATIC void xlog_verify_tail_lsn(xlog_t *log, xlog_in_core_t *iclog, | 87 | STATIC void xlog_verify_tail_lsn(xlog_t *log, xlog_in_core_t *iclog, |
88 | xfs_lsn_t tail_lsn); | 88 | xfs_lsn_t tail_lsn); |
89 | #else | 89 | #else |
90 | #define xlog_verify_dest_ptr(a,b) | 90 | #define xlog_verify_dest_ptr(a,b) |
91 | #define xlog_verify_grant_head(a,b) | 91 | #define xlog_verify_grant_tail(a) |
92 | #define xlog_verify_iclog(a,b,c,d) | 92 | #define xlog_verify_iclog(a,b,c,d) |
93 | #define xlog_verify_tail_lsn(a,b,c) | 93 | #define xlog_verify_tail_lsn(a,b,c) |
94 | #endif | 94 | #endif |
95 | 95 | ||
96 | STATIC int xlog_iclogs_empty(xlog_t *log); | 96 | STATIC int xlog_iclogs_empty(xlog_t *log); |
97 | 97 | ||
98 | |||
99 | static void | 98 | static void |
100 | xlog_ins_ticketq(struct xlog_ticket **qp, struct xlog_ticket *tic) | 99 | xlog_grant_sub_space( |
100 | struct log *log, | ||
101 | atomic64_t *head, | ||
102 | int bytes) | ||
101 | { | 103 | { |
102 | if (*qp) { | 104 | int64_t head_val = atomic64_read(head); |
103 | tic->t_next = (*qp); | 105 | int64_t new, old; |
104 | tic->t_prev = (*qp)->t_prev; | ||
105 | (*qp)->t_prev->t_next = tic; | ||
106 | (*qp)->t_prev = tic; | ||
107 | } else { | ||
108 | tic->t_prev = tic->t_next = tic; | ||
109 | *qp = tic; | ||
110 | } | ||
111 | 106 | ||
112 | tic->t_flags |= XLOG_TIC_IN_Q; | 107 | do { |
113 | } | 108 | int cycle, space; |
114 | 109 | ||
115 | static void | 110 | xlog_crack_grant_head_val(head_val, &cycle, &space); |
116 | xlog_del_ticketq(struct xlog_ticket **qp, struct xlog_ticket *tic) | 111 | |
117 | { | 112 | space -= bytes; |
118 | if (tic == tic->t_next) { | 113 | if (space < 0) { |
119 | *qp = NULL; | 114 | space += log->l_logsize; |
120 | } else { | 115 | cycle--; |
121 | *qp = tic->t_next; | 116 | } |
122 | tic->t_next->t_prev = tic->t_prev; | ||
123 | tic->t_prev->t_next = tic->t_next; | ||
124 | } | ||
125 | 117 | ||
126 | tic->t_next = tic->t_prev = NULL; | 118 | old = head_val; |
127 | tic->t_flags &= ~XLOG_TIC_IN_Q; | 119 | new = xlog_assign_grant_head_val(cycle, space); |
120 | head_val = atomic64_cmpxchg(head, old, new); | ||
121 | } while (head_val != old); | ||
128 | } | 122 | } |
129 | 123 | ||
130 | static void | 124 | static void |
131 | xlog_grant_sub_space(struct log *log, int bytes) | 125 | xlog_grant_add_space( |
126 | struct log *log, | ||
127 | atomic64_t *head, | ||
128 | int bytes) | ||
132 | { | 129 | { |
133 | log->l_grant_write_bytes -= bytes; | 130 | int64_t head_val = atomic64_read(head); |
134 | if (log->l_grant_write_bytes < 0) { | 131 | int64_t new, old; |
135 | log->l_grant_write_bytes += log->l_logsize; | ||
136 | log->l_grant_write_cycle--; | ||
137 | } | ||
138 | |||
139 | log->l_grant_reserve_bytes -= bytes; | ||
140 | if ((log)->l_grant_reserve_bytes < 0) { | ||
141 | log->l_grant_reserve_bytes += log->l_logsize; | ||
142 | log->l_grant_reserve_cycle--; | ||
143 | } | ||
144 | 132 | ||
145 | } | 133 | do { |
134 | int tmp; | ||
135 | int cycle, space; | ||
146 | 136 | ||
147 | static void | 137 | xlog_crack_grant_head_val(head_val, &cycle, &space); |
148 | xlog_grant_add_space_write(struct log *log, int bytes) | ||
149 | { | ||
150 | int tmp = log->l_logsize - log->l_grant_write_bytes; | ||
151 | if (tmp > bytes) | ||
152 | log->l_grant_write_bytes += bytes; | ||
153 | else { | ||
154 | log->l_grant_write_cycle++; | ||
155 | log->l_grant_write_bytes = bytes - tmp; | ||
156 | } | ||
157 | } | ||
158 | 138 | ||
159 | static void | 139 | tmp = log->l_logsize - space; |
160 | xlog_grant_add_space_reserve(struct log *log, int bytes) | 140 | if (tmp > bytes) |
161 | { | 141 | space += bytes; |
162 | int tmp = log->l_logsize - log->l_grant_reserve_bytes; | 142 | else { |
163 | if (tmp > bytes) | 143 | space = bytes - tmp; |
164 | log->l_grant_reserve_bytes += bytes; | 144 | cycle++; |
165 | else { | 145 | } |
166 | log->l_grant_reserve_cycle++; | ||
167 | log->l_grant_reserve_bytes = bytes - tmp; | ||
168 | } | ||
169 | } | ||
170 | 146 | ||
171 | static inline void | 147 | old = head_val; |
172 | xlog_grant_add_space(struct log *log, int bytes) | 148 | new = xlog_assign_grant_head_val(cycle, space); |
173 | { | 149 | head_val = atomic64_cmpxchg(head, old, new); |
174 | xlog_grant_add_space_write(log, bytes); | 150 | } while (head_val != old); |
175 | xlog_grant_add_space_reserve(log, bytes); | ||
176 | } | 151 | } |
177 | 152 | ||
178 | static void | 153 | static void |
@@ -355,7 +330,7 @@ xfs_log_reserve( | |||
355 | 330 | ||
356 | trace_xfs_log_reserve(log, internal_ticket); | 331 | trace_xfs_log_reserve(log, internal_ticket); |
357 | 332 | ||
358 | xlog_grant_push_ail(mp, internal_ticket->t_unit_res); | 333 | xlog_grant_push_ail(log, internal_ticket->t_unit_res); |
359 | retval = xlog_regrant_write_log_space(log, internal_ticket); | 334 | retval = xlog_regrant_write_log_space(log, internal_ticket); |
360 | } else { | 335 | } else { |
361 | /* may sleep if need to allocate more tickets */ | 336 | /* may sleep if need to allocate more tickets */ |
@@ -369,7 +344,7 @@ xfs_log_reserve( | |||
369 | 344 | ||
370 | trace_xfs_log_reserve(log, internal_ticket); | 345 | trace_xfs_log_reserve(log, internal_ticket); |
371 | 346 | ||
372 | xlog_grant_push_ail(mp, | 347 | xlog_grant_push_ail(log, |
373 | (internal_ticket->t_unit_res * | 348 | (internal_ticket->t_unit_res * |
374 | internal_ticket->t_cnt)); | 349 | internal_ticket->t_cnt)); |
375 | retval = xlog_grant_log_space(log, internal_ticket); | 350 | retval = xlog_grant_log_space(log, internal_ticket); |
@@ -399,11 +374,10 @@ xfs_log_mount( | |||
399 | int error; | 374 | int error; |
400 | 375 | ||
401 | if (!(mp->m_flags & XFS_MOUNT_NORECOVERY)) | 376 | if (!(mp->m_flags & XFS_MOUNT_NORECOVERY)) |
402 | cmn_err(CE_NOTE, "XFS mounting filesystem %s", mp->m_fsname); | 377 | xfs_notice(mp, "Mounting Filesystem"); |
403 | else { | 378 | else { |
404 | cmn_err(CE_NOTE, | 379 | xfs_notice(mp, |
405 | "!Mounting filesystem \"%s\" in no-recovery mode. Filesystem will be inconsistent.", | 380 | "Mounting filesystem in no-recovery mode. Filesystem will be inconsistent."); |
406 | mp->m_fsname); | ||
407 | ASSERT(mp->m_flags & XFS_MOUNT_RDONLY); | 381 | ASSERT(mp->m_flags & XFS_MOUNT_RDONLY); |
408 | } | 382 | } |
409 | 383 | ||
@@ -418,7 +392,7 @@ xfs_log_mount( | |||
418 | */ | 392 | */ |
419 | error = xfs_trans_ail_init(mp); | 393 | error = xfs_trans_ail_init(mp); |
420 | if (error) { | 394 | if (error) { |
421 | cmn_err(CE_WARN, "XFS: AIL initialisation failed: error %d", error); | 395 | xfs_warn(mp, "AIL initialisation failed: error %d", error); |
422 | goto out_free_log; | 396 | goto out_free_log; |
423 | } | 397 | } |
424 | mp->m_log->l_ailp = mp->m_ail; | 398 | mp->m_log->l_ailp = mp->m_ail; |
@@ -438,7 +412,8 @@ xfs_log_mount( | |||
438 | if (readonly) | 412 | if (readonly) |
439 | mp->m_flags |= XFS_MOUNT_RDONLY; | 413 | mp->m_flags |= XFS_MOUNT_RDONLY; |
440 | if (error) { | 414 | if (error) { |
441 | cmn_err(CE_WARN, "XFS: log mount/recovery failed: error %d", error); | 415 | xfs_warn(mp, "log mount/recovery failed: error %d", |
416 | error); | ||
442 | goto out_destroy_ail; | 417 | goto out_destroy_ail; |
443 | } | 418 | } |
444 | } | 419 | } |
@@ -567,10 +542,8 @@ xfs_log_unmount_write(xfs_mount_t *mp) | |||
567 | */ | 542 | */ |
568 | } | 543 | } |
569 | 544 | ||
570 | if (error) { | 545 | if (error) |
571 | xfs_fs_cmn_err(CE_ALERT, mp, | 546 | xfs_alert(mp, "%s: unmount record failed", __func__); |
572 | "xfs_log_unmount: unmount record failed"); | ||
573 | } | ||
574 | 547 | ||
575 | 548 | ||
576 | spin_lock(&log->l_icloglock); | 549 | spin_lock(&log->l_icloglock); |
@@ -584,8 +557,8 @@ xfs_log_unmount_write(xfs_mount_t *mp) | |||
584 | if (!(iclog->ic_state == XLOG_STATE_ACTIVE || | 557 | if (!(iclog->ic_state == XLOG_STATE_ACTIVE || |
585 | iclog->ic_state == XLOG_STATE_DIRTY)) { | 558 | iclog->ic_state == XLOG_STATE_DIRTY)) { |
586 | if (!XLOG_FORCED_SHUTDOWN(log)) { | 559 | if (!XLOG_FORCED_SHUTDOWN(log)) { |
587 | sv_wait(&iclog->ic_force_wait, PMEM, | 560 | xlog_wait(&iclog->ic_force_wait, |
588 | &log->l_icloglock, s); | 561 | &log->l_icloglock); |
589 | } else { | 562 | } else { |
590 | spin_unlock(&log->l_icloglock); | 563 | spin_unlock(&log->l_icloglock); |
591 | } | 564 | } |
@@ -625,8 +598,8 @@ xfs_log_unmount_write(xfs_mount_t *mp) | |||
625 | || iclog->ic_state == XLOG_STATE_DIRTY | 598 | || iclog->ic_state == XLOG_STATE_DIRTY |
626 | || iclog->ic_state == XLOG_STATE_IOERROR) ) { | 599 | || iclog->ic_state == XLOG_STATE_IOERROR) ) { |
627 | 600 | ||
628 | sv_wait(&iclog->ic_force_wait, PMEM, | 601 | xlog_wait(&iclog->ic_force_wait, |
629 | &log->l_icloglock, s); | 602 | &log->l_icloglock); |
630 | } else { | 603 | } else { |
631 | spin_unlock(&log->l_icloglock); | 604 | spin_unlock(&log->l_icloglock); |
632 | } | 605 | } |
@@ -703,55 +676,46 @@ xfs_log_move_tail(xfs_mount_t *mp, | |||
703 | { | 676 | { |
704 | xlog_ticket_t *tic; | 677 | xlog_ticket_t *tic; |
705 | xlog_t *log = mp->m_log; | 678 | xlog_t *log = mp->m_log; |
706 | int need_bytes, free_bytes, cycle, bytes; | 679 | int need_bytes, free_bytes; |
707 | 680 | ||
708 | if (XLOG_FORCED_SHUTDOWN(log)) | 681 | if (XLOG_FORCED_SHUTDOWN(log)) |
709 | return; | 682 | return; |
710 | 683 | ||
711 | if (tail_lsn == 0) { | 684 | if (tail_lsn == 0) |
712 | /* needed since sync_lsn is 64 bits */ | 685 | tail_lsn = atomic64_read(&log->l_last_sync_lsn); |
713 | spin_lock(&log->l_icloglock); | ||
714 | tail_lsn = log->l_last_sync_lsn; | ||
715 | spin_unlock(&log->l_icloglock); | ||
716 | } | ||
717 | |||
718 | spin_lock(&log->l_grant_lock); | ||
719 | 686 | ||
720 | /* Also an invalid lsn. 1 implies that we aren't passing in a valid | 687 | /* tail_lsn == 1 implies that we weren't passed a valid value. */ |
721 | * tail_lsn. | 688 | if (tail_lsn != 1) |
722 | */ | 689 | atomic64_set(&log->l_tail_lsn, tail_lsn); |
723 | if (tail_lsn != 1) { | ||
724 | log->l_tail_lsn = tail_lsn; | ||
725 | } | ||
726 | 690 | ||
727 | if ((tic = log->l_write_headq)) { | 691 | if (!list_empty_careful(&log->l_writeq)) { |
728 | #ifdef DEBUG | 692 | #ifdef DEBUG |
729 | if (log->l_flags & XLOG_ACTIVE_RECOVERY) | 693 | if (log->l_flags & XLOG_ACTIVE_RECOVERY) |
730 | panic("Recovery problem"); | 694 | panic("Recovery problem"); |
731 | #endif | 695 | #endif |
732 | cycle = log->l_grant_write_cycle; | 696 | spin_lock(&log->l_grant_write_lock); |
733 | bytes = log->l_grant_write_bytes; | 697 | free_bytes = xlog_space_left(log, &log->l_grant_write_head); |
734 | free_bytes = xlog_space_left(log, cycle, bytes); | 698 | list_for_each_entry(tic, &log->l_writeq, t_queue) { |
735 | do { | ||
736 | ASSERT(tic->t_flags & XLOG_TIC_PERM_RESERV); | 699 | ASSERT(tic->t_flags & XLOG_TIC_PERM_RESERV); |
737 | 700 | ||
738 | if (free_bytes < tic->t_unit_res && tail_lsn != 1) | 701 | if (free_bytes < tic->t_unit_res && tail_lsn != 1) |
739 | break; | 702 | break; |
740 | tail_lsn = 0; | 703 | tail_lsn = 0; |
741 | free_bytes -= tic->t_unit_res; | 704 | free_bytes -= tic->t_unit_res; |
742 | sv_signal(&tic->t_wait); | 705 | trace_xfs_log_regrant_write_wake_up(log, tic); |
743 | tic = tic->t_next; | 706 | wake_up(&tic->t_wait); |
744 | } while (tic != log->l_write_headq); | 707 | } |
708 | spin_unlock(&log->l_grant_write_lock); | ||
745 | } | 709 | } |
746 | if ((tic = log->l_reserve_headq)) { | 710 | |
711 | if (!list_empty_careful(&log->l_reserveq)) { | ||
747 | #ifdef DEBUG | 712 | #ifdef DEBUG |
748 | if (log->l_flags & XLOG_ACTIVE_RECOVERY) | 713 | if (log->l_flags & XLOG_ACTIVE_RECOVERY) |
749 | panic("Recovery problem"); | 714 | panic("Recovery problem"); |
750 | #endif | 715 | #endif |
751 | cycle = log->l_grant_reserve_cycle; | 716 | spin_lock(&log->l_grant_reserve_lock); |
752 | bytes = log->l_grant_reserve_bytes; | 717 | free_bytes = xlog_space_left(log, &log->l_grant_reserve_head); |
753 | free_bytes = xlog_space_left(log, cycle, bytes); | 718 | list_for_each_entry(tic, &log->l_reserveq, t_queue) { |
754 | do { | ||
755 | if (tic->t_flags & XLOG_TIC_PERM_RESERV) | 719 | if (tic->t_flags & XLOG_TIC_PERM_RESERV) |
756 | need_bytes = tic->t_unit_res*tic->t_cnt; | 720 | need_bytes = tic->t_unit_res*tic->t_cnt; |
757 | else | 721 | else |
@@ -760,12 +724,12 @@ xfs_log_move_tail(xfs_mount_t *mp, | |||
760 | break; | 724 | break; |
761 | tail_lsn = 0; | 725 | tail_lsn = 0; |
762 | free_bytes -= need_bytes; | 726 | free_bytes -= need_bytes; |
763 | sv_signal(&tic->t_wait); | 727 | trace_xfs_log_grant_wake_up(log, tic); |
764 | tic = tic->t_next; | 728 | wake_up(&tic->t_wait); |
765 | } while (tic != log->l_reserve_headq); | 729 | } |
730 | spin_unlock(&log->l_grant_reserve_lock); | ||
766 | } | 731 | } |
767 | spin_unlock(&log->l_grant_lock); | 732 | } |
768 | } /* xfs_log_move_tail */ | ||
769 | 733 | ||
770 | /* | 734 | /* |
771 | * Determine if we have a transaction that has gone to disk | 735 | * Determine if we have a transaction that has gone to disk |
@@ -797,7 +761,7 @@ xfs_log_need_covered(xfs_mount_t *mp) | |||
797 | break; | 761 | break; |
798 | case XLOG_STATE_COVER_NEED: | 762 | case XLOG_STATE_COVER_NEED: |
799 | case XLOG_STATE_COVER_NEED2: | 763 | case XLOG_STATE_COVER_NEED2: |
800 | if (!xfs_trans_ail_tail(log->l_ailp) && | 764 | if (!xfs_ail_min_lsn(log->l_ailp) && |
801 | xlog_iclogs_empty(log)) { | 765 | xlog_iclogs_empty(log)) { |
802 | if (log->l_covered_state == XLOG_STATE_COVER_NEED) | 766 | if (log->l_covered_state == XLOG_STATE_COVER_NEED) |
803 | log->l_covered_state = XLOG_STATE_COVER_DONE; | 767 | log->l_covered_state = XLOG_STATE_COVER_DONE; |
@@ -831,23 +795,19 @@ xfs_log_need_covered(xfs_mount_t *mp) | |||
831 | * We may be holding the log iclog lock upon entering this routine. | 795 | * We may be holding the log iclog lock upon entering this routine. |
832 | */ | 796 | */ |
833 | xfs_lsn_t | 797 | xfs_lsn_t |
834 | xlog_assign_tail_lsn(xfs_mount_t *mp) | 798 | xlog_assign_tail_lsn( |
799 | struct xfs_mount *mp) | ||
835 | { | 800 | { |
836 | xfs_lsn_t tail_lsn; | 801 | xfs_lsn_t tail_lsn; |
837 | xlog_t *log = mp->m_log; | 802 | struct log *log = mp->m_log; |
838 | 803 | ||
839 | tail_lsn = xfs_trans_ail_tail(mp->m_ail); | 804 | tail_lsn = xfs_ail_min_lsn(mp->m_ail); |
840 | spin_lock(&log->l_grant_lock); | 805 | if (!tail_lsn) |
841 | if (tail_lsn != 0) { | 806 | tail_lsn = atomic64_read(&log->l_last_sync_lsn); |
842 | log->l_tail_lsn = tail_lsn; | ||
843 | } else { | ||
844 | tail_lsn = log->l_tail_lsn = log->l_last_sync_lsn; | ||
845 | } | ||
846 | spin_unlock(&log->l_grant_lock); | ||
847 | 807 | ||
808 | atomic64_set(&log->l_tail_lsn, tail_lsn); | ||
848 | return tail_lsn; | 809 | return tail_lsn; |
849 | } /* xlog_assign_tail_lsn */ | 810 | } |
850 | |||
851 | 811 | ||
852 | /* | 812 | /* |
853 | * Return the space in the log between the tail and the head. The head | 813 | * Return the space in the log between the tail and the head. The head |
@@ -864,37 +824,42 @@ xlog_assign_tail_lsn(xfs_mount_t *mp) | |||
864 | * result is that we return the size of the log as the amount of space left. | 824 | * result is that we return the size of the log as the amount of space left. |
865 | */ | 825 | */ |
866 | STATIC int | 826 | STATIC int |
867 | xlog_space_left(xlog_t *log, int cycle, int bytes) | 827 | xlog_space_left( |
868 | { | 828 | struct log *log, |
869 | int free_bytes; | 829 | atomic64_t *head) |
870 | int tail_bytes; | 830 | { |
871 | int tail_cycle; | 831 | int free_bytes; |
872 | 832 | int tail_bytes; | |
873 | tail_bytes = BBTOB(BLOCK_LSN(log->l_tail_lsn)); | 833 | int tail_cycle; |
874 | tail_cycle = CYCLE_LSN(log->l_tail_lsn); | 834 | int head_cycle; |
875 | if ((tail_cycle == cycle) && (bytes >= tail_bytes)) { | 835 | int head_bytes; |
876 | free_bytes = log->l_logsize - (bytes - tail_bytes); | 836 | |
877 | } else if ((tail_cycle + 1) < cycle) { | 837 | xlog_crack_grant_head(head, &head_cycle, &head_bytes); |
838 | xlog_crack_atomic_lsn(&log->l_tail_lsn, &tail_cycle, &tail_bytes); | ||
839 | tail_bytes = BBTOB(tail_bytes); | ||
840 | if (tail_cycle == head_cycle && head_bytes >= tail_bytes) | ||
841 | free_bytes = log->l_logsize - (head_bytes - tail_bytes); | ||
842 | else if (tail_cycle + 1 < head_cycle) | ||
878 | return 0; | 843 | return 0; |
879 | } else if (tail_cycle < cycle) { | 844 | else if (tail_cycle < head_cycle) { |
880 | ASSERT(tail_cycle == (cycle - 1)); | 845 | ASSERT(tail_cycle == (head_cycle - 1)); |
881 | free_bytes = tail_bytes - bytes; | 846 | free_bytes = tail_bytes - head_bytes; |
882 | } else { | 847 | } else { |
883 | /* | 848 | /* |
884 | * The reservation head is behind the tail. | 849 | * The reservation head is behind the tail. |
885 | * In this case we just want to return the size of the | 850 | * In this case we just want to return the size of the |
886 | * log as the amount of space left. | 851 | * log as the amount of space left. |
887 | */ | 852 | */ |
888 | xfs_fs_cmn_err(CE_ALERT, log->l_mp, | 853 | xfs_alert(log->l_mp, |
889 | "xlog_space_left: head behind tail\n" | 854 | "xlog_space_left: head behind tail\n" |
890 | " tail_cycle = %d, tail_bytes = %d\n" | 855 | " tail_cycle = %d, tail_bytes = %d\n" |
891 | " GH cycle = %d, GH bytes = %d", | 856 | " GH cycle = %d, GH bytes = %d", |
892 | tail_cycle, tail_bytes, cycle, bytes); | 857 | tail_cycle, tail_bytes, head_cycle, head_bytes); |
893 | ASSERT(0); | 858 | ASSERT(0); |
894 | free_bytes = log->l_logsize; | 859 | free_bytes = log->l_logsize; |
895 | } | 860 | } |
896 | return free_bytes; | 861 | return free_bytes; |
897 | } /* xlog_space_left */ | 862 | } |
898 | 863 | ||
899 | 864 | ||
900 | /* | 865 | /* |
@@ -917,19 +882,6 @@ xlog_iodone(xfs_buf_t *bp) | |||
917 | l = iclog->ic_log; | 882 | l = iclog->ic_log; |
918 | 883 | ||
919 | /* | 884 | /* |
920 | * If the _XFS_BARRIER_FAILED flag was set by a lower | ||
921 | * layer, it means the underlying device no longer supports | ||
922 | * barrier I/O. Warn loudly and turn off barriers. | ||
923 | */ | ||
924 | if (bp->b_flags & _XFS_BARRIER_FAILED) { | ||
925 | bp->b_flags &= ~_XFS_BARRIER_FAILED; | ||
926 | l->l_mp->m_flags &= ~XFS_MOUNT_BARRIER; | ||
927 | xfs_fs_cmn_err(CE_WARN, l->l_mp, | ||
928 | "xlog_iodone: Barriers are no longer supported" | ||
929 | " by device. Disabling barriers\n"); | ||
930 | } | ||
931 | |||
932 | /* | ||
933 | * Race to shutdown the filesystem if we see an error. | 885 | * Race to shutdown the filesystem if we see an error. |
934 | */ | 886 | */ |
935 | if (XFS_TEST_ERROR((XFS_BUF_GETERROR(bp)), l->l_mp, | 887 | if (XFS_TEST_ERROR((XFS_BUF_GETERROR(bp)), l->l_mp, |
@@ -1047,7 +999,7 @@ xlog_alloc_log(xfs_mount_t *mp, | |||
1047 | 999 | ||
1048 | log = kmem_zalloc(sizeof(xlog_t), KM_MAYFAIL); | 1000 | log = kmem_zalloc(sizeof(xlog_t), KM_MAYFAIL); |
1049 | if (!log) { | 1001 | if (!log) { |
1050 | xlog_warn("XFS: Log allocation failed: No memory!"); | 1002 | xfs_warn(mp, "Log allocation failed: No memory!"); |
1051 | goto out; | 1003 | goto out; |
1052 | } | 1004 | } |
1053 | 1005 | ||
@@ -1060,35 +1012,39 @@ xlog_alloc_log(xfs_mount_t *mp, | |||
1060 | log->l_flags |= XLOG_ACTIVE_RECOVERY; | 1012 | log->l_flags |= XLOG_ACTIVE_RECOVERY; |
1061 | 1013 | ||
1062 | log->l_prev_block = -1; | 1014 | log->l_prev_block = -1; |
1063 | log->l_tail_lsn = xlog_assign_lsn(1, 0); | ||
1064 | /* log->l_tail_lsn = 0x100000000LL; cycle = 1; current block = 0 */ | 1015 | /* log->l_tail_lsn = 0x100000000LL; cycle = 1; current block = 0 */ |
1065 | log->l_last_sync_lsn = log->l_tail_lsn; | 1016 | xlog_assign_atomic_lsn(&log->l_tail_lsn, 1, 0); |
1017 | xlog_assign_atomic_lsn(&log->l_last_sync_lsn, 1, 0); | ||
1066 | log->l_curr_cycle = 1; /* 0 is bad since this is initial value */ | 1018 | log->l_curr_cycle = 1; /* 0 is bad since this is initial value */ |
1067 | log->l_grant_reserve_cycle = 1; | 1019 | xlog_assign_grant_head(&log->l_grant_reserve_head, 1, 0); |
1068 | log->l_grant_write_cycle = 1; | 1020 | xlog_assign_grant_head(&log->l_grant_write_head, 1, 0); |
1021 | INIT_LIST_HEAD(&log->l_reserveq); | ||
1022 | INIT_LIST_HEAD(&log->l_writeq); | ||
1023 | spin_lock_init(&log->l_grant_reserve_lock); | ||
1024 | spin_lock_init(&log->l_grant_write_lock); | ||
1069 | 1025 | ||
1070 | error = EFSCORRUPTED; | 1026 | error = EFSCORRUPTED; |
1071 | if (xfs_sb_version_hassector(&mp->m_sb)) { | 1027 | if (xfs_sb_version_hassector(&mp->m_sb)) { |
1072 | log2_size = mp->m_sb.sb_logsectlog; | 1028 | log2_size = mp->m_sb.sb_logsectlog; |
1073 | if (log2_size < BBSHIFT) { | 1029 | if (log2_size < BBSHIFT) { |
1074 | xlog_warn("XFS: Log sector size too small " | 1030 | xfs_warn(mp, "Log sector size too small (0x%x < 0x%x)", |
1075 | "(0x%x < 0x%x)", log2_size, BBSHIFT); | 1031 | log2_size, BBSHIFT); |
1076 | goto out_free_log; | 1032 | goto out_free_log; |
1077 | } | 1033 | } |
1078 | 1034 | ||
1079 | log2_size -= BBSHIFT; | 1035 | log2_size -= BBSHIFT; |
1080 | if (log2_size > mp->m_sectbb_log) { | 1036 | if (log2_size > mp->m_sectbb_log) { |
1081 | xlog_warn("XFS: Log sector size too large " | 1037 | xfs_warn(mp, "Log sector size too large (0x%x > 0x%x)", |
1082 | "(0x%x > 0x%x)", log2_size, mp->m_sectbb_log); | 1038 | log2_size, mp->m_sectbb_log); |
1083 | goto out_free_log; | 1039 | goto out_free_log; |
1084 | } | 1040 | } |
1085 | 1041 | ||
1086 | /* for larger sector sizes, must have v2 or external log */ | 1042 | /* for larger sector sizes, must have v2 or external log */ |
1087 | if (log2_size && log->l_logBBstart > 0 && | 1043 | if (log2_size && log->l_logBBstart > 0 && |
1088 | !xfs_sb_version_haslogv2(&mp->m_sb)) { | 1044 | !xfs_sb_version_haslogv2(&mp->m_sb)) { |
1089 | 1045 | xfs_warn(mp, | |
1090 | xlog_warn("XFS: log sector size (0x%x) invalid " | 1046 | "log sector size (0x%x) invalid for configuration.", |
1091 | "for configuration.", log2_size); | 1047 | log2_size); |
1092 | goto out_free_log; | 1048 | goto out_free_log; |
1093 | } | 1049 | } |
1094 | } | 1050 | } |
@@ -1107,8 +1063,7 @@ xlog_alloc_log(xfs_mount_t *mp, | |||
1107 | log->l_xbuf = bp; | 1063 | log->l_xbuf = bp; |
1108 | 1064 | ||
1109 | spin_lock_init(&log->l_icloglock); | 1065 | spin_lock_init(&log->l_icloglock); |
1110 | spin_lock_init(&log->l_grant_lock); | 1066 | init_waitqueue_head(&log->l_flush_wait); |
1111 | sv_init(&log->l_flush_wait, 0, "flush_wait"); | ||
1112 | 1067 | ||
1113 | /* log record size must be multiple of BBSIZE; see xlog_rec_header_t */ | 1068 | /* log record size must be multiple of BBSIZE; see xlog_rec_header_t */ |
1114 | ASSERT((XFS_BUF_SIZE(bp) & BBMASK) == 0); | 1069 | ASSERT((XFS_BUF_SIZE(bp) & BBMASK) == 0); |
@@ -1131,7 +1086,8 @@ xlog_alloc_log(xfs_mount_t *mp, | |||
1131 | iclog->ic_prev = prev_iclog; | 1086 | iclog->ic_prev = prev_iclog; |
1132 | prev_iclog = iclog; | 1087 | prev_iclog = iclog; |
1133 | 1088 | ||
1134 | bp = xfs_buf_get_noaddr(log->l_iclog_size, mp->m_logdev_targp); | 1089 | bp = xfs_buf_get_uncached(mp->m_logdev_targp, |
1090 | log->l_iclog_size, 0); | ||
1135 | if (!bp) | 1091 | if (!bp) |
1136 | goto out_free_iclog; | 1092 | goto out_free_iclog; |
1137 | if (!XFS_BUF_CPSEMA(bp)) | 1093 | if (!XFS_BUF_CPSEMA(bp)) |
@@ -1163,8 +1119,8 @@ xlog_alloc_log(xfs_mount_t *mp, | |||
1163 | 1119 | ||
1164 | ASSERT(XFS_BUF_ISBUSY(iclog->ic_bp)); | 1120 | ASSERT(XFS_BUF_ISBUSY(iclog->ic_bp)); |
1165 | ASSERT(XFS_BUF_VALUSEMA(iclog->ic_bp) <= 0); | 1121 | ASSERT(XFS_BUF_VALUSEMA(iclog->ic_bp) <= 0); |
1166 | sv_init(&iclog->ic_force_wait, SV_DEFAULT, "iclog-force"); | 1122 | init_waitqueue_head(&iclog->ic_force_wait); |
1167 | sv_init(&iclog->ic_write_wait, SV_DEFAULT, "iclog-write"); | 1123 | init_waitqueue_head(&iclog->ic_write_wait); |
1168 | 1124 | ||
1169 | iclogp = &iclog->ic_next; | 1125 | iclogp = &iclog->ic_next; |
1170 | } | 1126 | } |
@@ -1179,15 +1135,11 @@ xlog_alloc_log(xfs_mount_t *mp, | |||
1179 | out_free_iclog: | 1135 | out_free_iclog: |
1180 | for (iclog = log->l_iclog; iclog; iclog = prev_iclog) { | 1136 | for (iclog = log->l_iclog; iclog; iclog = prev_iclog) { |
1181 | prev_iclog = iclog->ic_next; | 1137 | prev_iclog = iclog->ic_next; |
1182 | if (iclog->ic_bp) { | 1138 | if (iclog->ic_bp) |
1183 | sv_destroy(&iclog->ic_force_wait); | ||
1184 | sv_destroy(&iclog->ic_write_wait); | ||
1185 | xfs_buf_free(iclog->ic_bp); | 1139 | xfs_buf_free(iclog->ic_bp); |
1186 | } | ||
1187 | kmem_free(iclog); | 1140 | kmem_free(iclog); |
1188 | } | 1141 | } |
1189 | spinlock_destroy(&log->l_icloglock); | 1142 | spinlock_destroy(&log->l_icloglock); |
1190 | spinlock_destroy(&log->l_grant_lock); | ||
1191 | xfs_buf_free(log->l_xbuf); | 1143 | xfs_buf_free(log->l_xbuf); |
1192 | out_free_log: | 1144 | out_free_log: |
1193 | kmem_free(log); | 1145 | kmem_free(log); |
@@ -1235,61 +1187,60 @@ xlog_commit_record( | |||
1235 | * water mark. In this manner, we would be creating a low water mark. | 1187 | * water mark. In this manner, we would be creating a low water mark. |
1236 | */ | 1188 | */ |
1237 | STATIC void | 1189 | STATIC void |
1238 | xlog_grant_push_ail(xfs_mount_t *mp, | 1190 | xlog_grant_push_ail( |
1239 | int need_bytes) | 1191 | struct log *log, |
1192 | int need_bytes) | ||
1240 | { | 1193 | { |
1241 | xlog_t *log = mp->m_log; /* pointer to the log */ | 1194 | xfs_lsn_t threshold_lsn = 0; |
1242 | xfs_lsn_t tail_lsn; /* lsn of the log tail */ | 1195 | xfs_lsn_t last_sync_lsn; |
1243 | xfs_lsn_t threshold_lsn = 0; /* lsn we'd like to be at */ | 1196 | int free_blocks; |
1244 | int free_blocks; /* free blocks left to write to */ | 1197 | int free_bytes; |
1245 | int free_bytes; /* free bytes left to write to */ | 1198 | int threshold_block; |
1246 | int threshold_block; /* block in lsn we'd like to be at */ | 1199 | int threshold_cycle; |
1247 | int threshold_cycle; /* lsn cycle we'd like to be at */ | 1200 | int free_threshold; |
1248 | int free_threshold; | 1201 | |
1249 | 1202 | ASSERT(BTOBB(need_bytes) < log->l_logBBsize); | |
1250 | ASSERT(BTOBB(need_bytes) < log->l_logBBsize); | 1203 | |
1251 | 1204 | free_bytes = xlog_space_left(log, &log->l_grant_reserve_head); | |
1252 | spin_lock(&log->l_grant_lock); | 1205 | free_blocks = BTOBBT(free_bytes); |
1253 | free_bytes = xlog_space_left(log, | 1206 | |
1254 | log->l_grant_reserve_cycle, | 1207 | /* |
1255 | log->l_grant_reserve_bytes); | 1208 | * Set the threshold for the minimum number of free blocks in the |
1256 | tail_lsn = log->l_tail_lsn; | 1209 | * log to the maximum of what the caller needs, one quarter of the |
1257 | free_blocks = BTOBBT(free_bytes); | 1210 | * log, and 256 blocks. |
1258 | 1211 | */ | |
1259 | /* | 1212 | free_threshold = BTOBB(need_bytes); |
1260 | * Set the threshold for the minimum number of free blocks in the | 1213 | free_threshold = MAX(free_threshold, (log->l_logBBsize >> 2)); |
1261 | * log to the maximum of what the caller needs, one quarter of the | 1214 | free_threshold = MAX(free_threshold, 256); |
1262 | * log, and 256 blocks. | 1215 | if (free_blocks >= free_threshold) |
1263 | */ | 1216 | return; |
1264 | free_threshold = BTOBB(need_bytes); | 1217 | |
1265 | free_threshold = MAX(free_threshold, (log->l_logBBsize >> 2)); | 1218 | xlog_crack_atomic_lsn(&log->l_tail_lsn, &threshold_cycle, |
1266 | free_threshold = MAX(free_threshold, 256); | 1219 | &threshold_block); |
1267 | if (free_blocks < free_threshold) { | 1220 | threshold_block += free_threshold; |
1268 | threshold_block = BLOCK_LSN(tail_lsn) + free_threshold; | ||
1269 | threshold_cycle = CYCLE_LSN(tail_lsn); | ||
1270 | if (threshold_block >= log->l_logBBsize) { | 1221 | if (threshold_block >= log->l_logBBsize) { |
1271 | threshold_block -= log->l_logBBsize; | 1222 | threshold_block -= log->l_logBBsize; |
1272 | threshold_cycle += 1; | 1223 | threshold_cycle += 1; |
1273 | } | 1224 | } |
1274 | threshold_lsn = xlog_assign_lsn(threshold_cycle, threshold_block); | 1225 | threshold_lsn = xlog_assign_lsn(threshold_cycle, |
1226 | threshold_block); | ||
1227 | /* | ||
1228 | * Don't pass in an lsn greater than the lsn of the last | ||
1229 | * log record known to be on disk. Use a snapshot of the last sync lsn | ||
1230 | * so that it doesn't change between the compare and the set. | ||
1231 | */ | ||
1232 | last_sync_lsn = atomic64_read(&log->l_last_sync_lsn); | ||
1233 | if (XFS_LSN_CMP(threshold_lsn, last_sync_lsn) > 0) | ||
1234 | threshold_lsn = last_sync_lsn; | ||
1275 | 1235 | ||
1276 | /* Don't pass in an lsn greater than the lsn of the last | 1236 | /* |
1277 | * log record known to be on disk. | 1237 | * Get the transaction layer to kick the dirty buffers out to |
1238 | * disk asynchronously. No point in trying to do this if | ||
1239 | * the filesystem is shutting down. | ||
1278 | */ | 1240 | */ |
1279 | if (XFS_LSN_CMP(threshold_lsn, log->l_last_sync_lsn) > 0) | 1241 | if (!XLOG_FORCED_SHUTDOWN(log)) |
1280 | threshold_lsn = log->l_last_sync_lsn; | 1242 | xfs_ail_push(log->l_ailp, threshold_lsn); |
1281 | } | 1243 | } |
1282 | spin_unlock(&log->l_grant_lock); | ||
1283 | |||
1284 | /* | ||
1285 | * Get the transaction layer to kick the dirty buffers out to | ||
1286 | * disk asynchronously. No point in trying to do this if | ||
1287 | * the filesystem is shutting down. | ||
1288 | */ | ||
1289 | if (threshold_lsn && | ||
1290 | !XLOG_FORCED_SHUTDOWN(log)) | ||
1291 | xfs_trans_ail_push(log->l_ailp, threshold_lsn); | ||
1292 | } /* xlog_grant_push_ail */ | ||
1293 | 1244 | ||
1294 | /* | 1245 | /* |
1295 | * The bdstrat callback function for log bufs. This gives us a central | 1246 | * The bdstrat callback function for log bufs. This gives us a central |
@@ -1309,7 +1260,7 @@ xlog_bdstrat( | |||
1309 | if (iclog->ic_state & XLOG_STATE_IOERROR) { | 1260 | if (iclog->ic_state & XLOG_STATE_IOERROR) { |
1310 | XFS_BUF_ERROR(bp, EIO); | 1261 | XFS_BUF_ERROR(bp, EIO); |
1311 | XFS_BUF_STALE(bp); | 1262 | XFS_BUF_STALE(bp); |
1312 | xfs_biodone(bp); | 1263 | xfs_buf_ioend(bp, 0); |
1313 | /* | 1264 | /* |
1314 | * It would seem logical to return EIO here, but we rely on | 1265 | * It would seem logical to return EIO here, but we rely on |
1315 | * the log state machine to propagate I/O errors instead of | 1266 | * the log state machine to propagate I/O errors instead of |
@@ -1384,9 +1335,8 @@ xlog_sync(xlog_t *log, | |||
1384 | roundoff < BBTOB(1))); | 1335 | roundoff < BBTOB(1))); |
1385 | 1336 | ||
1386 | /* move grant heads by roundoff in sync */ | 1337 | /* move grant heads by roundoff in sync */ |
1387 | spin_lock(&log->l_grant_lock); | 1338 | xlog_grant_add_space(log, &log->l_grant_reserve_head, roundoff); |
1388 | xlog_grant_add_space(log, roundoff); | 1339 | xlog_grant_add_space(log, &log->l_grant_write_head, roundoff); |
1389 | spin_unlock(&log->l_grant_lock); | ||
1390 | 1340 | ||
1391 | /* put cycle number in every block */ | 1341 | /* put cycle number in every block */ |
1392 | xlog_pack_data(log, iclog, roundoff); | 1342 | xlog_pack_data(log, iclog, roundoff); |
@@ -1422,8 +1372,17 @@ xlog_sync(xlog_t *log, | |||
1422 | XFS_BUF_ASYNC(bp); | 1372 | XFS_BUF_ASYNC(bp); |
1423 | bp->b_flags |= XBF_LOG_BUFFER; | 1373 | bp->b_flags |= XBF_LOG_BUFFER; |
1424 | 1374 | ||
1425 | if (log->l_mp->m_flags & XFS_MOUNT_BARRIER) | 1375 | if (log->l_mp->m_flags & XFS_MOUNT_BARRIER) { |
1376 | /* | ||
1377 | * If we have an external log device, flush the data device | ||
1378 | * before flushing the log to make sure all meta data | ||
1379 | * written back from the AIL actually made it to disk | ||
1380 | * before writing out the new log tail LSN in the log buffer. | ||
1381 | */ | ||
1382 | if (log->l_mp->m_logdev_targp != log->l_mp->m_ddev_targp) | ||
1383 | xfs_blkdev_issue_flush(log->l_mp->m_ddev_targp); | ||
1426 | XFS_BUF_ORDERED(bp); | 1384 | XFS_BUF_ORDERED(bp); |
1385 | } | ||
1427 | 1386 | ||
1428 | ASSERT(XFS_BUF_ADDR(bp) <= log->l_logBBsize-1); | 1387 | ASSERT(XFS_BUF_ADDR(bp) <= log->l_logBBsize-1); |
1429 | ASSERT(XFS_BUF_ADDR(bp) + BTOBB(count) <= log->l_logBBsize); | 1388 | ASSERT(XFS_BUF_ADDR(bp) + BTOBB(count) <= log->l_logBBsize); |
@@ -1499,19 +1458,22 @@ xlog_dealloc_log(xlog_t *log) | |||
1499 | 1458 | ||
1500 | xlog_cil_destroy(log); | 1459 | xlog_cil_destroy(log); |
1501 | 1460 | ||
1461 | /* | ||
1462 | * always need to ensure that the extra buffer does not point to memory | ||
1463 | * owned by another log buffer before we free it. | ||
1464 | */ | ||
1465 | xfs_buf_set_empty(log->l_xbuf, log->l_iclog_size); | ||
1466 | xfs_buf_free(log->l_xbuf); | ||
1467 | |||
1502 | iclog = log->l_iclog; | 1468 | iclog = log->l_iclog; |
1503 | for (i=0; i<log->l_iclog_bufs; i++) { | 1469 | for (i=0; i<log->l_iclog_bufs; i++) { |
1504 | sv_destroy(&iclog->ic_force_wait); | ||
1505 | sv_destroy(&iclog->ic_write_wait); | ||
1506 | xfs_buf_free(iclog->ic_bp); | 1470 | xfs_buf_free(iclog->ic_bp); |
1507 | next_iclog = iclog->ic_next; | 1471 | next_iclog = iclog->ic_next; |
1508 | kmem_free(iclog); | 1472 | kmem_free(iclog); |
1509 | iclog = next_iclog; | 1473 | iclog = next_iclog; |
1510 | } | 1474 | } |
1511 | spinlock_destroy(&log->l_icloglock); | 1475 | spinlock_destroy(&log->l_icloglock); |
1512 | spinlock_destroy(&log->l_grant_lock); | ||
1513 | 1476 | ||
1514 | xfs_buf_free(log->l_xbuf); | ||
1515 | log->l_mp->m_log = NULL; | 1477 | log->l_mp->m_log = NULL; |
1516 | kmem_free(log); | 1478 | kmem_free(log); |
1517 | } /* xlog_dealloc_log */ | 1479 | } /* xlog_dealloc_log */ |
@@ -1614,38 +1576,36 @@ xlog_print_tic_res( | |||
1614 | "SWAPEXT" | 1576 | "SWAPEXT" |
1615 | }; | 1577 | }; |
1616 | 1578 | ||
1617 | xfs_fs_cmn_err(CE_WARN, mp, | 1579 | xfs_warn(mp, |
1618 | "xfs_log_write: reservation summary:\n" | 1580 | "xfs_log_write: reservation summary:\n" |
1619 | " trans type = %s (%u)\n" | 1581 | " trans type = %s (%u)\n" |
1620 | " unit res = %d bytes\n" | 1582 | " unit res = %d bytes\n" |
1621 | " current res = %d bytes\n" | 1583 | " current res = %d bytes\n" |
1622 | " total reg = %u bytes (o/flow = %u bytes)\n" | 1584 | " total reg = %u bytes (o/flow = %u bytes)\n" |
1623 | " ophdrs = %u (ophdr space = %u bytes)\n" | 1585 | " ophdrs = %u (ophdr space = %u bytes)\n" |
1624 | " ophdr + reg = %u bytes\n" | 1586 | " ophdr + reg = %u bytes\n" |
1625 | " num regions = %u\n", | 1587 | " num regions = %u\n", |
1626 | ((ticket->t_trans_type <= 0 || | 1588 | ((ticket->t_trans_type <= 0 || |
1627 | ticket->t_trans_type > XFS_TRANS_TYPE_MAX) ? | 1589 | ticket->t_trans_type > XFS_TRANS_TYPE_MAX) ? |
1628 | "bad-trans-type" : trans_type_str[ticket->t_trans_type-1]), | 1590 | "bad-trans-type" : trans_type_str[ticket->t_trans_type-1]), |
1629 | ticket->t_trans_type, | 1591 | ticket->t_trans_type, |
1630 | ticket->t_unit_res, | 1592 | ticket->t_unit_res, |
1631 | ticket->t_curr_res, | 1593 | ticket->t_curr_res, |
1632 | ticket->t_res_arr_sum, ticket->t_res_o_flow, | 1594 | ticket->t_res_arr_sum, ticket->t_res_o_flow, |
1633 | ticket->t_res_num_ophdrs, ophdr_spc, | 1595 | ticket->t_res_num_ophdrs, ophdr_spc, |
1634 | ticket->t_res_arr_sum + | 1596 | ticket->t_res_arr_sum + |
1635 | ticket->t_res_o_flow + ophdr_spc, | 1597 | ticket->t_res_o_flow + ophdr_spc, |
1636 | ticket->t_res_num); | 1598 | ticket->t_res_num); |
1637 | 1599 | ||
1638 | for (i = 0; i < ticket->t_res_num; i++) { | 1600 | for (i = 0; i < ticket->t_res_num; i++) { |
1639 | uint r_type = ticket->t_res_arr[i].r_type; | 1601 | uint r_type = ticket->t_res_arr[i].r_type; |
1640 | cmn_err(CE_WARN, | 1602 | xfs_warn(mp, "region[%u]: %s - %u bytes\n", i, |
1641 | "region[%u]: %s - %u bytes\n", | ||
1642 | i, | ||
1643 | ((r_type <= 0 || r_type > XLOG_REG_TYPE_MAX) ? | 1603 | ((r_type <= 0 || r_type > XLOG_REG_TYPE_MAX) ? |
1644 | "bad-rtype" : res_type_str[r_type-1]), | 1604 | "bad-rtype" : res_type_str[r_type-1]), |
1645 | ticket->t_res_arr[i].r_len); | 1605 | ticket->t_res_arr[i].r_len); |
1646 | } | 1606 | } |
1647 | 1607 | ||
1648 | xfs_cmn_err(XFS_PTAG_LOGRES, CE_ALERT, mp, | 1608 | xfs_alert_tag(mp, XFS_PTAG_LOGRES, |
1649 | "xfs_log_write: reservation ran out. Need to up reservation"); | 1609 | "xfs_log_write: reservation ran out. Need to up reservation"); |
1650 | xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); | 1610 | xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); |
1651 | } | 1611 | } |
@@ -1733,7 +1693,7 @@ xlog_write_setup_ophdr( | |||
1733 | case XFS_LOG: | 1693 | case XFS_LOG: |
1734 | break; | 1694 | break; |
1735 | default: | 1695 | default: |
1736 | xfs_fs_cmn_err(CE_WARN, log->l_mp, | 1696 | xfs_warn(log->l_mp, |
1737 | "Bad XFS transaction clientid 0x%x in ticket 0x%p", | 1697 | "Bad XFS transaction clientid 0x%x in ticket 0x%p", |
1738 | ophdr->oh_clientid, ticket); | 1698 | ophdr->oh_clientid, ticket); |
1739 | return NULL; | 1699 | return NULL; |
@@ -2244,7 +2204,7 @@ xlog_state_do_callback( | |||
2244 | lowest_lsn = xlog_get_lowest_lsn(log); | 2204 | lowest_lsn = xlog_get_lowest_lsn(log); |
2245 | if (lowest_lsn && | 2205 | if (lowest_lsn && |
2246 | XFS_LSN_CMP(lowest_lsn, | 2206 | XFS_LSN_CMP(lowest_lsn, |
2247 | be64_to_cpu(iclog->ic_header.h_lsn)) < 0) { | 2207 | be64_to_cpu(iclog->ic_header.h_lsn)) < 0) { |
2248 | iclog = iclog->ic_next; | 2208 | iclog = iclog->ic_next; |
2249 | continue; /* Leave this iclog for | 2209 | continue; /* Leave this iclog for |
2250 | * another thread */ | 2210 | * another thread */ |
@@ -2252,23 +2212,21 @@ xlog_state_do_callback( | |||
2252 | 2212 | ||
2253 | iclog->ic_state = XLOG_STATE_CALLBACK; | 2213 | iclog->ic_state = XLOG_STATE_CALLBACK; |
2254 | 2214 | ||
2255 | spin_unlock(&log->l_icloglock); | ||
2256 | 2215 | ||
2257 | /* l_last_sync_lsn field protected by | 2216 | /* |
2258 | * l_grant_lock. Don't worry about iclog's lsn. | 2217 | * update the last_sync_lsn before we drop the |
2259 | * No one else can be here except us. | 2218 | * icloglock to ensure we are the only one that |
2219 | * can update it. | ||
2260 | */ | 2220 | */ |
2261 | spin_lock(&log->l_grant_lock); | 2221 | ASSERT(XFS_LSN_CMP(atomic64_read(&log->l_last_sync_lsn), |
2262 | ASSERT(XFS_LSN_CMP(log->l_last_sync_lsn, | 2222 | be64_to_cpu(iclog->ic_header.h_lsn)) <= 0); |
2263 | be64_to_cpu(iclog->ic_header.h_lsn)) <= 0); | 2223 | atomic64_set(&log->l_last_sync_lsn, |
2264 | log->l_last_sync_lsn = | 2224 | be64_to_cpu(iclog->ic_header.h_lsn)); |
2265 | be64_to_cpu(iclog->ic_header.h_lsn); | ||
2266 | spin_unlock(&log->l_grant_lock); | ||
2267 | 2225 | ||
2268 | } else { | 2226 | } else |
2269 | spin_unlock(&log->l_icloglock); | ||
2270 | ioerrors++; | 2227 | ioerrors++; |
2271 | } | 2228 | |
2229 | spin_unlock(&log->l_icloglock); | ||
2272 | 2230 | ||
2273 | /* | 2231 | /* |
2274 | * Keep processing entries in the callback list until | 2232 | * Keep processing entries in the callback list until |
@@ -2309,7 +2267,7 @@ xlog_state_do_callback( | |||
2309 | xlog_state_clean_log(log); | 2267 | xlog_state_clean_log(log); |
2310 | 2268 | ||
2311 | /* wake up threads waiting in xfs_log_force() */ | 2269 | /* wake up threads waiting in xfs_log_force() */ |
2312 | sv_broadcast(&iclog->ic_force_wait); | 2270 | wake_up_all(&iclog->ic_force_wait); |
2313 | 2271 | ||
2314 | iclog = iclog->ic_next; | 2272 | iclog = iclog->ic_next; |
2315 | } while (first_iclog != iclog); | 2273 | } while (first_iclog != iclog); |
@@ -2317,7 +2275,7 @@ xlog_state_do_callback( | |||
2317 | if (repeats > 5000) { | 2275 | if (repeats > 5000) { |
2318 | flushcnt += repeats; | 2276 | flushcnt += repeats; |
2319 | repeats = 0; | 2277 | repeats = 0; |
2320 | xfs_fs_cmn_err(CE_WARN, log->l_mp, | 2278 | xfs_warn(log->l_mp, |
2321 | "%s: possible infinite loop (%d iterations)", | 2279 | "%s: possible infinite loop (%d iterations)", |
2322 | __func__, flushcnt); | 2280 | __func__, flushcnt); |
2323 | } | 2281 | } |
@@ -2356,7 +2314,7 @@ xlog_state_do_callback( | |||
2356 | spin_unlock(&log->l_icloglock); | 2314 | spin_unlock(&log->l_icloglock); |
2357 | 2315 | ||
2358 | if (wake) | 2316 | if (wake) |
2359 | sv_broadcast(&log->l_flush_wait); | 2317 | wake_up_all(&log->l_flush_wait); |
2360 | } | 2318 | } |
2361 | 2319 | ||
2362 | 2320 | ||
@@ -2407,7 +2365,7 @@ xlog_state_done_syncing( | |||
2407 | * iclog buffer, we wake them all, one will get to do the | 2365 | * iclog buffer, we wake them all, one will get to do the |
2408 | * I/O, the others get to wait for the result. | 2366 | * I/O, the others get to wait for the result. |
2409 | */ | 2367 | */ |
2410 | sv_broadcast(&iclog->ic_write_wait); | 2368 | wake_up_all(&iclog->ic_write_wait); |
2411 | spin_unlock(&log->l_icloglock); | 2369 | spin_unlock(&log->l_icloglock); |
2412 | xlog_state_do_callback(log, aborted, iclog); /* also cleans log */ | 2370 | xlog_state_do_callback(log, aborted, iclog); /* also cleans log */ |
2413 | } /* xlog_state_done_syncing */ | 2371 | } /* xlog_state_done_syncing */ |
@@ -2456,7 +2414,7 @@ restart: | |||
2456 | XFS_STATS_INC(xs_log_noiclogs); | 2414 | XFS_STATS_INC(xs_log_noiclogs); |
2457 | 2415 | ||
2458 | /* Wait for log writes to have flushed */ | 2416 | /* Wait for log writes to have flushed */ |
2459 | sv_wait(&log->l_flush_wait, 0, &log->l_icloglock, 0); | 2417 | xlog_wait(&log->l_flush_wait, &log->l_icloglock); |
2460 | goto restart; | 2418 | goto restart; |
2461 | } | 2419 | } |
2462 | 2420 | ||
@@ -2539,6 +2497,18 @@ restart: | |||
2539 | * | 2497 | * |
2540 | * Once a ticket gets put onto the reserveq, it will only return after | 2498 | * Once a ticket gets put onto the reserveq, it will only return after |
2541 | * the needed reservation is satisfied. | 2499 | * the needed reservation is satisfied. |
2500 | * | ||
2501 | * This function is structured so that it has a lock free fast path. This is | ||
2502 | * necessary because every new transaction reservation will come through this | ||
2503 | * path. Hence any lock will be globally hot if we take it unconditionally on | ||
2504 | * every pass. | ||
2505 | * | ||
2506 | * As tickets are only ever moved on and off the reserveq under the | ||
2507 | * l_grant_reserve_lock, we only need to take that lock if we are going | ||
2508 | * to add the ticket to the queue and sleep. We can avoid taking the lock if the | ||
2509 | * ticket was never added to the reserveq because the t_queue list head will be | ||
2510 | * empty and we hold the only reference to it so it can safely be checked | ||
2511 | * unlocked. | ||
2542 | */ | 2512 | */ |
2543 | STATIC int | 2513 | STATIC int |
2544 | xlog_grant_log_space(xlog_t *log, | 2514 | xlog_grant_log_space(xlog_t *log, |
@@ -2546,24 +2516,27 @@ xlog_grant_log_space(xlog_t *log, | |||
2546 | { | 2516 | { |
2547 | int free_bytes; | 2517 | int free_bytes; |
2548 | int need_bytes; | 2518 | int need_bytes; |
2549 | #ifdef DEBUG | ||
2550 | xfs_lsn_t tail_lsn; | ||
2551 | #endif | ||
2552 | |||
2553 | 2519 | ||
2554 | #ifdef DEBUG | 2520 | #ifdef DEBUG |
2555 | if (log->l_flags & XLOG_ACTIVE_RECOVERY) | 2521 | if (log->l_flags & XLOG_ACTIVE_RECOVERY) |
2556 | panic("grant Recovery problem"); | 2522 | panic("grant Recovery problem"); |
2557 | #endif | 2523 | #endif |
2558 | 2524 | ||
2559 | /* Is there space or do we need to sleep? */ | ||
2560 | spin_lock(&log->l_grant_lock); | ||
2561 | |||
2562 | trace_xfs_log_grant_enter(log, tic); | 2525 | trace_xfs_log_grant_enter(log, tic); |
2563 | 2526 | ||
2527 | need_bytes = tic->t_unit_res; | ||
2528 | if (tic->t_flags & XFS_LOG_PERM_RESERV) | ||
2529 | need_bytes *= tic->t_ocnt; | ||
2530 | |||
2564 | /* something is already sleeping; insert new transaction at end */ | 2531 | /* something is already sleeping; insert new transaction at end */ |
2565 | if (log->l_reserve_headq) { | 2532 | if (!list_empty_careful(&log->l_reserveq)) { |
2566 | xlog_ins_ticketq(&log->l_reserve_headq, tic); | 2533 | spin_lock(&log->l_grant_reserve_lock); |
2534 | /* recheck the queue now we are locked */ | ||
2535 | if (list_empty(&log->l_reserveq)) { | ||
2536 | spin_unlock(&log->l_grant_reserve_lock); | ||
2537 | goto redo; | ||
2538 | } | ||
2539 | list_add_tail(&tic->t_queue, &log->l_reserveq); | ||
2567 | 2540 | ||
2568 | trace_xfs_log_grant_sleep1(log, tic); | 2541 | trace_xfs_log_grant_sleep1(log, tic); |
2569 | 2542 | ||
@@ -2575,72 +2548,57 @@ xlog_grant_log_space(xlog_t *log, | |||
2575 | goto error_return; | 2548 | goto error_return; |
2576 | 2549 | ||
2577 | XFS_STATS_INC(xs_sleep_logspace); | 2550 | XFS_STATS_INC(xs_sleep_logspace); |
2578 | sv_wait(&tic->t_wait, PINOD|PLTWAIT, &log->l_grant_lock, s); | 2551 | xlog_wait(&tic->t_wait, &log->l_grant_reserve_lock); |
2552 | |||
2579 | /* | 2553 | /* |
2580 | * If we got an error, and the filesystem is shutting down, | 2554 | * If we got an error, and the filesystem is shutting down, |
2581 | * we'll catch it down below. So just continue... | 2555 | * we'll catch it down below. So just continue... |
2582 | */ | 2556 | */ |
2583 | trace_xfs_log_grant_wake1(log, tic); | 2557 | trace_xfs_log_grant_wake1(log, tic); |
2584 | spin_lock(&log->l_grant_lock); | ||
2585 | } | 2558 | } |
2586 | if (tic->t_flags & XFS_LOG_PERM_RESERV) | ||
2587 | need_bytes = tic->t_unit_res*tic->t_ocnt; | ||
2588 | else | ||
2589 | need_bytes = tic->t_unit_res; | ||
2590 | 2559 | ||
2591 | redo: | 2560 | redo: |
2592 | if (XLOG_FORCED_SHUTDOWN(log)) | 2561 | if (XLOG_FORCED_SHUTDOWN(log)) |
2593 | goto error_return; | 2562 | goto error_return_unlocked; |
2594 | 2563 | ||
2595 | free_bytes = xlog_space_left(log, log->l_grant_reserve_cycle, | 2564 | free_bytes = xlog_space_left(log, &log->l_grant_reserve_head); |
2596 | log->l_grant_reserve_bytes); | ||
2597 | if (free_bytes < need_bytes) { | 2565 | if (free_bytes < need_bytes) { |
2598 | if ((tic->t_flags & XLOG_TIC_IN_Q) == 0) | 2566 | spin_lock(&log->l_grant_reserve_lock); |
2599 | xlog_ins_ticketq(&log->l_reserve_headq, tic); | 2567 | if (list_empty(&tic->t_queue)) |
2568 | list_add_tail(&tic->t_queue, &log->l_reserveq); | ||
2600 | 2569 | ||
2601 | trace_xfs_log_grant_sleep2(log, tic); | 2570 | trace_xfs_log_grant_sleep2(log, tic); |
2602 | 2571 | ||
2603 | spin_unlock(&log->l_grant_lock); | ||
2604 | xlog_grant_push_ail(log->l_mp, need_bytes); | ||
2605 | spin_lock(&log->l_grant_lock); | ||
2606 | |||
2607 | XFS_STATS_INC(xs_sleep_logspace); | ||
2608 | sv_wait(&tic->t_wait, PINOD|PLTWAIT, &log->l_grant_lock, s); | ||
2609 | |||
2610 | spin_lock(&log->l_grant_lock); | ||
2611 | if (XLOG_FORCED_SHUTDOWN(log)) | 2572 | if (XLOG_FORCED_SHUTDOWN(log)) |
2612 | goto error_return; | 2573 | goto error_return; |
2613 | 2574 | ||
2614 | trace_xfs_log_grant_wake2(log, tic); | 2575 | xlog_grant_push_ail(log, need_bytes); |
2576 | |||
2577 | XFS_STATS_INC(xs_sleep_logspace); | ||
2578 | xlog_wait(&tic->t_wait, &log->l_grant_reserve_lock); | ||
2615 | 2579 | ||
2580 | trace_xfs_log_grant_wake2(log, tic); | ||
2616 | goto redo; | 2581 | goto redo; |
2617 | } else if (tic->t_flags & XLOG_TIC_IN_Q) | 2582 | } |
2618 | xlog_del_ticketq(&log->l_reserve_headq, tic); | ||
2619 | 2583 | ||
2620 | /* we've got enough space */ | 2584 | if (!list_empty(&tic->t_queue)) { |
2621 | xlog_grant_add_space(log, need_bytes); | 2585 | spin_lock(&log->l_grant_reserve_lock); |
2622 | #ifdef DEBUG | 2586 | list_del_init(&tic->t_queue); |
2623 | tail_lsn = log->l_tail_lsn; | 2587 | spin_unlock(&log->l_grant_reserve_lock); |
2624 | /* | ||
2625 | * Check to make sure the grant write head didn't just over lap the | ||
2626 | * tail. If the cycles are the same, we can't be overlapping. | ||
2627 | * Otherwise, make sure that the cycles differ by exactly one and | ||
2628 | * check the byte count. | ||
2629 | */ | ||
2630 | if (CYCLE_LSN(tail_lsn) != log->l_grant_write_cycle) { | ||
2631 | ASSERT(log->l_grant_write_cycle-1 == CYCLE_LSN(tail_lsn)); | ||
2632 | ASSERT(log->l_grant_write_bytes <= BBTOB(BLOCK_LSN(tail_lsn))); | ||
2633 | } | 2588 | } |
2634 | #endif | 2589 | |
2590 | /* we've got enough space */ | ||
2591 | xlog_grant_add_space(log, &log->l_grant_reserve_head, need_bytes); | ||
2592 | xlog_grant_add_space(log, &log->l_grant_write_head, need_bytes); | ||
2635 | trace_xfs_log_grant_exit(log, tic); | 2593 | trace_xfs_log_grant_exit(log, tic); |
2636 | xlog_verify_grant_head(log, 1); | 2594 | xlog_verify_grant_tail(log); |
2637 | spin_unlock(&log->l_grant_lock); | ||
2638 | return 0; | 2595 | return 0; |
2639 | 2596 | ||
2640 | error_return: | 2597 | error_return_unlocked: |
2641 | if (tic->t_flags & XLOG_TIC_IN_Q) | 2598 | spin_lock(&log->l_grant_reserve_lock); |
2642 | xlog_del_ticketq(&log->l_reserve_headq, tic); | 2599 | error_return: |
2643 | 2600 | list_del_init(&tic->t_queue); | |
2601 | spin_unlock(&log->l_grant_reserve_lock); | ||
2644 | trace_xfs_log_grant_error(log, tic); | 2602 | trace_xfs_log_grant_error(log, tic); |
2645 | 2603 | ||
2646 | /* | 2604 | /* |
@@ -2650,7 +2608,6 @@ redo: | |||
2650 | */ | 2608 | */ |
2651 | tic->t_curr_res = 0; | 2609 | tic->t_curr_res = 0; |
2652 | tic->t_cnt = 0; /* ungrant will give back unit_res * t_cnt. */ | 2610 | tic->t_cnt = 0; /* ungrant will give back unit_res * t_cnt. */ |
2653 | spin_unlock(&log->l_grant_lock); | ||
2654 | return XFS_ERROR(EIO); | 2611 | return XFS_ERROR(EIO); |
2655 | } /* xlog_grant_log_space */ | 2612 | } /* xlog_grant_log_space */ |
2656 | 2613 | ||
@@ -2658,17 +2615,14 @@ redo: | |||
2658 | /* | 2615 | /* |
2659 | * Replenish the byte reservation required by moving the grant write head. | 2616 | * Replenish the byte reservation required by moving the grant write head. |
2660 | * | 2617 | * |
2661 | * | 2618 | * Similar to xlog_grant_log_space, the function is structured to have a lock |
2619 | * free fast path. | ||
2662 | */ | 2620 | */ |
2663 | STATIC int | 2621 | STATIC int |
2664 | xlog_regrant_write_log_space(xlog_t *log, | 2622 | xlog_regrant_write_log_space(xlog_t *log, |
2665 | xlog_ticket_t *tic) | 2623 | xlog_ticket_t *tic) |
2666 | { | 2624 | { |
2667 | int free_bytes, need_bytes; | 2625 | int free_bytes, need_bytes; |
2668 | xlog_ticket_t *ntic; | ||
2669 | #ifdef DEBUG | ||
2670 | xfs_lsn_t tail_lsn; | ||
2671 | #endif | ||
2672 | 2626 | ||
2673 | tic->t_curr_res = tic->t_unit_res; | 2627 | tic->t_curr_res = tic->t_unit_res; |
2674 | xlog_tic_reset_res(tic); | 2628 | xlog_tic_reset_res(tic); |
@@ -2681,12 +2635,9 @@ xlog_regrant_write_log_space(xlog_t *log, | |||
2681 | panic("regrant Recovery problem"); | 2635 | panic("regrant Recovery problem"); |
2682 | #endif | 2636 | #endif |
2683 | 2637 | ||
2684 | spin_lock(&log->l_grant_lock); | ||
2685 | |||
2686 | trace_xfs_log_regrant_write_enter(log, tic); | 2638 | trace_xfs_log_regrant_write_enter(log, tic); |
2687 | |||
2688 | if (XLOG_FORCED_SHUTDOWN(log)) | 2639 | if (XLOG_FORCED_SHUTDOWN(log)) |
2689 | goto error_return; | 2640 | goto error_return_unlocked; |
2690 | 2641 | ||
2691 | /* If there are other waiters on the queue then give them a | 2642 | /* If there are other waiters on the queue then give them a |
2692 | * chance at logspace before us. Wake up the first waiters, | 2643 | * chance at logspace before us. Wake up the first waiters, |
@@ -2695,92 +2646,76 @@ xlog_regrant_write_log_space(xlog_t *log, | |||
2695 | * this transaction. | 2646 | * this transaction. |
2696 | */ | 2647 | */ |
2697 | need_bytes = tic->t_unit_res; | 2648 | need_bytes = tic->t_unit_res; |
2698 | if ((ntic = log->l_write_headq)) { | 2649 | if (!list_empty_careful(&log->l_writeq)) { |
2699 | free_bytes = xlog_space_left(log, log->l_grant_write_cycle, | 2650 | struct xlog_ticket *ntic; |
2700 | log->l_grant_write_bytes); | 2651 | |
2701 | do { | 2652 | spin_lock(&log->l_grant_write_lock); |
2653 | free_bytes = xlog_space_left(log, &log->l_grant_write_head); | ||
2654 | list_for_each_entry(ntic, &log->l_writeq, t_queue) { | ||
2702 | ASSERT(ntic->t_flags & XLOG_TIC_PERM_RESERV); | 2655 | ASSERT(ntic->t_flags & XLOG_TIC_PERM_RESERV); |
2703 | 2656 | ||
2704 | if (free_bytes < ntic->t_unit_res) | 2657 | if (free_bytes < ntic->t_unit_res) |
2705 | break; | 2658 | break; |
2706 | free_bytes -= ntic->t_unit_res; | 2659 | free_bytes -= ntic->t_unit_res; |
2707 | sv_signal(&ntic->t_wait); | 2660 | wake_up(&ntic->t_wait); |
2708 | ntic = ntic->t_next; | 2661 | } |
2709 | } while (ntic != log->l_write_headq); | ||
2710 | |||
2711 | if (ntic != log->l_write_headq) { | ||
2712 | if ((tic->t_flags & XLOG_TIC_IN_Q) == 0) | ||
2713 | xlog_ins_ticketq(&log->l_write_headq, tic); | ||
2714 | 2662 | ||
2663 | if (ntic != list_first_entry(&log->l_writeq, | ||
2664 | struct xlog_ticket, t_queue)) { | ||
2665 | if (list_empty(&tic->t_queue)) | ||
2666 | list_add_tail(&tic->t_queue, &log->l_writeq); | ||
2715 | trace_xfs_log_regrant_write_sleep1(log, tic); | 2667 | trace_xfs_log_regrant_write_sleep1(log, tic); |
2716 | 2668 | ||
2717 | spin_unlock(&log->l_grant_lock); | 2669 | xlog_grant_push_ail(log, need_bytes); |
2718 | xlog_grant_push_ail(log->l_mp, need_bytes); | ||
2719 | spin_lock(&log->l_grant_lock); | ||
2720 | 2670 | ||
2721 | XFS_STATS_INC(xs_sleep_logspace); | 2671 | XFS_STATS_INC(xs_sleep_logspace); |
2722 | sv_wait(&tic->t_wait, PINOD|PLTWAIT, | 2672 | xlog_wait(&tic->t_wait, &log->l_grant_write_lock); |
2723 | &log->l_grant_lock, s); | ||
2724 | |||
2725 | /* If we're shutting down, this tic is already | ||
2726 | * off the queue */ | ||
2727 | spin_lock(&log->l_grant_lock); | ||
2728 | if (XLOG_FORCED_SHUTDOWN(log)) | ||
2729 | goto error_return; | ||
2730 | |||
2731 | trace_xfs_log_regrant_write_wake1(log, tic); | 2673 | trace_xfs_log_regrant_write_wake1(log, tic); |
2732 | } | 2674 | } else |
2675 | spin_unlock(&log->l_grant_write_lock); | ||
2733 | } | 2676 | } |
2734 | 2677 | ||
2735 | redo: | 2678 | redo: |
2736 | if (XLOG_FORCED_SHUTDOWN(log)) | 2679 | if (XLOG_FORCED_SHUTDOWN(log)) |
2737 | goto error_return; | 2680 | goto error_return_unlocked; |
2738 | 2681 | ||
2739 | free_bytes = xlog_space_left(log, log->l_grant_write_cycle, | 2682 | free_bytes = xlog_space_left(log, &log->l_grant_write_head); |
2740 | log->l_grant_write_bytes); | ||
2741 | if (free_bytes < need_bytes) { | 2683 | if (free_bytes < need_bytes) { |
2742 | if ((tic->t_flags & XLOG_TIC_IN_Q) == 0) | 2684 | spin_lock(&log->l_grant_write_lock); |
2743 | xlog_ins_ticketq(&log->l_write_headq, tic); | 2685 | if (list_empty(&tic->t_queue)) |
2744 | spin_unlock(&log->l_grant_lock); | 2686 | list_add_tail(&tic->t_queue, &log->l_writeq); |
2745 | xlog_grant_push_ail(log->l_mp, need_bytes); | ||
2746 | spin_lock(&log->l_grant_lock); | ||
2747 | 2687 | ||
2748 | XFS_STATS_INC(xs_sleep_logspace); | ||
2749 | trace_xfs_log_regrant_write_sleep2(log, tic); | ||
2750 | |||
2751 | sv_wait(&tic->t_wait, PINOD|PLTWAIT, &log->l_grant_lock, s); | ||
2752 | |||
2753 | /* If we're shutting down, this tic is already off the queue */ | ||
2754 | spin_lock(&log->l_grant_lock); | ||
2755 | if (XLOG_FORCED_SHUTDOWN(log)) | 2688 | if (XLOG_FORCED_SHUTDOWN(log)) |
2756 | goto error_return; | 2689 | goto error_return; |
2757 | 2690 | ||
2691 | xlog_grant_push_ail(log, need_bytes); | ||
2692 | |||
2693 | XFS_STATS_INC(xs_sleep_logspace); | ||
2694 | trace_xfs_log_regrant_write_sleep2(log, tic); | ||
2695 | xlog_wait(&tic->t_wait, &log->l_grant_write_lock); | ||
2696 | |||
2758 | trace_xfs_log_regrant_write_wake2(log, tic); | 2697 | trace_xfs_log_regrant_write_wake2(log, tic); |
2759 | goto redo; | 2698 | goto redo; |
2760 | } else if (tic->t_flags & XLOG_TIC_IN_Q) | 2699 | } |
2761 | xlog_del_ticketq(&log->l_write_headq, tic); | ||
2762 | 2700 | ||
2763 | /* we've got enough space */ | 2701 | if (!list_empty(&tic->t_queue)) { |
2764 | xlog_grant_add_space_write(log, need_bytes); | 2702 | spin_lock(&log->l_grant_write_lock); |
2765 | #ifdef DEBUG | 2703 | list_del_init(&tic->t_queue); |
2766 | tail_lsn = log->l_tail_lsn; | 2704 | spin_unlock(&log->l_grant_write_lock); |
2767 | if (CYCLE_LSN(tail_lsn) != log->l_grant_write_cycle) { | ||
2768 | ASSERT(log->l_grant_write_cycle-1 == CYCLE_LSN(tail_lsn)); | ||
2769 | ASSERT(log->l_grant_write_bytes <= BBTOB(BLOCK_LSN(tail_lsn))); | ||
2770 | } | 2705 | } |
2771 | #endif | ||
2772 | 2706 | ||
2707 | /* we've got enough space */ | ||
2708 | xlog_grant_add_space(log, &log->l_grant_write_head, need_bytes); | ||
2773 | trace_xfs_log_regrant_write_exit(log, tic); | 2709 | trace_xfs_log_regrant_write_exit(log, tic); |
2774 | 2710 | xlog_verify_grant_tail(log); | |
2775 | xlog_verify_grant_head(log, 1); | ||
2776 | spin_unlock(&log->l_grant_lock); | ||
2777 | return 0; | 2711 | return 0; |
2778 | 2712 | ||
2779 | 2713 | ||
2714 | error_return_unlocked: | ||
2715 | spin_lock(&log->l_grant_write_lock); | ||
2780 | error_return: | 2716 | error_return: |
2781 | if (tic->t_flags & XLOG_TIC_IN_Q) | 2717 | list_del_init(&tic->t_queue); |
2782 | xlog_del_ticketq(&log->l_reserve_headq, tic); | 2718 | spin_unlock(&log->l_grant_write_lock); |
2783 | |||
2784 | trace_xfs_log_regrant_write_error(log, tic); | 2719 | trace_xfs_log_regrant_write_error(log, tic); |
2785 | 2720 | ||
2786 | /* | 2721 | /* |
@@ -2790,7 +2725,6 @@ redo: | |||
2790 | */ | 2725 | */ |
2791 | tic->t_curr_res = 0; | 2726 | tic->t_curr_res = 0; |
2792 | tic->t_cnt = 0; /* ungrant will give back unit_res * t_cnt. */ | 2727 | tic->t_cnt = 0; /* ungrant will give back unit_res * t_cnt. */ |
2793 | spin_unlock(&log->l_grant_lock); | ||
2794 | return XFS_ERROR(EIO); | 2728 | return XFS_ERROR(EIO); |
2795 | } /* xlog_regrant_write_log_space */ | 2729 | } /* xlog_regrant_write_log_space */ |
2796 | 2730 | ||
@@ -2811,27 +2745,24 @@ xlog_regrant_reserve_log_space(xlog_t *log, | |||
2811 | if (ticket->t_cnt > 0) | 2745 | if (ticket->t_cnt > 0) |
2812 | ticket->t_cnt--; | 2746 | ticket->t_cnt--; |
2813 | 2747 | ||
2814 | spin_lock(&log->l_grant_lock); | 2748 | xlog_grant_sub_space(log, &log->l_grant_reserve_head, |
2815 | xlog_grant_sub_space(log, ticket->t_curr_res); | 2749 | ticket->t_curr_res); |
2750 | xlog_grant_sub_space(log, &log->l_grant_write_head, | ||
2751 | ticket->t_curr_res); | ||
2816 | ticket->t_curr_res = ticket->t_unit_res; | 2752 | ticket->t_curr_res = ticket->t_unit_res; |
2817 | xlog_tic_reset_res(ticket); | 2753 | xlog_tic_reset_res(ticket); |
2818 | 2754 | ||
2819 | trace_xfs_log_regrant_reserve_sub(log, ticket); | 2755 | trace_xfs_log_regrant_reserve_sub(log, ticket); |
2820 | 2756 | ||
2821 | xlog_verify_grant_head(log, 1); | ||
2822 | |||
2823 | /* just return if we still have some of the pre-reserved space */ | 2757 | /* just return if we still have some of the pre-reserved space */ |
2824 | if (ticket->t_cnt > 0) { | 2758 | if (ticket->t_cnt > 0) |
2825 | spin_unlock(&log->l_grant_lock); | ||
2826 | return; | 2759 | return; |
2827 | } | ||
2828 | 2760 | ||
2829 | xlog_grant_add_space_reserve(log, ticket->t_unit_res); | 2761 | xlog_grant_add_space(log, &log->l_grant_reserve_head, |
2762 | ticket->t_unit_res); | ||
2830 | 2763 | ||
2831 | trace_xfs_log_regrant_reserve_exit(log, ticket); | 2764 | trace_xfs_log_regrant_reserve_exit(log, ticket); |
2832 | 2765 | ||
2833 | xlog_verify_grant_head(log, 0); | ||
2834 | spin_unlock(&log->l_grant_lock); | ||
2835 | ticket->t_curr_res = ticket->t_unit_res; | 2766 | ticket->t_curr_res = ticket->t_unit_res; |
2836 | xlog_tic_reset_res(ticket); | 2767 | xlog_tic_reset_res(ticket); |
2837 | } /* xlog_regrant_reserve_log_space */ | 2768 | } /* xlog_regrant_reserve_log_space */ |
@@ -2855,28 +2786,29 @@ STATIC void | |||
2855 | xlog_ungrant_log_space(xlog_t *log, | 2786 | xlog_ungrant_log_space(xlog_t *log, |
2856 | xlog_ticket_t *ticket) | 2787 | xlog_ticket_t *ticket) |
2857 | { | 2788 | { |
2789 | int bytes; | ||
2790 | |||
2858 | if (ticket->t_cnt > 0) | 2791 | if (ticket->t_cnt > 0) |
2859 | ticket->t_cnt--; | 2792 | ticket->t_cnt--; |
2860 | 2793 | ||
2861 | spin_lock(&log->l_grant_lock); | ||
2862 | trace_xfs_log_ungrant_enter(log, ticket); | 2794 | trace_xfs_log_ungrant_enter(log, ticket); |
2863 | |||
2864 | xlog_grant_sub_space(log, ticket->t_curr_res); | ||
2865 | |||
2866 | trace_xfs_log_ungrant_sub(log, ticket); | 2795 | trace_xfs_log_ungrant_sub(log, ticket); |
2867 | 2796 | ||
2868 | /* If this is a permanent reservation ticket, we may be able to free | 2797 | /* |
2798 | * If this is a permanent reservation ticket, we may be able to free | ||
2869 | * up more space based on the remaining count. | 2799 | * up more space based on the remaining count. |
2870 | */ | 2800 | */ |
2801 | bytes = ticket->t_curr_res; | ||
2871 | if (ticket->t_cnt > 0) { | 2802 | if (ticket->t_cnt > 0) { |
2872 | ASSERT(ticket->t_flags & XLOG_TIC_PERM_RESERV); | 2803 | ASSERT(ticket->t_flags & XLOG_TIC_PERM_RESERV); |
2873 | xlog_grant_sub_space(log, ticket->t_unit_res*ticket->t_cnt); | 2804 | bytes += ticket->t_unit_res*ticket->t_cnt; |
2874 | } | 2805 | } |
2875 | 2806 | ||
2807 | xlog_grant_sub_space(log, &log->l_grant_reserve_head, bytes); | ||
2808 | xlog_grant_sub_space(log, &log->l_grant_write_head, bytes); | ||
2809 | |||
2876 | trace_xfs_log_ungrant_exit(log, ticket); | 2810 | trace_xfs_log_ungrant_exit(log, ticket); |
2877 | 2811 | ||
2878 | xlog_verify_grant_head(log, 1); | ||
2879 | spin_unlock(&log->l_grant_lock); | ||
2880 | xfs_log_move_tail(log->l_mp, 1); | 2812 | xfs_log_move_tail(log->l_mp, 1); |
2881 | } /* xlog_ungrant_log_space */ | 2813 | } /* xlog_ungrant_log_space */ |
2882 | 2814 | ||
@@ -2913,11 +2845,11 @@ xlog_state_release_iclog( | |||
2913 | 2845 | ||
2914 | if (iclog->ic_state == XLOG_STATE_WANT_SYNC) { | 2846 | if (iclog->ic_state == XLOG_STATE_WANT_SYNC) { |
2915 | /* update tail before writing to iclog */ | 2847 | /* update tail before writing to iclog */ |
2916 | xlog_assign_tail_lsn(log->l_mp); | 2848 | xfs_lsn_t tail_lsn = xlog_assign_tail_lsn(log->l_mp); |
2917 | sync++; | 2849 | sync++; |
2918 | iclog->ic_state = XLOG_STATE_SYNCING; | 2850 | iclog->ic_state = XLOG_STATE_SYNCING; |
2919 | iclog->ic_header.h_tail_lsn = cpu_to_be64(log->l_tail_lsn); | 2851 | iclog->ic_header.h_tail_lsn = cpu_to_be64(tail_lsn); |
2920 | xlog_verify_tail_lsn(log, iclog, log->l_tail_lsn); | 2852 | xlog_verify_tail_lsn(log, iclog, tail_lsn); |
2921 | /* cycle incremented when incrementing curr_block */ | 2853 | /* cycle incremented when incrementing curr_block */ |
2922 | } | 2854 | } |
2923 | spin_unlock(&log->l_icloglock); | 2855 | spin_unlock(&log->l_icloglock); |
@@ -3100,7 +3032,7 @@ maybe_sleep: | |||
3100 | return XFS_ERROR(EIO); | 3032 | return XFS_ERROR(EIO); |
3101 | } | 3033 | } |
3102 | XFS_STATS_INC(xs_log_force_sleep); | 3034 | XFS_STATS_INC(xs_log_force_sleep); |
3103 | sv_wait(&iclog->ic_force_wait, PINOD, &log->l_icloglock, s); | 3035 | xlog_wait(&iclog->ic_force_wait, &log->l_icloglock); |
3104 | /* | 3036 | /* |
3105 | * No need to grab the log lock here since we're | 3037 | * No need to grab the log lock here since we're |
3106 | * only deciding whether or not to return EIO | 3038 | * only deciding whether or not to return EIO |
@@ -3131,10 +3063,8 @@ xfs_log_force( | |||
3131 | int error; | 3063 | int error; |
3132 | 3064 | ||
3133 | error = _xfs_log_force(mp, flags, NULL); | 3065 | error = _xfs_log_force(mp, flags, NULL); |
3134 | if (error) { | 3066 | if (error) |
3135 | xfs_fs_cmn_err(CE_WARN, mp, "xfs_log_force: " | 3067 | xfs_warn(mp, "%s: error %d returned.", __func__, error); |
3136 | "error %d returned.", error); | ||
3137 | } | ||
3138 | } | 3068 | } |
3139 | 3069 | ||
3140 | /* | 3070 | /* |
@@ -3218,8 +3148,8 @@ try_again: | |||
3218 | 3148 | ||
3219 | XFS_STATS_INC(xs_log_force_sleep); | 3149 | XFS_STATS_INC(xs_log_force_sleep); |
3220 | 3150 | ||
3221 | sv_wait(&iclog->ic_prev->ic_write_wait, | 3151 | xlog_wait(&iclog->ic_prev->ic_write_wait, |
3222 | PSWP, &log->l_icloglock, s); | 3152 | &log->l_icloglock); |
3223 | if (log_flushed) | 3153 | if (log_flushed) |
3224 | *log_flushed = 1; | 3154 | *log_flushed = 1; |
3225 | already_slept = 1; | 3155 | already_slept = 1; |
@@ -3247,7 +3177,7 @@ try_again: | |||
3247 | return XFS_ERROR(EIO); | 3177 | return XFS_ERROR(EIO); |
3248 | } | 3178 | } |
3249 | XFS_STATS_INC(xs_log_force_sleep); | 3179 | XFS_STATS_INC(xs_log_force_sleep); |
3250 | sv_wait(&iclog->ic_force_wait, PSWP, &log->l_icloglock, s); | 3180 | xlog_wait(&iclog->ic_force_wait, &log->l_icloglock); |
3251 | /* | 3181 | /* |
3252 | * No need to grab the log lock here since we're | 3182 | * No need to grab the log lock here since we're |
3253 | * only deciding whether or not to return EIO | 3183 | * only deciding whether or not to return EIO |
@@ -3283,10 +3213,8 @@ xfs_log_force_lsn( | |||
3283 | int error; | 3213 | int error; |
3284 | 3214 | ||
3285 | error = _xfs_log_force_lsn(mp, lsn, flags, NULL); | 3215 | error = _xfs_log_force_lsn(mp, lsn, flags, NULL); |
3286 | if (error) { | 3216 | if (error) |
3287 | xfs_fs_cmn_err(CE_WARN, mp, "xfs_log_force: " | 3217 | xfs_warn(mp, "%s: error %d returned.", __func__, error); |
3288 | "error %d returned.", error); | ||
3289 | } | ||
3290 | } | 3218 | } |
3291 | 3219 | ||
3292 | /* | 3220 | /* |
@@ -3322,10 +3250,8 @@ xfs_log_ticket_put( | |||
3322 | xlog_ticket_t *ticket) | 3250 | xlog_ticket_t *ticket) |
3323 | { | 3251 | { |
3324 | ASSERT(atomic_read(&ticket->t_ref) > 0); | 3252 | ASSERT(atomic_read(&ticket->t_ref) > 0); |
3325 | if (atomic_dec_and_test(&ticket->t_ref)) { | 3253 | if (atomic_dec_and_test(&ticket->t_ref)) |
3326 | sv_destroy(&ticket->t_wait); | ||
3327 | kmem_zone_free(xfs_log_ticket_zone, ticket); | 3254 | kmem_zone_free(xfs_log_ticket_zone, ticket); |
3328 | } | ||
3329 | } | 3255 | } |
3330 | 3256 | ||
3331 | xlog_ticket_t * | 3257 | xlog_ticket_t * |
@@ -3337,13 +3263,6 @@ xfs_log_ticket_get( | |||
3337 | return ticket; | 3263 | return ticket; |
3338 | } | 3264 | } |
3339 | 3265 | ||
3340 | xlog_tid_t | ||
3341 | xfs_log_get_trans_ident( | ||
3342 | struct xfs_trans *tp) | ||
3343 | { | ||
3344 | return tp->t_ticket->t_tid; | ||
3345 | } | ||
3346 | |||
3347 | /* | 3266 | /* |
3348 | * Allocate and initialise a new log ticket. | 3267 | * Allocate and initialise a new log ticket. |
3349 | */ | 3268 | */ |
@@ -3447,6 +3366,7 @@ xlog_ticket_alloc( | |||
3447 | } | 3366 | } |
3448 | 3367 | ||
3449 | atomic_set(&tic->t_ref, 1); | 3368 | atomic_set(&tic->t_ref, 1); |
3369 | INIT_LIST_HEAD(&tic->t_queue); | ||
3450 | tic->t_unit_res = unit_bytes; | 3370 | tic->t_unit_res = unit_bytes; |
3451 | tic->t_curr_res = unit_bytes; | 3371 | tic->t_curr_res = unit_bytes; |
3452 | tic->t_cnt = cnt; | 3372 | tic->t_cnt = cnt; |
@@ -3457,7 +3377,7 @@ xlog_ticket_alloc( | |||
3457 | tic->t_trans_type = 0; | 3377 | tic->t_trans_type = 0; |
3458 | if (xflags & XFS_LOG_PERM_RESERV) | 3378 | if (xflags & XFS_LOG_PERM_RESERV) |
3459 | tic->t_flags |= XLOG_TIC_PERM_RESERV; | 3379 | tic->t_flags |= XLOG_TIC_PERM_RESERV; |
3460 | sv_init(&tic->t_wait, SV_DEFAULT, "logtick"); | 3380 | init_waitqueue_head(&tic->t_wait); |
3461 | 3381 | ||
3462 | xlog_tic_reset_res(tic); | 3382 | xlog_tic_reset_res(tic); |
3463 | 3383 | ||
@@ -3492,22 +3412,45 @@ xlog_verify_dest_ptr( | |||
3492 | } | 3412 | } |
3493 | 3413 | ||
3494 | if (!good_ptr) | 3414 | if (!good_ptr) |
3495 | xlog_panic("xlog_verify_dest_ptr: invalid ptr"); | 3415 | xfs_emerg(log->l_mp, "%s: invalid ptr", __func__); |
3496 | } | 3416 | } |
3497 | 3417 | ||
3418 | /* | ||
3419 | * Check to make sure the grant write head didn't just over lap the tail. If | ||
3420 | * the cycles are the same, we can't be overlapping. Otherwise, make sure that | ||
3421 | * the cycles differ by exactly one and check the byte count. | ||
3422 | * | ||
3423 | * This check is run unlocked, so can give false positives. Rather than assert | ||
3424 | * on failures, use a warn-once flag and a panic tag to allow the admin to | ||
3425 | * determine if they want to panic the machine when such an error occurs. For | ||
3426 | * debug kernels this will have the same effect as using an assert but, unlinke | ||
3427 | * an assert, it can be turned off at runtime. | ||
3428 | */ | ||
3498 | STATIC void | 3429 | STATIC void |
3499 | xlog_verify_grant_head(xlog_t *log, int equals) | 3430 | xlog_verify_grant_tail( |
3500 | { | 3431 | struct log *log) |
3501 | if (log->l_grant_reserve_cycle == log->l_grant_write_cycle) { | 3432 | { |
3502 | if (equals) | 3433 | int tail_cycle, tail_blocks; |
3503 | ASSERT(log->l_grant_reserve_bytes >= log->l_grant_write_bytes); | 3434 | int cycle, space; |
3504 | else | 3435 | |
3505 | ASSERT(log->l_grant_reserve_bytes > log->l_grant_write_bytes); | 3436 | xlog_crack_grant_head(&log->l_grant_write_head, &cycle, &space); |
3506 | } else { | 3437 | xlog_crack_atomic_lsn(&log->l_tail_lsn, &tail_cycle, &tail_blocks); |
3507 | ASSERT(log->l_grant_reserve_cycle-1 == log->l_grant_write_cycle); | 3438 | if (tail_cycle != cycle) { |
3508 | ASSERT(log->l_grant_write_bytes >= log->l_grant_reserve_bytes); | 3439 | if (cycle - 1 != tail_cycle && |
3509 | } | 3440 | !(log->l_flags & XLOG_TAIL_WARN)) { |
3510 | } /* xlog_verify_grant_head */ | 3441 | xfs_alert_tag(log->l_mp, XFS_PTAG_LOGRES, |
3442 | "%s: cycle - 1 != tail_cycle", __func__); | ||
3443 | log->l_flags |= XLOG_TAIL_WARN; | ||
3444 | } | ||
3445 | |||
3446 | if (space > BBTOB(tail_blocks) && | ||
3447 | !(log->l_flags & XLOG_TAIL_WARN)) { | ||
3448 | xfs_alert_tag(log->l_mp, XFS_PTAG_LOGRES, | ||
3449 | "%s: space > BBTOB(tail_blocks)", __func__); | ||
3450 | log->l_flags |= XLOG_TAIL_WARN; | ||
3451 | } | ||
3452 | } | ||
3453 | } | ||
3511 | 3454 | ||
3512 | /* check if it will fit */ | 3455 | /* check if it will fit */ |
3513 | STATIC void | 3456 | STATIC void |
@@ -3521,16 +3464,16 @@ xlog_verify_tail_lsn(xlog_t *log, | |||
3521 | blocks = | 3464 | blocks = |
3522 | log->l_logBBsize - (log->l_prev_block - BLOCK_LSN(tail_lsn)); | 3465 | log->l_logBBsize - (log->l_prev_block - BLOCK_LSN(tail_lsn)); |
3523 | if (blocks < BTOBB(iclog->ic_offset)+BTOBB(log->l_iclog_hsize)) | 3466 | if (blocks < BTOBB(iclog->ic_offset)+BTOBB(log->l_iclog_hsize)) |
3524 | xlog_panic("xlog_verify_tail_lsn: ran out of log space"); | 3467 | xfs_emerg(log->l_mp, "%s: ran out of log space", __func__); |
3525 | } else { | 3468 | } else { |
3526 | ASSERT(CYCLE_LSN(tail_lsn)+1 == log->l_prev_cycle); | 3469 | ASSERT(CYCLE_LSN(tail_lsn)+1 == log->l_prev_cycle); |
3527 | 3470 | ||
3528 | if (BLOCK_LSN(tail_lsn) == log->l_prev_block) | 3471 | if (BLOCK_LSN(tail_lsn) == log->l_prev_block) |
3529 | xlog_panic("xlog_verify_tail_lsn: tail wrapped"); | 3472 | xfs_emerg(log->l_mp, "%s: tail wrapped", __func__); |
3530 | 3473 | ||
3531 | blocks = BLOCK_LSN(tail_lsn) - log->l_prev_block; | 3474 | blocks = BLOCK_LSN(tail_lsn) - log->l_prev_block; |
3532 | if (blocks < BTOBB(iclog->ic_offset) + 1) | 3475 | if (blocks < BTOBB(iclog->ic_offset) + 1) |
3533 | xlog_panic("xlog_verify_tail_lsn: ran out of log space"); | 3476 | xfs_emerg(log->l_mp, "%s: ran out of log space", __func__); |
3534 | } | 3477 | } |
3535 | } /* xlog_verify_tail_lsn */ | 3478 | } /* xlog_verify_tail_lsn */ |
3536 | 3479 | ||
@@ -3570,22 +3513,23 @@ xlog_verify_iclog(xlog_t *log, | |||
3570 | icptr = log->l_iclog; | 3513 | icptr = log->l_iclog; |
3571 | for (i=0; i < log->l_iclog_bufs; i++) { | 3514 | for (i=0; i < log->l_iclog_bufs; i++) { |
3572 | if (icptr == NULL) | 3515 | if (icptr == NULL) |
3573 | xlog_panic("xlog_verify_iclog: invalid ptr"); | 3516 | xfs_emerg(log->l_mp, "%s: invalid ptr", __func__); |
3574 | icptr = icptr->ic_next; | 3517 | icptr = icptr->ic_next; |
3575 | } | 3518 | } |
3576 | if (icptr != log->l_iclog) | 3519 | if (icptr != log->l_iclog) |
3577 | xlog_panic("xlog_verify_iclog: corrupt iclog ring"); | 3520 | xfs_emerg(log->l_mp, "%s: corrupt iclog ring", __func__); |
3578 | spin_unlock(&log->l_icloglock); | 3521 | spin_unlock(&log->l_icloglock); |
3579 | 3522 | ||
3580 | /* check log magic numbers */ | 3523 | /* check log magic numbers */ |
3581 | if (be32_to_cpu(iclog->ic_header.h_magicno) != XLOG_HEADER_MAGIC_NUM) | 3524 | if (be32_to_cpu(iclog->ic_header.h_magicno) != XLOG_HEADER_MAGIC_NUM) |
3582 | xlog_panic("xlog_verify_iclog: invalid magic num"); | 3525 | xfs_emerg(log->l_mp, "%s: invalid magic num", __func__); |
3583 | 3526 | ||
3584 | ptr = (xfs_caddr_t) &iclog->ic_header; | 3527 | ptr = (xfs_caddr_t) &iclog->ic_header; |
3585 | for (ptr += BBSIZE; ptr < ((xfs_caddr_t)&iclog->ic_header) + count; | 3528 | for (ptr += BBSIZE; ptr < ((xfs_caddr_t)&iclog->ic_header) + count; |
3586 | ptr += BBSIZE) { | 3529 | ptr += BBSIZE) { |
3587 | if (be32_to_cpu(*(__be32 *)ptr) == XLOG_HEADER_MAGIC_NUM) | 3530 | if (be32_to_cpu(*(__be32 *)ptr) == XLOG_HEADER_MAGIC_NUM) |
3588 | xlog_panic("xlog_verify_iclog: unexpected magic num"); | 3531 | xfs_emerg(log->l_mp, "%s: unexpected magic num", |
3532 | __func__); | ||
3589 | } | 3533 | } |
3590 | 3534 | ||
3591 | /* check fields */ | 3535 | /* check fields */ |
@@ -3615,9 +3559,10 @@ xlog_verify_iclog(xlog_t *log, | |||
3615 | } | 3559 | } |
3616 | } | 3560 | } |
3617 | if (clientid != XFS_TRANSACTION && clientid != XFS_LOG) | 3561 | if (clientid != XFS_TRANSACTION && clientid != XFS_LOG) |
3618 | cmn_err(CE_WARN, "xlog_verify_iclog: " | 3562 | xfs_warn(log->l_mp, |
3619 | "invalid clientid %d op 0x%p offset 0x%lx", | 3563 | "%s: invalid clientid %d op 0x%p offset 0x%lx", |
3620 | clientid, ophead, (unsigned long)field_offset); | 3564 | __func__, clientid, ophead, |
3565 | (unsigned long)field_offset); | ||
3621 | 3566 | ||
3622 | /* check length */ | 3567 | /* check length */ |
3623 | field_offset = (__psint_t) | 3568 | field_offset = (__psint_t) |
@@ -3728,12 +3673,10 @@ xfs_log_force_umount( | |||
3728 | xlog_cil_force(log); | 3673 | xlog_cil_force(log); |
3729 | 3674 | ||
3730 | /* | 3675 | /* |
3731 | * We must hold both the GRANT lock and the LOG lock, | 3676 | * mark the filesystem and the as in a shutdown state and wake |
3732 | * before we mark the filesystem SHUTDOWN and wake | 3677 | * everybody up to tell them the bad news. |
3733 | * everybody up to tell the bad news. | ||
3734 | */ | 3678 | */ |
3735 | spin_lock(&log->l_icloglock); | 3679 | spin_lock(&log->l_icloglock); |
3736 | spin_lock(&log->l_grant_lock); | ||
3737 | mp->m_flags |= XFS_MOUNT_FS_SHUTDOWN; | 3680 | mp->m_flags |= XFS_MOUNT_FS_SHUTDOWN; |
3738 | if (mp->m_sb_bp) | 3681 | if (mp->m_sb_bp) |
3739 | XFS_BUF_DONE(mp->m_sb_bp); | 3682 | XFS_BUF_DONE(mp->m_sb_bp); |
@@ -3754,27 +3697,21 @@ xfs_log_force_umount( | |||
3754 | spin_unlock(&log->l_icloglock); | 3697 | spin_unlock(&log->l_icloglock); |
3755 | 3698 | ||
3756 | /* | 3699 | /* |
3757 | * We don't want anybody waiting for log reservations | 3700 | * We don't want anybody waiting for log reservations after this. That |
3758 | * after this. That means we have to wake up everybody | 3701 | * means we have to wake up everybody queued up on reserveq as well as |
3759 | * queued up on reserve_headq as well as write_headq. | 3702 | * writeq. In addition, we make sure in xlog_{re}grant_log_space that |
3760 | * In addition, we make sure in xlog_{re}grant_log_space | 3703 | * we don't enqueue anything once the SHUTDOWN flag is set, and this |
3761 | * that we don't enqueue anything once the SHUTDOWN flag | 3704 | * action is protected by the grant locks. |
3762 | * is set, and this action is protected by the GRANTLOCK. | ||
3763 | */ | 3705 | */ |
3764 | if ((tic = log->l_reserve_headq)) { | 3706 | spin_lock(&log->l_grant_reserve_lock); |
3765 | do { | 3707 | list_for_each_entry(tic, &log->l_reserveq, t_queue) |
3766 | sv_signal(&tic->t_wait); | 3708 | wake_up(&tic->t_wait); |
3767 | tic = tic->t_next; | 3709 | spin_unlock(&log->l_grant_reserve_lock); |
3768 | } while (tic != log->l_reserve_headq); | 3710 | |
3769 | } | 3711 | spin_lock(&log->l_grant_write_lock); |
3770 | 3712 | list_for_each_entry(tic, &log->l_writeq, t_queue) | |
3771 | if ((tic = log->l_write_headq)) { | 3713 | wake_up(&tic->t_wait); |
3772 | do { | 3714 | spin_unlock(&log->l_grant_write_lock); |
3773 | sv_signal(&tic->t_wait); | ||
3774 | tic = tic->t_next; | ||
3775 | } while (tic != log->l_write_headq); | ||
3776 | } | ||
3777 | spin_unlock(&log->l_grant_lock); | ||
3778 | 3715 | ||
3779 | if (!(log->l_iclog->ic_state & XLOG_STATE_IOERROR)) { | 3716 | if (!(log->l_iclog->ic_state & XLOG_STATE_IOERROR)) { |
3780 | ASSERT(!logerror); | 3717 | ASSERT(!logerror); |
diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h index 916eb7db14d9..78c9039994af 100644 --- a/fs/xfs/xfs_log.h +++ b/fs/xfs/xfs_log.h | |||
@@ -189,9 +189,7 @@ void xlog_iodone(struct xfs_buf *); | |||
189 | struct xlog_ticket *xfs_log_ticket_get(struct xlog_ticket *ticket); | 189 | struct xlog_ticket *xfs_log_ticket_get(struct xlog_ticket *ticket); |
190 | void xfs_log_ticket_put(struct xlog_ticket *ticket); | 190 | void xfs_log_ticket_put(struct xlog_ticket *ticket); |
191 | 191 | ||
192 | xlog_tid_t xfs_log_get_trans_ident(struct xfs_trans *tp); | 192 | void xfs_log_commit_cil(struct xfs_mount *mp, struct xfs_trans *tp, |
193 | |||
194 | int xfs_log_commit_cil(struct xfs_mount *mp, struct xfs_trans *tp, | ||
195 | struct xfs_log_vec *log_vector, | 193 | struct xfs_log_vec *log_vector, |
196 | xfs_lsn_t *commit_lsn, int flags); | 194 | xfs_lsn_t *commit_lsn, int flags); |
197 | bool xfs_log_item_in_current_chkpt(struct xfs_log_item *lip); | 195 | bool xfs_log_item_in_current_chkpt(struct xfs_log_item *lip); |
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c index 7e206fc1fa36..c7755d5a5fbe 100644 --- a/fs/xfs/xfs_log_cil.c +++ b/fs/xfs/xfs_log_cil.c | |||
@@ -29,6 +29,7 @@ | |||
29 | #include "xfs_mount.h" | 29 | #include "xfs_mount.h" |
30 | #include "xfs_error.h" | 30 | #include "xfs_error.h" |
31 | #include "xfs_alloc.h" | 31 | #include "xfs_alloc.h" |
32 | #include "xfs_discard.h" | ||
32 | 33 | ||
33 | /* | 34 | /* |
34 | * Perform initial CIL structure initialisation. If the CIL is not | 35 | * Perform initial CIL structure initialisation. If the CIL is not |
@@ -61,7 +62,7 @@ xlog_cil_init( | |||
61 | INIT_LIST_HEAD(&cil->xc_committing); | 62 | INIT_LIST_HEAD(&cil->xc_committing); |
62 | spin_lock_init(&cil->xc_cil_lock); | 63 | spin_lock_init(&cil->xc_cil_lock); |
63 | init_rwsem(&cil->xc_ctx_lock); | 64 | init_rwsem(&cil->xc_ctx_lock); |
64 | sv_init(&cil->xc_commit_wait, SV_DEFAULT, "cilwait"); | 65 | init_waitqueue_head(&cil->xc_commit_wait); |
65 | 66 | ||
66 | INIT_LIST_HEAD(&ctx->committing); | 67 | INIT_LIST_HEAD(&ctx->committing); |
67 | INIT_LIST_HEAD(&ctx->busy_extents); | 68 | INIT_LIST_HEAD(&ctx->busy_extents); |
@@ -146,102 +147,6 @@ xlog_cil_init_post_recovery( | |||
146 | } | 147 | } |
147 | 148 | ||
148 | /* | 149 | /* |
149 | * Insert the log item into the CIL and calculate the difference in space | ||
150 | * consumed by the item. Add the space to the checkpoint ticket and calculate | ||
151 | * if the change requires additional log metadata. If it does, take that space | ||
152 | * as well. Remove the amount of space we addded to the checkpoint ticket from | ||
153 | * the current transaction ticket so that the accounting works out correctly. | ||
154 | * | ||
155 | * If this is the first time the item is being placed into the CIL in this | ||
156 | * context, pin it so it can't be written to disk until the CIL is flushed to | ||
157 | * the iclog and the iclog written to disk. | ||
158 | */ | ||
159 | static void | ||
160 | xlog_cil_insert( | ||
161 | struct log *log, | ||
162 | struct xlog_ticket *ticket, | ||
163 | struct xfs_log_item *item, | ||
164 | struct xfs_log_vec *lv) | ||
165 | { | ||
166 | struct xfs_cil *cil = log->l_cilp; | ||
167 | struct xfs_log_vec *old = lv->lv_item->li_lv; | ||
168 | struct xfs_cil_ctx *ctx = cil->xc_ctx; | ||
169 | int len; | ||
170 | int diff_iovecs; | ||
171 | int iclog_space; | ||
172 | |||
173 | if (old) { | ||
174 | /* existing lv on log item, space used is a delta */ | ||
175 | ASSERT(!list_empty(&item->li_cil)); | ||
176 | ASSERT(old->lv_buf && old->lv_buf_len && old->lv_niovecs); | ||
177 | |||
178 | len = lv->lv_buf_len - old->lv_buf_len; | ||
179 | diff_iovecs = lv->lv_niovecs - old->lv_niovecs; | ||
180 | kmem_free(old->lv_buf); | ||
181 | kmem_free(old); | ||
182 | } else { | ||
183 | /* new lv, must pin the log item */ | ||
184 | ASSERT(!lv->lv_item->li_lv); | ||
185 | ASSERT(list_empty(&item->li_cil)); | ||
186 | |||
187 | len = lv->lv_buf_len; | ||
188 | diff_iovecs = lv->lv_niovecs; | ||
189 | IOP_PIN(lv->lv_item); | ||
190 | |||
191 | } | ||
192 | len += diff_iovecs * sizeof(xlog_op_header_t); | ||
193 | |||
194 | /* attach new log vector to log item */ | ||
195 | lv->lv_item->li_lv = lv; | ||
196 | |||
197 | spin_lock(&cil->xc_cil_lock); | ||
198 | list_move_tail(&item->li_cil, &cil->xc_cil); | ||
199 | ctx->nvecs += diff_iovecs; | ||
200 | |||
201 | /* | ||
202 | * If this is the first time the item is being committed to the CIL, | ||
203 | * store the sequence number on the log item so we can tell | ||
204 | * in future commits whether this is the first checkpoint the item is | ||
205 | * being committed into. | ||
206 | */ | ||
207 | if (!item->li_seq) | ||
208 | item->li_seq = ctx->sequence; | ||
209 | |||
210 | /* | ||
211 | * Now transfer enough transaction reservation to the context ticket | ||
212 | * for the checkpoint. The context ticket is special - the unit | ||
213 | * reservation has to grow as well as the current reservation as we | ||
214 | * steal from tickets so we can correctly determine the space used | ||
215 | * during the transaction commit. | ||
216 | */ | ||
217 | if (ctx->ticket->t_curr_res == 0) { | ||
218 | /* first commit in checkpoint, steal the header reservation */ | ||
219 | ASSERT(ticket->t_curr_res >= ctx->ticket->t_unit_res + len); | ||
220 | ctx->ticket->t_curr_res = ctx->ticket->t_unit_res; | ||
221 | ticket->t_curr_res -= ctx->ticket->t_unit_res; | ||
222 | } | ||
223 | |||
224 | /* do we need space for more log record headers? */ | ||
225 | iclog_space = log->l_iclog_size - log->l_iclog_hsize; | ||
226 | if (len > 0 && (ctx->space_used / iclog_space != | ||
227 | (ctx->space_used + len) / iclog_space)) { | ||
228 | int hdrs; | ||
229 | |||
230 | hdrs = (len + iclog_space - 1) / iclog_space; | ||
231 | /* need to take into account split region headers, too */ | ||
232 | hdrs *= log->l_iclog_hsize + sizeof(struct xlog_op_header); | ||
233 | ctx->ticket->t_unit_res += hdrs; | ||
234 | ctx->ticket->t_curr_res += hdrs; | ||
235 | ticket->t_curr_res -= hdrs; | ||
236 | ASSERT(ticket->t_curr_res >= len); | ||
237 | } | ||
238 | ticket->t_curr_res -= len; | ||
239 | ctx->space_used += len; | ||
240 | |||
241 | spin_unlock(&cil->xc_cil_lock); | ||
242 | } | ||
243 | |||
244 | /* | ||
245 | * Format log item into a flat buffers | 150 | * Format log item into a flat buffers |
246 | * | 151 | * |
247 | * For delayed logging, we need to hold a formatted buffer containing all the | 152 | * For delayed logging, we need to hold a formatted buffer containing all the |
@@ -286,7 +191,7 @@ xlog_cil_format_items( | |||
286 | len += lv->lv_iovecp[index].i_len; | 191 | len += lv->lv_iovecp[index].i_len; |
287 | 192 | ||
288 | lv->lv_buf_len = len; | 193 | lv->lv_buf_len = len; |
289 | lv->lv_buf = kmem_zalloc(lv->lv_buf_len, KM_SLEEP|KM_NOFS); | 194 | lv->lv_buf = kmem_alloc(lv->lv_buf_len, KM_SLEEP|KM_NOFS); |
290 | ptr = lv->lv_buf; | 195 | ptr = lv->lv_buf; |
291 | 196 | ||
292 | for (index = 0; index < lv->lv_niovecs; index++) { | 197 | for (index = 0; index < lv->lv_niovecs; index++) { |
@@ -300,21 +205,136 @@ xlog_cil_format_items( | |||
300 | } | 205 | } |
301 | } | 206 | } |
302 | 207 | ||
208 | /* | ||
209 | * Prepare the log item for insertion into the CIL. Calculate the difference in | ||
210 | * log space and vectors it will consume, and if it is a new item pin it as | ||
211 | * well. | ||
212 | */ | ||
213 | STATIC void | ||
214 | xfs_cil_prepare_item( | ||
215 | struct log *log, | ||
216 | struct xfs_log_vec *lv, | ||
217 | int *len, | ||
218 | int *diff_iovecs) | ||
219 | { | ||
220 | struct xfs_log_vec *old = lv->lv_item->li_lv; | ||
221 | |||
222 | if (old) { | ||
223 | /* existing lv on log item, space used is a delta */ | ||
224 | ASSERT(!list_empty(&lv->lv_item->li_cil)); | ||
225 | ASSERT(old->lv_buf && old->lv_buf_len && old->lv_niovecs); | ||
226 | |||
227 | *len += lv->lv_buf_len - old->lv_buf_len; | ||
228 | *diff_iovecs += lv->lv_niovecs - old->lv_niovecs; | ||
229 | kmem_free(old->lv_buf); | ||
230 | kmem_free(old); | ||
231 | } else { | ||
232 | /* new lv, must pin the log item */ | ||
233 | ASSERT(!lv->lv_item->li_lv); | ||
234 | ASSERT(list_empty(&lv->lv_item->li_cil)); | ||
235 | |||
236 | *len += lv->lv_buf_len; | ||
237 | *diff_iovecs += lv->lv_niovecs; | ||
238 | IOP_PIN(lv->lv_item); | ||
239 | |||
240 | } | ||
241 | |||
242 | /* attach new log vector to log item */ | ||
243 | lv->lv_item->li_lv = lv; | ||
244 | |||
245 | /* | ||
246 | * If this is the first time the item is being committed to the | ||
247 | * CIL, store the sequence number on the log item so we can | ||
248 | * tell in future commits whether this is the first checkpoint | ||
249 | * the item is being committed into. | ||
250 | */ | ||
251 | if (!lv->lv_item->li_seq) | ||
252 | lv->lv_item->li_seq = log->l_cilp->xc_ctx->sequence; | ||
253 | } | ||
254 | |||
255 | /* | ||
256 | * Insert the log items into the CIL and calculate the difference in space | ||
257 | * consumed by the item. Add the space to the checkpoint ticket and calculate | ||
258 | * if the change requires additional log metadata. If it does, take that space | ||
259 | * as well. Remove the amount of space we addded to the checkpoint ticket from | ||
260 | * the current transaction ticket so that the accounting works out correctly. | ||
261 | */ | ||
303 | static void | 262 | static void |
304 | xlog_cil_insert_items( | 263 | xlog_cil_insert_items( |
305 | struct log *log, | 264 | struct log *log, |
306 | struct xfs_log_vec *log_vector, | 265 | struct xfs_log_vec *log_vector, |
307 | struct xlog_ticket *ticket, | 266 | struct xlog_ticket *ticket) |
308 | xfs_lsn_t *start_lsn) | ||
309 | { | 267 | { |
310 | struct xfs_log_vec *lv; | 268 | struct xfs_cil *cil = log->l_cilp; |
311 | 269 | struct xfs_cil_ctx *ctx = cil->xc_ctx; | |
312 | if (start_lsn) | 270 | struct xfs_log_vec *lv; |
313 | *start_lsn = log->l_cilp->xc_ctx->sequence; | 271 | int len = 0; |
272 | int diff_iovecs = 0; | ||
273 | int iclog_space; | ||
314 | 274 | ||
315 | ASSERT(log_vector); | 275 | ASSERT(log_vector); |
276 | |||
277 | /* | ||
278 | * Do all the accounting aggregation and switching of log vectors | ||
279 | * around in a separate loop to the insertion of items into the CIL. | ||
280 | * Then we can do a separate loop to update the CIL within a single | ||
281 | * lock/unlock pair. This reduces the number of round trips on the CIL | ||
282 | * lock from O(nr_logvectors) to O(1) and greatly reduces the overall | ||
283 | * hold time for the transaction commit. | ||
284 | * | ||
285 | * If this is the first time the item is being placed into the CIL in | ||
286 | * this context, pin it so it can't be written to disk until the CIL is | ||
287 | * flushed to the iclog and the iclog written to disk. | ||
288 | * | ||
289 | * We can do this safely because the context can't checkpoint until we | ||
290 | * are done so it doesn't matter exactly how we update the CIL. | ||
291 | */ | ||
316 | for (lv = log_vector; lv; lv = lv->lv_next) | 292 | for (lv = log_vector; lv; lv = lv->lv_next) |
317 | xlog_cil_insert(log, ticket, lv->lv_item, lv); | 293 | xfs_cil_prepare_item(log, lv, &len, &diff_iovecs); |
294 | |||
295 | /* account for space used by new iovec headers */ | ||
296 | len += diff_iovecs * sizeof(xlog_op_header_t); | ||
297 | |||
298 | spin_lock(&cil->xc_cil_lock); | ||
299 | |||
300 | /* move the items to the tail of the CIL */ | ||
301 | for (lv = log_vector; lv; lv = lv->lv_next) | ||
302 | list_move_tail(&lv->lv_item->li_cil, &cil->xc_cil); | ||
303 | |||
304 | ctx->nvecs += diff_iovecs; | ||
305 | |||
306 | /* | ||
307 | * Now transfer enough transaction reservation to the context ticket | ||
308 | * for the checkpoint. The context ticket is special - the unit | ||
309 | * reservation has to grow as well as the current reservation as we | ||
310 | * steal from tickets so we can correctly determine the space used | ||
311 | * during the transaction commit. | ||
312 | */ | ||
313 | if (ctx->ticket->t_curr_res == 0) { | ||
314 | /* first commit in checkpoint, steal the header reservation */ | ||
315 | ASSERT(ticket->t_curr_res >= ctx->ticket->t_unit_res + len); | ||
316 | ctx->ticket->t_curr_res = ctx->ticket->t_unit_res; | ||
317 | ticket->t_curr_res -= ctx->ticket->t_unit_res; | ||
318 | } | ||
319 | |||
320 | /* do we need space for more log record headers? */ | ||
321 | iclog_space = log->l_iclog_size - log->l_iclog_hsize; | ||
322 | if (len > 0 && (ctx->space_used / iclog_space != | ||
323 | (ctx->space_used + len) / iclog_space)) { | ||
324 | int hdrs; | ||
325 | |||
326 | hdrs = (len + iclog_space - 1) / iclog_space; | ||
327 | /* need to take into account split region headers, too */ | ||
328 | hdrs *= log->l_iclog_hsize + sizeof(struct xlog_op_header); | ||
329 | ctx->ticket->t_unit_res += hdrs; | ||
330 | ctx->ticket->t_curr_res += hdrs; | ||
331 | ticket->t_curr_res -= hdrs; | ||
332 | ASSERT(ticket->t_curr_res >= len); | ||
333 | } | ||
334 | ticket->t_curr_res -= len; | ||
335 | ctx->space_used += len; | ||
336 | |||
337 | spin_unlock(&cil->xc_cil_lock); | ||
318 | } | 338 | } |
319 | 339 | ||
320 | static void | 340 | static void |
@@ -342,24 +362,28 @@ xlog_cil_committed( | |||
342 | int abort) | 362 | int abort) |
343 | { | 363 | { |
344 | struct xfs_cil_ctx *ctx = args; | 364 | struct xfs_cil_ctx *ctx = args; |
345 | struct xfs_log_vec *lv; | 365 | struct xfs_mount *mp = ctx->cil->xc_log->l_mp; |
346 | int abortflag = abort ? XFS_LI_ABORTED : 0; | ||
347 | struct xfs_busy_extent *busyp, *n; | ||
348 | 366 | ||
349 | /* unpin all the log items */ | 367 | xfs_trans_committed_bulk(ctx->cil->xc_log->l_ailp, ctx->lv_chain, |
350 | for (lv = ctx->lv_chain; lv; lv = lv->lv_next ) { | 368 | ctx->start_lsn, abort); |
351 | xfs_trans_item_committed(lv->lv_item, ctx->start_lsn, | ||
352 | abortflag); | ||
353 | } | ||
354 | 369 | ||
355 | list_for_each_entry_safe(busyp, n, &ctx->busy_extents, list) | 370 | xfs_alloc_busy_sort(&ctx->busy_extents); |
356 | xfs_alloc_busy_clear(ctx->cil->xc_log->l_mp, busyp); | 371 | xfs_alloc_busy_clear(mp, &ctx->busy_extents, |
372 | (mp->m_flags & XFS_MOUNT_DISCARD) && !abort); | ||
357 | 373 | ||
358 | spin_lock(&ctx->cil->xc_cil_lock); | 374 | spin_lock(&ctx->cil->xc_cil_lock); |
359 | list_del(&ctx->committing); | 375 | list_del(&ctx->committing); |
360 | spin_unlock(&ctx->cil->xc_cil_lock); | 376 | spin_unlock(&ctx->cil->xc_cil_lock); |
361 | 377 | ||
362 | xlog_cil_free_logvec(ctx->lv_chain); | 378 | xlog_cil_free_logvec(ctx->lv_chain); |
379 | |||
380 | if (!list_empty(&ctx->busy_extents)) { | ||
381 | ASSERT(mp->m_flags & XFS_MOUNT_DISCARD); | ||
382 | |||
383 | xfs_discard_extents(mp, &ctx->busy_extents); | ||
384 | xfs_alloc_busy_clear(mp, &ctx->busy_extents, false); | ||
385 | } | ||
386 | |||
363 | kmem_free(ctx); | 387 | kmem_free(ctx); |
364 | } | 388 | } |
365 | 389 | ||
@@ -529,7 +553,7 @@ xlog_cil_push( | |||
529 | 553 | ||
530 | error = xlog_write(log, &lvhdr, tic, &ctx->start_lsn, NULL, 0); | 554 | error = xlog_write(log, &lvhdr, tic, &ctx->start_lsn, NULL, 0); |
531 | if (error) | 555 | if (error) |
532 | goto out_abort; | 556 | goto out_abort_free_ticket; |
533 | 557 | ||
534 | /* | 558 | /* |
535 | * now that we've written the checkpoint into the log, strictly | 559 | * now that we've written the checkpoint into the log, strictly |
@@ -549,14 +573,15 @@ restart: | |||
549 | * It is still being pushed! Wait for the push to | 573 | * It is still being pushed! Wait for the push to |
550 | * complete, then start again from the beginning. | 574 | * complete, then start again from the beginning. |
551 | */ | 575 | */ |
552 | sv_wait(&cil->xc_commit_wait, 0, &cil->xc_cil_lock, 0); | 576 | xlog_wait(&cil->xc_commit_wait, &cil->xc_cil_lock); |
553 | goto restart; | 577 | goto restart; |
554 | } | 578 | } |
555 | } | 579 | } |
556 | spin_unlock(&cil->xc_cil_lock); | 580 | spin_unlock(&cil->xc_cil_lock); |
557 | 581 | ||
582 | /* xfs_log_done always frees the ticket on error. */ | ||
558 | commit_lsn = xfs_log_done(log->l_mp, tic, &commit_iclog, 0); | 583 | commit_lsn = xfs_log_done(log->l_mp, tic, &commit_iclog, 0); |
559 | if (error || commit_lsn == -1) | 584 | if (commit_lsn == -1) |
560 | goto out_abort; | 585 | goto out_abort; |
561 | 586 | ||
562 | /* attach all the transactions w/ busy extents to iclog */ | 587 | /* attach all the transactions w/ busy extents to iclog */ |
@@ -573,7 +598,7 @@ restart: | |||
573 | */ | 598 | */ |
574 | spin_lock(&cil->xc_cil_lock); | 599 | spin_lock(&cil->xc_cil_lock); |
575 | ctx->commit_lsn = commit_lsn; | 600 | ctx->commit_lsn = commit_lsn; |
576 | sv_broadcast(&cil->xc_commit_wait); | 601 | wake_up_all(&cil->xc_commit_wait); |
577 | spin_unlock(&cil->xc_cil_lock); | 602 | spin_unlock(&cil->xc_cil_lock); |
578 | 603 | ||
579 | /* release the hounds! */ | 604 | /* release the hounds! */ |
@@ -586,6 +611,8 @@ out_free_ticket: | |||
586 | kmem_free(new_ctx); | 611 | kmem_free(new_ctx); |
587 | return 0; | 612 | return 0; |
588 | 613 | ||
614 | out_abort_free_ticket: | ||
615 | xfs_log_ticket_put(tic); | ||
589 | out_abort: | 616 | out_abort: |
590 | xlog_cil_committed(ctx, XFS_LI_ABORTED); | 617 | xlog_cil_committed(ctx, XFS_LI_ABORTED); |
591 | return XFS_ERROR(EIO); | 618 | return XFS_ERROR(EIO); |
@@ -608,7 +635,7 @@ out_abort: | |||
608 | * background commit, returns without it held once background commits are | 635 | * background commit, returns without it held once background commits are |
609 | * allowed again. | 636 | * allowed again. |
610 | */ | 637 | */ |
611 | int | 638 | void |
612 | xfs_log_commit_cil( | 639 | xfs_log_commit_cil( |
613 | struct xfs_mount *mp, | 640 | struct xfs_mount *mp, |
614 | struct xfs_trans *tp, | 641 | struct xfs_trans *tp, |
@@ -623,11 +650,6 @@ xfs_log_commit_cil( | |||
623 | if (flags & XFS_TRANS_RELEASE_LOG_RES) | 650 | if (flags & XFS_TRANS_RELEASE_LOG_RES) |
624 | log_flags = XFS_LOG_REL_PERM_RESERV; | 651 | log_flags = XFS_LOG_REL_PERM_RESERV; |
625 | 652 | ||
626 | if (XLOG_FORCED_SHUTDOWN(log)) { | ||
627 | xlog_cil_free_logvec(log_vector); | ||
628 | return XFS_ERROR(EIO); | ||
629 | } | ||
630 | |||
631 | /* | 653 | /* |
632 | * do all the hard work of formatting items (including memory | 654 | * do all the hard work of formatting items (including memory |
633 | * allocation) outside the CIL context lock. This prevents stalling CIL | 655 | * allocation) outside the CIL context lock. This prevents stalling CIL |
@@ -638,7 +660,10 @@ xfs_log_commit_cil( | |||
638 | 660 | ||
639 | /* lock out background commit */ | 661 | /* lock out background commit */ |
640 | down_read(&log->l_cilp->xc_ctx_lock); | 662 | down_read(&log->l_cilp->xc_ctx_lock); |
641 | xlog_cil_insert_items(log, log_vector, tp->t_ticket, commit_lsn); | 663 | if (commit_lsn) |
664 | *commit_lsn = log->l_cilp->xc_ctx->sequence; | ||
665 | |||
666 | xlog_cil_insert_items(log, log_vector, tp->t_ticket); | ||
642 | 667 | ||
643 | /* check we didn't blow the reservation */ | 668 | /* check we didn't blow the reservation */ |
644 | if (tp->t_ticket->t_curr_res < 0) | 669 | if (tp->t_ticket->t_curr_res < 0) |
@@ -684,7 +709,6 @@ xfs_log_commit_cil( | |||
684 | */ | 709 | */ |
685 | if (push) | 710 | if (push) |
686 | xlog_cil_push(log, 0); | 711 | xlog_cil_push(log, 0); |
687 | return 0; | ||
688 | } | 712 | } |
689 | 713 | ||
690 | /* | 714 | /* |
@@ -735,7 +759,7 @@ restart: | |||
735 | * It is still being pushed! Wait for the push to | 759 | * It is still being pushed! Wait for the push to |
736 | * complete, then start again from the beginning. | 760 | * complete, then start again from the beginning. |
737 | */ | 761 | */ |
738 | sv_wait(&cil->xc_commit_wait, 0, &cil->xc_cil_lock, 0); | 762 | xlog_wait(&cil->xc_commit_wait, &cil->xc_cil_lock); |
739 | goto restart; | 763 | goto restart; |
740 | } | 764 | } |
741 | if (ctx->sequence != sequence) | 765 | if (ctx->sequence != sequence) |
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h index edcdfe01617f..2d3b6a498d63 100644 --- a/fs/xfs/xfs_log_priv.h +++ b/fs/xfs/xfs_log_priv.h | |||
@@ -21,7 +21,6 @@ | |||
21 | struct xfs_buf; | 21 | struct xfs_buf; |
22 | struct log; | 22 | struct log; |
23 | struct xlog_ticket; | 23 | struct xlog_ticket; |
24 | struct xfs_buf_cancel; | ||
25 | struct xfs_mount; | 24 | struct xfs_mount; |
26 | 25 | ||
27 | /* | 26 | /* |
@@ -54,7 +53,6 @@ struct xfs_mount; | |||
54 | BTOBB(XLOG_MAX_ICLOGS << (xfs_sb_version_haslogv2(&log->l_mp->m_sb) ? \ | 53 | BTOBB(XLOG_MAX_ICLOGS << (xfs_sb_version_haslogv2(&log->l_mp->m_sb) ? \ |
55 | XLOG_MAX_RECORD_BSHIFT : XLOG_BIG_RECORD_BSHIFT)) | 54 | XLOG_MAX_RECORD_BSHIFT : XLOG_BIG_RECORD_BSHIFT)) |
56 | 55 | ||
57 | |||
58 | static inline xfs_lsn_t xlog_assign_lsn(uint cycle, uint block) | 56 | static inline xfs_lsn_t xlog_assign_lsn(uint cycle, uint block) |
59 | { | 57 | { |
60 | return ((xfs_lsn_t)cycle << 32) | block; | 58 | return ((xfs_lsn_t)cycle << 32) | block; |
@@ -89,10 +87,6 @@ static inline uint xlog_get_client_id(__be32 i) | |||
89 | return be32_to_cpu(i) >> 24; | 87 | return be32_to_cpu(i) >> 24; |
90 | } | 88 | } |
91 | 89 | ||
92 | #define xlog_panic(args...) cmn_err(CE_PANIC, ## args) | ||
93 | #define xlog_exit(args...) cmn_err(CE_PANIC, ## args) | ||
94 | #define xlog_warn(args...) cmn_err(CE_WARN, ## args) | ||
95 | |||
96 | /* | 90 | /* |
97 | * In core log state | 91 | * In core log state |
98 | */ | 92 | */ |
@@ -133,12 +127,10 @@ static inline uint xlog_get_client_id(__be32 i) | |||
133 | */ | 127 | */ |
134 | #define XLOG_TIC_INITED 0x1 /* has been initialized */ | 128 | #define XLOG_TIC_INITED 0x1 /* has been initialized */ |
135 | #define XLOG_TIC_PERM_RESERV 0x2 /* permanent reservation */ | 129 | #define XLOG_TIC_PERM_RESERV 0x2 /* permanent reservation */ |
136 | #define XLOG_TIC_IN_Q 0x4 | ||
137 | 130 | ||
138 | #define XLOG_TIC_FLAGS \ | 131 | #define XLOG_TIC_FLAGS \ |
139 | { XLOG_TIC_INITED, "XLOG_TIC_INITED" }, \ | 132 | { XLOG_TIC_INITED, "XLOG_TIC_INITED" }, \ |
140 | { XLOG_TIC_PERM_RESERV, "XLOG_TIC_PERM_RESERV" }, \ | 133 | { XLOG_TIC_PERM_RESERV, "XLOG_TIC_PERM_RESERV" } |
141 | { XLOG_TIC_IN_Q, "XLOG_TIC_IN_Q" } | ||
142 | 134 | ||
143 | #endif /* __KERNEL__ */ | 135 | #endif /* __KERNEL__ */ |
144 | 136 | ||
@@ -152,6 +144,9 @@ static inline uint xlog_get_client_id(__be32 i) | |||
152 | #define XLOG_RECOVERY_NEEDED 0x4 /* log was recovered */ | 144 | #define XLOG_RECOVERY_NEEDED 0x4 /* log was recovered */ |
153 | #define XLOG_IO_ERROR 0x8 /* log hit an I/O error, and being | 145 | #define XLOG_IO_ERROR 0x8 /* log hit an I/O error, and being |
154 | shutdown */ | 146 | shutdown */ |
147 | #define XLOG_TAIL_WARN 0x10 /* log tail verify warning issued */ | ||
148 | |||
149 | typedef __uint32_t xlog_tid_t; | ||
155 | 150 | ||
156 | #ifdef __KERNEL__ | 151 | #ifdef __KERNEL__ |
157 | /* | 152 | /* |
@@ -244,9 +239,8 @@ typedef struct xlog_res { | |||
244 | } xlog_res_t; | 239 | } xlog_res_t; |
245 | 240 | ||
246 | typedef struct xlog_ticket { | 241 | typedef struct xlog_ticket { |
247 | sv_t t_wait; /* ticket wait queue : 20 */ | 242 | wait_queue_head_t t_wait; /* ticket wait queue */ |
248 | struct xlog_ticket *t_next; /* :4|8 */ | 243 | struct list_head t_queue; /* reserve/write queue */ |
249 | struct xlog_ticket *t_prev; /* :4|8 */ | ||
250 | xlog_tid_t t_tid; /* transaction identifier : 4 */ | 244 | xlog_tid_t t_tid; /* transaction identifier : 4 */ |
251 | atomic_t t_ref; /* ticket reference count : 4 */ | 245 | atomic_t t_ref; /* ticket reference count : 4 */ |
252 | int t_curr_res; /* current reservation in bytes : 4 */ | 246 | int t_curr_res; /* current reservation in bytes : 4 */ |
@@ -353,8 +347,8 @@ typedef union xlog_in_core2 { | |||
353 | * and move everything else out to subsequent cachelines. | 347 | * and move everything else out to subsequent cachelines. |
354 | */ | 348 | */ |
355 | typedef struct xlog_in_core { | 349 | typedef struct xlog_in_core { |
356 | sv_t ic_force_wait; | 350 | wait_queue_head_t ic_force_wait; |
357 | sv_t ic_write_wait; | 351 | wait_queue_head_t ic_write_wait; |
358 | struct xlog_in_core *ic_next; | 352 | struct xlog_in_core *ic_next; |
359 | struct xlog_in_core *ic_prev; | 353 | struct xlog_in_core *ic_prev; |
360 | struct xfs_buf *ic_bp; | 354 | struct xfs_buf *ic_bp; |
@@ -421,7 +415,7 @@ struct xfs_cil { | |||
421 | struct xfs_cil_ctx *xc_ctx; | 415 | struct xfs_cil_ctx *xc_ctx; |
422 | struct rw_semaphore xc_ctx_lock; | 416 | struct rw_semaphore xc_ctx_lock; |
423 | struct list_head xc_committing; | 417 | struct list_head xc_committing; |
424 | sv_t xc_commit_wait; | 418 | wait_queue_head_t xc_commit_wait; |
425 | xfs_lsn_t xc_current_sequence; | 419 | xfs_lsn_t xc_current_sequence; |
426 | }; | 420 | }; |
427 | 421 | ||
@@ -491,7 +485,7 @@ typedef struct log { | |||
491 | struct xfs_buftarg *l_targ; /* buftarg of log */ | 485 | struct xfs_buftarg *l_targ; /* buftarg of log */ |
492 | uint l_flags; | 486 | uint l_flags; |
493 | uint l_quotaoffs_flag; /* XFS_DQ_*, for QUOTAOFFs */ | 487 | uint l_quotaoffs_flag; /* XFS_DQ_*, for QUOTAOFFs */ |
494 | struct xfs_buf_cancel **l_buf_cancel_table; | 488 | struct list_head *l_buf_cancel_table; |
495 | int l_iclog_hsize; /* size of iclog header */ | 489 | int l_iclog_hsize; /* size of iclog header */ |
496 | int l_iclog_heads; /* # of iclog header sectors */ | 490 | int l_iclog_heads; /* # of iclog header sectors */ |
497 | uint l_sectBBsize; /* sector size in BBs (2^n) */ | 491 | uint l_sectBBsize; /* sector size in BBs (2^n) */ |
@@ -503,29 +497,40 @@ typedef struct log { | |||
503 | int l_logBBsize; /* size of log in BB chunks */ | 497 | int l_logBBsize; /* size of log in BB chunks */ |
504 | 498 | ||
505 | /* The following block of fields are changed while holding icloglock */ | 499 | /* The following block of fields are changed while holding icloglock */ |
506 | sv_t l_flush_wait ____cacheline_aligned_in_smp; | 500 | wait_queue_head_t l_flush_wait ____cacheline_aligned_in_smp; |
507 | /* waiting for iclog flush */ | 501 | /* waiting for iclog flush */ |
508 | int l_covered_state;/* state of "covering disk | 502 | int l_covered_state;/* state of "covering disk |
509 | * log entries" */ | 503 | * log entries" */ |
510 | xlog_in_core_t *l_iclog; /* head log queue */ | 504 | xlog_in_core_t *l_iclog; /* head log queue */ |
511 | spinlock_t l_icloglock; /* grab to change iclog state */ | 505 | spinlock_t l_icloglock; /* grab to change iclog state */ |
512 | xfs_lsn_t l_tail_lsn; /* lsn of 1st LR with unflushed | ||
513 | * buffers */ | ||
514 | xfs_lsn_t l_last_sync_lsn;/* lsn of last LR on disk */ | ||
515 | int l_curr_cycle; /* Cycle number of log writes */ | 506 | int l_curr_cycle; /* Cycle number of log writes */ |
516 | int l_prev_cycle; /* Cycle number before last | 507 | int l_prev_cycle; /* Cycle number before last |
517 | * block increment */ | 508 | * block increment */ |
518 | int l_curr_block; /* current logical log block */ | 509 | int l_curr_block; /* current logical log block */ |
519 | int l_prev_block; /* previous logical log block */ | 510 | int l_prev_block; /* previous logical log block */ |
520 | 511 | ||
521 | /* The following block of fields are changed while holding grant_lock */ | 512 | /* |
522 | spinlock_t l_grant_lock ____cacheline_aligned_in_smp; | 513 | * l_last_sync_lsn and l_tail_lsn are atomics so they can be set and |
523 | xlog_ticket_t *l_reserve_headq; | 514 | * read without needing to hold specific locks. To avoid operations |
524 | xlog_ticket_t *l_write_headq; | 515 | * contending with other hot objects, place each of them on a separate |
525 | int l_grant_reserve_cycle; | 516 | * cacheline. |
526 | int l_grant_reserve_bytes; | 517 | */ |
527 | int l_grant_write_cycle; | 518 | /* lsn of last LR on disk */ |
528 | int l_grant_write_bytes; | 519 | atomic64_t l_last_sync_lsn ____cacheline_aligned_in_smp; |
520 | /* lsn of 1st LR with unflushed * buffers */ | ||
521 | atomic64_t l_tail_lsn ____cacheline_aligned_in_smp; | ||
522 | |||
523 | /* | ||
524 | * ticket grant locks, queues and accounting have their own cachlines | ||
525 | * as these are quite hot and can be operated on concurrently. | ||
526 | */ | ||
527 | spinlock_t l_grant_reserve_lock ____cacheline_aligned_in_smp; | ||
528 | struct list_head l_reserveq; | ||
529 | atomic64_t l_grant_reserve_head; | ||
530 | |||
531 | spinlock_t l_grant_write_lock ____cacheline_aligned_in_smp; | ||
532 | struct list_head l_writeq; | ||
533 | atomic64_t l_grant_write_head; | ||
529 | 534 | ||
530 | /* The following field are used for debugging; need to hold icloglock */ | 535 | /* The following field are used for debugging; need to hold icloglock */ |
531 | #ifdef DEBUG | 536 | #ifdef DEBUG |
@@ -534,6 +539,9 @@ typedef struct log { | |||
534 | 539 | ||
535 | } xlog_t; | 540 | } xlog_t; |
536 | 541 | ||
542 | #define XLOG_BUF_CANCEL_BUCKET(log, blkno) \ | ||
543 | ((log)->l_buf_cancel_table + ((__uint64_t)blkno % XLOG_BC_TABLE_SIZE)) | ||
544 | |||
537 | #define XLOG_FORCED_SHUTDOWN(log) ((log)->l_flags & XLOG_IO_ERROR) | 545 | #define XLOG_FORCED_SHUTDOWN(log) ((log)->l_flags & XLOG_IO_ERROR) |
538 | 546 | ||
539 | /* common routines */ | 547 | /* common routines */ |
@@ -562,6 +570,61 @@ int xlog_write(struct log *log, struct xfs_log_vec *log_vector, | |||
562 | xlog_in_core_t **commit_iclog, uint flags); | 570 | xlog_in_core_t **commit_iclog, uint flags); |
563 | 571 | ||
564 | /* | 572 | /* |
573 | * When we crack an atomic LSN, we sample it first so that the value will not | ||
574 | * change while we are cracking it into the component values. This means we | ||
575 | * will always get consistent component values to work from. This should always | ||
576 | * be used to sample and crack LSNs that are stored and updated in atomic | ||
577 | * variables. | ||
578 | */ | ||
579 | static inline void | ||
580 | xlog_crack_atomic_lsn(atomic64_t *lsn, uint *cycle, uint *block) | ||
581 | { | ||
582 | xfs_lsn_t val = atomic64_read(lsn); | ||
583 | |||
584 | *cycle = CYCLE_LSN(val); | ||
585 | *block = BLOCK_LSN(val); | ||
586 | } | ||
587 | |||
588 | /* | ||
589 | * Calculate and assign a value to an atomic LSN variable from component pieces. | ||
590 | */ | ||
591 | static inline void | ||
592 | xlog_assign_atomic_lsn(atomic64_t *lsn, uint cycle, uint block) | ||
593 | { | ||
594 | atomic64_set(lsn, xlog_assign_lsn(cycle, block)); | ||
595 | } | ||
596 | |||
597 | /* | ||
598 | * When we crack the grant head, we sample it first so that the value will not | ||
599 | * change while we are cracking it into the component values. This means we | ||
600 | * will always get consistent component values to work from. | ||
601 | */ | ||
602 | static inline void | ||
603 | xlog_crack_grant_head_val(int64_t val, int *cycle, int *space) | ||
604 | { | ||
605 | *cycle = val >> 32; | ||
606 | *space = val & 0xffffffff; | ||
607 | } | ||
608 | |||
609 | static inline void | ||
610 | xlog_crack_grant_head(atomic64_t *head, int *cycle, int *space) | ||
611 | { | ||
612 | xlog_crack_grant_head_val(atomic64_read(head), cycle, space); | ||
613 | } | ||
614 | |||
615 | static inline int64_t | ||
616 | xlog_assign_grant_head_val(int cycle, int space) | ||
617 | { | ||
618 | return ((int64_t)cycle << 32) | space; | ||
619 | } | ||
620 | |||
621 | static inline void | ||
622 | xlog_assign_grant_head(atomic64_t *head, int cycle, int space) | ||
623 | { | ||
624 | atomic64_set(head, xlog_assign_grant_head_val(cycle, space)); | ||
625 | } | ||
626 | |||
627 | /* | ||
565 | * Committed Item List interfaces | 628 | * Committed Item List interfaces |
566 | */ | 629 | */ |
567 | int xlog_cil_init(struct log *log); | 630 | int xlog_cil_init(struct log *log); |
@@ -585,6 +648,21 @@ xlog_cil_force(struct log *log) | |||
585 | */ | 648 | */ |
586 | #define XLOG_UNMOUNT_REC_TYPE (-1U) | 649 | #define XLOG_UNMOUNT_REC_TYPE (-1U) |
587 | 650 | ||
651 | /* | ||
652 | * Wrapper function for waiting on a wait queue serialised against wakeups | ||
653 | * by a spinlock. This matches the semantics of all the wait queues used in the | ||
654 | * log code. | ||
655 | */ | ||
656 | static inline void xlog_wait(wait_queue_head_t *wq, spinlock_t *lock) | ||
657 | { | ||
658 | DECLARE_WAITQUEUE(wait, current); | ||
659 | |||
660 | add_wait_queue_exclusive(wq, &wait); | ||
661 | __set_current_state(TASK_UNINTERRUPTIBLE); | ||
662 | spin_unlock(lock); | ||
663 | schedule(); | ||
664 | remove_wait_queue(wq, &wait); | ||
665 | } | ||
588 | #endif /* __KERNEL__ */ | 666 | #endif /* __KERNEL__ */ |
589 | 667 | ||
590 | #endif /* __XFS_LOG_PRIV_H__ */ | 668 | #endif /* __XFS_LOG_PRIV_H__ */ |
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 6f3f5fa37acf..04142caedb2b 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c | |||
@@ -53,6 +53,17 @@ STATIC void xlog_recover_check_summary(xlog_t *); | |||
53 | #endif | 53 | #endif |
54 | 54 | ||
55 | /* | 55 | /* |
56 | * This structure is used during recovery to record the buf log items which | ||
57 | * have been canceled and should not be replayed. | ||
58 | */ | ||
59 | struct xfs_buf_cancel { | ||
60 | xfs_daddr_t bc_blkno; | ||
61 | uint bc_len; | ||
62 | int bc_refcount; | ||
63 | struct list_head bc_list; | ||
64 | }; | ||
65 | |||
66 | /* | ||
56 | * Sector aligned buffer routines for buffer create/read/write/access | 67 | * Sector aligned buffer routines for buffer create/read/write/access |
57 | */ | 68 | */ |
58 | 69 | ||
@@ -81,7 +92,7 @@ xlog_get_bp( | |||
81 | int nbblks) | 92 | int nbblks) |
82 | { | 93 | { |
83 | if (!xlog_buf_bbcount_valid(log, nbblks)) { | 94 | if (!xlog_buf_bbcount_valid(log, nbblks)) { |
84 | xlog_warn("XFS: Invalid block length (0x%x) given for buffer", | 95 | xfs_warn(log->l_mp, "Invalid block length (0x%x) for buffer", |
85 | nbblks); | 96 | nbblks); |
86 | XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_HIGH, log->l_mp); | 97 | XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_HIGH, log->l_mp); |
87 | return NULL; | 98 | return NULL; |
@@ -90,7 +101,7 @@ xlog_get_bp( | |||
90 | /* | 101 | /* |
91 | * We do log I/O in units of log sectors (a power-of-2 | 102 | * We do log I/O in units of log sectors (a power-of-2 |
92 | * multiple of the basic block size), so we round up the | 103 | * multiple of the basic block size), so we round up the |
93 | * requested size to acommodate the basic blocks required | 104 | * requested size to accommodate the basic blocks required |
94 | * for complete log sectors. | 105 | * for complete log sectors. |
95 | * | 106 | * |
96 | * In addition, the buffer may be used for a non-sector- | 107 | * In addition, the buffer may be used for a non-sector- |
@@ -101,13 +112,14 @@ xlog_get_bp( | |||
101 | * an issue. Nor will this be a problem if the log I/O is | 112 | * an issue. Nor will this be a problem if the log I/O is |
102 | * done in basic blocks (sector size 1). But otherwise we | 113 | * done in basic blocks (sector size 1). But otherwise we |
103 | * extend the buffer by one extra log sector to ensure | 114 | * extend the buffer by one extra log sector to ensure |
104 | * there's space to accomodate this possiblility. | 115 | * there's space to accommodate this possibility. |
105 | */ | 116 | */ |
106 | if (nbblks > 1 && log->l_sectBBsize > 1) | 117 | if (nbblks > 1 && log->l_sectBBsize > 1) |
107 | nbblks += log->l_sectBBsize; | 118 | nbblks += log->l_sectBBsize; |
108 | nbblks = round_up(nbblks, log->l_sectBBsize); | 119 | nbblks = round_up(nbblks, log->l_sectBBsize); |
109 | 120 | ||
110 | return xfs_buf_get_noaddr(BBTOB(nbblks), log->l_mp->m_logdev_targp); | 121 | return xfs_buf_get_uncached(log->l_mp->m_logdev_targp, |
122 | BBTOB(nbblks), 0); | ||
111 | } | 123 | } |
112 | 124 | ||
113 | STATIC void | 125 | STATIC void |
@@ -148,7 +160,7 @@ xlog_bread_noalign( | |||
148 | int error; | 160 | int error; |
149 | 161 | ||
150 | if (!xlog_buf_bbcount_valid(log, nbblks)) { | 162 | if (!xlog_buf_bbcount_valid(log, nbblks)) { |
151 | xlog_warn("XFS: Invalid block length (0x%x) given for buffer", | 163 | xfs_warn(log->l_mp, "Invalid block length (0x%x) for buffer", |
152 | nbblks); | 164 | nbblks); |
153 | XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_HIGH, log->l_mp); | 165 | XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_HIGH, log->l_mp); |
154 | return EFSCORRUPTED; | 166 | return EFSCORRUPTED; |
@@ -167,7 +179,7 @@ xlog_bread_noalign( | |||
167 | XFS_BUF_SET_TARGET(bp, log->l_mp->m_logdev_targp); | 179 | XFS_BUF_SET_TARGET(bp, log->l_mp->m_logdev_targp); |
168 | 180 | ||
169 | xfsbdstrat(log->l_mp, bp); | 181 | xfsbdstrat(log->l_mp, bp); |
170 | error = xfs_iowait(bp); | 182 | error = xfs_buf_iowait(bp); |
171 | if (error) | 183 | if (error) |
172 | xfs_ioerror_alert("xlog_bread", log->l_mp, | 184 | xfs_ioerror_alert("xlog_bread", log->l_mp, |
173 | bp, XFS_BUF_ADDR(bp)); | 185 | bp, XFS_BUF_ADDR(bp)); |
@@ -193,6 +205,35 @@ xlog_bread( | |||
193 | } | 205 | } |
194 | 206 | ||
195 | /* | 207 | /* |
208 | * Read at an offset into the buffer. Returns with the buffer in it's original | ||
209 | * state regardless of the result of the read. | ||
210 | */ | ||
211 | STATIC int | ||
212 | xlog_bread_offset( | ||
213 | xlog_t *log, | ||
214 | xfs_daddr_t blk_no, /* block to read from */ | ||
215 | int nbblks, /* blocks to read */ | ||
216 | xfs_buf_t *bp, | ||
217 | xfs_caddr_t offset) | ||
218 | { | ||
219 | xfs_caddr_t orig_offset = XFS_BUF_PTR(bp); | ||
220 | int orig_len = bp->b_buffer_length; | ||
221 | int error, error2; | ||
222 | |||
223 | error = XFS_BUF_SET_PTR(bp, offset, BBTOB(nbblks)); | ||
224 | if (error) | ||
225 | return error; | ||
226 | |||
227 | error = xlog_bread_noalign(log, blk_no, nbblks, bp); | ||
228 | |||
229 | /* must reset buffer pointer even on error */ | ||
230 | error2 = XFS_BUF_SET_PTR(bp, orig_offset, orig_len); | ||
231 | if (error) | ||
232 | return error; | ||
233 | return error2; | ||
234 | } | ||
235 | |||
236 | /* | ||
196 | * Write out the buffer at the given block for the given number of blocks. | 237 | * Write out the buffer at the given block for the given number of blocks. |
197 | * The buffer is kept locked across the write and is returned locked. | 238 | * The buffer is kept locked across the write and is returned locked. |
198 | * This can only be used for synchronous log writes. | 239 | * This can only be used for synchronous log writes. |
@@ -207,7 +248,7 @@ xlog_bwrite( | |||
207 | int error; | 248 | int error; |
208 | 249 | ||
209 | if (!xlog_buf_bbcount_valid(log, nbblks)) { | 250 | if (!xlog_buf_bbcount_valid(log, nbblks)) { |
210 | xlog_warn("XFS: Invalid block length (0x%x) given for buffer", | 251 | xfs_warn(log->l_mp, "Invalid block length (0x%x) for buffer", |
211 | nbblks); | 252 | nbblks); |
212 | XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_HIGH, log->l_mp); | 253 | XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_HIGH, log->l_mp); |
213 | return EFSCORRUPTED; | 254 | return EFSCORRUPTED; |
@@ -242,9 +283,9 @@ xlog_header_check_dump( | |||
242 | xfs_mount_t *mp, | 283 | xfs_mount_t *mp, |
243 | xlog_rec_header_t *head) | 284 | xlog_rec_header_t *head) |
244 | { | 285 | { |
245 | cmn_err(CE_DEBUG, "%s: SB : uuid = %pU, fmt = %d\n", | 286 | xfs_debug(mp, "%s: SB : uuid = %pU, fmt = %d\n", |
246 | __func__, &mp->m_sb.sb_uuid, XLOG_FMT); | 287 | __func__, &mp->m_sb.sb_uuid, XLOG_FMT); |
247 | cmn_err(CE_DEBUG, " log : uuid = %pU, fmt = %d\n", | 288 | xfs_debug(mp, " log : uuid = %pU, fmt = %d\n", |
248 | &head->h_fs_uuid, be32_to_cpu(head->h_fmt)); | 289 | &head->h_fs_uuid, be32_to_cpu(head->h_fmt)); |
249 | } | 290 | } |
250 | #else | 291 | #else |
@@ -267,15 +308,15 @@ xlog_header_check_recover( | |||
267 | * a dirty log created in IRIX. | 308 | * a dirty log created in IRIX. |
268 | */ | 309 | */ |
269 | if (unlikely(be32_to_cpu(head->h_fmt) != XLOG_FMT)) { | 310 | if (unlikely(be32_to_cpu(head->h_fmt) != XLOG_FMT)) { |
270 | xlog_warn( | 311 | xfs_warn(mp, |
271 | "XFS: dirty log written in incompatible format - can't recover"); | 312 | "dirty log written in incompatible format - can't recover"); |
272 | xlog_header_check_dump(mp, head); | 313 | xlog_header_check_dump(mp, head); |
273 | XFS_ERROR_REPORT("xlog_header_check_recover(1)", | 314 | XFS_ERROR_REPORT("xlog_header_check_recover(1)", |
274 | XFS_ERRLEVEL_HIGH, mp); | 315 | XFS_ERRLEVEL_HIGH, mp); |
275 | return XFS_ERROR(EFSCORRUPTED); | 316 | return XFS_ERROR(EFSCORRUPTED); |
276 | } else if (unlikely(!uuid_equal(&mp->m_sb.sb_uuid, &head->h_fs_uuid))) { | 317 | } else if (unlikely(!uuid_equal(&mp->m_sb.sb_uuid, &head->h_fs_uuid))) { |
277 | xlog_warn( | 318 | xfs_warn(mp, |
278 | "XFS: dirty log entry has mismatched uuid - can't recover"); | 319 | "dirty log entry has mismatched uuid - can't recover"); |
279 | xlog_header_check_dump(mp, head); | 320 | xlog_header_check_dump(mp, head); |
280 | XFS_ERROR_REPORT("xlog_header_check_recover(2)", | 321 | XFS_ERROR_REPORT("xlog_header_check_recover(2)", |
281 | XFS_ERRLEVEL_HIGH, mp); | 322 | XFS_ERRLEVEL_HIGH, mp); |
@@ -300,9 +341,9 @@ xlog_header_check_mount( | |||
300 | * h_fs_uuid is nil, we assume this log was last mounted | 341 | * h_fs_uuid is nil, we assume this log was last mounted |
301 | * by IRIX and continue. | 342 | * by IRIX and continue. |
302 | */ | 343 | */ |
303 | xlog_warn("XFS: nil uuid in log - IRIX style log"); | 344 | xfs_warn(mp, "nil uuid in log - IRIX style log"); |
304 | } else if (unlikely(!uuid_equal(&mp->m_sb.sb_uuid, &head->h_fs_uuid))) { | 345 | } else if (unlikely(!uuid_equal(&mp->m_sb.sb_uuid, &head->h_fs_uuid))) { |
305 | xlog_warn("XFS: log has mismatched uuid - can't recover"); | 346 | xfs_warn(mp, "log has mismatched uuid - can't recover"); |
306 | xlog_header_check_dump(mp, head); | 347 | xlog_header_check_dump(mp, head); |
307 | XFS_ERROR_REPORT("xlog_header_check_mount", | 348 | XFS_ERROR_REPORT("xlog_header_check_mount", |
308 | XFS_ERRLEVEL_HIGH, mp); | 349 | XFS_ERRLEVEL_HIGH, mp); |
@@ -321,12 +362,13 @@ xlog_recover_iodone( | |||
321 | * this during recovery. One strike! | 362 | * this during recovery. One strike! |
322 | */ | 363 | */ |
323 | xfs_ioerror_alert("xlog_recover_iodone", | 364 | xfs_ioerror_alert("xlog_recover_iodone", |
324 | bp->b_mount, bp, XFS_BUF_ADDR(bp)); | 365 | bp->b_target->bt_mount, bp, |
325 | xfs_force_shutdown(bp->b_mount, SHUTDOWN_META_IO_ERROR); | 366 | XFS_BUF_ADDR(bp)); |
367 | xfs_force_shutdown(bp->b_target->bt_mount, | ||
368 | SHUTDOWN_META_IO_ERROR); | ||
326 | } | 369 | } |
327 | bp->b_mount = NULL; | ||
328 | XFS_BUF_CLR_IODONE_FUNC(bp); | 370 | XFS_BUF_CLR_IODONE_FUNC(bp); |
329 | xfs_biodone(bp); | 371 | xfs_buf_ioend(bp, 0); |
330 | } | 372 | } |
331 | 373 | ||
332 | /* | 374 | /* |
@@ -477,8 +519,8 @@ xlog_find_verify_log_record( | |||
477 | for (i = (*last_blk) - 1; i >= 0; i--) { | 519 | for (i = (*last_blk) - 1; i >= 0; i--) { |
478 | if (i < start_blk) { | 520 | if (i < start_blk) { |
479 | /* valid log record not found */ | 521 | /* valid log record not found */ |
480 | xlog_warn( | 522 | xfs_warn(log->l_mp, |
481 | "XFS: Log inconsistent (didn't find previous header)"); | 523 | "Log inconsistent (didn't find previous header)"); |
482 | ASSERT(0); | 524 | ASSERT(0); |
483 | error = XFS_ERROR(EIO); | 525 | error = XFS_ERROR(EIO); |
484 | goto out; | 526 | goto out; |
@@ -578,12 +620,12 @@ xlog_find_head( | |||
578 | * mkfs etc write a dummy unmount record to a fresh | 620 | * mkfs etc write a dummy unmount record to a fresh |
579 | * log so we can store the uuid in there | 621 | * log so we can store the uuid in there |
580 | */ | 622 | */ |
581 | xlog_warn("XFS: totally zeroed log"); | 623 | xfs_warn(log->l_mp, "totally zeroed log"); |
582 | } | 624 | } |
583 | 625 | ||
584 | return 0; | 626 | return 0; |
585 | } else if (error) { | 627 | } else if (error) { |
586 | xlog_warn("XFS: empty log check failed"); | 628 | xfs_warn(log->l_mp, "empty log check failed"); |
587 | return error; | 629 | return error; |
588 | } | 630 | } |
589 | 631 | ||
@@ -806,7 +848,7 @@ validate_head: | |||
806 | xlog_put_bp(bp); | 848 | xlog_put_bp(bp); |
807 | 849 | ||
808 | if (error) | 850 | if (error) |
809 | xlog_warn("XFS: failed to find log head"); | 851 | xfs_warn(log->l_mp, "failed to find log head"); |
810 | return error; | 852 | return error; |
811 | } | 853 | } |
812 | 854 | ||
@@ -899,7 +941,7 @@ xlog_find_tail( | |||
899 | } | 941 | } |
900 | } | 942 | } |
901 | if (!found) { | 943 | if (!found) { |
902 | xlog_warn("XFS: xlog_find_tail: couldn't find sync record"); | 944 | xfs_warn(log->l_mp, "%s: couldn't find sync record", __func__); |
903 | ASSERT(0); | 945 | ASSERT(0); |
904 | return XFS_ERROR(EIO); | 946 | return XFS_ERROR(EIO); |
905 | } | 947 | } |
@@ -923,12 +965,12 @@ xlog_find_tail( | |||
923 | log->l_curr_cycle = be32_to_cpu(rhead->h_cycle); | 965 | log->l_curr_cycle = be32_to_cpu(rhead->h_cycle); |
924 | if (found == 2) | 966 | if (found == 2) |
925 | log->l_curr_cycle++; | 967 | log->l_curr_cycle++; |
926 | log->l_tail_lsn = be64_to_cpu(rhead->h_tail_lsn); | 968 | atomic64_set(&log->l_tail_lsn, be64_to_cpu(rhead->h_tail_lsn)); |
927 | log->l_last_sync_lsn = be64_to_cpu(rhead->h_lsn); | 969 | atomic64_set(&log->l_last_sync_lsn, be64_to_cpu(rhead->h_lsn)); |
928 | log->l_grant_reserve_cycle = log->l_curr_cycle; | 970 | xlog_assign_grant_head(&log->l_grant_reserve_head, log->l_curr_cycle, |
929 | log->l_grant_reserve_bytes = BBTOB(log->l_curr_block); | 971 | BBTOB(log->l_curr_block)); |
930 | log->l_grant_write_cycle = log->l_curr_cycle; | 972 | xlog_assign_grant_head(&log->l_grant_write_head, log->l_curr_cycle, |
931 | log->l_grant_write_bytes = BBTOB(log->l_curr_block); | 973 | BBTOB(log->l_curr_block)); |
932 | 974 | ||
933 | /* | 975 | /* |
934 | * Look for unmount record. If we find it, then we know there | 976 | * Look for unmount record. If we find it, then we know there |
@@ -958,7 +1000,7 @@ xlog_find_tail( | |||
958 | } | 1000 | } |
959 | after_umount_blk = (i + hblks + (int) | 1001 | after_umount_blk = (i + hblks + (int) |
960 | BTOBB(be32_to_cpu(rhead->h_len))) % log->l_logBBsize; | 1002 | BTOBB(be32_to_cpu(rhead->h_len))) % log->l_logBBsize; |
961 | tail_lsn = log->l_tail_lsn; | 1003 | tail_lsn = atomic64_read(&log->l_tail_lsn); |
962 | if (*head_blk == after_umount_blk && | 1004 | if (*head_blk == after_umount_blk && |
963 | be32_to_cpu(rhead->h_num_logops) == 1) { | 1005 | be32_to_cpu(rhead->h_num_logops) == 1) { |
964 | umount_data_blk = (i + hblks) % log->l_logBBsize; | 1006 | umount_data_blk = (i + hblks) % log->l_logBBsize; |
@@ -973,12 +1015,10 @@ xlog_find_tail( | |||
973 | * log records will point recovery to after the | 1015 | * log records will point recovery to after the |
974 | * current unmount record. | 1016 | * current unmount record. |
975 | */ | 1017 | */ |
976 | log->l_tail_lsn = | 1018 | xlog_assign_atomic_lsn(&log->l_tail_lsn, |
977 | xlog_assign_lsn(log->l_curr_cycle, | 1019 | log->l_curr_cycle, after_umount_blk); |
978 | after_umount_blk); | 1020 | xlog_assign_atomic_lsn(&log->l_last_sync_lsn, |
979 | log->l_last_sync_lsn = | 1021 | log->l_curr_cycle, after_umount_blk); |
980 | xlog_assign_lsn(log->l_curr_cycle, | ||
981 | after_umount_blk); | ||
982 | *tail_blk = after_umount_blk; | 1022 | *tail_blk = after_umount_blk; |
983 | 1023 | ||
984 | /* | 1024 | /* |
@@ -1017,7 +1057,7 @@ done: | |||
1017 | xlog_put_bp(bp); | 1057 | xlog_put_bp(bp); |
1018 | 1058 | ||
1019 | if (error) | 1059 | if (error) |
1020 | xlog_warn("XFS: failed to locate log tail"); | 1060 | xfs_warn(log->l_mp, "failed to locate log tail"); |
1021 | return error; | 1061 | return error; |
1022 | } | 1062 | } |
1023 | 1063 | ||
@@ -1081,7 +1121,8 @@ xlog_find_zeroed( | |||
1081 | * the first block must be 1. If it's not, maybe we're | 1121 | * the first block must be 1. If it's not, maybe we're |
1082 | * not looking at a log... Bail out. | 1122 | * not looking at a log... Bail out. |
1083 | */ | 1123 | */ |
1084 | xlog_warn("XFS: Log inconsistent or not a log (last==0, first!=1)"); | 1124 | xfs_warn(log->l_mp, |
1125 | "Log inconsistent or not a log (last==0, first!=1)"); | ||
1085 | return XFS_ERROR(EINVAL); | 1126 | return XFS_ERROR(EINVAL); |
1086 | } | 1127 | } |
1087 | 1128 | ||
@@ -1217,20 +1258,12 @@ xlog_write_log_records( | |||
1217 | */ | 1258 | */ |
1218 | ealign = round_down(end_block, sectbb); | 1259 | ealign = round_down(end_block, sectbb); |
1219 | if (j == 0 && (start_block + endcount > ealign)) { | 1260 | if (j == 0 && (start_block + endcount > ealign)) { |
1220 | offset = XFS_BUF_PTR(bp); | 1261 | offset = XFS_BUF_PTR(bp) + BBTOB(ealign - start_block); |
1221 | balign = BBTOB(ealign - start_block); | 1262 | error = xlog_bread_offset(log, ealign, sectbb, |
1222 | error = XFS_BUF_SET_PTR(bp, offset + balign, | 1263 | bp, offset); |
1223 | BBTOB(sectbb)); | ||
1224 | if (error) | ||
1225 | break; | ||
1226 | |||
1227 | error = xlog_bread_noalign(log, ealign, sectbb, bp); | ||
1228 | if (error) | 1264 | if (error) |
1229 | break; | 1265 | break; |
1230 | 1266 | ||
1231 | error = XFS_BUF_SET_PTR(bp, offset, bufblks); | ||
1232 | if (error) | ||
1233 | break; | ||
1234 | } | 1267 | } |
1235 | 1268 | ||
1236 | offset = xlog_align(log, start_block, endcount, bp); | 1269 | offset = xlog_align(log, start_block, endcount, bp); |
@@ -1495,8 +1528,8 @@ xlog_recover_add_to_trans( | |||
1495 | if (list_empty(&trans->r_itemq)) { | 1528 | if (list_empty(&trans->r_itemq)) { |
1496 | /* we need to catch log corruptions here */ | 1529 | /* we need to catch log corruptions here */ |
1497 | if (*(uint *)dp != XFS_TRANS_HEADER_MAGIC) { | 1530 | if (*(uint *)dp != XFS_TRANS_HEADER_MAGIC) { |
1498 | xlog_warn("XFS: xlog_recover_add_to_trans: " | 1531 | xfs_warn(log->l_mp, "%s: bad header magic number", |
1499 | "bad header magic number"); | 1532 | __func__); |
1500 | ASSERT(0); | 1533 | ASSERT(0); |
1501 | return XFS_ERROR(EIO); | 1534 | return XFS_ERROR(EIO); |
1502 | } | 1535 | } |
@@ -1523,8 +1556,8 @@ xlog_recover_add_to_trans( | |||
1523 | if (item->ri_total == 0) { /* first region to be added */ | 1556 | if (item->ri_total == 0) { /* first region to be added */ |
1524 | if (in_f->ilf_size == 0 || | 1557 | if (in_f->ilf_size == 0 || |
1525 | in_f->ilf_size > XLOG_MAX_REGIONS_IN_ITEM) { | 1558 | in_f->ilf_size > XLOG_MAX_REGIONS_IN_ITEM) { |
1526 | xlog_warn( | 1559 | xfs_warn(log->l_mp, |
1527 | "XFS: bad number of regions (%d) in inode log format", | 1560 | "bad number of regions (%d) in inode log format", |
1528 | in_f->ilf_size); | 1561 | in_f->ilf_size); |
1529 | ASSERT(0); | 1562 | ASSERT(0); |
1530 | return XFS_ERROR(EIO); | 1563 | return XFS_ERROR(EIO); |
@@ -1581,8 +1614,9 @@ xlog_recover_reorder_trans( | |||
1581 | list_move_tail(&item->ri_list, &trans->r_itemq); | 1614 | list_move_tail(&item->ri_list, &trans->r_itemq); |
1582 | break; | 1615 | break; |
1583 | default: | 1616 | default: |
1584 | xlog_warn( | 1617 | xfs_warn(log->l_mp, |
1585 | "XFS: xlog_recover_reorder_trans: unrecognized type of log operation"); | 1618 | "%s: unrecognized type of log operation", |
1619 | __func__); | ||
1586 | ASSERT(0); | 1620 | ASSERT(0); |
1587 | return XFS_ERROR(EIO); | 1621 | return XFS_ERROR(EIO); |
1588 | } | 1622 | } |
@@ -1603,82 +1637,45 @@ xlog_recover_reorder_trans( | |||
1603 | * record in the table to tell us how many times we expect to see this | 1637 | * record in the table to tell us how many times we expect to see this |
1604 | * record during the second pass. | 1638 | * record during the second pass. |
1605 | */ | 1639 | */ |
1606 | STATIC void | 1640 | STATIC int |
1607 | xlog_recover_do_buffer_pass1( | 1641 | xlog_recover_buffer_pass1( |
1608 | xlog_t *log, | 1642 | struct log *log, |
1609 | xfs_buf_log_format_t *buf_f) | 1643 | xlog_recover_item_t *item) |
1610 | { | 1644 | { |
1611 | xfs_buf_cancel_t *bcp; | 1645 | xfs_buf_log_format_t *buf_f = item->ri_buf[0].i_addr; |
1612 | xfs_buf_cancel_t *nextp; | 1646 | struct list_head *bucket; |
1613 | xfs_buf_cancel_t *prevp; | 1647 | struct xfs_buf_cancel *bcp; |
1614 | xfs_buf_cancel_t **bucket; | ||
1615 | xfs_daddr_t blkno = 0; | ||
1616 | uint len = 0; | ||
1617 | ushort flags = 0; | ||
1618 | |||
1619 | switch (buf_f->blf_type) { | ||
1620 | case XFS_LI_BUF: | ||
1621 | blkno = buf_f->blf_blkno; | ||
1622 | len = buf_f->blf_len; | ||
1623 | flags = buf_f->blf_flags; | ||
1624 | break; | ||
1625 | } | ||
1626 | 1648 | ||
1627 | /* | 1649 | /* |
1628 | * If this isn't a cancel buffer item, then just return. | 1650 | * If this isn't a cancel buffer item, then just return. |
1629 | */ | 1651 | */ |
1630 | if (!(flags & XFS_BLF_CANCEL)) { | 1652 | if (!(buf_f->blf_flags & XFS_BLF_CANCEL)) { |
1631 | trace_xfs_log_recover_buf_not_cancel(log, buf_f); | 1653 | trace_xfs_log_recover_buf_not_cancel(log, buf_f); |
1632 | return; | 1654 | return 0; |
1633 | } | ||
1634 | |||
1635 | /* | ||
1636 | * Insert an xfs_buf_cancel record into the hash table of | ||
1637 | * them. If there is already an identical record, bump | ||
1638 | * its reference count. | ||
1639 | */ | ||
1640 | bucket = &log->l_buf_cancel_table[(__uint64_t)blkno % | ||
1641 | XLOG_BC_TABLE_SIZE]; | ||
1642 | /* | ||
1643 | * If the hash bucket is empty then just insert a new record into | ||
1644 | * the bucket. | ||
1645 | */ | ||
1646 | if (*bucket == NULL) { | ||
1647 | bcp = (xfs_buf_cancel_t *)kmem_alloc(sizeof(xfs_buf_cancel_t), | ||
1648 | KM_SLEEP); | ||
1649 | bcp->bc_blkno = blkno; | ||
1650 | bcp->bc_len = len; | ||
1651 | bcp->bc_refcount = 1; | ||
1652 | bcp->bc_next = NULL; | ||
1653 | *bucket = bcp; | ||
1654 | return; | ||
1655 | } | 1655 | } |
1656 | 1656 | ||
1657 | /* | 1657 | /* |
1658 | * The hash bucket is not empty, so search for duplicates of our | 1658 | * Insert an xfs_buf_cancel record into the hash table of them. |
1659 | * record. If we find one them just bump its refcount. If not | 1659 | * If there is already an identical record, bump its reference count. |
1660 | * then add us at the end of the list. | ||
1661 | */ | 1660 | */ |
1662 | prevp = NULL; | 1661 | bucket = XLOG_BUF_CANCEL_BUCKET(log, buf_f->blf_blkno); |
1663 | nextp = *bucket; | 1662 | list_for_each_entry(bcp, bucket, bc_list) { |
1664 | while (nextp != NULL) { | 1663 | if (bcp->bc_blkno == buf_f->blf_blkno && |
1665 | if (nextp->bc_blkno == blkno && nextp->bc_len == len) { | 1664 | bcp->bc_len == buf_f->blf_len) { |
1666 | nextp->bc_refcount++; | 1665 | bcp->bc_refcount++; |
1667 | trace_xfs_log_recover_buf_cancel_ref_inc(log, buf_f); | 1666 | trace_xfs_log_recover_buf_cancel_ref_inc(log, buf_f); |
1668 | return; | 1667 | return 0; |
1669 | } | 1668 | } |
1670 | prevp = nextp; | 1669 | } |
1671 | nextp = nextp->bc_next; | 1670 | |
1672 | } | 1671 | bcp = kmem_alloc(sizeof(struct xfs_buf_cancel), KM_SLEEP); |
1673 | ASSERT(prevp != NULL); | 1672 | bcp->bc_blkno = buf_f->blf_blkno; |
1674 | bcp = (xfs_buf_cancel_t *)kmem_alloc(sizeof(xfs_buf_cancel_t), | 1673 | bcp->bc_len = buf_f->blf_len; |
1675 | KM_SLEEP); | ||
1676 | bcp->bc_blkno = blkno; | ||
1677 | bcp->bc_len = len; | ||
1678 | bcp->bc_refcount = 1; | 1674 | bcp->bc_refcount = 1; |
1679 | bcp->bc_next = NULL; | 1675 | list_add_tail(&bcp->bc_list, bucket); |
1680 | prevp->bc_next = bcp; | 1676 | |
1681 | trace_xfs_log_recover_buf_cancel_add(log, buf_f); | 1677 | trace_xfs_log_recover_buf_cancel_add(log, buf_f); |
1678 | return 0; | ||
1682 | } | 1679 | } |
1683 | 1680 | ||
1684 | /* | 1681 | /* |
@@ -1696,14 +1693,13 @@ xlog_recover_do_buffer_pass1( | |||
1696 | */ | 1693 | */ |
1697 | STATIC int | 1694 | STATIC int |
1698 | xlog_check_buffer_cancelled( | 1695 | xlog_check_buffer_cancelled( |
1699 | xlog_t *log, | 1696 | struct log *log, |
1700 | xfs_daddr_t blkno, | 1697 | xfs_daddr_t blkno, |
1701 | uint len, | 1698 | uint len, |
1702 | ushort flags) | 1699 | ushort flags) |
1703 | { | 1700 | { |
1704 | xfs_buf_cancel_t *bcp; | 1701 | struct list_head *bucket; |
1705 | xfs_buf_cancel_t *prevp; | 1702 | struct xfs_buf_cancel *bcp; |
1706 | xfs_buf_cancel_t **bucket; | ||
1707 | 1703 | ||
1708 | if (log->l_buf_cancel_table == NULL) { | 1704 | if (log->l_buf_cancel_table == NULL) { |
1709 | /* | 1705 | /* |
@@ -1714,128 +1710,70 @@ xlog_check_buffer_cancelled( | |||
1714 | return 0; | 1710 | return 0; |
1715 | } | 1711 | } |
1716 | 1712 | ||
1717 | bucket = &log->l_buf_cancel_table[(__uint64_t)blkno % | ||
1718 | XLOG_BC_TABLE_SIZE]; | ||
1719 | bcp = *bucket; | ||
1720 | if (bcp == NULL) { | ||
1721 | /* | ||
1722 | * There is no corresponding entry in the table built | ||
1723 | * in pass one, so this buffer has not been cancelled. | ||
1724 | */ | ||
1725 | ASSERT(!(flags & XFS_BLF_CANCEL)); | ||
1726 | return 0; | ||
1727 | } | ||
1728 | |||
1729 | /* | 1713 | /* |
1730 | * Search for an entry in the buffer cancel table that | 1714 | * Search for an entry in the cancel table that matches our buffer. |
1731 | * matches our buffer. | ||
1732 | */ | 1715 | */ |
1733 | prevp = NULL; | 1716 | bucket = XLOG_BUF_CANCEL_BUCKET(log, blkno); |
1734 | while (bcp != NULL) { | 1717 | list_for_each_entry(bcp, bucket, bc_list) { |
1735 | if (bcp->bc_blkno == blkno && bcp->bc_len == len) { | 1718 | if (bcp->bc_blkno == blkno && bcp->bc_len == len) |
1736 | /* | 1719 | goto found; |
1737 | * We've go a match, so return 1 so that the | ||
1738 | * recovery of this buffer is cancelled. | ||
1739 | * If this buffer is actually a buffer cancel | ||
1740 | * log item, then decrement the refcount on the | ||
1741 | * one in the table and remove it if this is the | ||
1742 | * last reference. | ||
1743 | */ | ||
1744 | if (flags & XFS_BLF_CANCEL) { | ||
1745 | bcp->bc_refcount--; | ||
1746 | if (bcp->bc_refcount == 0) { | ||
1747 | if (prevp == NULL) { | ||
1748 | *bucket = bcp->bc_next; | ||
1749 | } else { | ||
1750 | prevp->bc_next = bcp->bc_next; | ||
1751 | } | ||
1752 | kmem_free(bcp); | ||
1753 | } | ||
1754 | } | ||
1755 | return 1; | ||
1756 | } | ||
1757 | prevp = bcp; | ||
1758 | bcp = bcp->bc_next; | ||
1759 | } | 1720 | } |
1721 | |||
1760 | /* | 1722 | /* |
1761 | * We didn't find a corresponding entry in the table, so | 1723 | * We didn't find a corresponding entry in the table, so return 0 so |
1762 | * return 0 so that the buffer is NOT cancelled. | 1724 | * that the buffer is NOT cancelled. |
1763 | */ | 1725 | */ |
1764 | ASSERT(!(flags & XFS_BLF_CANCEL)); | 1726 | ASSERT(!(flags & XFS_BLF_CANCEL)); |
1765 | return 0; | 1727 | return 0; |
1766 | } | ||
1767 | 1728 | ||
1768 | STATIC int | 1729 | found: |
1769 | xlog_recover_do_buffer_pass2( | 1730 | /* |
1770 | xlog_t *log, | 1731 | * We've go a match, so return 1 so that the recovery of this buffer |
1771 | xfs_buf_log_format_t *buf_f) | 1732 | * is cancelled. If this buffer is actually a buffer cancel log |
1772 | { | 1733 | * item, then decrement the refcount on the one in the table and |
1773 | xfs_daddr_t blkno = 0; | 1734 | * remove it if this is the last reference. |
1774 | ushort flags = 0; | 1735 | */ |
1775 | uint len = 0; | 1736 | if (flags & XFS_BLF_CANCEL) { |
1776 | 1737 | if (--bcp->bc_refcount == 0) { | |
1777 | switch (buf_f->blf_type) { | 1738 | list_del(&bcp->bc_list); |
1778 | case XFS_LI_BUF: | 1739 | kmem_free(bcp); |
1779 | blkno = buf_f->blf_blkno; | 1740 | } |
1780 | flags = buf_f->blf_flags; | ||
1781 | len = buf_f->blf_len; | ||
1782 | break; | ||
1783 | } | 1741 | } |
1784 | 1742 | return 1; | |
1785 | return xlog_check_buffer_cancelled(log, blkno, len, flags); | ||
1786 | } | 1743 | } |
1787 | 1744 | ||
1788 | /* | 1745 | /* |
1789 | * Perform recovery for a buffer full of inodes. In these buffers, | 1746 | * Perform recovery for a buffer full of inodes. In these buffers, the only |
1790 | * the only data which should be recovered is that which corresponds | 1747 | * data which should be recovered is that which corresponds to the |
1791 | * to the di_next_unlinked pointers in the on disk inode structures. | 1748 | * di_next_unlinked pointers in the on disk inode structures. The rest of the |
1792 | * The rest of the data for the inodes is always logged through the | 1749 | * data for the inodes is always logged through the inodes themselves rather |
1793 | * inodes themselves rather than the inode buffer and is recovered | 1750 | * than the inode buffer and is recovered in xlog_recover_inode_pass2(). |
1794 | * in xlog_recover_do_inode_trans(). | ||
1795 | * | 1751 | * |
1796 | * The only time when buffers full of inodes are fully recovered is | 1752 | * The only time when buffers full of inodes are fully recovered is when the |
1797 | * when the buffer is full of newly allocated inodes. In this case | 1753 | * buffer is full of newly allocated inodes. In this case the buffer will |
1798 | * the buffer will not be marked as an inode buffer and so will be | 1754 | * not be marked as an inode buffer and so will be sent to |
1799 | * sent to xlog_recover_do_reg_buffer() below during recovery. | 1755 | * xlog_recover_do_reg_buffer() below during recovery. |
1800 | */ | 1756 | */ |
1801 | STATIC int | 1757 | STATIC int |
1802 | xlog_recover_do_inode_buffer( | 1758 | xlog_recover_do_inode_buffer( |
1803 | xfs_mount_t *mp, | 1759 | struct xfs_mount *mp, |
1804 | xlog_recover_item_t *item, | 1760 | xlog_recover_item_t *item, |
1805 | xfs_buf_t *bp, | 1761 | struct xfs_buf *bp, |
1806 | xfs_buf_log_format_t *buf_f) | 1762 | xfs_buf_log_format_t *buf_f) |
1807 | { | 1763 | { |
1808 | int i; | 1764 | int i; |
1809 | int item_index; | 1765 | int item_index = 0; |
1810 | int bit; | 1766 | int bit = 0; |
1811 | int nbits; | 1767 | int nbits = 0; |
1812 | int reg_buf_offset; | 1768 | int reg_buf_offset = 0; |
1813 | int reg_buf_bytes; | 1769 | int reg_buf_bytes = 0; |
1814 | int next_unlinked_offset; | 1770 | int next_unlinked_offset; |
1815 | int inodes_per_buf; | 1771 | int inodes_per_buf; |
1816 | xfs_agino_t *logged_nextp; | 1772 | xfs_agino_t *logged_nextp; |
1817 | xfs_agino_t *buffer_nextp; | 1773 | xfs_agino_t *buffer_nextp; |
1818 | unsigned int *data_map = NULL; | ||
1819 | unsigned int map_size = 0; | ||
1820 | 1774 | ||
1821 | trace_xfs_log_recover_buf_inode_buf(mp->m_log, buf_f); | 1775 | trace_xfs_log_recover_buf_inode_buf(mp->m_log, buf_f); |
1822 | 1776 | ||
1823 | switch (buf_f->blf_type) { | ||
1824 | case XFS_LI_BUF: | ||
1825 | data_map = buf_f->blf_data_map; | ||
1826 | map_size = buf_f->blf_map_size; | ||
1827 | break; | ||
1828 | } | ||
1829 | /* | ||
1830 | * Set the variables corresponding to the current region to | ||
1831 | * 0 so that we'll initialize them on the first pass through | ||
1832 | * the loop. | ||
1833 | */ | ||
1834 | reg_buf_offset = 0; | ||
1835 | reg_buf_bytes = 0; | ||
1836 | bit = 0; | ||
1837 | nbits = 0; | ||
1838 | item_index = 0; | ||
1839 | inodes_per_buf = XFS_BUF_COUNT(bp) >> mp->m_sb.sb_inodelog; | 1777 | inodes_per_buf = XFS_BUF_COUNT(bp) >> mp->m_sb.sb_inodelog; |
1840 | for (i = 0; i < inodes_per_buf; i++) { | 1778 | for (i = 0; i < inodes_per_buf; i++) { |
1841 | next_unlinked_offset = (i * mp->m_sb.sb_inodesize) + | 1779 | next_unlinked_offset = (i * mp->m_sb.sb_inodesize) + |
@@ -1850,18 +1788,18 @@ xlog_recover_do_inode_buffer( | |||
1850 | * the current di_next_unlinked field. | 1788 | * the current di_next_unlinked field. |
1851 | */ | 1789 | */ |
1852 | bit += nbits; | 1790 | bit += nbits; |
1853 | bit = xfs_next_bit(data_map, map_size, bit); | 1791 | bit = xfs_next_bit(buf_f->blf_data_map, |
1792 | buf_f->blf_map_size, bit); | ||
1854 | 1793 | ||
1855 | /* | 1794 | /* |
1856 | * If there are no more logged regions in the | 1795 | * If there are no more logged regions in the |
1857 | * buffer, then we're done. | 1796 | * buffer, then we're done. |
1858 | */ | 1797 | */ |
1859 | if (bit == -1) { | 1798 | if (bit == -1) |
1860 | return 0; | 1799 | return 0; |
1861 | } | ||
1862 | 1800 | ||
1863 | nbits = xfs_contig_bits(data_map, map_size, | 1801 | nbits = xfs_contig_bits(buf_f->blf_data_map, |
1864 | bit); | 1802 | buf_f->blf_map_size, bit); |
1865 | ASSERT(nbits > 0); | 1803 | ASSERT(nbits > 0); |
1866 | reg_buf_offset = bit << XFS_BLF_SHIFT; | 1804 | reg_buf_offset = bit << XFS_BLF_SHIFT; |
1867 | reg_buf_bytes = nbits << XFS_BLF_SHIFT; | 1805 | reg_buf_bytes = nbits << XFS_BLF_SHIFT; |
@@ -1873,9 +1811,8 @@ xlog_recover_do_inode_buffer( | |||
1873 | * di_next_unlinked field, then move on to the next | 1811 | * di_next_unlinked field, then move on to the next |
1874 | * di_next_unlinked field. | 1812 | * di_next_unlinked field. |
1875 | */ | 1813 | */ |
1876 | if (next_unlinked_offset < reg_buf_offset) { | 1814 | if (next_unlinked_offset < reg_buf_offset) |
1877 | continue; | 1815 | continue; |
1878 | } | ||
1879 | 1816 | ||
1880 | ASSERT(item->ri_buf[item_index].i_addr != NULL); | 1817 | ASSERT(item->ri_buf[item_index].i_addr != NULL); |
1881 | ASSERT((item->ri_buf[item_index].i_len % XFS_BLF_CHUNK) == 0); | 1818 | ASSERT((item->ri_buf[item_index].i_len % XFS_BLF_CHUNK) == 0); |
@@ -1889,8 +1826,9 @@ xlog_recover_do_inode_buffer( | |||
1889 | logged_nextp = item->ri_buf[item_index].i_addr + | 1826 | logged_nextp = item->ri_buf[item_index].i_addr + |
1890 | next_unlinked_offset - reg_buf_offset; | 1827 | next_unlinked_offset - reg_buf_offset; |
1891 | if (unlikely(*logged_nextp == 0)) { | 1828 | if (unlikely(*logged_nextp == 0)) { |
1892 | xfs_fs_cmn_err(CE_ALERT, mp, | 1829 | xfs_alert(mp, |
1893 | "bad inode buffer log record (ptr = 0x%p, bp = 0x%p). XFS trying to replay bad (0) inode di_next_unlinked field", | 1830 | "Bad inode buffer log record (ptr = 0x%p, bp = 0x%p). " |
1831 | "Trying to replay bad (0) inode di_next_unlinked field.", | ||
1894 | item, bp); | 1832 | item, bp); |
1895 | XFS_ERROR_REPORT("xlog_recover_do_inode_buf", | 1833 | XFS_ERROR_REPORT("xlog_recover_do_inode_buf", |
1896 | XFS_ERRLEVEL_LOW, mp); | 1834 | XFS_ERRLEVEL_LOW, mp); |
@@ -1911,36 +1849,29 @@ xlog_recover_do_inode_buffer( | |||
1911 | * given buffer. The bitmap in the buf log format structure indicates | 1849 | * given buffer. The bitmap in the buf log format structure indicates |
1912 | * where to place the logged data. | 1850 | * where to place the logged data. |
1913 | */ | 1851 | */ |
1914 | /*ARGSUSED*/ | ||
1915 | STATIC void | 1852 | STATIC void |
1916 | xlog_recover_do_reg_buffer( | 1853 | xlog_recover_do_reg_buffer( |
1917 | struct xfs_mount *mp, | 1854 | struct xfs_mount *mp, |
1918 | xlog_recover_item_t *item, | 1855 | xlog_recover_item_t *item, |
1919 | xfs_buf_t *bp, | 1856 | struct xfs_buf *bp, |
1920 | xfs_buf_log_format_t *buf_f) | 1857 | xfs_buf_log_format_t *buf_f) |
1921 | { | 1858 | { |
1922 | int i; | 1859 | int i; |
1923 | int bit; | 1860 | int bit; |
1924 | int nbits; | 1861 | int nbits; |
1925 | unsigned int *data_map = NULL; | ||
1926 | unsigned int map_size = 0; | ||
1927 | int error; | 1862 | int error; |
1928 | 1863 | ||
1929 | trace_xfs_log_recover_buf_reg_buf(mp->m_log, buf_f); | 1864 | trace_xfs_log_recover_buf_reg_buf(mp->m_log, buf_f); |
1930 | 1865 | ||
1931 | switch (buf_f->blf_type) { | ||
1932 | case XFS_LI_BUF: | ||
1933 | data_map = buf_f->blf_data_map; | ||
1934 | map_size = buf_f->blf_map_size; | ||
1935 | break; | ||
1936 | } | ||
1937 | bit = 0; | 1866 | bit = 0; |
1938 | i = 1; /* 0 is the buf format structure */ | 1867 | i = 1; /* 0 is the buf format structure */ |
1939 | while (1) { | 1868 | while (1) { |
1940 | bit = xfs_next_bit(data_map, map_size, bit); | 1869 | bit = xfs_next_bit(buf_f->blf_data_map, |
1870 | buf_f->blf_map_size, bit); | ||
1941 | if (bit == -1) | 1871 | if (bit == -1) |
1942 | break; | 1872 | break; |
1943 | nbits = xfs_contig_bits(data_map, map_size, bit); | 1873 | nbits = xfs_contig_bits(buf_f->blf_data_map, |
1874 | buf_f->blf_map_size, bit); | ||
1944 | ASSERT(nbits > 0); | 1875 | ASSERT(nbits > 0); |
1945 | ASSERT(item->ri_buf[i].i_addr != NULL); | 1876 | ASSERT(item->ri_buf[i].i_addr != NULL); |
1946 | ASSERT(item->ri_buf[i].i_len % XFS_BLF_CHUNK == 0); | 1877 | ASSERT(item->ri_buf[i].i_len % XFS_BLF_CHUNK == 0); |
@@ -1956,17 +1887,17 @@ xlog_recover_do_reg_buffer( | |||
1956 | if (buf_f->blf_flags & | 1887 | if (buf_f->blf_flags & |
1957 | (XFS_BLF_UDQUOT_BUF|XFS_BLF_PDQUOT_BUF|XFS_BLF_GDQUOT_BUF)) { | 1888 | (XFS_BLF_UDQUOT_BUF|XFS_BLF_PDQUOT_BUF|XFS_BLF_GDQUOT_BUF)) { |
1958 | if (item->ri_buf[i].i_addr == NULL) { | 1889 | if (item->ri_buf[i].i_addr == NULL) { |
1959 | cmn_err(CE_ALERT, | 1890 | xfs_alert(mp, |
1960 | "XFS: NULL dquot in %s.", __func__); | 1891 | "XFS: NULL dquot in %s.", __func__); |
1961 | goto next; | 1892 | goto next; |
1962 | } | 1893 | } |
1963 | if (item->ri_buf[i].i_len < sizeof(xfs_disk_dquot_t)) { | 1894 | if (item->ri_buf[i].i_len < sizeof(xfs_disk_dquot_t)) { |
1964 | cmn_err(CE_ALERT, | 1895 | xfs_alert(mp, |
1965 | "XFS: dquot too small (%d) in %s.", | 1896 | "XFS: dquot too small (%d) in %s.", |
1966 | item->ri_buf[i].i_len, __func__); | 1897 | item->ri_buf[i].i_len, __func__); |
1967 | goto next; | 1898 | goto next; |
1968 | } | 1899 | } |
1969 | error = xfs_qm_dqcheck(item->ri_buf[i].i_addr, | 1900 | error = xfs_qm_dqcheck(mp, item->ri_buf[i].i_addr, |
1970 | -1, 0, XFS_QMOPT_DOWARN, | 1901 | -1, 0, XFS_QMOPT_DOWARN, |
1971 | "dquot_buf_recover"); | 1902 | "dquot_buf_recover"); |
1972 | if (error) | 1903 | if (error) |
@@ -1991,6 +1922,7 @@ xlog_recover_do_reg_buffer( | |||
1991 | */ | 1922 | */ |
1992 | int | 1923 | int |
1993 | xfs_qm_dqcheck( | 1924 | xfs_qm_dqcheck( |
1925 | struct xfs_mount *mp, | ||
1994 | xfs_disk_dquot_t *ddq, | 1926 | xfs_disk_dquot_t *ddq, |
1995 | xfs_dqid_t id, | 1927 | xfs_dqid_t id, |
1996 | uint type, /* used only when IO_dorepair is true */ | 1928 | uint type, /* used only when IO_dorepair is true */ |
@@ -2017,14 +1949,14 @@ xfs_qm_dqcheck( | |||
2017 | */ | 1949 | */ |
2018 | if (be16_to_cpu(ddq->d_magic) != XFS_DQUOT_MAGIC) { | 1950 | if (be16_to_cpu(ddq->d_magic) != XFS_DQUOT_MAGIC) { |
2019 | if (flags & XFS_QMOPT_DOWARN) | 1951 | if (flags & XFS_QMOPT_DOWARN) |
2020 | cmn_err(CE_ALERT, | 1952 | xfs_alert(mp, |
2021 | "%s : XFS dquot ID 0x%x, magic 0x%x != 0x%x", | 1953 | "%s : XFS dquot ID 0x%x, magic 0x%x != 0x%x", |
2022 | str, id, be16_to_cpu(ddq->d_magic), XFS_DQUOT_MAGIC); | 1954 | str, id, be16_to_cpu(ddq->d_magic), XFS_DQUOT_MAGIC); |
2023 | errs++; | 1955 | errs++; |
2024 | } | 1956 | } |
2025 | if (ddq->d_version != XFS_DQUOT_VERSION) { | 1957 | if (ddq->d_version != XFS_DQUOT_VERSION) { |
2026 | if (flags & XFS_QMOPT_DOWARN) | 1958 | if (flags & XFS_QMOPT_DOWARN) |
2027 | cmn_err(CE_ALERT, | 1959 | xfs_alert(mp, |
2028 | "%s : XFS dquot ID 0x%x, version 0x%x != 0x%x", | 1960 | "%s : XFS dquot ID 0x%x, version 0x%x != 0x%x", |
2029 | str, id, ddq->d_version, XFS_DQUOT_VERSION); | 1961 | str, id, ddq->d_version, XFS_DQUOT_VERSION); |
2030 | errs++; | 1962 | errs++; |
@@ -2034,7 +1966,7 @@ xfs_qm_dqcheck( | |||
2034 | ddq->d_flags != XFS_DQ_PROJ && | 1966 | ddq->d_flags != XFS_DQ_PROJ && |
2035 | ddq->d_flags != XFS_DQ_GROUP) { | 1967 | ddq->d_flags != XFS_DQ_GROUP) { |
2036 | if (flags & XFS_QMOPT_DOWARN) | 1968 | if (flags & XFS_QMOPT_DOWARN) |
2037 | cmn_err(CE_ALERT, | 1969 | xfs_alert(mp, |
2038 | "%s : XFS dquot ID 0x%x, unknown flags 0x%x", | 1970 | "%s : XFS dquot ID 0x%x, unknown flags 0x%x", |
2039 | str, id, ddq->d_flags); | 1971 | str, id, ddq->d_flags); |
2040 | errs++; | 1972 | errs++; |
@@ -2042,7 +1974,7 @@ xfs_qm_dqcheck( | |||
2042 | 1974 | ||
2043 | if (id != -1 && id != be32_to_cpu(ddq->d_id)) { | 1975 | if (id != -1 && id != be32_to_cpu(ddq->d_id)) { |
2044 | if (flags & XFS_QMOPT_DOWARN) | 1976 | if (flags & XFS_QMOPT_DOWARN) |
2045 | cmn_err(CE_ALERT, | 1977 | xfs_alert(mp, |
2046 | "%s : ondisk-dquot 0x%p, ID mismatch: " | 1978 | "%s : ondisk-dquot 0x%p, ID mismatch: " |
2047 | "0x%x expected, found id 0x%x", | 1979 | "0x%x expected, found id 0x%x", |
2048 | str, ddq, id, be32_to_cpu(ddq->d_id)); | 1980 | str, ddq, id, be32_to_cpu(ddq->d_id)); |
@@ -2055,9 +1987,8 @@ xfs_qm_dqcheck( | |||
2055 | be64_to_cpu(ddq->d_blk_softlimit)) { | 1987 | be64_to_cpu(ddq->d_blk_softlimit)) { |
2056 | if (!ddq->d_btimer) { | 1988 | if (!ddq->d_btimer) { |
2057 | if (flags & XFS_QMOPT_DOWARN) | 1989 | if (flags & XFS_QMOPT_DOWARN) |
2058 | cmn_err(CE_ALERT, | 1990 | xfs_alert(mp, |
2059 | "%s : Dquot ID 0x%x (0x%p) " | 1991 | "%s : Dquot ID 0x%x (0x%p) BLK TIMER NOT STARTED", |
2060 | "BLK TIMER NOT STARTED", | ||
2061 | str, (int)be32_to_cpu(ddq->d_id), ddq); | 1992 | str, (int)be32_to_cpu(ddq->d_id), ddq); |
2062 | errs++; | 1993 | errs++; |
2063 | } | 1994 | } |
@@ -2067,9 +1998,8 @@ xfs_qm_dqcheck( | |||
2067 | be64_to_cpu(ddq->d_ino_softlimit)) { | 1998 | be64_to_cpu(ddq->d_ino_softlimit)) { |
2068 | if (!ddq->d_itimer) { | 1999 | if (!ddq->d_itimer) { |
2069 | if (flags & XFS_QMOPT_DOWARN) | 2000 | if (flags & XFS_QMOPT_DOWARN) |
2070 | cmn_err(CE_ALERT, | 2001 | xfs_alert(mp, |
2071 | "%s : Dquot ID 0x%x (0x%p) " | 2002 | "%s : Dquot ID 0x%x (0x%p) INODE TIMER NOT STARTED", |
2072 | "INODE TIMER NOT STARTED", | ||
2073 | str, (int)be32_to_cpu(ddq->d_id), ddq); | 2003 | str, (int)be32_to_cpu(ddq->d_id), ddq); |
2074 | errs++; | 2004 | errs++; |
2075 | } | 2005 | } |
@@ -2079,9 +2009,8 @@ xfs_qm_dqcheck( | |||
2079 | be64_to_cpu(ddq->d_rtb_softlimit)) { | 2009 | be64_to_cpu(ddq->d_rtb_softlimit)) { |
2080 | if (!ddq->d_rtbtimer) { | 2010 | if (!ddq->d_rtbtimer) { |
2081 | if (flags & XFS_QMOPT_DOWARN) | 2011 | if (flags & XFS_QMOPT_DOWARN) |
2082 | cmn_err(CE_ALERT, | 2012 | xfs_alert(mp, |
2083 | "%s : Dquot ID 0x%x (0x%p) " | 2013 | "%s : Dquot ID 0x%x (0x%p) RTBLK TIMER NOT STARTED", |
2084 | "RTBLK TIMER NOT STARTED", | ||
2085 | str, (int)be32_to_cpu(ddq->d_id), ddq); | 2014 | str, (int)be32_to_cpu(ddq->d_id), ddq); |
2086 | errs++; | 2015 | errs++; |
2087 | } | 2016 | } |
@@ -2092,7 +2021,7 @@ xfs_qm_dqcheck( | |||
2092 | return errs; | 2021 | return errs; |
2093 | 2022 | ||
2094 | if (flags & XFS_QMOPT_DOWARN) | 2023 | if (flags & XFS_QMOPT_DOWARN) |
2095 | cmn_err(CE_NOTE, "Re-initializing dquot ID 0x%x", id); | 2024 | xfs_notice(mp, "Re-initializing dquot ID 0x%x", id); |
2096 | 2025 | ||
2097 | /* | 2026 | /* |
2098 | * Typically, a repair is only requested by quotacheck. | 2027 | * Typically, a repair is only requested by quotacheck. |
@@ -2174,77 +2103,46 @@ xlog_recover_do_dquot_buffer( | |||
2174 | * for more details on the implementation of the table of cancel records. | 2103 | * for more details on the implementation of the table of cancel records. |
2175 | */ | 2104 | */ |
2176 | STATIC int | 2105 | STATIC int |
2177 | xlog_recover_do_buffer_trans( | 2106 | xlog_recover_buffer_pass2( |
2178 | xlog_t *log, | 2107 | xlog_t *log, |
2179 | xlog_recover_item_t *item, | 2108 | xlog_recover_item_t *item) |
2180 | int pass) | ||
2181 | { | 2109 | { |
2182 | xfs_buf_log_format_t *buf_f = item->ri_buf[0].i_addr; | 2110 | xfs_buf_log_format_t *buf_f = item->ri_buf[0].i_addr; |
2183 | xfs_mount_t *mp; | 2111 | xfs_mount_t *mp = log->l_mp; |
2184 | xfs_buf_t *bp; | 2112 | xfs_buf_t *bp; |
2185 | int error; | 2113 | int error; |
2186 | int cancel; | ||
2187 | xfs_daddr_t blkno; | ||
2188 | int len; | ||
2189 | ushort flags; | ||
2190 | uint buf_flags; | 2114 | uint buf_flags; |
2191 | 2115 | ||
2192 | if (pass == XLOG_RECOVER_PASS1) { | 2116 | /* |
2193 | /* | 2117 | * In this pass we only want to recover all the buffers which have |
2194 | * In this pass we're only looking for buf items | 2118 | * not been cancelled and are not cancellation buffers themselves. |
2195 | * with the XFS_BLF_CANCEL bit set. | 2119 | */ |
2196 | */ | 2120 | if (xlog_check_buffer_cancelled(log, buf_f->blf_blkno, |
2197 | xlog_recover_do_buffer_pass1(log, buf_f); | 2121 | buf_f->blf_len, buf_f->blf_flags)) { |
2122 | trace_xfs_log_recover_buf_cancel(log, buf_f); | ||
2198 | return 0; | 2123 | return 0; |
2199 | } else { | ||
2200 | /* | ||
2201 | * In this pass we want to recover all the buffers | ||
2202 | * which have not been cancelled and are not | ||
2203 | * cancellation buffers themselves. The routine | ||
2204 | * we call here will tell us whether or not to | ||
2205 | * continue with the replay of this buffer. | ||
2206 | */ | ||
2207 | cancel = xlog_recover_do_buffer_pass2(log, buf_f); | ||
2208 | if (cancel) { | ||
2209 | trace_xfs_log_recover_buf_cancel(log, buf_f); | ||
2210 | return 0; | ||
2211 | } | ||
2212 | } | 2124 | } |
2125 | |||
2213 | trace_xfs_log_recover_buf_recover(log, buf_f); | 2126 | trace_xfs_log_recover_buf_recover(log, buf_f); |
2214 | switch (buf_f->blf_type) { | ||
2215 | case XFS_LI_BUF: | ||
2216 | blkno = buf_f->blf_blkno; | ||
2217 | len = buf_f->blf_len; | ||
2218 | flags = buf_f->blf_flags; | ||
2219 | break; | ||
2220 | default: | ||
2221 | xfs_fs_cmn_err(CE_ALERT, log->l_mp, | ||
2222 | "xfs_log_recover: unknown buffer type 0x%x, logdev %s", | ||
2223 | buf_f->blf_type, log->l_mp->m_logname ? | ||
2224 | log->l_mp->m_logname : "internal"); | ||
2225 | XFS_ERROR_REPORT("xlog_recover_do_buffer_trans", | ||
2226 | XFS_ERRLEVEL_LOW, log->l_mp); | ||
2227 | return XFS_ERROR(EFSCORRUPTED); | ||
2228 | } | ||
2229 | 2127 | ||
2230 | mp = log->l_mp; | ||
2231 | buf_flags = XBF_LOCK; | 2128 | buf_flags = XBF_LOCK; |
2232 | if (!(flags & XFS_BLF_INODE_BUF)) | 2129 | if (!(buf_f->blf_flags & XFS_BLF_INODE_BUF)) |
2233 | buf_flags |= XBF_MAPPED; | 2130 | buf_flags |= XBF_MAPPED; |
2234 | 2131 | ||
2235 | bp = xfs_buf_read(mp->m_ddev_targp, blkno, len, buf_flags); | 2132 | bp = xfs_buf_read(mp->m_ddev_targp, buf_f->blf_blkno, buf_f->blf_len, |
2133 | buf_flags); | ||
2236 | if (XFS_BUF_ISERROR(bp)) { | 2134 | if (XFS_BUF_ISERROR(bp)) { |
2237 | xfs_ioerror_alert("xlog_recover_do..(read#1)", log->l_mp, | 2135 | xfs_ioerror_alert("xlog_recover_do..(read#1)", mp, |
2238 | bp, blkno); | 2136 | bp, buf_f->blf_blkno); |
2239 | error = XFS_BUF_GETERROR(bp); | 2137 | error = XFS_BUF_GETERROR(bp); |
2240 | xfs_buf_relse(bp); | 2138 | xfs_buf_relse(bp); |
2241 | return error; | 2139 | return error; |
2242 | } | 2140 | } |
2243 | 2141 | ||
2244 | error = 0; | 2142 | error = 0; |
2245 | if (flags & XFS_BLF_INODE_BUF) { | 2143 | if (buf_f->blf_flags & XFS_BLF_INODE_BUF) { |
2246 | error = xlog_recover_do_inode_buffer(mp, item, bp, buf_f); | 2144 | error = xlog_recover_do_inode_buffer(mp, item, bp, buf_f); |
2247 | } else if (flags & | 2145 | } else if (buf_f->blf_flags & |
2248 | (XFS_BLF_UDQUOT_BUF|XFS_BLF_PDQUOT_BUF|XFS_BLF_GDQUOT_BUF)) { | 2146 | (XFS_BLF_UDQUOT_BUF|XFS_BLF_PDQUOT_BUF|XFS_BLF_GDQUOT_BUF)) { |
2249 | xlog_recover_do_dquot_buffer(mp, log, item, bp, buf_f); | 2147 | xlog_recover_do_dquot_buffer(mp, log, item, bp, buf_f); |
2250 | } else { | 2148 | } else { |
@@ -2275,8 +2173,7 @@ xlog_recover_do_buffer_trans( | |||
2275 | XFS_BUF_STALE(bp); | 2173 | XFS_BUF_STALE(bp); |
2276 | error = xfs_bwrite(mp, bp); | 2174 | error = xfs_bwrite(mp, bp); |
2277 | } else { | 2175 | } else { |
2278 | ASSERT(bp->b_mount == NULL || bp->b_mount == mp); | 2176 | ASSERT(bp->b_target->bt_mount == mp); |
2279 | bp->b_mount = mp; | ||
2280 | XFS_BUF_SET_IODONE_FUNC(bp, xlog_recover_iodone); | 2177 | XFS_BUF_SET_IODONE_FUNC(bp, xlog_recover_iodone); |
2281 | xfs_bdwrite(mp, bp); | 2178 | xfs_bdwrite(mp, bp); |
2282 | } | 2179 | } |
@@ -2285,16 +2182,14 @@ xlog_recover_do_buffer_trans( | |||
2285 | } | 2182 | } |
2286 | 2183 | ||
2287 | STATIC int | 2184 | STATIC int |
2288 | xlog_recover_do_inode_trans( | 2185 | xlog_recover_inode_pass2( |
2289 | xlog_t *log, | 2186 | xlog_t *log, |
2290 | xlog_recover_item_t *item, | 2187 | xlog_recover_item_t *item) |
2291 | int pass) | ||
2292 | { | 2188 | { |
2293 | xfs_inode_log_format_t *in_f; | 2189 | xfs_inode_log_format_t *in_f; |
2294 | xfs_mount_t *mp; | 2190 | xfs_mount_t *mp = log->l_mp; |
2295 | xfs_buf_t *bp; | 2191 | xfs_buf_t *bp; |
2296 | xfs_dinode_t *dip; | 2192 | xfs_dinode_t *dip; |
2297 | xfs_ino_t ino; | ||
2298 | int len; | 2193 | int len; |
2299 | xfs_caddr_t src; | 2194 | xfs_caddr_t src; |
2300 | xfs_caddr_t dest; | 2195 | xfs_caddr_t dest; |
@@ -2304,10 +2199,6 @@ xlog_recover_do_inode_trans( | |||
2304 | xfs_icdinode_t *dicp; | 2199 | xfs_icdinode_t *dicp; |
2305 | int need_free = 0; | 2200 | int need_free = 0; |
2306 | 2201 | ||
2307 | if (pass == XLOG_RECOVER_PASS1) { | ||
2308 | return 0; | ||
2309 | } | ||
2310 | |||
2311 | if (item->ri_buf[0].i_len == sizeof(xfs_inode_log_format_t)) { | 2202 | if (item->ri_buf[0].i_len == sizeof(xfs_inode_log_format_t)) { |
2312 | in_f = item->ri_buf[0].i_addr; | 2203 | in_f = item->ri_buf[0].i_addr; |
2313 | } else { | 2204 | } else { |
@@ -2317,8 +2208,6 @@ xlog_recover_do_inode_trans( | |||
2317 | if (error) | 2208 | if (error) |
2318 | goto error; | 2209 | goto error; |
2319 | } | 2210 | } |
2320 | ino = in_f->ilf_ino; | ||
2321 | mp = log->l_mp; | ||
2322 | 2211 | ||
2323 | /* | 2212 | /* |
2324 | * Inode buffers can be freed, look out for it, | 2213 | * Inode buffers can be freed, look out for it, |
@@ -2351,10 +2240,10 @@ xlog_recover_do_inode_trans( | |||
2351 | */ | 2240 | */ |
2352 | if (unlikely(be16_to_cpu(dip->di_magic) != XFS_DINODE_MAGIC)) { | 2241 | if (unlikely(be16_to_cpu(dip->di_magic) != XFS_DINODE_MAGIC)) { |
2353 | xfs_buf_relse(bp); | 2242 | xfs_buf_relse(bp); |
2354 | xfs_fs_cmn_err(CE_ALERT, mp, | 2243 | xfs_alert(mp, |
2355 | "xfs_inode_recover: Bad inode magic number, dino ptr = 0x%p, dino bp = 0x%p, ino = %Ld", | 2244 | "%s: Bad inode magic number, dip = 0x%p, dino bp = 0x%p, ino = %Ld", |
2356 | dip, bp, ino); | 2245 | __func__, dip, bp, in_f->ilf_ino); |
2357 | XFS_ERROR_REPORT("xlog_recover_do_inode_trans(1)", | 2246 | XFS_ERROR_REPORT("xlog_recover_inode_pass2(1)", |
2358 | XFS_ERRLEVEL_LOW, mp); | 2247 | XFS_ERRLEVEL_LOW, mp); |
2359 | error = EFSCORRUPTED; | 2248 | error = EFSCORRUPTED; |
2360 | goto error; | 2249 | goto error; |
@@ -2362,10 +2251,10 @@ xlog_recover_do_inode_trans( | |||
2362 | dicp = item->ri_buf[1].i_addr; | 2251 | dicp = item->ri_buf[1].i_addr; |
2363 | if (unlikely(dicp->di_magic != XFS_DINODE_MAGIC)) { | 2252 | if (unlikely(dicp->di_magic != XFS_DINODE_MAGIC)) { |
2364 | xfs_buf_relse(bp); | 2253 | xfs_buf_relse(bp); |
2365 | xfs_fs_cmn_err(CE_ALERT, mp, | 2254 | xfs_alert(mp, |
2366 | "xfs_inode_recover: Bad inode log record, rec ptr 0x%p, ino %Ld", | 2255 | "%s: Bad inode log record, rec ptr 0x%p, ino %Ld", |
2367 | item, ino); | 2256 | __func__, item, in_f->ilf_ino); |
2368 | XFS_ERROR_REPORT("xlog_recover_do_inode_trans(2)", | 2257 | XFS_ERROR_REPORT("xlog_recover_inode_pass2(2)", |
2369 | XFS_ERRLEVEL_LOW, mp); | 2258 | XFS_ERRLEVEL_LOW, mp); |
2370 | error = EFSCORRUPTED; | 2259 | error = EFSCORRUPTED; |
2371 | goto error; | 2260 | goto error; |
@@ -2393,12 +2282,13 @@ xlog_recover_do_inode_trans( | |||
2393 | if (unlikely((dicp->di_mode & S_IFMT) == S_IFREG)) { | 2282 | if (unlikely((dicp->di_mode & S_IFMT) == S_IFREG)) { |
2394 | if ((dicp->di_format != XFS_DINODE_FMT_EXTENTS) && | 2283 | if ((dicp->di_format != XFS_DINODE_FMT_EXTENTS) && |
2395 | (dicp->di_format != XFS_DINODE_FMT_BTREE)) { | 2284 | (dicp->di_format != XFS_DINODE_FMT_BTREE)) { |
2396 | XFS_CORRUPTION_ERROR("xlog_recover_do_inode_trans(3)", | 2285 | XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(3)", |
2397 | XFS_ERRLEVEL_LOW, mp, dicp); | 2286 | XFS_ERRLEVEL_LOW, mp, dicp); |
2398 | xfs_buf_relse(bp); | 2287 | xfs_buf_relse(bp); |
2399 | xfs_fs_cmn_err(CE_ALERT, mp, | 2288 | xfs_alert(mp, |
2400 | "xfs_inode_recover: Bad regular inode log record, rec ptr 0x%p, ino ptr = 0x%p, ino bp = 0x%p, ino %Ld", | 2289 | "%s: Bad regular inode log record, rec ptr 0x%p, " |
2401 | item, dip, bp, ino); | 2290 | "ino ptr = 0x%p, ino bp = 0x%p, ino %Ld", |
2291 | __func__, item, dip, bp, in_f->ilf_ino); | ||
2402 | error = EFSCORRUPTED; | 2292 | error = EFSCORRUPTED; |
2403 | goto error; | 2293 | goto error; |
2404 | } | 2294 | } |
@@ -2406,45 +2296,48 @@ xlog_recover_do_inode_trans( | |||
2406 | if ((dicp->di_format != XFS_DINODE_FMT_EXTENTS) && | 2296 | if ((dicp->di_format != XFS_DINODE_FMT_EXTENTS) && |
2407 | (dicp->di_format != XFS_DINODE_FMT_BTREE) && | 2297 | (dicp->di_format != XFS_DINODE_FMT_BTREE) && |
2408 | (dicp->di_format != XFS_DINODE_FMT_LOCAL)) { | 2298 | (dicp->di_format != XFS_DINODE_FMT_LOCAL)) { |
2409 | XFS_CORRUPTION_ERROR("xlog_recover_do_inode_trans(4)", | 2299 | XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(4)", |
2410 | XFS_ERRLEVEL_LOW, mp, dicp); | 2300 | XFS_ERRLEVEL_LOW, mp, dicp); |
2411 | xfs_buf_relse(bp); | 2301 | xfs_buf_relse(bp); |
2412 | xfs_fs_cmn_err(CE_ALERT, mp, | 2302 | xfs_alert(mp, |
2413 | "xfs_inode_recover: Bad dir inode log record, rec ptr 0x%p, ino ptr = 0x%p, ino bp = 0x%p, ino %Ld", | 2303 | "%s: Bad dir inode log record, rec ptr 0x%p, " |
2414 | item, dip, bp, ino); | 2304 | "ino ptr = 0x%p, ino bp = 0x%p, ino %Ld", |
2305 | __func__, item, dip, bp, in_f->ilf_ino); | ||
2415 | error = EFSCORRUPTED; | 2306 | error = EFSCORRUPTED; |
2416 | goto error; | 2307 | goto error; |
2417 | } | 2308 | } |
2418 | } | 2309 | } |
2419 | if (unlikely(dicp->di_nextents + dicp->di_anextents > dicp->di_nblocks)){ | 2310 | if (unlikely(dicp->di_nextents + dicp->di_anextents > dicp->di_nblocks)){ |
2420 | XFS_CORRUPTION_ERROR("xlog_recover_do_inode_trans(5)", | 2311 | XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(5)", |
2421 | XFS_ERRLEVEL_LOW, mp, dicp); | 2312 | XFS_ERRLEVEL_LOW, mp, dicp); |
2422 | xfs_buf_relse(bp); | 2313 | xfs_buf_relse(bp); |
2423 | xfs_fs_cmn_err(CE_ALERT, mp, | 2314 | xfs_alert(mp, |
2424 | "xfs_inode_recover: Bad inode log record, rec ptr 0x%p, dino ptr 0x%p, dino bp 0x%p, ino %Ld, total extents = %d, nblocks = %Ld", | 2315 | "%s: Bad inode log record, rec ptr 0x%p, dino ptr 0x%p, " |
2425 | item, dip, bp, ino, | 2316 | "dino bp 0x%p, ino %Ld, total extents = %d, nblocks = %Ld", |
2317 | __func__, item, dip, bp, in_f->ilf_ino, | ||
2426 | dicp->di_nextents + dicp->di_anextents, | 2318 | dicp->di_nextents + dicp->di_anextents, |
2427 | dicp->di_nblocks); | 2319 | dicp->di_nblocks); |
2428 | error = EFSCORRUPTED; | 2320 | error = EFSCORRUPTED; |
2429 | goto error; | 2321 | goto error; |
2430 | } | 2322 | } |
2431 | if (unlikely(dicp->di_forkoff > mp->m_sb.sb_inodesize)) { | 2323 | if (unlikely(dicp->di_forkoff > mp->m_sb.sb_inodesize)) { |
2432 | XFS_CORRUPTION_ERROR("xlog_recover_do_inode_trans(6)", | 2324 | XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(6)", |
2433 | XFS_ERRLEVEL_LOW, mp, dicp); | 2325 | XFS_ERRLEVEL_LOW, mp, dicp); |
2434 | xfs_buf_relse(bp); | 2326 | xfs_buf_relse(bp); |
2435 | xfs_fs_cmn_err(CE_ALERT, mp, | 2327 | xfs_alert(mp, |
2436 | "xfs_inode_recover: Bad inode log rec ptr 0x%p, dino ptr 0x%p, dino bp 0x%p, ino %Ld, forkoff 0x%x", | 2328 | "%s: Bad inode log record, rec ptr 0x%p, dino ptr 0x%p, " |
2437 | item, dip, bp, ino, dicp->di_forkoff); | 2329 | "dino bp 0x%p, ino %Ld, forkoff 0x%x", __func__, |
2330 | item, dip, bp, in_f->ilf_ino, dicp->di_forkoff); | ||
2438 | error = EFSCORRUPTED; | 2331 | error = EFSCORRUPTED; |
2439 | goto error; | 2332 | goto error; |
2440 | } | 2333 | } |
2441 | if (unlikely(item->ri_buf[1].i_len > sizeof(struct xfs_icdinode))) { | 2334 | if (unlikely(item->ri_buf[1].i_len > sizeof(struct xfs_icdinode))) { |
2442 | XFS_CORRUPTION_ERROR("xlog_recover_do_inode_trans(7)", | 2335 | XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(7)", |
2443 | XFS_ERRLEVEL_LOW, mp, dicp); | 2336 | XFS_ERRLEVEL_LOW, mp, dicp); |
2444 | xfs_buf_relse(bp); | 2337 | xfs_buf_relse(bp); |
2445 | xfs_fs_cmn_err(CE_ALERT, mp, | 2338 | xfs_alert(mp, |
2446 | "xfs_inode_recover: Bad inode log record length %d, rec ptr 0x%p", | 2339 | "%s: Bad inode log record length %d, rec ptr 0x%p", |
2447 | item->ri_buf[1].i_len, item); | 2340 | __func__, item->ri_buf[1].i_len, item); |
2448 | error = EFSCORRUPTED; | 2341 | error = EFSCORRUPTED; |
2449 | goto error; | 2342 | goto error; |
2450 | } | 2343 | } |
@@ -2531,7 +2424,7 @@ xlog_recover_do_inode_trans( | |||
2531 | break; | 2424 | break; |
2532 | 2425 | ||
2533 | default: | 2426 | default: |
2534 | xlog_warn("XFS: xlog_recover_do_inode_trans: Invalid flag"); | 2427 | xfs_warn(log->l_mp, "%s: Invalid flag", __func__); |
2535 | ASSERT(0); | 2428 | ASSERT(0); |
2536 | xfs_buf_relse(bp); | 2429 | xfs_buf_relse(bp); |
2537 | error = EIO; | 2430 | error = EIO; |
@@ -2540,8 +2433,7 @@ xlog_recover_do_inode_trans( | |||
2540 | } | 2433 | } |
2541 | 2434 | ||
2542 | write_inode_buffer: | 2435 | write_inode_buffer: |
2543 | ASSERT(bp->b_mount == NULL || bp->b_mount == mp); | 2436 | ASSERT(bp->b_target->bt_mount == mp); |
2544 | bp->b_mount = mp; | ||
2545 | XFS_BUF_SET_IODONE_FUNC(bp, xlog_recover_iodone); | 2437 | XFS_BUF_SET_IODONE_FUNC(bp, xlog_recover_iodone); |
2546 | xfs_bdwrite(mp, bp); | 2438 | xfs_bdwrite(mp, bp); |
2547 | error: | 2439 | error: |
@@ -2556,18 +2448,11 @@ error: | |||
2556 | * of that type. | 2448 | * of that type. |
2557 | */ | 2449 | */ |
2558 | STATIC int | 2450 | STATIC int |
2559 | xlog_recover_do_quotaoff_trans( | 2451 | xlog_recover_quotaoff_pass1( |
2560 | xlog_t *log, | 2452 | xlog_t *log, |
2561 | xlog_recover_item_t *item, | 2453 | xlog_recover_item_t *item) |
2562 | int pass) | ||
2563 | { | 2454 | { |
2564 | xfs_qoff_logformat_t *qoff_f; | 2455 | xfs_qoff_logformat_t *qoff_f = item->ri_buf[0].i_addr; |
2565 | |||
2566 | if (pass == XLOG_RECOVER_PASS2) { | ||
2567 | return (0); | ||
2568 | } | ||
2569 | |||
2570 | qoff_f = item->ri_buf[0].i_addr; | ||
2571 | ASSERT(qoff_f); | 2456 | ASSERT(qoff_f); |
2572 | 2457 | ||
2573 | /* | 2458 | /* |
@@ -2588,22 +2473,17 @@ xlog_recover_do_quotaoff_trans( | |||
2588 | * Recover a dquot record | 2473 | * Recover a dquot record |
2589 | */ | 2474 | */ |
2590 | STATIC int | 2475 | STATIC int |
2591 | xlog_recover_do_dquot_trans( | 2476 | xlog_recover_dquot_pass2( |
2592 | xlog_t *log, | 2477 | xlog_t *log, |
2593 | xlog_recover_item_t *item, | 2478 | xlog_recover_item_t *item) |
2594 | int pass) | ||
2595 | { | 2479 | { |
2596 | xfs_mount_t *mp; | 2480 | xfs_mount_t *mp = log->l_mp; |
2597 | xfs_buf_t *bp; | 2481 | xfs_buf_t *bp; |
2598 | struct xfs_disk_dquot *ddq, *recddq; | 2482 | struct xfs_disk_dquot *ddq, *recddq; |
2599 | int error; | 2483 | int error; |
2600 | xfs_dq_logformat_t *dq_f; | 2484 | xfs_dq_logformat_t *dq_f; |
2601 | uint type; | 2485 | uint type; |
2602 | 2486 | ||
2603 | if (pass == XLOG_RECOVER_PASS1) { | ||
2604 | return 0; | ||
2605 | } | ||
2606 | mp = log->l_mp; | ||
2607 | 2487 | ||
2608 | /* | 2488 | /* |
2609 | * Filesystems are required to send in quota flags at mount time. | 2489 | * Filesystems are required to send in quota flags at mount time. |
@@ -2613,13 +2493,11 @@ xlog_recover_do_dquot_trans( | |||
2613 | 2493 | ||
2614 | recddq = item->ri_buf[1].i_addr; | 2494 | recddq = item->ri_buf[1].i_addr; |
2615 | if (recddq == NULL) { | 2495 | if (recddq == NULL) { |
2616 | cmn_err(CE_ALERT, | 2496 | xfs_alert(log->l_mp, "NULL dquot in %s.", __func__); |
2617 | "XFS: NULL dquot in %s.", __func__); | ||
2618 | return XFS_ERROR(EIO); | 2497 | return XFS_ERROR(EIO); |
2619 | } | 2498 | } |
2620 | if (item->ri_buf[1].i_len < sizeof(xfs_disk_dquot_t)) { | 2499 | if (item->ri_buf[1].i_len < sizeof(xfs_disk_dquot_t)) { |
2621 | cmn_err(CE_ALERT, | 2500 | xfs_alert(log->l_mp, "dquot too small (%d) in %s.", |
2622 | "XFS: dquot too small (%d) in %s.", | ||
2623 | item->ri_buf[1].i_len, __func__); | 2501 | item->ri_buf[1].i_len, __func__); |
2624 | return XFS_ERROR(EIO); | 2502 | return XFS_ERROR(EIO); |
2625 | } | 2503 | } |
@@ -2644,12 +2522,10 @@ xlog_recover_do_dquot_trans( | |||
2644 | */ | 2522 | */ |
2645 | dq_f = item->ri_buf[0].i_addr; | 2523 | dq_f = item->ri_buf[0].i_addr; |
2646 | ASSERT(dq_f); | 2524 | ASSERT(dq_f); |
2647 | if ((error = xfs_qm_dqcheck(recddq, | 2525 | error = xfs_qm_dqcheck(mp, recddq, dq_f->qlf_id, 0, XFS_QMOPT_DOWARN, |
2648 | dq_f->qlf_id, | 2526 | "xlog_recover_dquot_pass2 (log copy)"); |
2649 | 0, XFS_QMOPT_DOWARN, | 2527 | if (error) |
2650 | "xlog_recover_do_dquot_trans (log copy)"))) { | ||
2651 | return XFS_ERROR(EIO); | 2528 | return XFS_ERROR(EIO); |
2652 | } | ||
2653 | ASSERT(dq_f->qlf_len == 1); | 2529 | ASSERT(dq_f->qlf_len == 1); |
2654 | 2530 | ||
2655 | error = xfs_read_buf(mp, mp->m_ddev_targp, | 2531 | error = xfs_read_buf(mp, mp->m_ddev_targp, |
@@ -2669,8 +2545,9 @@ xlog_recover_do_dquot_trans( | |||
2669 | * was among a chunk of dquots created earlier, and we did some | 2545 | * was among a chunk of dquots created earlier, and we did some |
2670 | * minimal initialization then. | 2546 | * minimal initialization then. |
2671 | */ | 2547 | */ |
2672 | if (xfs_qm_dqcheck(ddq, dq_f->qlf_id, 0, XFS_QMOPT_DOWARN, | 2548 | error = xfs_qm_dqcheck(mp, ddq, dq_f->qlf_id, 0, XFS_QMOPT_DOWARN, |
2673 | "xlog_recover_do_dquot_trans")) { | 2549 | "xlog_recover_dquot_pass2"); |
2550 | if (error) { | ||
2674 | xfs_buf_relse(bp); | 2551 | xfs_buf_relse(bp); |
2675 | return XFS_ERROR(EIO); | 2552 | return XFS_ERROR(EIO); |
2676 | } | 2553 | } |
@@ -2678,8 +2555,7 @@ xlog_recover_do_dquot_trans( | |||
2678 | memcpy(ddq, recddq, item->ri_buf[1].i_len); | 2555 | memcpy(ddq, recddq, item->ri_buf[1].i_len); |
2679 | 2556 | ||
2680 | ASSERT(dq_f->qlf_size == 2); | 2557 | ASSERT(dq_f->qlf_size == 2); |
2681 | ASSERT(bp->b_mount == NULL || bp->b_mount == mp); | 2558 | ASSERT(bp->b_target->bt_mount == mp); |
2682 | bp->b_mount = mp; | ||
2683 | XFS_BUF_SET_IODONE_FUNC(bp, xlog_recover_iodone); | 2559 | XFS_BUF_SET_IODONE_FUNC(bp, xlog_recover_iodone); |
2684 | xfs_bdwrite(mp, bp); | 2560 | xfs_bdwrite(mp, bp); |
2685 | 2561 | ||
@@ -2694,38 +2570,31 @@ xlog_recover_do_dquot_trans( | |||
2694 | * LSN. | 2570 | * LSN. |
2695 | */ | 2571 | */ |
2696 | STATIC int | 2572 | STATIC int |
2697 | xlog_recover_do_efi_trans( | 2573 | xlog_recover_efi_pass2( |
2698 | xlog_t *log, | 2574 | xlog_t *log, |
2699 | xlog_recover_item_t *item, | 2575 | xlog_recover_item_t *item, |
2700 | xfs_lsn_t lsn, | 2576 | xfs_lsn_t lsn) |
2701 | int pass) | ||
2702 | { | 2577 | { |
2703 | int error; | 2578 | int error; |
2704 | xfs_mount_t *mp; | 2579 | xfs_mount_t *mp = log->l_mp; |
2705 | xfs_efi_log_item_t *efip; | 2580 | xfs_efi_log_item_t *efip; |
2706 | xfs_efi_log_format_t *efi_formatp; | 2581 | xfs_efi_log_format_t *efi_formatp; |
2707 | 2582 | ||
2708 | if (pass == XLOG_RECOVER_PASS1) { | ||
2709 | return 0; | ||
2710 | } | ||
2711 | |||
2712 | efi_formatp = item->ri_buf[0].i_addr; | 2583 | efi_formatp = item->ri_buf[0].i_addr; |
2713 | 2584 | ||
2714 | mp = log->l_mp; | ||
2715 | efip = xfs_efi_init(mp, efi_formatp->efi_nextents); | 2585 | efip = xfs_efi_init(mp, efi_formatp->efi_nextents); |
2716 | if ((error = xfs_efi_copy_format(&(item->ri_buf[0]), | 2586 | if ((error = xfs_efi_copy_format(&(item->ri_buf[0]), |
2717 | &(efip->efi_format)))) { | 2587 | &(efip->efi_format)))) { |
2718 | xfs_efi_item_free(efip); | 2588 | xfs_efi_item_free(efip); |
2719 | return error; | 2589 | return error; |
2720 | } | 2590 | } |
2721 | efip->efi_next_extent = efi_formatp->efi_nextents; | 2591 | atomic_set(&efip->efi_next_extent, efi_formatp->efi_nextents); |
2722 | efip->efi_flags |= XFS_EFI_COMMITTED; | ||
2723 | 2592 | ||
2724 | spin_lock(&log->l_ailp->xa_lock); | 2593 | spin_lock(&log->l_ailp->xa_lock); |
2725 | /* | 2594 | /* |
2726 | * xfs_trans_ail_update() drops the AIL lock. | 2595 | * xfs_trans_ail_update() drops the AIL lock. |
2727 | */ | 2596 | */ |
2728 | xfs_trans_ail_update(log->l_ailp, (xfs_log_item_t *)efip, lsn); | 2597 | xfs_trans_ail_update(log->l_ailp, &efip->efi_item, lsn); |
2729 | return 0; | 2598 | return 0; |
2730 | } | 2599 | } |
2731 | 2600 | ||
@@ -2738,11 +2607,10 @@ xlog_recover_do_efi_trans( | |||
2738 | * efd format structure. If we find it, we remove the efi from the | 2607 | * efd format structure. If we find it, we remove the efi from the |
2739 | * AIL and free it. | 2608 | * AIL and free it. |
2740 | */ | 2609 | */ |
2741 | STATIC void | 2610 | STATIC int |
2742 | xlog_recover_do_efd_trans( | 2611 | xlog_recover_efd_pass2( |
2743 | xlog_t *log, | 2612 | xlog_t *log, |
2744 | xlog_recover_item_t *item, | 2613 | xlog_recover_item_t *item) |
2745 | int pass) | ||
2746 | { | 2614 | { |
2747 | xfs_efd_log_format_t *efd_formatp; | 2615 | xfs_efd_log_format_t *efd_formatp; |
2748 | xfs_efi_log_item_t *efip = NULL; | 2616 | xfs_efi_log_item_t *efip = NULL; |
@@ -2751,10 +2619,6 @@ xlog_recover_do_efd_trans( | |||
2751 | struct xfs_ail_cursor cur; | 2619 | struct xfs_ail_cursor cur; |
2752 | struct xfs_ail *ailp = log->l_ailp; | 2620 | struct xfs_ail *ailp = log->l_ailp; |
2753 | 2621 | ||
2754 | if (pass == XLOG_RECOVER_PASS1) { | ||
2755 | return; | ||
2756 | } | ||
2757 | |||
2758 | efd_formatp = item->ri_buf[0].i_addr; | 2622 | efd_formatp = item->ri_buf[0].i_addr; |
2759 | ASSERT((item->ri_buf[0].i_len == (sizeof(xfs_efd_log_format_32_t) + | 2623 | ASSERT((item->ri_buf[0].i_len == (sizeof(xfs_efd_log_format_32_t) + |
2760 | ((efd_formatp->efd_nextents - 1) * sizeof(xfs_extent_32_t)))) || | 2624 | ((efd_formatp->efd_nextents - 1) * sizeof(xfs_extent_32_t)))) || |
@@ -2786,62 +2650,6 @@ xlog_recover_do_efd_trans( | |||
2786 | } | 2650 | } |
2787 | xfs_trans_ail_cursor_done(ailp, &cur); | 2651 | xfs_trans_ail_cursor_done(ailp, &cur); |
2788 | spin_unlock(&ailp->xa_lock); | 2652 | spin_unlock(&ailp->xa_lock); |
2789 | } | ||
2790 | |||
2791 | /* | ||
2792 | * Perform the transaction | ||
2793 | * | ||
2794 | * If the transaction modifies a buffer or inode, do it now. Otherwise, | ||
2795 | * EFIs and EFDs get queued up by adding entries into the AIL for them. | ||
2796 | */ | ||
2797 | STATIC int | ||
2798 | xlog_recover_do_trans( | ||
2799 | xlog_t *log, | ||
2800 | xlog_recover_t *trans, | ||
2801 | int pass) | ||
2802 | { | ||
2803 | int error = 0; | ||
2804 | xlog_recover_item_t *item; | ||
2805 | |||
2806 | error = xlog_recover_reorder_trans(log, trans, pass); | ||
2807 | if (error) | ||
2808 | return error; | ||
2809 | |||
2810 | list_for_each_entry(item, &trans->r_itemq, ri_list) { | ||
2811 | trace_xfs_log_recover_item_recover(log, trans, item, pass); | ||
2812 | switch (ITEM_TYPE(item)) { | ||
2813 | case XFS_LI_BUF: | ||
2814 | error = xlog_recover_do_buffer_trans(log, item, pass); | ||
2815 | break; | ||
2816 | case XFS_LI_INODE: | ||
2817 | error = xlog_recover_do_inode_trans(log, item, pass); | ||
2818 | break; | ||
2819 | case XFS_LI_EFI: | ||
2820 | error = xlog_recover_do_efi_trans(log, item, | ||
2821 | trans->r_lsn, pass); | ||
2822 | break; | ||
2823 | case XFS_LI_EFD: | ||
2824 | xlog_recover_do_efd_trans(log, item, pass); | ||
2825 | error = 0; | ||
2826 | break; | ||
2827 | case XFS_LI_DQUOT: | ||
2828 | error = xlog_recover_do_dquot_trans(log, item, pass); | ||
2829 | break; | ||
2830 | case XFS_LI_QUOTAOFF: | ||
2831 | error = xlog_recover_do_quotaoff_trans(log, item, | ||
2832 | pass); | ||
2833 | break; | ||
2834 | default: | ||
2835 | xlog_warn( | ||
2836 | "XFS: invalid item type (%d) xlog_recover_do_trans", ITEM_TYPE(item)); | ||
2837 | ASSERT(0); | ||
2838 | error = XFS_ERROR(EIO); | ||
2839 | break; | ||
2840 | } | ||
2841 | |||
2842 | if (error) | ||
2843 | return error; | ||
2844 | } | ||
2845 | 2653 | ||
2846 | return 0; | 2654 | return 0; |
2847 | } | 2655 | } |
@@ -2853,7 +2661,7 @@ xlog_recover_do_trans( | |||
2853 | */ | 2661 | */ |
2854 | STATIC void | 2662 | STATIC void |
2855 | xlog_recover_free_trans( | 2663 | xlog_recover_free_trans( |
2856 | xlog_recover_t *trans) | 2664 | struct xlog_recover *trans) |
2857 | { | 2665 | { |
2858 | xlog_recover_item_t *item, *n; | 2666 | xlog_recover_item_t *item, *n; |
2859 | int i; | 2667 | int i; |
@@ -2872,26 +2680,103 @@ xlog_recover_free_trans( | |||
2872 | } | 2680 | } |
2873 | 2681 | ||
2874 | STATIC int | 2682 | STATIC int |
2683 | xlog_recover_commit_pass1( | ||
2684 | struct log *log, | ||
2685 | struct xlog_recover *trans, | ||
2686 | xlog_recover_item_t *item) | ||
2687 | { | ||
2688 | trace_xfs_log_recover_item_recover(log, trans, item, XLOG_RECOVER_PASS1); | ||
2689 | |||
2690 | switch (ITEM_TYPE(item)) { | ||
2691 | case XFS_LI_BUF: | ||
2692 | return xlog_recover_buffer_pass1(log, item); | ||
2693 | case XFS_LI_QUOTAOFF: | ||
2694 | return xlog_recover_quotaoff_pass1(log, item); | ||
2695 | case XFS_LI_INODE: | ||
2696 | case XFS_LI_EFI: | ||
2697 | case XFS_LI_EFD: | ||
2698 | case XFS_LI_DQUOT: | ||
2699 | /* nothing to do in pass 1 */ | ||
2700 | return 0; | ||
2701 | default: | ||
2702 | xfs_warn(log->l_mp, "%s: invalid item type (%d)", | ||
2703 | __func__, ITEM_TYPE(item)); | ||
2704 | ASSERT(0); | ||
2705 | return XFS_ERROR(EIO); | ||
2706 | } | ||
2707 | } | ||
2708 | |||
2709 | STATIC int | ||
2710 | xlog_recover_commit_pass2( | ||
2711 | struct log *log, | ||
2712 | struct xlog_recover *trans, | ||
2713 | xlog_recover_item_t *item) | ||
2714 | { | ||
2715 | trace_xfs_log_recover_item_recover(log, trans, item, XLOG_RECOVER_PASS2); | ||
2716 | |||
2717 | switch (ITEM_TYPE(item)) { | ||
2718 | case XFS_LI_BUF: | ||
2719 | return xlog_recover_buffer_pass2(log, item); | ||
2720 | case XFS_LI_INODE: | ||
2721 | return xlog_recover_inode_pass2(log, item); | ||
2722 | case XFS_LI_EFI: | ||
2723 | return xlog_recover_efi_pass2(log, item, trans->r_lsn); | ||
2724 | case XFS_LI_EFD: | ||
2725 | return xlog_recover_efd_pass2(log, item); | ||
2726 | case XFS_LI_DQUOT: | ||
2727 | return xlog_recover_dquot_pass2(log, item); | ||
2728 | case XFS_LI_QUOTAOFF: | ||
2729 | /* nothing to do in pass2 */ | ||
2730 | return 0; | ||
2731 | default: | ||
2732 | xfs_warn(log->l_mp, "%s: invalid item type (%d)", | ||
2733 | __func__, ITEM_TYPE(item)); | ||
2734 | ASSERT(0); | ||
2735 | return XFS_ERROR(EIO); | ||
2736 | } | ||
2737 | } | ||
2738 | |||
2739 | /* | ||
2740 | * Perform the transaction. | ||
2741 | * | ||
2742 | * If the transaction modifies a buffer or inode, do it now. Otherwise, | ||
2743 | * EFIs and EFDs get queued up by adding entries into the AIL for them. | ||
2744 | */ | ||
2745 | STATIC int | ||
2875 | xlog_recover_commit_trans( | 2746 | xlog_recover_commit_trans( |
2876 | xlog_t *log, | 2747 | struct log *log, |
2877 | xlog_recover_t *trans, | 2748 | struct xlog_recover *trans, |
2878 | int pass) | 2749 | int pass) |
2879 | { | 2750 | { |
2880 | int error; | 2751 | int error = 0; |
2752 | xlog_recover_item_t *item; | ||
2881 | 2753 | ||
2882 | hlist_del(&trans->r_list); | 2754 | hlist_del(&trans->r_list); |
2883 | if ((error = xlog_recover_do_trans(log, trans, pass))) | 2755 | |
2756 | error = xlog_recover_reorder_trans(log, trans, pass); | ||
2757 | if (error) | ||
2884 | return error; | 2758 | return error; |
2885 | xlog_recover_free_trans(trans); /* no error */ | 2759 | |
2760 | list_for_each_entry(item, &trans->r_itemq, ri_list) { | ||
2761 | if (pass == XLOG_RECOVER_PASS1) | ||
2762 | error = xlog_recover_commit_pass1(log, trans, item); | ||
2763 | else | ||
2764 | error = xlog_recover_commit_pass2(log, trans, item); | ||
2765 | if (error) | ||
2766 | return error; | ||
2767 | } | ||
2768 | |||
2769 | xlog_recover_free_trans(trans); | ||
2886 | return 0; | 2770 | return 0; |
2887 | } | 2771 | } |
2888 | 2772 | ||
2889 | STATIC int | 2773 | STATIC int |
2890 | xlog_recover_unmount_trans( | 2774 | xlog_recover_unmount_trans( |
2775 | struct log *log, | ||
2891 | xlog_recover_t *trans) | 2776 | xlog_recover_t *trans) |
2892 | { | 2777 | { |
2893 | /* Do nothing now */ | 2778 | /* Do nothing now */ |
2894 | xlog_warn("XFS: xlog_recover_unmount_trans: Unmount LR"); | 2779 | xfs_warn(log->l_mp, "%s: Unmount LR", __func__); |
2895 | return 0; | 2780 | return 0; |
2896 | } | 2781 | } |
2897 | 2782 | ||
@@ -2934,8 +2819,8 @@ xlog_recover_process_data( | |||
2934 | dp += sizeof(xlog_op_header_t); | 2819 | dp += sizeof(xlog_op_header_t); |
2935 | if (ohead->oh_clientid != XFS_TRANSACTION && | 2820 | if (ohead->oh_clientid != XFS_TRANSACTION && |
2936 | ohead->oh_clientid != XFS_LOG) { | 2821 | ohead->oh_clientid != XFS_LOG) { |
2937 | xlog_warn( | 2822 | xfs_warn(log->l_mp, "%s: bad clientid 0x%x", |
2938 | "XFS: xlog_recover_process_data: bad clientid"); | 2823 | __func__, ohead->oh_clientid); |
2939 | ASSERT(0); | 2824 | ASSERT(0); |
2940 | return (XFS_ERROR(EIO)); | 2825 | return (XFS_ERROR(EIO)); |
2941 | } | 2826 | } |
@@ -2948,8 +2833,8 @@ xlog_recover_process_data( | |||
2948 | be64_to_cpu(rhead->h_lsn)); | 2833 | be64_to_cpu(rhead->h_lsn)); |
2949 | } else { | 2834 | } else { |
2950 | if (dp + be32_to_cpu(ohead->oh_len) > lp) { | 2835 | if (dp + be32_to_cpu(ohead->oh_len) > lp) { |
2951 | xlog_warn( | 2836 | xfs_warn(log->l_mp, "%s: bad length 0x%x", |
2952 | "XFS: xlog_recover_process_data: bad length"); | 2837 | __func__, be32_to_cpu(ohead->oh_len)); |
2953 | WARN_ON(1); | 2838 | WARN_ON(1); |
2954 | return (XFS_ERROR(EIO)); | 2839 | return (XFS_ERROR(EIO)); |
2955 | } | 2840 | } |
@@ -2962,7 +2847,7 @@ xlog_recover_process_data( | |||
2962 | trans, pass); | 2847 | trans, pass); |
2963 | break; | 2848 | break; |
2964 | case XLOG_UNMOUNT_TRANS: | 2849 | case XLOG_UNMOUNT_TRANS: |
2965 | error = xlog_recover_unmount_trans(trans); | 2850 | error = xlog_recover_unmount_trans(log, trans); |
2966 | break; | 2851 | break; |
2967 | case XLOG_WAS_CONT_TRANS: | 2852 | case XLOG_WAS_CONT_TRANS: |
2968 | error = xlog_recover_add_to_cont_trans(log, | 2853 | error = xlog_recover_add_to_cont_trans(log, |
@@ -2970,8 +2855,8 @@ xlog_recover_process_data( | |||
2970 | be32_to_cpu(ohead->oh_len)); | 2855 | be32_to_cpu(ohead->oh_len)); |
2971 | break; | 2856 | break; |
2972 | case XLOG_START_TRANS: | 2857 | case XLOG_START_TRANS: |
2973 | xlog_warn( | 2858 | xfs_warn(log->l_mp, "%s: bad transaction", |
2974 | "XFS: xlog_recover_process_data: bad transaction"); | 2859 | __func__); |
2975 | ASSERT(0); | 2860 | ASSERT(0); |
2976 | error = XFS_ERROR(EIO); | 2861 | error = XFS_ERROR(EIO); |
2977 | break; | 2862 | break; |
@@ -2981,8 +2866,8 @@ xlog_recover_process_data( | |||
2981 | dp, be32_to_cpu(ohead->oh_len)); | 2866 | dp, be32_to_cpu(ohead->oh_len)); |
2982 | break; | 2867 | break; |
2983 | default: | 2868 | default: |
2984 | xlog_warn( | 2869 | xfs_warn(log->l_mp, "%s: bad flag 0x%x", |
2985 | "XFS: xlog_recover_process_data: bad flag"); | 2870 | __func__, flags); |
2986 | ASSERT(0); | 2871 | ASSERT(0); |
2987 | error = XFS_ERROR(EIO); | 2872 | error = XFS_ERROR(EIO); |
2988 | break; | 2873 | break; |
@@ -3012,7 +2897,7 @@ xlog_recover_process_efi( | |||
3012 | xfs_extent_t *extp; | 2897 | xfs_extent_t *extp; |
3013 | xfs_fsblock_t startblock_fsb; | 2898 | xfs_fsblock_t startblock_fsb; |
3014 | 2899 | ||
3015 | ASSERT(!(efip->efi_flags & XFS_EFI_RECOVERED)); | 2900 | ASSERT(!test_bit(XFS_EFI_RECOVERED, &efip->efi_flags)); |
3016 | 2901 | ||
3017 | /* | 2902 | /* |
3018 | * First check the validity of the extents described by the | 2903 | * First check the validity of the extents described by the |
@@ -3051,7 +2936,7 @@ xlog_recover_process_efi( | |||
3051 | extp->ext_len); | 2936 | extp->ext_len); |
3052 | } | 2937 | } |
3053 | 2938 | ||
3054 | efip->efi_flags |= XFS_EFI_RECOVERED; | 2939 | set_bit(XFS_EFI_RECOVERED, &efip->efi_flags); |
3055 | error = xfs_trans_commit(tp, 0); | 2940 | error = xfs_trans_commit(tp, 0); |
3056 | return error; | 2941 | return error; |
3057 | 2942 | ||
@@ -3108,7 +2993,7 @@ xlog_recover_process_efis( | |||
3108 | * Skip EFIs that we've already processed. | 2993 | * Skip EFIs that we've already processed. |
3109 | */ | 2994 | */ |
3110 | efip = (xfs_efi_log_item_t *)lip; | 2995 | efip = (xfs_efi_log_item_t *)lip; |
3111 | if (efip->efi_flags & XFS_EFI_RECOVERED) { | 2996 | if (test_bit(XFS_EFI_RECOVERED, &efip->efi_flags)) { |
3112 | lip = xfs_trans_ail_cursor_next(ailp, &cur); | 2997 | lip = xfs_trans_ail_cursor_next(ailp, &cur); |
3113 | continue; | 2998 | continue; |
3114 | } | 2999 | } |
@@ -3167,8 +3052,7 @@ xlog_recover_clear_agi_bucket( | |||
3167 | out_abort: | 3052 | out_abort: |
3168 | xfs_trans_cancel(tp, XFS_TRANS_ABORT); | 3053 | xfs_trans_cancel(tp, XFS_TRANS_ABORT); |
3169 | out_error: | 3054 | out_error: |
3170 | xfs_fs_cmn_err(CE_WARN, mp, "xlog_recover_clear_agi_bucket: " | 3055 | xfs_warn(mp, "%s: failed to clear agi %d. Continuing.", __func__, agno); |
3171 | "failed to clear agi %d. Continuing.", agno); | ||
3172 | return; | 3056 | return; |
3173 | } | 3057 | } |
3174 | 3058 | ||
@@ -3419,7 +3303,7 @@ xlog_valid_rec_header( | |||
3419 | if (unlikely( | 3303 | if (unlikely( |
3420 | (!rhead->h_version || | 3304 | (!rhead->h_version || |
3421 | (be32_to_cpu(rhead->h_version) & (~XLOG_VERSION_OKBITS))))) { | 3305 | (be32_to_cpu(rhead->h_version) & (~XLOG_VERSION_OKBITS))))) { |
3422 | xlog_warn("XFS: %s: unrecognised log version (%d).", | 3306 | xfs_warn(log->l_mp, "%s: unrecognised log version (%d).", |
3423 | __func__, be32_to_cpu(rhead->h_version)); | 3307 | __func__, be32_to_cpu(rhead->h_version)); |
3424 | return XFS_ERROR(EIO); | 3308 | return XFS_ERROR(EIO); |
3425 | } | 3309 | } |
@@ -3585,19 +3469,9 @@ xlog_do_recovery_pass( | |||
3585 | * - order is important. | 3469 | * - order is important. |
3586 | */ | 3470 | */ |
3587 | wrapped_hblks = hblks - split_hblks; | 3471 | wrapped_hblks = hblks - split_hblks; |
3588 | error = XFS_BUF_SET_PTR(hbp, | 3472 | error = xlog_bread_offset(log, 0, |
3589 | offset + BBTOB(split_hblks), | 3473 | wrapped_hblks, hbp, |
3590 | BBTOB(hblks - split_hblks)); | 3474 | offset + BBTOB(split_hblks)); |
3591 | if (error) | ||
3592 | goto bread_err2; | ||
3593 | |||
3594 | error = xlog_bread_noalign(log, 0, | ||
3595 | wrapped_hblks, hbp); | ||
3596 | if (error) | ||
3597 | goto bread_err2; | ||
3598 | |||
3599 | error = XFS_BUF_SET_PTR(hbp, offset, | ||
3600 | BBTOB(hblks)); | ||
3601 | if (error) | 3475 | if (error) |
3602 | goto bread_err2; | 3476 | goto bread_err2; |
3603 | } | 3477 | } |
@@ -3648,19 +3522,9 @@ xlog_do_recovery_pass( | |||
3648 | * _first_, then the log start (LR header end) | 3522 | * _first_, then the log start (LR header end) |
3649 | * - order is important. | 3523 | * - order is important. |
3650 | */ | 3524 | */ |
3651 | error = XFS_BUF_SET_PTR(dbp, | 3525 | error = xlog_bread_offset(log, 0, |
3652 | offset + BBTOB(split_bblks), | 3526 | bblks - split_bblks, hbp, |
3653 | BBTOB(bblks - split_bblks)); | 3527 | offset + BBTOB(split_bblks)); |
3654 | if (error) | ||
3655 | goto bread_err2; | ||
3656 | |||
3657 | error = xlog_bread_noalign(log, wrapped_hblks, | ||
3658 | bblks - split_bblks, | ||
3659 | dbp); | ||
3660 | if (error) | ||
3661 | goto bread_err2; | ||
3662 | |||
3663 | error = XFS_BUF_SET_PTR(dbp, offset, h_size); | ||
3664 | if (error) | 3528 | if (error) |
3665 | goto bread_err2; | 3529 | goto bread_err2; |
3666 | } | 3530 | } |
@@ -3725,7 +3589,7 @@ xlog_do_log_recovery( | |||
3725 | xfs_daddr_t head_blk, | 3589 | xfs_daddr_t head_blk, |
3726 | xfs_daddr_t tail_blk) | 3590 | xfs_daddr_t tail_blk) |
3727 | { | 3591 | { |
3728 | int error; | 3592 | int error, i; |
3729 | 3593 | ||
3730 | ASSERT(head_blk != tail_blk); | 3594 | ASSERT(head_blk != tail_blk); |
3731 | 3595 | ||
@@ -3733,10 +3597,12 @@ xlog_do_log_recovery( | |||
3733 | * First do a pass to find all of the cancelled buf log items. | 3597 | * First do a pass to find all of the cancelled buf log items. |
3734 | * Store them in the buf_cancel_table for use in the second pass. | 3598 | * Store them in the buf_cancel_table for use in the second pass. |
3735 | */ | 3599 | */ |
3736 | log->l_buf_cancel_table = | 3600 | log->l_buf_cancel_table = kmem_zalloc(XLOG_BC_TABLE_SIZE * |
3737 | (xfs_buf_cancel_t **)kmem_zalloc(XLOG_BC_TABLE_SIZE * | 3601 | sizeof(struct list_head), |
3738 | sizeof(xfs_buf_cancel_t*), | ||
3739 | KM_SLEEP); | 3602 | KM_SLEEP); |
3603 | for (i = 0; i < XLOG_BC_TABLE_SIZE; i++) | ||
3604 | INIT_LIST_HEAD(&log->l_buf_cancel_table[i]); | ||
3605 | |||
3740 | error = xlog_do_recovery_pass(log, head_blk, tail_blk, | 3606 | error = xlog_do_recovery_pass(log, head_blk, tail_blk, |
3741 | XLOG_RECOVER_PASS1); | 3607 | XLOG_RECOVER_PASS1); |
3742 | if (error != 0) { | 3608 | if (error != 0) { |
@@ -3755,7 +3621,7 @@ xlog_do_log_recovery( | |||
3755 | int i; | 3621 | int i; |
3756 | 3622 | ||
3757 | for (i = 0; i < XLOG_BC_TABLE_SIZE; i++) | 3623 | for (i = 0; i < XLOG_BC_TABLE_SIZE; i++) |
3758 | ASSERT(log->l_buf_cancel_table[i] == NULL); | 3624 | ASSERT(list_empty(&log->l_buf_cancel_table[i])); |
3759 | } | 3625 | } |
3760 | #endif /* DEBUG */ | 3626 | #endif /* DEBUG */ |
3761 | 3627 | ||
@@ -3817,7 +3683,7 @@ xlog_do_recover( | |||
3817 | XFS_BUF_READ(bp); | 3683 | XFS_BUF_READ(bp); |
3818 | XFS_BUF_UNASYNC(bp); | 3684 | XFS_BUF_UNASYNC(bp); |
3819 | xfsbdstrat(log->l_mp, bp); | 3685 | xfsbdstrat(log->l_mp, bp); |
3820 | error = xfs_iowait(bp); | 3686 | error = xfs_buf_iowait(bp); |
3821 | if (error) { | 3687 | if (error) { |
3822 | xfs_ioerror_alert("xlog_do_recover", | 3688 | xfs_ioerror_alert("xlog_do_recover", |
3823 | log->l_mp, bp, XFS_BUF_ADDR(bp)); | 3689 | log->l_mp, bp, XFS_BUF_ADDR(bp)); |
@@ -3875,10 +3741,9 @@ xlog_recover( | |||
3875 | return error; | 3741 | return error; |
3876 | } | 3742 | } |
3877 | 3743 | ||
3878 | cmn_err(CE_NOTE, | 3744 | xfs_notice(log->l_mp, "Starting recovery (logdev: %s)", |
3879 | "Starting XFS recovery on filesystem: %s (logdev: %s)", | 3745 | log->l_mp->m_logname ? log->l_mp->m_logname |
3880 | log->l_mp->m_fsname, log->l_mp->m_logname ? | 3746 | : "internal"); |
3881 | log->l_mp->m_logname : "internal"); | ||
3882 | 3747 | ||
3883 | error = xlog_do_recover(log, head_blk, tail_blk); | 3748 | error = xlog_do_recover(log, head_blk, tail_blk); |
3884 | log->l_flags |= XLOG_RECOVERY_NEEDED; | 3749 | log->l_flags |= XLOG_RECOVERY_NEEDED; |
@@ -3911,9 +3776,7 @@ xlog_recover_finish( | |||
3911 | int error; | 3776 | int error; |
3912 | error = xlog_recover_process_efis(log); | 3777 | error = xlog_recover_process_efis(log); |
3913 | if (error) { | 3778 | if (error) { |
3914 | cmn_err(CE_ALERT, | 3779 | xfs_alert(log->l_mp, "Failed to recover EFIs"); |
3915 | "Failed to recover EFIs on filesystem: %s", | ||
3916 | log->l_mp->m_fsname); | ||
3917 | return error; | 3780 | return error; |
3918 | } | 3781 | } |
3919 | /* | 3782 | /* |
@@ -3928,15 +3791,12 @@ xlog_recover_finish( | |||
3928 | 3791 | ||
3929 | xlog_recover_check_summary(log); | 3792 | xlog_recover_check_summary(log); |
3930 | 3793 | ||
3931 | cmn_err(CE_NOTE, | 3794 | xfs_notice(log->l_mp, "Ending recovery (logdev: %s)", |
3932 | "Ending XFS recovery on filesystem: %s (logdev: %s)", | 3795 | log->l_mp->m_logname ? log->l_mp->m_logname |
3933 | log->l_mp->m_fsname, log->l_mp->m_logname ? | 3796 | : "internal"); |
3934 | log->l_mp->m_logname : "internal"); | ||
3935 | log->l_flags &= ~XLOG_RECOVERY_NEEDED; | 3797 | log->l_flags &= ~XLOG_RECOVERY_NEEDED; |
3936 | } else { | 3798 | } else { |
3937 | cmn_err(CE_DEBUG, | 3799 | xfs_info(log->l_mp, "Ending clean mount"); |
3938 | "!Ending clean XFS mount for filesystem: %s\n", | ||
3939 | log->l_mp->m_fsname); | ||
3940 | } | 3800 | } |
3941 | return 0; | 3801 | return 0; |
3942 | } | 3802 | } |
@@ -3969,10 +3829,8 @@ xlog_recover_check_summary( | |||
3969 | for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) { | 3829 | for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) { |
3970 | error = xfs_read_agf(mp, NULL, agno, 0, &agfbp); | 3830 | error = xfs_read_agf(mp, NULL, agno, 0, &agfbp); |
3971 | if (error) { | 3831 | if (error) { |
3972 | xfs_fs_cmn_err(CE_ALERT, mp, | 3832 | xfs_alert(mp, "%s agf read failed agno %d error %d", |
3973 | "xlog_recover_check_summary(agf)" | 3833 | __func__, agno, error); |
3974 | "agf read failed agno %d error %d", | ||
3975 | agno, error); | ||
3976 | } else { | 3834 | } else { |
3977 | agfp = XFS_BUF_TO_AGF(agfbp); | 3835 | agfp = XFS_BUF_TO_AGF(agfbp); |
3978 | freeblks += be32_to_cpu(agfp->agf_freeblks) + | 3836 | freeblks += be32_to_cpu(agfp->agf_freeblks) + |
@@ -3981,7 +3839,10 @@ xlog_recover_check_summary( | |||
3981 | } | 3839 | } |
3982 | 3840 | ||
3983 | error = xfs_read_agi(mp, NULL, agno, &agibp); | 3841 | error = xfs_read_agi(mp, NULL, agno, &agibp); |
3984 | if (!error) { | 3842 | if (error) { |
3843 | xfs_alert(mp, "%s agi read failed agno %d error %d", | ||
3844 | __func__, agno, error); | ||
3845 | } else { | ||
3985 | struct xfs_agi *agi = XFS_BUF_TO_AGI(agibp); | 3846 | struct xfs_agi *agi = XFS_BUF_TO_AGI(agibp); |
3986 | 3847 | ||
3987 | itotal += be32_to_cpu(agi->agi_count); | 3848 | itotal += be32_to_cpu(agi->agi_count); |
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index aeb9d72ebf6e..b49b82363d20 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c | |||
@@ -52,16 +52,11 @@ STATIC void xfs_icsb_balance_counter(xfs_mount_t *, xfs_sb_field_t, | |||
52 | int); | 52 | int); |
53 | STATIC void xfs_icsb_balance_counter_locked(xfs_mount_t *, xfs_sb_field_t, | 53 | STATIC void xfs_icsb_balance_counter_locked(xfs_mount_t *, xfs_sb_field_t, |
54 | int); | 54 | int); |
55 | STATIC int xfs_icsb_modify_counters(xfs_mount_t *, xfs_sb_field_t, | ||
56 | int64_t, int); | ||
57 | STATIC void xfs_icsb_disable_counter(xfs_mount_t *, xfs_sb_field_t); | 55 | STATIC void xfs_icsb_disable_counter(xfs_mount_t *, xfs_sb_field_t); |
58 | |||
59 | #else | 56 | #else |
60 | 57 | ||
61 | #define xfs_icsb_balance_counter(mp, a, b) do { } while (0) | 58 | #define xfs_icsb_balance_counter(mp, a, b) do { } while (0) |
62 | #define xfs_icsb_balance_counter_locked(mp, a, b) do { } while (0) | 59 | #define xfs_icsb_balance_counter_locked(mp, a, b) do { } while (0) |
63 | #define xfs_icsb_modify_counters(mp, a, b, c) do { } while (0) | ||
64 | |||
65 | #endif | 60 | #endif |
66 | 61 | ||
67 | static const struct { | 62 | static const struct { |
@@ -138,9 +133,7 @@ xfs_uuid_mount( | |||
138 | return 0; | 133 | return 0; |
139 | 134 | ||
140 | if (uuid_is_nil(uuid)) { | 135 | if (uuid_is_nil(uuid)) { |
141 | cmn_err(CE_WARN, | 136 | xfs_warn(mp, "Filesystem has nil UUID - can't mount"); |
142 | "XFS: Filesystem %s has nil UUID - can't mount", | ||
143 | mp->m_fsname); | ||
144 | return XFS_ERROR(EINVAL); | 137 | return XFS_ERROR(EINVAL); |
145 | } | 138 | } |
146 | 139 | ||
@@ -168,8 +161,7 @@ xfs_uuid_mount( | |||
168 | 161 | ||
169 | out_duplicate: | 162 | out_duplicate: |
170 | mutex_unlock(&xfs_uuid_table_mutex); | 163 | mutex_unlock(&xfs_uuid_table_mutex); |
171 | cmn_err(CE_WARN, "XFS: Filesystem %s has duplicate UUID - can't mount", | 164 | xfs_warn(mp, "Filesystem has duplicate UUID - can't mount"); |
172 | mp->m_fsname); | ||
173 | return XFS_ERROR(EINVAL); | 165 | return XFS_ERROR(EINVAL); |
174 | } | 166 | } |
175 | 167 | ||
@@ -199,6 +191,8 @@ xfs_uuid_unmount( | |||
199 | 191 | ||
200 | /* | 192 | /* |
201 | * Reference counting access wrappers to the perag structures. | 193 | * Reference counting access wrappers to the perag structures. |
194 | * Because we never free per-ag structures, the only thing we | ||
195 | * have to protect against changes is the tree structure itself. | ||
202 | */ | 196 | */ |
203 | struct xfs_perag * | 197 | struct xfs_perag * |
204 | xfs_perag_get(struct xfs_mount *mp, xfs_agnumber_t agno) | 198 | xfs_perag_get(struct xfs_mount *mp, xfs_agnumber_t agno) |
@@ -206,19 +200,43 @@ xfs_perag_get(struct xfs_mount *mp, xfs_agnumber_t agno) | |||
206 | struct xfs_perag *pag; | 200 | struct xfs_perag *pag; |
207 | int ref = 0; | 201 | int ref = 0; |
208 | 202 | ||
209 | spin_lock(&mp->m_perag_lock); | 203 | rcu_read_lock(); |
210 | pag = radix_tree_lookup(&mp->m_perag_tree, agno); | 204 | pag = radix_tree_lookup(&mp->m_perag_tree, agno); |
211 | if (pag) { | 205 | if (pag) { |
212 | ASSERT(atomic_read(&pag->pag_ref) >= 0); | 206 | ASSERT(atomic_read(&pag->pag_ref) >= 0); |
213 | /* catch leaks in the positive direction during testing */ | ||
214 | ASSERT(atomic_read(&pag->pag_ref) < 1000); | ||
215 | ref = atomic_inc_return(&pag->pag_ref); | 207 | ref = atomic_inc_return(&pag->pag_ref); |
216 | } | 208 | } |
217 | spin_unlock(&mp->m_perag_lock); | 209 | rcu_read_unlock(); |
218 | trace_xfs_perag_get(mp, agno, ref, _RET_IP_); | 210 | trace_xfs_perag_get(mp, agno, ref, _RET_IP_); |
219 | return pag; | 211 | return pag; |
220 | } | 212 | } |
221 | 213 | ||
214 | /* | ||
215 | * search from @first to find the next perag with the given tag set. | ||
216 | */ | ||
217 | struct xfs_perag * | ||
218 | xfs_perag_get_tag( | ||
219 | struct xfs_mount *mp, | ||
220 | xfs_agnumber_t first, | ||
221 | int tag) | ||
222 | { | ||
223 | struct xfs_perag *pag; | ||
224 | int found; | ||
225 | int ref; | ||
226 | |||
227 | rcu_read_lock(); | ||
228 | found = radix_tree_gang_lookup_tag(&mp->m_perag_tree, | ||
229 | (void **)&pag, first, 1, tag); | ||
230 | if (found <= 0) { | ||
231 | rcu_read_unlock(); | ||
232 | return NULL; | ||
233 | } | ||
234 | ref = atomic_inc_return(&pag->pag_ref); | ||
235 | rcu_read_unlock(); | ||
236 | trace_xfs_perag_get_tag(mp, pag->pag_agno, ref, _RET_IP_); | ||
237 | return pag; | ||
238 | } | ||
239 | |||
222 | void | 240 | void |
223 | xfs_perag_put(struct xfs_perag *pag) | 241 | xfs_perag_put(struct xfs_perag *pag) |
224 | { | 242 | { |
@@ -229,10 +247,18 @@ xfs_perag_put(struct xfs_perag *pag) | |||
229 | trace_xfs_perag_put(pag->pag_mount, pag->pag_agno, ref, _RET_IP_); | 247 | trace_xfs_perag_put(pag->pag_mount, pag->pag_agno, ref, _RET_IP_); |
230 | } | 248 | } |
231 | 249 | ||
250 | STATIC void | ||
251 | __xfs_free_perag( | ||
252 | struct rcu_head *head) | ||
253 | { | ||
254 | struct xfs_perag *pag = container_of(head, struct xfs_perag, rcu_head); | ||
255 | |||
256 | ASSERT(atomic_read(&pag->pag_ref) == 0); | ||
257 | kmem_free(pag); | ||
258 | } | ||
259 | |||
232 | /* | 260 | /* |
233 | * Free up the resources associated with a mount structure. Assume that | 261 | * Free up the per-ag resources associated with the mount structure. |
234 | * the structure was initially zeroed, so we can tell which fields got | ||
235 | * initialized. | ||
236 | */ | 262 | */ |
237 | STATIC void | 263 | STATIC void |
238 | xfs_free_perag( | 264 | xfs_free_perag( |
@@ -244,10 +270,10 @@ xfs_free_perag( | |||
244 | for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) { | 270 | for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) { |
245 | spin_lock(&mp->m_perag_lock); | 271 | spin_lock(&mp->m_perag_lock); |
246 | pag = radix_tree_delete(&mp->m_perag_tree, agno); | 272 | pag = radix_tree_delete(&mp->m_perag_tree, agno); |
273 | spin_unlock(&mp->m_perag_lock); | ||
247 | ASSERT(pag); | 274 | ASSERT(pag); |
248 | ASSERT(atomic_read(&pag->pag_ref) == 0); | 275 | ASSERT(atomic_read(&pag->pag_ref) == 0); |
249 | spin_unlock(&mp->m_perag_lock); | 276 | call_rcu(&pag->rcu_head, __xfs_free_perag); |
250 | kmem_free(pag); | ||
251 | } | 277 | } |
252 | } | 278 | } |
253 | 279 | ||
@@ -282,6 +308,8 @@ xfs_mount_validate_sb( | |||
282 | xfs_sb_t *sbp, | 308 | xfs_sb_t *sbp, |
283 | int flags) | 309 | int flags) |
284 | { | 310 | { |
311 | int loud = !(flags & XFS_MFSI_QUIET); | ||
312 | |||
285 | /* | 313 | /* |
286 | * If the log device and data device have the | 314 | * If the log device and data device have the |
287 | * same device number, the log is internal. | 315 | * same device number, the log is internal. |
@@ -290,28 +318,32 @@ xfs_mount_validate_sb( | |||
290 | * a volume filesystem in a non-volume manner. | 318 | * a volume filesystem in a non-volume manner. |
291 | */ | 319 | */ |
292 | if (sbp->sb_magicnum != XFS_SB_MAGIC) { | 320 | if (sbp->sb_magicnum != XFS_SB_MAGIC) { |
293 | xfs_fs_mount_cmn_err(flags, "bad magic number"); | 321 | if (loud) |
322 | xfs_warn(mp, "bad magic number"); | ||
294 | return XFS_ERROR(EWRONGFS); | 323 | return XFS_ERROR(EWRONGFS); |
295 | } | 324 | } |
296 | 325 | ||
297 | if (!xfs_sb_good_version(sbp)) { | 326 | if (!xfs_sb_good_version(sbp)) { |
298 | xfs_fs_mount_cmn_err(flags, "bad version"); | 327 | if (loud) |
328 | xfs_warn(mp, "bad version"); | ||
299 | return XFS_ERROR(EWRONGFS); | 329 | return XFS_ERROR(EWRONGFS); |
300 | } | 330 | } |
301 | 331 | ||
302 | if (unlikely( | 332 | if (unlikely( |
303 | sbp->sb_logstart == 0 && mp->m_logdev_targp == mp->m_ddev_targp)) { | 333 | sbp->sb_logstart == 0 && mp->m_logdev_targp == mp->m_ddev_targp)) { |
304 | xfs_fs_mount_cmn_err(flags, | 334 | if (loud) |
305 | "filesystem is marked as having an external log; " | 335 | xfs_warn(mp, |
306 | "specify logdev on the\nmount command line."); | 336 | "filesystem is marked as having an external log; " |
337 | "specify logdev on the mount command line."); | ||
307 | return XFS_ERROR(EINVAL); | 338 | return XFS_ERROR(EINVAL); |
308 | } | 339 | } |
309 | 340 | ||
310 | if (unlikely( | 341 | if (unlikely( |
311 | sbp->sb_logstart != 0 && mp->m_logdev_targp != mp->m_ddev_targp)) { | 342 | sbp->sb_logstart != 0 && mp->m_logdev_targp != mp->m_ddev_targp)) { |
312 | xfs_fs_mount_cmn_err(flags, | 343 | if (loud) |
313 | "filesystem is marked as having an internal log; " | 344 | xfs_warn(mp, |
314 | "do not specify logdev on\nthe mount command line."); | 345 | "filesystem is marked as having an internal log; " |
346 | "do not specify logdev on the mount command line."); | ||
315 | return XFS_ERROR(EINVAL); | 347 | return XFS_ERROR(EINVAL); |
316 | } | 348 | } |
317 | 349 | ||
@@ -340,7 +372,8 @@ xfs_mount_validate_sb( | |||
340 | (sbp->sb_rextsize * sbp->sb_blocksize > XFS_MAX_RTEXTSIZE) || | 372 | (sbp->sb_rextsize * sbp->sb_blocksize > XFS_MAX_RTEXTSIZE) || |
341 | (sbp->sb_rextsize * sbp->sb_blocksize < XFS_MIN_RTEXTSIZE) || | 373 | (sbp->sb_rextsize * sbp->sb_blocksize < XFS_MIN_RTEXTSIZE) || |
342 | (sbp->sb_imax_pct > 100 /* zero sb_imax_pct is valid */))) { | 374 | (sbp->sb_imax_pct > 100 /* zero sb_imax_pct is valid */))) { |
343 | xfs_fs_mount_cmn_err(flags, "SB sanity check 1 failed"); | 375 | if (loud) |
376 | xfs_warn(mp, "SB sanity check 1 failed"); | ||
344 | return XFS_ERROR(EFSCORRUPTED); | 377 | return XFS_ERROR(EFSCORRUPTED); |
345 | } | 378 | } |
346 | 379 | ||
@@ -353,7 +386,8 @@ xfs_mount_validate_sb( | |||
353 | (xfs_drfsbno_t)sbp->sb_agcount * sbp->sb_agblocks || | 386 | (xfs_drfsbno_t)sbp->sb_agcount * sbp->sb_agblocks || |
354 | sbp->sb_dblocks < (xfs_drfsbno_t)(sbp->sb_agcount - 1) * | 387 | sbp->sb_dblocks < (xfs_drfsbno_t)(sbp->sb_agcount - 1) * |
355 | sbp->sb_agblocks + XFS_MIN_AG_BLOCKS)) { | 388 | sbp->sb_agblocks + XFS_MIN_AG_BLOCKS)) { |
356 | xfs_fs_mount_cmn_err(flags, "SB sanity check 2 failed"); | 389 | if (loud) |
390 | xfs_warn(mp, "SB sanity check 2 failed"); | ||
357 | return XFS_ERROR(EFSCORRUPTED); | 391 | return XFS_ERROR(EFSCORRUPTED); |
358 | } | 392 | } |
359 | 393 | ||
@@ -361,12 +395,12 @@ xfs_mount_validate_sb( | |||
361 | * Until this is fixed only page-sized or smaller data blocks work. | 395 | * Until this is fixed only page-sized or smaller data blocks work. |
362 | */ | 396 | */ |
363 | if (unlikely(sbp->sb_blocksize > PAGE_SIZE)) { | 397 | if (unlikely(sbp->sb_blocksize > PAGE_SIZE)) { |
364 | xfs_fs_mount_cmn_err(flags, | 398 | if (loud) { |
365 | "file system with blocksize %d bytes", | 399 | xfs_warn(mp, |
366 | sbp->sb_blocksize); | 400 | "File system with blocksize %d bytes. " |
367 | xfs_fs_mount_cmn_err(flags, | 401 | "Only pagesize (%ld) or less will currently work.", |
368 | "only pagesize (%ld) or less will currently work.", | 402 | sbp->sb_blocksize, PAGE_SIZE); |
369 | PAGE_SIZE); | 403 | } |
370 | return XFS_ERROR(ENOSYS); | 404 | return XFS_ERROR(ENOSYS); |
371 | } | 405 | } |
372 | 406 | ||
@@ -380,21 +414,23 @@ xfs_mount_validate_sb( | |||
380 | case 2048: | 414 | case 2048: |
381 | break; | 415 | break; |
382 | default: | 416 | default: |
383 | xfs_fs_mount_cmn_err(flags, | 417 | if (loud) |
384 | "inode size of %d bytes not supported", | 418 | xfs_warn(mp, "inode size of %d bytes not supported", |
385 | sbp->sb_inodesize); | 419 | sbp->sb_inodesize); |
386 | return XFS_ERROR(ENOSYS); | 420 | return XFS_ERROR(ENOSYS); |
387 | } | 421 | } |
388 | 422 | ||
389 | if (xfs_sb_validate_fsb_count(sbp, sbp->sb_dblocks) || | 423 | if (xfs_sb_validate_fsb_count(sbp, sbp->sb_dblocks) || |
390 | xfs_sb_validate_fsb_count(sbp, sbp->sb_rblocks)) { | 424 | xfs_sb_validate_fsb_count(sbp, sbp->sb_rblocks)) { |
391 | xfs_fs_mount_cmn_err(flags, | 425 | if (loud) |
392 | "file system too large to be mounted on this system."); | 426 | xfs_warn(mp, |
427 | "file system too large to be mounted on this system."); | ||
393 | return XFS_ERROR(EFBIG); | 428 | return XFS_ERROR(EFBIG); |
394 | } | 429 | } |
395 | 430 | ||
396 | if (unlikely(sbp->sb_inprogress)) { | 431 | if (unlikely(sbp->sb_inprogress)) { |
397 | xfs_fs_mount_cmn_err(flags, "file system busy"); | 432 | if (loud) |
433 | xfs_warn(mp, "file system busy"); | ||
398 | return XFS_ERROR(EFSCORRUPTED); | 434 | return XFS_ERROR(EFSCORRUPTED); |
399 | } | 435 | } |
400 | 436 | ||
@@ -402,8 +438,9 @@ xfs_mount_validate_sb( | |||
402 | * Version 1 directory format has never worked on Linux. | 438 | * Version 1 directory format has never worked on Linux. |
403 | */ | 439 | */ |
404 | if (unlikely(!xfs_sb_version_hasdirv2(sbp))) { | 440 | if (unlikely(!xfs_sb_version_hasdirv2(sbp))) { |
405 | xfs_fs_mount_cmn_err(flags, | 441 | if (loud) |
406 | "file system using version 1 directory format"); | 442 | xfs_warn(mp, |
443 | "file system using version 1 directory format"); | ||
407 | return XFS_ERROR(ENOSYS); | 444 | return XFS_ERROR(ENOSYS); |
408 | } | 445 | } |
409 | 446 | ||
@@ -443,8 +480,11 @@ xfs_initialize_perag( | |||
443 | goto out_unwind; | 480 | goto out_unwind; |
444 | pag->pag_agno = index; | 481 | pag->pag_agno = index; |
445 | pag->pag_mount = mp; | 482 | pag->pag_mount = mp; |
446 | rwlock_init(&pag->pag_ici_lock); | 483 | spin_lock_init(&pag->pag_ici_lock); |
484 | mutex_init(&pag->pag_ici_reclaim_lock); | ||
447 | INIT_RADIX_TREE(&pag->pag_ici_root, GFP_ATOMIC); | 485 | INIT_RADIX_TREE(&pag->pag_ici_root, GFP_ATOMIC); |
486 | spin_lock_init(&pag->pag_buf_lock); | ||
487 | pag->pag_buf_tree = RB_ROOT; | ||
448 | 488 | ||
449 | if (radix_tree_preload(GFP_NOFS)) | 489 | if (radix_tree_preload(GFP_NOFS)) |
450 | goto out_unwind; | 490 | goto out_unwind; |
@@ -639,9 +679,9 @@ int | |||
639 | xfs_readsb(xfs_mount_t *mp, int flags) | 679 | xfs_readsb(xfs_mount_t *mp, int flags) |
640 | { | 680 | { |
641 | unsigned int sector_size; | 681 | unsigned int sector_size; |
642 | unsigned int extra_flags; | ||
643 | xfs_buf_t *bp; | 682 | xfs_buf_t *bp; |
644 | int error; | 683 | int error; |
684 | int loud = !(flags & XFS_MFSI_QUIET); | ||
645 | 685 | ||
646 | ASSERT(mp->m_sb_bp == NULL); | 686 | ASSERT(mp->m_sb_bp == NULL); |
647 | ASSERT(mp->m_ddev_targp != NULL); | 687 | ASSERT(mp->m_ddev_targp != NULL); |
@@ -652,39 +692,37 @@ xfs_readsb(xfs_mount_t *mp, int flags) | |||
652 | * access to the superblock. | 692 | * access to the superblock. |
653 | */ | 693 | */ |
654 | sector_size = xfs_getsize_buftarg(mp->m_ddev_targp); | 694 | sector_size = xfs_getsize_buftarg(mp->m_ddev_targp); |
655 | extra_flags = XBF_LOCK | XBF_FS_MANAGED | XBF_MAPPED; | ||
656 | 695 | ||
657 | bp = xfs_buf_read(mp->m_ddev_targp, XFS_SB_DADDR, BTOBB(sector_size), | 696 | reread: |
658 | extra_flags); | 697 | bp = xfs_buf_read_uncached(mp, mp->m_ddev_targp, |
659 | if (!bp || XFS_BUF_ISERROR(bp)) { | 698 | XFS_SB_DADDR, sector_size, 0); |
660 | xfs_fs_mount_cmn_err(flags, "SB read failed"); | 699 | if (!bp) { |
661 | error = bp ? XFS_BUF_GETERROR(bp) : ENOMEM; | 700 | if (loud) |
662 | goto fail; | 701 | xfs_warn(mp, "SB buffer read failed"); |
702 | return EIO; | ||
663 | } | 703 | } |
664 | ASSERT(XFS_BUF_ISBUSY(bp)); | ||
665 | ASSERT(XFS_BUF_VALUSEMA(bp) <= 0); | ||
666 | 704 | ||
667 | /* | 705 | /* |
668 | * Initialize the mount structure from the superblock. | 706 | * Initialize the mount structure from the superblock. |
669 | * But first do some basic consistency checking. | 707 | * But first do some basic consistency checking. |
670 | */ | 708 | */ |
671 | xfs_sb_from_disk(&mp->m_sb, XFS_BUF_TO_SBP(bp)); | 709 | xfs_sb_from_disk(&mp->m_sb, XFS_BUF_TO_SBP(bp)); |
672 | |||
673 | error = xfs_mount_validate_sb(mp, &(mp->m_sb), flags); | 710 | error = xfs_mount_validate_sb(mp, &(mp->m_sb), flags); |
674 | if (error) { | 711 | if (error) { |
675 | xfs_fs_mount_cmn_err(flags, "SB validate failed"); | 712 | if (loud) |
676 | goto fail; | 713 | xfs_warn(mp, "SB validate failed"); |
714 | goto release_buf; | ||
677 | } | 715 | } |
678 | 716 | ||
679 | /* | 717 | /* |
680 | * We must be able to do sector-sized and sector-aligned IO. | 718 | * We must be able to do sector-sized and sector-aligned IO. |
681 | */ | 719 | */ |
682 | if (sector_size > mp->m_sb.sb_sectsize) { | 720 | if (sector_size > mp->m_sb.sb_sectsize) { |
683 | xfs_fs_mount_cmn_err(flags, | 721 | if (loud) |
684 | "device supports only %u byte sectors (not %u)", | 722 | xfs_warn(mp, "device supports %u byte sectors (not %u)", |
685 | sector_size, mp->m_sb.sb_sectsize); | 723 | sector_size, mp->m_sb.sb_sectsize); |
686 | error = ENOSYS; | 724 | error = ENOSYS; |
687 | goto fail; | 725 | goto release_buf; |
688 | } | 726 | } |
689 | 727 | ||
690 | /* | 728 | /* |
@@ -692,33 +730,20 @@ xfs_readsb(xfs_mount_t *mp, int flags) | |||
692 | * re-read the superblock so the buffer is correctly sized. | 730 | * re-read the superblock so the buffer is correctly sized. |
693 | */ | 731 | */ |
694 | if (sector_size < mp->m_sb.sb_sectsize) { | 732 | if (sector_size < mp->m_sb.sb_sectsize) { |
695 | XFS_BUF_UNMANAGE(bp); | ||
696 | xfs_buf_relse(bp); | 733 | xfs_buf_relse(bp); |
697 | sector_size = mp->m_sb.sb_sectsize; | 734 | sector_size = mp->m_sb.sb_sectsize; |
698 | bp = xfs_buf_read(mp->m_ddev_targp, XFS_SB_DADDR, | 735 | goto reread; |
699 | BTOBB(sector_size), extra_flags); | ||
700 | if (!bp || XFS_BUF_ISERROR(bp)) { | ||
701 | xfs_fs_mount_cmn_err(flags, "SB re-read failed"); | ||
702 | error = bp ? XFS_BUF_GETERROR(bp) : ENOMEM; | ||
703 | goto fail; | ||
704 | } | ||
705 | ASSERT(XFS_BUF_ISBUSY(bp)); | ||
706 | ASSERT(XFS_BUF_VALUSEMA(bp) <= 0); | ||
707 | } | 736 | } |
708 | 737 | ||
709 | /* Initialize per-cpu counters */ | 738 | /* Initialize per-cpu counters */ |
710 | xfs_icsb_reinit_counters(mp); | 739 | xfs_icsb_reinit_counters(mp); |
711 | 740 | ||
712 | mp->m_sb_bp = bp; | 741 | mp->m_sb_bp = bp; |
713 | xfs_buf_relse(bp); | 742 | xfs_buf_unlock(bp); |
714 | ASSERT(XFS_BUF_VALUSEMA(bp) > 0); | ||
715 | return 0; | 743 | return 0; |
716 | 744 | ||
717 | fail: | 745 | release_buf: |
718 | if (bp) { | 746 | xfs_buf_relse(bp); |
719 | XFS_BUF_UNMANAGE(bp); | ||
720 | xfs_buf_relse(bp); | ||
721 | } | ||
722 | return error; | 747 | return error; |
723 | } | 748 | } |
724 | 749 | ||
@@ -839,8 +864,7 @@ xfs_update_alignment(xfs_mount_t *mp) | |||
839 | if ((BBTOB(mp->m_dalign) & mp->m_blockmask) || | 864 | if ((BBTOB(mp->m_dalign) & mp->m_blockmask) || |
840 | (BBTOB(mp->m_swidth) & mp->m_blockmask)) { | 865 | (BBTOB(mp->m_swidth) & mp->m_blockmask)) { |
841 | if (mp->m_flags & XFS_MOUNT_RETERR) { | 866 | if (mp->m_flags & XFS_MOUNT_RETERR) { |
842 | cmn_err(CE_WARN, | 867 | xfs_warn(mp, "alignment check 1 failed"); |
843 | "XFS: alignment check 1 failed"); | ||
844 | return XFS_ERROR(EINVAL); | 868 | return XFS_ERROR(EINVAL); |
845 | } | 869 | } |
846 | mp->m_dalign = mp->m_swidth = 0; | 870 | mp->m_dalign = mp->m_swidth = 0; |
@@ -853,8 +877,9 @@ xfs_update_alignment(xfs_mount_t *mp) | |||
853 | if (mp->m_flags & XFS_MOUNT_RETERR) { | 877 | if (mp->m_flags & XFS_MOUNT_RETERR) { |
854 | return XFS_ERROR(EINVAL); | 878 | return XFS_ERROR(EINVAL); |
855 | } | 879 | } |
856 | xfs_fs_cmn_err(CE_WARN, mp, | 880 | xfs_warn(mp, |
857 | "stripe alignment turned off: sunit(%d)/swidth(%d) incompatible with agsize(%d)", | 881 | "stripe alignment turned off: sunit(%d)/swidth(%d) " |
882 | "incompatible with agsize(%d)", | ||
858 | mp->m_dalign, mp->m_swidth, | 883 | mp->m_dalign, mp->m_swidth, |
859 | sbp->sb_agblocks); | 884 | sbp->sb_agblocks); |
860 | 885 | ||
@@ -864,9 +889,9 @@ xfs_update_alignment(xfs_mount_t *mp) | |||
864 | mp->m_swidth = XFS_BB_TO_FSBT(mp, mp->m_swidth); | 889 | mp->m_swidth = XFS_BB_TO_FSBT(mp, mp->m_swidth); |
865 | } else { | 890 | } else { |
866 | if (mp->m_flags & XFS_MOUNT_RETERR) { | 891 | if (mp->m_flags & XFS_MOUNT_RETERR) { |
867 | xfs_fs_cmn_err(CE_WARN, mp, | 892 | xfs_warn(mp, |
868 | "stripe alignment turned off: sunit(%d) less than bsize(%d)", | 893 | "stripe alignment turned off: sunit(%d) less than bsize(%d)", |
869 | mp->m_dalign, | 894 | mp->m_dalign, |
870 | mp->m_blockmask +1); | 895 | mp->m_blockmask +1); |
871 | return XFS_ERROR(EINVAL); | 896 | return XFS_ERROR(EINVAL); |
872 | } | 897 | } |
@@ -961,6 +986,24 @@ xfs_set_rw_sizes(xfs_mount_t *mp) | |||
961 | } | 986 | } |
962 | 987 | ||
963 | /* | 988 | /* |
989 | * precalculate the low space thresholds for dynamic speculative preallocation. | ||
990 | */ | ||
991 | void | ||
992 | xfs_set_low_space_thresholds( | ||
993 | struct xfs_mount *mp) | ||
994 | { | ||
995 | int i; | ||
996 | |||
997 | for (i = 0; i < XFS_LOWSP_MAX; i++) { | ||
998 | __uint64_t space = mp->m_sb.sb_dblocks; | ||
999 | |||
1000 | do_div(space, 100); | ||
1001 | mp->m_low_space[i] = space * (i + 1); | ||
1002 | } | ||
1003 | } | ||
1004 | |||
1005 | |||
1006 | /* | ||
964 | * Set whether we're using inode alignment. | 1007 | * Set whether we're using inode alignment. |
965 | */ | 1008 | */ |
966 | STATIC void | 1009 | STATIC void |
@@ -991,42 +1034,35 @@ xfs_check_sizes(xfs_mount_t *mp) | |||
991 | { | 1034 | { |
992 | xfs_buf_t *bp; | 1035 | xfs_buf_t *bp; |
993 | xfs_daddr_t d; | 1036 | xfs_daddr_t d; |
994 | int error; | ||
995 | 1037 | ||
996 | d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks); | 1038 | d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks); |
997 | if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_dblocks) { | 1039 | if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_dblocks) { |
998 | cmn_err(CE_WARN, "XFS: size check 1 failed"); | 1040 | xfs_warn(mp, "filesystem size mismatch detected"); |
999 | return XFS_ERROR(EFBIG); | 1041 | return XFS_ERROR(EFBIG); |
1000 | } | 1042 | } |
1001 | error = xfs_read_buf(mp, mp->m_ddev_targp, | 1043 | bp = xfs_buf_read_uncached(mp, mp->m_ddev_targp, |
1002 | d - XFS_FSS_TO_BB(mp, 1), | 1044 | d - XFS_FSS_TO_BB(mp, 1), |
1003 | XFS_FSS_TO_BB(mp, 1), 0, &bp); | 1045 | BBTOB(XFS_FSS_TO_BB(mp, 1)), 0); |
1004 | if (!error) { | 1046 | if (!bp) { |
1005 | xfs_buf_relse(bp); | 1047 | xfs_warn(mp, "last sector read failed"); |
1006 | } else { | 1048 | return EIO; |
1007 | cmn_err(CE_WARN, "XFS: size check 2 failed"); | ||
1008 | if (error == ENOSPC) | ||
1009 | error = XFS_ERROR(EFBIG); | ||
1010 | return error; | ||
1011 | } | 1049 | } |
1050 | xfs_buf_relse(bp); | ||
1012 | 1051 | ||
1013 | if (mp->m_logdev_targp != mp->m_ddev_targp) { | 1052 | if (mp->m_logdev_targp != mp->m_ddev_targp) { |
1014 | d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_logblocks); | 1053 | d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_logblocks); |
1015 | if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_logblocks) { | 1054 | if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_logblocks) { |
1016 | cmn_err(CE_WARN, "XFS: size check 3 failed"); | 1055 | xfs_warn(mp, "log size mismatch detected"); |
1017 | return XFS_ERROR(EFBIG); | 1056 | return XFS_ERROR(EFBIG); |
1018 | } | 1057 | } |
1019 | error = xfs_read_buf(mp, mp->m_logdev_targp, | 1058 | bp = xfs_buf_read_uncached(mp, mp->m_logdev_targp, |
1020 | d - XFS_FSB_TO_BB(mp, 1), | 1059 | d - XFS_FSB_TO_BB(mp, 1), |
1021 | XFS_FSB_TO_BB(mp, 1), 0, &bp); | 1060 | XFS_FSB_TO_B(mp, 1), 0); |
1022 | if (!error) { | 1061 | if (!bp) { |
1023 | xfs_buf_relse(bp); | 1062 | xfs_warn(mp, "log device read failed"); |
1024 | } else { | 1063 | return EIO; |
1025 | cmn_err(CE_WARN, "XFS: size check 3 failed"); | ||
1026 | if (error == ENOSPC) | ||
1027 | error = XFS_ERROR(EFBIG); | ||
1028 | return error; | ||
1029 | } | 1064 | } |
1065 | xfs_buf_relse(bp); | ||
1030 | } | 1066 | } |
1031 | return 0; | 1067 | return 0; |
1032 | } | 1068 | } |
@@ -1061,7 +1097,7 @@ xfs_mount_reset_sbqflags( | |||
1061 | return 0; | 1097 | return 0; |
1062 | 1098 | ||
1063 | #ifdef QUOTADEBUG | 1099 | #ifdef QUOTADEBUG |
1064 | xfs_fs_cmn_err(CE_NOTE, mp, "Writing superblock quota changes"); | 1100 | xfs_notice(mp, "Writing superblock quota changes"); |
1065 | #endif | 1101 | #endif |
1066 | 1102 | ||
1067 | tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SBCHANGE); | 1103 | tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SBCHANGE); |
@@ -1069,8 +1105,7 @@ xfs_mount_reset_sbqflags( | |||
1069 | XFS_DEFAULT_LOG_COUNT); | 1105 | XFS_DEFAULT_LOG_COUNT); |
1070 | if (error) { | 1106 | if (error) { |
1071 | xfs_trans_cancel(tp, 0); | 1107 | xfs_trans_cancel(tp, 0); |
1072 | xfs_fs_cmn_err(CE_ALERT, mp, | 1108 | xfs_alert(mp, "%s: Superblock update failed!", __func__); |
1073 | "xfs_mount_reset_sbqflags: Superblock update failed!"); | ||
1074 | return error; | 1109 | return error; |
1075 | } | 1110 | } |
1076 | 1111 | ||
@@ -1136,8 +1171,7 @@ xfs_mountfs( | |||
1136 | * transaction subsystem is online. | 1171 | * transaction subsystem is online. |
1137 | */ | 1172 | */ |
1138 | if (xfs_sb_has_mismatched_features2(sbp)) { | 1173 | if (xfs_sb_has_mismatched_features2(sbp)) { |
1139 | cmn_err(CE_WARN, | 1174 | xfs_warn(mp, "correcting sb_features alignment problem"); |
1140 | "XFS: correcting sb_features alignment problem"); | ||
1141 | sbp->sb_features2 |= sbp->sb_bad_features2; | 1175 | sbp->sb_features2 |= sbp->sb_bad_features2; |
1142 | sbp->sb_bad_features2 = sbp->sb_features2; | 1176 | sbp->sb_bad_features2 = sbp->sb_features2; |
1143 | mp->m_update_flags |= XFS_SB_FEATURES2 | XFS_SB_BAD_FEATURES2; | 1177 | mp->m_update_flags |= XFS_SB_FEATURES2 | XFS_SB_BAD_FEATURES2; |
@@ -1189,6 +1223,9 @@ xfs_mountfs( | |||
1189 | */ | 1223 | */ |
1190 | xfs_set_rw_sizes(mp); | 1224 | xfs_set_rw_sizes(mp); |
1191 | 1225 | ||
1226 | /* set the low space thresholds for dynamic preallocation */ | ||
1227 | xfs_set_low_space_thresholds(mp); | ||
1228 | |||
1192 | /* | 1229 | /* |
1193 | * Set the inode cluster size. | 1230 | * Set the inode cluster size. |
1194 | * This may still be overridden by the file system | 1231 | * This may still be overridden by the file system |
@@ -1213,7 +1250,7 @@ xfs_mountfs( | |||
1213 | */ | 1250 | */ |
1214 | error = xfs_rtmount_init(mp); | 1251 | error = xfs_rtmount_init(mp); |
1215 | if (error) { | 1252 | if (error) { |
1216 | cmn_err(CE_WARN, "XFS: RT mount failed"); | 1253 | xfs_warn(mp, "RT mount failed"); |
1217 | goto out_remove_uuid; | 1254 | goto out_remove_uuid; |
1218 | } | 1255 | } |
1219 | 1256 | ||
@@ -1244,12 +1281,12 @@ xfs_mountfs( | |||
1244 | INIT_RADIX_TREE(&mp->m_perag_tree, GFP_ATOMIC); | 1281 | INIT_RADIX_TREE(&mp->m_perag_tree, GFP_ATOMIC); |
1245 | error = xfs_initialize_perag(mp, sbp->sb_agcount, &mp->m_maxagi); | 1282 | error = xfs_initialize_perag(mp, sbp->sb_agcount, &mp->m_maxagi); |
1246 | if (error) { | 1283 | if (error) { |
1247 | cmn_err(CE_WARN, "XFS: Failed per-ag init: %d", error); | 1284 | xfs_warn(mp, "Failed per-ag init: %d", error); |
1248 | goto out_remove_uuid; | 1285 | goto out_remove_uuid; |
1249 | } | 1286 | } |
1250 | 1287 | ||
1251 | if (!sbp->sb_logblocks) { | 1288 | if (!sbp->sb_logblocks) { |
1252 | cmn_err(CE_WARN, "XFS: no log defined"); | 1289 | xfs_warn(mp, "no log defined"); |
1253 | XFS_ERROR_REPORT("xfs_mountfs", XFS_ERRLEVEL_LOW, mp); | 1290 | XFS_ERROR_REPORT("xfs_mountfs", XFS_ERRLEVEL_LOW, mp); |
1254 | error = XFS_ERROR(EFSCORRUPTED); | 1291 | error = XFS_ERROR(EFSCORRUPTED); |
1255 | goto out_free_perag; | 1292 | goto out_free_perag; |
@@ -1262,7 +1299,7 @@ xfs_mountfs( | |||
1262 | XFS_FSB_TO_DADDR(mp, sbp->sb_logstart), | 1299 | XFS_FSB_TO_DADDR(mp, sbp->sb_logstart), |
1263 | XFS_FSB_TO_BB(mp, sbp->sb_logblocks)); | 1300 | XFS_FSB_TO_BB(mp, sbp->sb_logblocks)); |
1264 | if (error) { | 1301 | if (error) { |
1265 | cmn_err(CE_WARN, "XFS: log mount failed"); | 1302 | xfs_warn(mp, "log mount failed"); |
1266 | goto out_free_perag; | 1303 | goto out_free_perag; |
1267 | } | 1304 | } |
1268 | 1305 | ||
@@ -1299,16 +1336,14 @@ xfs_mountfs( | |||
1299 | */ | 1336 | */ |
1300 | error = xfs_iget(mp, NULL, sbp->sb_rootino, 0, XFS_ILOCK_EXCL, &rip); | 1337 | error = xfs_iget(mp, NULL, sbp->sb_rootino, 0, XFS_ILOCK_EXCL, &rip); |
1301 | if (error) { | 1338 | if (error) { |
1302 | cmn_err(CE_WARN, "XFS: failed to read root inode"); | 1339 | xfs_warn(mp, "failed to read root inode"); |
1303 | goto out_log_dealloc; | 1340 | goto out_log_dealloc; |
1304 | } | 1341 | } |
1305 | 1342 | ||
1306 | ASSERT(rip != NULL); | 1343 | ASSERT(rip != NULL); |
1307 | 1344 | ||
1308 | if (unlikely((rip->i_d.di_mode & S_IFMT) != S_IFDIR)) { | 1345 | if (unlikely((rip->i_d.di_mode & S_IFMT) != S_IFDIR)) { |
1309 | cmn_err(CE_WARN, "XFS: corrupted root inode"); | 1346 | xfs_warn(mp, "corrupted root inode %llu: not a directory", |
1310 | cmn_err(CE_WARN, "Device %s - root %llu is not a directory", | ||
1311 | XFS_BUFTARG_NAME(mp->m_ddev_targp), | ||
1312 | (unsigned long long)rip->i_ino); | 1347 | (unsigned long long)rip->i_ino); |
1313 | xfs_iunlock(rip, XFS_ILOCK_EXCL); | 1348 | xfs_iunlock(rip, XFS_ILOCK_EXCL); |
1314 | XFS_ERROR_REPORT("xfs_mountfs_int(2)", XFS_ERRLEVEL_LOW, | 1349 | XFS_ERROR_REPORT("xfs_mountfs_int(2)", XFS_ERRLEVEL_LOW, |
@@ -1328,7 +1363,7 @@ xfs_mountfs( | |||
1328 | /* | 1363 | /* |
1329 | * Free up the root inode. | 1364 | * Free up the root inode. |
1330 | */ | 1365 | */ |
1331 | cmn_err(CE_WARN, "XFS: failed to read RT inodes"); | 1366 | xfs_warn(mp, "failed to read RT inodes"); |
1332 | goto out_rele_rip; | 1367 | goto out_rele_rip; |
1333 | } | 1368 | } |
1334 | 1369 | ||
@@ -1340,7 +1375,7 @@ xfs_mountfs( | |||
1340 | if (mp->m_update_flags && !(mp->m_flags & XFS_MOUNT_RDONLY)) { | 1375 | if (mp->m_update_flags && !(mp->m_flags & XFS_MOUNT_RDONLY)) { |
1341 | error = xfs_mount_log_sb(mp, mp->m_update_flags); | 1376 | error = xfs_mount_log_sb(mp, mp->m_update_flags); |
1342 | if (error) { | 1377 | if (error) { |
1343 | cmn_err(CE_WARN, "XFS: failed to write sb changes"); | 1378 | xfs_warn(mp, "failed to write sb changes"); |
1344 | goto out_rtunmount; | 1379 | goto out_rtunmount; |
1345 | } | 1380 | } |
1346 | } | 1381 | } |
@@ -1361,10 +1396,7 @@ xfs_mountfs( | |||
1361 | * quotachecked license. | 1396 | * quotachecked license. |
1362 | */ | 1397 | */ |
1363 | if (mp->m_sb.sb_qflags & XFS_ALL_QUOTA_ACCT) { | 1398 | if (mp->m_sb.sb_qflags & XFS_ALL_QUOTA_ACCT) { |
1364 | cmn_err(CE_NOTE, | 1399 | xfs_notice(mp, "resetting quota flags"); |
1365 | "XFS: resetting qflags for filesystem %s", | ||
1366 | mp->m_fsname); | ||
1367 | |||
1368 | error = xfs_mount_reset_sbqflags(mp); | 1400 | error = xfs_mount_reset_sbqflags(mp); |
1369 | if (error) | 1401 | if (error) |
1370 | return error; | 1402 | return error; |
@@ -1378,7 +1410,7 @@ xfs_mountfs( | |||
1378 | */ | 1410 | */ |
1379 | error = xfs_log_mount_finish(mp); | 1411 | error = xfs_log_mount_finish(mp); |
1380 | if (error) { | 1412 | if (error) { |
1381 | cmn_err(CE_WARN, "XFS: log mount finish failed"); | 1413 | xfs_warn(mp, "log mount finish failed"); |
1382 | goto out_rtunmount; | 1414 | goto out_rtunmount; |
1383 | } | 1415 | } |
1384 | 1416 | ||
@@ -1407,8 +1439,8 @@ xfs_mountfs( | |||
1407 | resblks = xfs_default_resblks(mp); | 1439 | resblks = xfs_default_resblks(mp); |
1408 | error = xfs_reserve_blocks(mp, &resblks, NULL); | 1440 | error = xfs_reserve_blocks(mp, &resblks, NULL); |
1409 | if (error) | 1441 | if (error) |
1410 | cmn_err(CE_WARN, "XFS: Unable to allocate reserve " | 1442 | xfs_warn(mp, |
1411 | "blocks. Continuing without a reserve pool."); | 1443 | "Unable to allocate reserve blocks. Continuing without reserve pool."); |
1412 | } | 1444 | } |
1413 | 1445 | ||
1414 | return 0; | 1446 | return 0; |
@@ -1497,12 +1529,12 @@ xfs_unmountfs( | |||
1497 | resblks = 0; | 1529 | resblks = 0; |
1498 | error = xfs_reserve_blocks(mp, &resblks, NULL); | 1530 | error = xfs_reserve_blocks(mp, &resblks, NULL); |
1499 | if (error) | 1531 | if (error) |
1500 | cmn_err(CE_WARN, "XFS: Unable to free reserved block pool. " | 1532 | xfs_warn(mp, "Unable to free reserved block pool. " |
1501 | "Freespace may not be correct on next mount."); | 1533 | "Freespace may not be correct on next mount."); |
1502 | 1534 | ||
1503 | error = xfs_log_sbcount(mp, 1); | 1535 | error = xfs_log_sbcount(mp, 1); |
1504 | if (error) | 1536 | if (error) |
1505 | cmn_err(CE_WARN, "XFS: Unable to update superblock counters. " | 1537 | xfs_warn(mp, "Unable to update superblock counters. " |
1506 | "Freespace may not be correct on next mount."); | 1538 | "Freespace may not be correct on next mount."); |
1507 | xfs_unmountfs_writesb(mp); | 1539 | xfs_unmountfs_writesb(mp); |
1508 | xfs_unmountfs_wait(mp); /* wait for async bufs */ | 1540 | xfs_unmountfs_wait(mp); /* wait for async bufs */ |
@@ -1601,7 +1633,7 @@ xfs_unmountfs_writesb(xfs_mount_t *mp) | |||
1601 | XFS_BUF_UNASYNC(sbp); | 1633 | XFS_BUF_UNASYNC(sbp); |
1602 | ASSERT(XFS_BUF_TARGET(sbp) == mp->m_ddev_targp); | 1634 | ASSERT(XFS_BUF_TARGET(sbp) == mp->m_ddev_targp); |
1603 | xfsbdstrat(mp, sbp); | 1635 | xfsbdstrat(mp, sbp); |
1604 | error = xfs_iowait(sbp); | 1636 | error = xfs_buf_iowait(sbp); |
1605 | if (error) | 1637 | if (error) |
1606 | xfs_ioerror_alert("xfs_unmountfs_writesb", | 1638 | xfs_ioerror_alert("xfs_unmountfs_writesb", |
1607 | mp, sbp, XFS_BUF_ADDR(sbp)); | 1639 | mp, sbp, XFS_BUF_ADDR(sbp)); |
@@ -1832,135 +1864,72 @@ xfs_mod_incore_sb_unlocked( | |||
1832 | */ | 1864 | */ |
1833 | int | 1865 | int |
1834 | xfs_mod_incore_sb( | 1866 | xfs_mod_incore_sb( |
1835 | xfs_mount_t *mp, | 1867 | struct xfs_mount *mp, |
1836 | xfs_sb_field_t field, | 1868 | xfs_sb_field_t field, |
1837 | int64_t delta, | 1869 | int64_t delta, |
1838 | int rsvd) | 1870 | int rsvd) |
1839 | { | 1871 | { |
1840 | int status; | 1872 | int status; |
1841 | 1873 | ||
1842 | /* check for per-cpu counters */ | ||
1843 | switch (field) { | ||
1844 | #ifdef HAVE_PERCPU_SB | 1874 | #ifdef HAVE_PERCPU_SB |
1845 | case XFS_SBS_ICOUNT: | 1875 | ASSERT(field < XFS_SBS_ICOUNT || field > XFS_SBS_FDBLOCKS); |
1846 | case XFS_SBS_IFREE: | ||
1847 | case XFS_SBS_FDBLOCKS: | ||
1848 | if (!(mp->m_flags & XFS_MOUNT_NO_PERCPU_SB)) { | ||
1849 | status = xfs_icsb_modify_counters(mp, field, | ||
1850 | delta, rsvd); | ||
1851 | break; | ||
1852 | } | ||
1853 | /* FALLTHROUGH */ | ||
1854 | #endif | 1876 | #endif |
1855 | default: | 1877 | spin_lock(&mp->m_sb_lock); |
1856 | spin_lock(&mp->m_sb_lock); | 1878 | status = xfs_mod_incore_sb_unlocked(mp, field, delta, rsvd); |
1857 | status = xfs_mod_incore_sb_unlocked(mp, field, delta, rsvd); | 1879 | spin_unlock(&mp->m_sb_lock); |
1858 | spin_unlock(&mp->m_sb_lock); | ||
1859 | break; | ||
1860 | } | ||
1861 | 1880 | ||
1862 | return status; | 1881 | return status; |
1863 | } | 1882 | } |
1864 | 1883 | ||
1865 | /* | 1884 | /* |
1866 | * xfs_mod_incore_sb_batch() is used to change more than one field | 1885 | * Change more than one field in the in-core superblock structure at a time. |
1867 | * in the in-core superblock structure at a time. This modification | ||
1868 | * is protected by a lock internal to this module. The fields and | ||
1869 | * changes to those fields are specified in the array of xfs_mod_sb | ||
1870 | * structures passed in. | ||
1871 | * | 1886 | * |
1872 | * Either all of the specified deltas will be applied or none of | 1887 | * The fields and changes to those fields are specified in the array of |
1873 | * them will. If any modified field dips below 0, then all modifications | 1888 | * xfs_mod_sb structures passed in. Either all of the specified deltas |
1874 | * will be backed out and EINVAL will be returned. | 1889 | * will be applied or none of them will. If any modified field dips below 0, |
1890 | * then all modifications will be backed out and EINVAL will be returned. | ||
1891 | * | ||
1892 | * Note that this function may not be used for the superblock values that | ||
1893 | * are tracked with the in-memory per-cpu counters - a direct call to | ||
1894 | * xfs_icsb_modify_counters is required for these. | ||
1875 | */ | 1895 | */ |
1876 | int | 1896 | int |
1877 | xfs_mod_incore_sb_batch(xfs_mount_t *mp, xfs_mod_sb_t *msb, uint nmsb, int rsvd) | 1897 | xfs_mod_incore_sb_batch( |
1898 | struct xfs_mount *mp, | ||
1899 | xfs_mod_sb_t *msb, | ||
1900 | uint nmsb, | ||
1901 | int rsvd) | ||
1878 | { | 1902 | { |
1879 | int status=0; | 1903 | xfs_mod_sb_t *msbp; |
1880 | xfs_mod_sb_t *msbp; | 1904 | int error = 0; |
1881 | 1905 | ||
1882 | /* | 1906 | /* |
1883 | * Loop through the array of mod structures and apply each | 1907 | * Loop through the array of mod structures and apply each individually. |
1884 | * individually. If any fail, then back out all those | 1908 | * If any fail, then back out all those which have already been applied. |
1885 | * which have already been applied. Do all of this within | 1909 | * Do all of this within the scope of the m_sb_lock so that all of the |
1886 | * the scope of the m_sb_lock so that all of the changes will | 1910 | * changes will be atomic. |
1887 | * be atomic. | ||
1888 | */ | 1911 | */ |
1889 | spin_lock(&mp->m_sb_lock); | 1912 | spin_lock(&mp->m_sb_lock); |
1890 | msbp = &msb[0]; | 1913 | for (msbp = msb; msbp < (msb + nmsb); msbp++) { |
1891 | for (msbp = &msbp[0]; msbp < (msb + nmsb); msbp++) { | 1914 | ASSERT(msbp->msb_field < XFS_SBS_ICOUNT || |
1892 | /* | 1915 | msbp->msb_field > XFS_SBS_FDBLOCKS); |
1893 | * Apply the delta at index n. If it fails, break | ||
1894 | * from the loop so we'll fall into the undo loop | ||
1895 | * below. | ||
1896 | */ | ||
1897 | switch (msbp->msb_field) { | ||
1898 | #ifdef HAVE_PERCPU_SB | ||
1899 | case XFS_SBS_ICOUNT: | ||
1900 | case XFS_SBS_IFREE: | ||
1901 | case XFS_SBS_FDBLOCKS: | ||
1902 | if (!(mp->m_flags & XFS_MOUNT_NO_PERCPU_SB)) { | ||
1903 | spin_unlock(&mp->m_sb_lock); | ||
1904 | status = xfs_icsb_modify_counters(mp, | ||
1905 | msbp->msb_field, | ||
1906 | msbp->msb_delta, rsvd); | ||
1907 | spin_lock(&mp->m_sb_lock); | ||
1908 | break; | ||
1909 | } | ||
1910 | /* FALLTHROUGH */ | ||
1911 | #endif | ||
1912 | default: | ||
1913 | status = xfs_mod_incore_sb_unlocked(mp, | ||
1914 | msbp->msb_field, | ||
1915 | msbp->msb_delta, rsvd); | ||
1916 | break; | ||
1917 | } | ||
1918 | 1916 | ||
1919 | if (status != 0) { | 1917 | error = xfs_mod_incore_sb_unlocked(mp, msbp->msb_field, |
1920 | break; | 1918 | msbp->msb_delta, rsvd); |
1921 | } | 1919 | if (error) |
1920 | goto unwind; | ||
1922 | } | 1921 | } |
1922 | spin_unlock(&mp->m_sb_lock); | ||
1923 | return 0; | ||
1923 | 1924 | ||
1924 | /* | 1925 | unwind: |
1925 | * If we didn't complete the loop above, then back out | 1926 | while (--msbp >= msb) { |
1926 | * any changes made to the superblock. If you add code | 1927 | error = xfs_mod_incore_sb_unlocked(mp, msbp->msb_field, |
1927 | * between the loop above and here, make sure that you | 1928 | -msbp->msb_delta, rsvd); |
1928 | * preserve the value of status. Loop back until | 1929 | ASSERT(error == 0); |
1929 | * we step below the beginning of the array. Make sure | ||
1930 | * we don't touch anything back there. | ||
1931 | */ | ||
1932 | if (status != 0) { | ||
1933 | msbp--; | ||
1934 | while (msbp >= msb) { | ||
1935 | switch (msbp->msb_field) { | ||
1936 | #ifdef HAVE_PERCPU_SB | ||
1937 | case XFS_SBS_ICOUNT: | ||
1938 | case XFS_SBS_IFREE: | ||
1939 | case XFS_SBS_FDBLOCKS: | ||
1940 | if (!(mp->m_flags & XFS_MOUNT_NO_PERCPU_SB)) { | ||
1941 | spin_unlock(&mp->m_sb_lock); | ||
1942 | status = xfs_icsb_modify_counters(mp, | ||
1943 | msbp->msb_field, | ||
1944 | -(msbp->msb_delta), | ||
1945 | rsvd); | ||
1946 | spin_lock(&mp->m_sb_lock); | ||
1947 | break; | ||
1948 | } | ||
1949 | /* FALLTHROUGH */ | ||
1950 | #endif | ||
1951 | default: | ||
1952 | status = xfs_mod_incore_sb_unlocked(mp, | ||
1953 | msbp->msb_field, | ||
1954 | -(msbp->msb_delta), | ||
1955 | rsvd); | ||
1956 | break; | ||
1957 | } | ||
1958 | ASSERT(status == 0); | ||
1959 | msbp--; | ||
1960 | } | ||
1961 | } | 1930 | } |
1962 | spin_unlock(&mp->m_sb_lock); | 1931 | spin_unlock(&mp->m_sb_lock); |
1963 | return status; | 1932 | return error; |
1964 | } | 1933 | } |
1965 | 1934 | ||
1966 | /* | 1935 | /* |
@@ -1998,18 +1967,13 @@ xfs_getsb( | |||
1998 | */ | 1967 | */ |
1999 | void | 1968 | void |
2000 | xfs_freesb( | 1969 | xfs_freesb( |
2001 | xfs_mount_t *mp) | 1970 | struct xfs_mount *mp) |
2002 | { | 1971 | { |
2003 | xfs_buf_t *bp; | 1972 | struct xfs_buf *bp = mp->m_sb_bp; |
2004 | 1973 | ||
2005 | /* | 1974 | xfs_buf_lock(bp); |
2006 | * Use xfs_getsb() so that the buffer will be locked | ||
2007 | * when we call xfs_buf_relse(). | ||
2008 | */ | ||
2009 | bp = xfs_getsb(mp, 0); | ||
2010 | XFS_BUF_UNMANAGE(bp); | ||
2011 | xfs_buf_relse(bp); | ||
2012 | mp->m_sb_bp = NULL; | 1975 | mp->m_sb_bp = NULL; |
1976 | xfs_buf_relse(bp); | ||
2013 | } | 1977 | } |
2014 | 1978 | ||
2015 | /* | 1979 | /* |
@@ -2053,10 +2017,8 @@ xfs_dev_is_read_only( | |||
2053 | if (xfs_readonly_buftarg(mp->m_ddev_targp) || | 2017 | if (xfs_readonly_buftarg(mp->m_ddev_targp) || |
2054 | xfs_readonly_buftarg(mp->m_logdev_targp) || | 2018 | xfs_readonly_buftarg(mp->m_logdev_targp) || |
2055 | (mp->m_rtdev_targp && xfs_readonly_buftarg(mp->m_rtdev_targp))) { | 2019 | (mp->m_rtdev_targp && xfs_readonly_buftarg(mp->m_rtdev_targp))) { |
2056 | cmn_err(CE_NOTE, | 2020 | xfs_notice(mp, "%s required on read-only device.", message); |
2057 | "XFS: %s required on read-only device.", message); | 2021 | xfs_notice(mp, "write access unavailable, cannot proceed."); |
2058 | cmn_err(CE_NOTE, | ||
2059 | "XFS: write access unavailable, cannot proceed."); | ||
2060 | return EROFS; | 2022 | return EROFS; |
2061 | } | 2023 | } |
2062 | return 0; | 2024 | return 0; |
@@ -2496,7 +2458,7 @@ xfs_icsb_balance_counter( | |||
2496 | spin_unlock(&mp->m_sb_lock); | 2458 | spin_unlock(&mp->m_sb_lock); |
2497 | } | 2459 | } |
2498 | 2460 | ||
2499 | STATIC int | 2461 | int |
2500 | xfs_icsb_modify_counters( | 2462 | xfs_icsb_modify_counters( |
2501 | xfs_mount_t *mp, | 2463 | xfs_mount_t *mp, |
2502 | xfs_sb_field_t field, | 2464 | xfs_sb_field_t field, |
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index 622da2179a57..3d68bb267c5f 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h | |||
@@ -53,7 +53,6 @@ typedef struct xfs_trans_reservations { | |||
53 | 53 | ||
54 | #include "xfs_sync.h" | 54 | #include "xfs_sync.h" |
55 | 55 | ||
56 | struct cred; | ||
57 | struct log; | 56 | struct log; |
58 | struct xfs_mount_args; | 57 | struct xfs_mount_args; |
59 | struct xfs_inode; | 58 | struct xfs_inode; |
@@ -91,6 +90,8 @@ extern void xfs_icsb_reinit_counters(struct xfs_mount *); | |||
91 | extern void xfs_icsb_destroy_counters(struct xfs_mount *); | 90 | extern void xfs_icsb_destroy_counters(struct xfs_mount *); |
92 | extern void xfs_icsb_sync_counters(struct xfs_mount *, int); | 91 | extern void xfs_icsb_sync_counters(struct xfs_mount *, int); |
93 | extern void xfs_icsb_sync_counters_locked(struct xfs_mount *, int); | 92 | extern void xfs_icsb_sync_counters_locked(struct xfs_mount *, int); |
93 | extern int xfs_icsb_modify_counters(struct xfs_mount *, xfs_sb_field_t, | ||
94 | int64_t, int); | ||
94 | 95 | ||
95 | #else | 96 | #else |
96 | #define xfs_icsb_init_counters(mp) (0) | 97 | #define xfs_icsb_init_counters(mp) (0) |
@@ -98,8 +99,20 @@ extern void xfs_icsb_sync_counters_locked(struct xfs_mount *, int); | |||
98 | #define xfs_icsb_reinit_counters(mp) do { } while (0) | 99 | #define xfs_icsb_reinit_counters(mp) do { } while (0) |
99 | #define xfs_icsb_sync_counters(mp, flags) do { } while (0) | 100 | #define xfs_icsb_sync_counters(mp, flags) do { } while (0) |
100 | #define xfs_icsb_sync_counters_locked(mp, flags) do { } while (0) | 101 | #define xfs_icsb_sync_counters_locked(mp, flags) do { } while (0) |
102 | #define xfs_icsb_modify_counters(mp, field, delta, rsvd) \ | ||
103 | xfs_mod_incore_sb(mp, field, delta, rsvd) | ||
101 | #endif | 104 | #endif |
102 | 105 | ||
106 | /* dynamic preallocation free space thresholds, 5% down to 1% */ | ||
107 | enum { | ||
108 | XFS_LOWSP_1_PCNT = 0, | ||
109 | XFS_LOWSP_2_PCNT, | ||
110 | XFS_LOWSP_3_PCNT, | ||
111 | XFS_LOWSP_4_PCNT, | ||
112 | XFS_LOWSP_5_PCNT, | ||
113 | XFS_LOWSP_MAX, | ||
114 | }; | ||
115 | |||
103 | typedef struct xfs_mount { | 116 | typedef struct xfs_mount { |
104 | struct super_block *m_super; | 117 | struct super_block *m_super; |
105 | xfs_tid_t m_tid; /* next unused tid for fs */ | 118 | xfs_tid_t m_tid; /* next unused tid for fs */ |
@@ -190,15 +203,14 @@ typedef struct xfs_mount { | |||
190 | struct mutex m_icsb_mutex; /* balancer sync lock */ | 203 | struct mutex m_icsb_mutex; /* balancer sync lock */ |
191 | #endif | 204 | #endif |
192 | struct xfs_mru_cache *m_filestream; /* per-mount filestream data */ | 205 | struct xfs_mru_cache *m_filestream; /* per-mount filestream data */ |
193 | struct task_struct *m_sync_task; /* generalised sync thread */ | 206 | struct delayed_work m_sync_work; /* background sync work */ |
194 | xfs_sync_work_t m_sync_work; /* work item for VFS_SYNC */ | 207 | struct delayed_work m_reclaim_work; /* background inode reclaim */ |
195 | struct list_head m_sync_list; /* sync thread work item list */ | 208 | struct work_struct m_flush_work; /* background inode flush */ |
196 | spinlock_t m_sync_lock; /* work item list lock */ | ||
197 | int m_sync_seq; /* sync thread generation no. */ | ||
198 | wait_queue_head_t m_wait_single_sync_task; | ||
199 | __int64_t m_update_flags; /* sb flags we need to update | 209 | __int64_t m_update_flags; /* sb flags we need to update |
200 | on the next remount,rw */ | 210 | on the next remount,rw */ |
201 | struct shrinker m_inode_shrink; /* inode reclaim shrinker */ | 211 | struct shrinker m_inode_shrink; /* inode reclaim shrinker */ |
212 | int64_t m_low_space[XFS_LOWSP_MAX]; | ||
213 | /* low free space thresholds */ | ||
202 | } xfs_mount_t; | 214 | } xfs_mount_t; |
203 | 215 | ||
204 | /* | 216 | /* |
@@ -212,6 +224,7 @@ typedef struct xfs_mount { | |||
212 | #define XFS_MOUNT_FS_SHUTDOWN (1ULL << 4) /* atomic stop of all filesystem | 224 | #define XFS_MOUNT_FS_SHUTDOWN (1ULL << 4) /* atomic stop of all filesystem |
213 | operations, typically for | 225 | operations, typically for |
214 | disk errors in metadata */ | 226 | disk errors in metadata */ |
227 | #define XFS_MOUNT_DISCARD (1ULL << 5) /* discard unused blocks */ | ||
215 | #define XFS_MOUNT_RETERR (1ULL << 6) /* return alignment errors to | 228 | #define XFS_MOUNT_RETERR (1ULL << 6) /* return alignment errors to |
216 | user */ | 229 | user */ |
217 | #define XFS_MOUNT_NOALIGN (1ULL << 7) /* turn off stripe alignment | 230 | #define XFS_MOUNT_NOALIGN (1ULL << 7) /* turn off stripe alignment |
@@ -232,8 +245,6 @@ typedef struct xfs_mount { | |||
232 | #define XFS_MOUNT_DIRSYNC (1ULL << 21) /* synchronous directory ops */ | 245 | #define XFS_MOUNT_DIRSYNC (1ULL << 21) /* synchronous directory ops */ |
233 | #define XFS_MOUNT_COMPAT_IOSIZE (1ULL << 22) /* don't report large preferred | 246 | #define XFS_MOUNT_COMPAT_IOSIZE (1ULL << 22) /* don't report large preferred |
234 | * I/O size in stat() */ | 247 | * I/O size in stat() */ |
235 | #define XFS_MOUNT_NO_PERCPU_SB (1ULL << 23) /* don't use per-cpu superblock | ||
236 | counters */ | ||
237 | #define XFS_MOUNT_FILESTREAMS (1ULL << 24) /* enable the filestreams | 248 | #define XFS_MOUNT_FILESTREAMS (1ULL << 24) /* enable the filestreams |
238 | allocator */ | 249 | allocator */ |
239 | #define XFS_MOUNT_NOATTR2 (1ULL << 25) /* disable use of attr2 format */ | 250 | #define XFS_MOUNT_NOATTR2 (1ULL << 25) /* disable use of attr2 format */ |
@@ -327,6 +338,8 @@ xfs_daddr_to_agbno(struct xfs_mount *mp, xfs_daddr_t d) | |||
327 | * perag get/put wrappers for ref counting | 338 | * perag get/put wrappers for ref counting |
328 | */ | 339 | */ |
329 | struct xfs_perag *xfs_perag_get(struct xfs_mount *mp, xfs_agnumber_t agno); | 340 | struct xfs_perag *xfs_perag_get(struct xfs_mount *mp, xfs_agnumber_t agno); |
341 | struct xfs_perag *xfs_perag_get_tag(struct xfs_mount *mp, xfs_agnumber_t agno, | ||
342 | int tag); | ||
330 | void xfs_perag_put(struct xfs_perag *pag); | 343 | void xfs_perag_put(struct xfs_perag *pag); |
331 | 344 | ||
332 | /* | 345 | /* |
@@ -376,6 +389,8 @@ extern int xfs_sb_validate_fsb_count(struct xfs_sb *, __uint64_t); | |||
376 | 389 | ||
377 | extern int xfs_dev_is_read_only(struct xfs_mount *, char *); | 390 | extern int xfs_dev_is_read_only(struct xfs_mount *, char *); |
378 | 391 | ||
392 | extern void xfs_set_low_space_thresholds(struct xfs_mount *); | ||
393 | |||
379 | #endif /* __KERNEL__ */ | 394 | #endif /* __KERNEL__ */ |
380 | 395 | ||
381 | extern void xfs_mod_sb(struct xfs_trans *, __int64_t); | 396 | extern void xfs_mod_sb(struct xfs_trans *, __int64_t); |
diff --git a/fs/xfs/xfs_mru_cache.c b/fs/xfs/xfs_mru_cache.c index 45ce15dc5b2b..4aff56395732 100644 --- a/fs/xfs/xfs_mru_cache.c +++ b/fs/xfs/xfs_mru_cache.c | |||
@@ -309,7 +309,7 @@ xfs_mru_cache_init(void) | |||
309 | if (!xfs_mru_elem_zone) | 309 | if (!xfs_mru_elem_zone) |
310 | goto out; | 310 | goto out; |
311 | 311 | ||
312 | xfs_mru_reap_wq = create_singlethread_workqueue("xfs_mru_cache"); | 312 | xfs_mru_reap_wq = alloc_workqueue("xfs_mru_cache", WQ_MEM_RECLAIM, 1); |
313 | if (!xfs_mru_reap_wq) | 313 | if (!xfs_mru_reap_wq) |
314 | goto out_destroy_mru_elem_zone; | 314 | goto out_destroy_mru_elem_zone; |
315 | 315 | ||
@@ -408,7 +408,7 @@ xfs_mru_cache_flush( | |||
408 | spin_lock(&mru->lock); | 408 | spin_lock(&mru->lock); |
409 | if (mru->queued) { | 409 | if (mru->queued) { |
410 | spin_unlock(&mru->lock); | 410 | spin_unlock(&mru->lock); |
411 | cancel_rearming_delayed_workqueue(xfs_mru_reap_wq, &mru->work); | 411 | cancel_delayed_work_sync(&mru->work); |
412 | spin_lock(&mru->lock); | 412 | spin_lock(&mru->lock); |
413 | } | 413 | } |
414 | 414 | ||
diff --git a/fs/xfs/xfs_quota.h b/fs/xfs/xfs_quota.h index e0e64b113bd6..a595f29567fe 100644 --- a/fs/xfs/xfs_quota.h +++ b/fs/xfs/xfs_quota.h | |||
@@ -346,8 +346,17 @@ xfs_qm_vop_dqalloc(struct xfs_inode *ip, uid_t uid, gid_t gid, prid_t prid, | |||
346 | #define xfs_trans_mod_dquot_byino(tp, ip, fields, delta) | 346 | #define xfs_trans_mod_dquot_byino(tp, ip, fields, delta) |
347 | #define xfs_trans_apply_dquot_deltas(tp) | 347 | #define xfs_trans_apply_dquot_deltas(tp) |
348 | #define xfs_trans_unreserve_and_mod_dquots(tp) | 348 | #define xfs_trans_unreserve_and_mod_dquots(tp) |
349 | #define xfs_trans_reserve_quota_nblks(tp, ip, nblks, ninos, flags) (0) | 349 | static inline int xfs_trans_reserve_quota_nblks(struct xfs_trans *tp, |
350 | #define xfs_trans_reserve_quota_bydquots(tp, mp, u, g, nb, ni, fl) (0) | 350 | struct xfs_inode *ip, long nblks, long ninos, uint flags) |
351 | { | ||
352 | return 0; | ||
353 | } | ||
354 | static inline int xfs_trans_reserve_quota_bydquots(struct xfs_trans *tp, | ||
355 | struct xfs_mount *mp, struct xfs_dquot *udqp, | ||
356 | struct xfs_dquot *gdqp, long nblks, long nions, uint flags) | ||
357 | { | ||
358 | return 0; | ||
359 | } | ||
351 | #define xfs_qm_vop_create_dqattach(tp, ip, u, g) | 360 | #define xfs_qm_vop_create_dqattach(tp, ip, u, g) |
352 | #define xfs_qm_vop_rename_dqattach(it) (0) | 361 | #define xfs_qm_vop_rename_dqattach(it) (0) |
353 | #define xfs_qm_vop_chown(tp, ip, old, new) (NULL) | 362 | #define xfs_qm_vop_chown(tp, ip, old, new) (NULL) |
@@ -357,11 +366,14 @@ xfs_qm_vop_dqalloc(struct xfs_inode *ip, uid_t uid, gid_t gid, prid_t prid, | |||
357 | #define xfs_qm_dqdetach(ip) | 366 | #define xfs_qm_dqdetach(ip) |
358 | #define xfs_qm_dqrele(d) | 367 | #define xfs_qm_dqrele(d) |
359 | #define xfs_qm_statvfs(ip, s) | 368 | #define xfs_qm_statvfs(ip, s) |
360 | #define xfs_qm_sync(mp, fl) (0) | 369 | static inline int xfs_qm_sync(struct xfs_mount *mp, int flags) |
370 | { | ||
371 | return 0; | ||
372 | } | ||
361 | #define xfs_qm_newmount(mp, a, b) (0) | 373 | #define xfs_qm_newmount(mp, a, b) (0) |
362 | #define xfs_qm_mount_quotas(mp) | 374 | #define xfs_qm_mount_quotas(mp) |
363 | #define xfs_qm_unmount(mp) | 375 | #define xfs_qm_unmount(mp) |
364 | #define xfs_qm_unmount_quotas(mp) (0) | 376 | #define xfs_qm_unmount_quotas(mp) |
365 | #endif /* CONFIG_XFS_QUOTA */ | 377 | #endif /* CONFIG_XFS_QUOTA */ |
366 | 378 | ||
367 | #define xfs_trans_unreserve_quota_nblks(tp, ip, nblks, ninos, flags) \ | 379 | #define xfs_trans_unreserve_quota_nblks(tp, ip, nblks, ninos, flags) \ |
@@ -370,7 +382,8 @@ xfs_qm_vop_dqalloc(struct xfs_inode *ip, uid_t uid, gid_t gid, prid_t prid, | |||
370 | xfs_trans_reserve_quota_bydquots(tp, mp, ud, gd, nb, ni, \ | 382 | xfs_trans_reserve_quota_bydquots(tp, mp, ud, gd, nb, ni, \ |
371 | f | XFS_QMOPT_RES_REGBLKS) | 383 | f | XFS_QMOPT_RES_REGBLKS) |
372 | 384 | ||
373 | extern int xfs_qm_dqcheck(xfs_disk_dquot_t *, xfs_dqid_t, uint, uint, char *); | 385 | extern int xfs_qm_dqcheck(struct xfs_mount *, xfs_disk_dquot_t *, |
386 | xfs_dqid_t, uint, uint, char *); | ||
374 | extern int xfs_mount_reset_sbqflags(struct xfs_mount *); | 387 | extern int xfs_mount_reset_sbqflags(struct xfs_mount *); |
375 | 388 | ||
376 | #endif /* __KERNEL__ */ | 389 | #endif /* __KERNEL__ */ |
diff --git a/fs/xfs/xfs_refcache.h b/fs/xfs/xfs_refcache.h deleted file mode 100644 index 2dec79edb510..000000000000 --- a/fs/xfs/xfs_refcache.h +++ /dev/null | |||
@@ -1,52 +0,0 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc. | ||
3 | * All Rights Reserved. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or | ||
6 | * modify it under the terms of the GNU General Public License as | ||
7 | * published by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope that it would be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, write the Free Software Foundation, | ||
16 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
17 | */ | ||
18 | #ifndef __XFS_REFCACHE_H__ | ||
19 | #define __XFS_REFCACHE_H__ | ||
20 | |||
21 | #ifdef HAVE_REFCACHE | ||
22 | /* | ||
23 | * Maximum size (in inodes) for the NFS reference cache | ||
24 | */ | ||
25 | #define XFS_REFCACHE_SIZE_MAX 512 | ||
26 | |||
27 | struct xfs_inode; | ||
28 | struct xfs_mount; | ||
29 | |||
30 | extern void xfs_refcache_insert(struct xfs_inode *); | ||
31 | extern void xfs_refcache_purge_ip(struct xfs_inode *); | ||
32 | extern void xfs_refcache_purge_mp(struct xfs_mount *); | ||
33 | extern void xfs_refcache_purge_some(struct xfs_mount *); | ||
34 | extern void xfs_refcache_resize(int); | ||
35 | extern void xfs_refcache_destroy(void); | ||
36 | |||
37 | extern void xfs_refcache_iunlock(struct xfs_inode *, uint); | ||
38 | |||
39 | #else | ||
40 | |||
41 | #define xfs_refcache_insert(ip) do { } while (0) | ||
42 | #define xfs_refcache_purge_ip(ip) do { } while (0) | ||
43 | #define xfs_refcache_purge_mp(mp) do { } while (0) | ||
44 | #define xfs_refcache_purge_some(mp) do { } while (0) | ||
45 | #define xfs_refcache_resize(size) do { } while (0) | ||
46 | #define xfs_refcache_destroy() do { } while (0) | ||
47 | |||
48 | #define xfs_refcache_iunlock(ip, flags) xfs_iunlock(ip, flags) | ||
49 | |||
50 | #endif | ||
51 | |||
52 | #endif /* __XFS_REFCACHE_H__ */ | ||
diff --git a/fs/xfs/xfs_rename.c b/fs/xfs/xfs_rename.c index 8fca957200df..77a59891734e 100644 --- a/fs/xfs/xfs_rename.c +++ b/fs/xfs/xfs_rename.c | |||
@@ -183,7 +183,7 @@ xfs_rename( | |||
183 | * tree quota mechanism would be circumvented. | 183 | * tree quota mechanism would be circumvented. |
184 | */ | 184 | */ |
185 | if (unlikely((target_dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) && | 185 | if (unlikely((target_dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) && |
186 | (target_dp->i_d.di_projid != src_ip->i_d.di_projid))) { | 186 | (xfs_get_projid(target_dp) != xfs_get_projid(src_ip)))) { |
187 | error = XFS_ERROR(EXDEV); | 187 | error = XFS_ERROR(EXDEV); |
188 | goto error_return; | 188 | goto error_return; |
189 | } | 189 | } |
@@ -211,7 +211,9 @@ xfs_rename( | |||
211 | goto error_return; | 211 | goto error_return; |
212 | if (error) | 212 | if (error) |
213 | goto abort_return; | 213 | goto abort_return; |
214 | xfs_ichgtime(target_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); | 214 | |
215 | xfs_trans_ichgtime(tp, target_dp, | ||
216 | XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); | ||
215 | 217 | ||
216 | if (new_parent && src_is_directory) { | 218 | if (new_parent && src_is_directory) { |
217 | error = xfs_bumplink(tp, target_dp); | 219 | error = xfs_bumplink(tp, target_dp); |
@@ -249,7 +251,9 @@ xfs_rename( | |||
249 | &first_block, &free_list, spaceres); | 251 | &first_block, &free_list, spaceres); |
250 | if (error) | 252 | if (error) |
251 | goto abort_return; | 253 | goto abort_return; |
252 | xfs_ichgtime(target_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); | 254 | |
255 | xfs_trans_ichgtime(tp, target_dp, | ||
256 | XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); | ||
253 | 257 | ||
254 | /* | 258 | /* |
255 | * Decrement the link count on the target since the target | 259 | * Decrement the link count on the target since the target |
@@ -292,7 +296,8 @@ xfs_rename( | |||
292 | * inode isn't really being changed, but old unix file systems did | 296 | * inode isn't really being changed, but old unix file systems did |
293 | * it and some incremental backup programs won't work without it. | 297 | * it and some incremental backup programs won't work without it. |
294 | */ | 298 | */ |
295 | xfs_ichgtime(src_ip, XFS_ICHGTIME_CHG); | 299 | xfs_trans_ichgtime(tp, src_ip, XFS_ICHGTIME_CHG); |
300 | xfs_trans_log_inode(tp, src_ip, XFS_ILOG_CORE); | ||
296 | 301 | ||
297 | /* | 302 | /* |
298 | * Adjust the link count on src_dp. This is necessary when | 303 | * Adjust the link count on src_dp. This is necessary when |
@@ -315,7 +320,7 @@ xfs_rename( | |||
315 | if (error) | 320 | if (error) |
316 | goto abort_return; | 321 | goto abort_return; |
317 | 322 | ||
318 | xfs_ichgtime(src_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); | 323 | xfs_trans_ichgtime(tp, src_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); |
319 | xfs_trans_log_inode(tp, src_dp, XFS_ILOG_CORE); | 324 | xfs_trans_log_inode(tp, src_dp, XFS_ILOG_CORE); |
320 | if (new_parent) | 325 | if (new_parent) |
321 | xfs_trans_log_inode(tp, target_dp, XFS_ILOG_CORE); | 326 | xfs_trans_log_inode(tp, target_dp, XFS_ILOG_CORE); |
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c index 891260fea11e..8f76fdff4f46 100644 --- a/fs/xfs/xfs_rtalloc.c +++ b/fs/xfs/xfs_rtalloc.c | |||
@@ -39,6 +39,7 @@ | |||
39 | #include "xfs_trans_space.h" | 39 | #include "xfs_trans_space.h" |
40 | #include "xfs_utils.h" | 40 | #include "xfs_utils.h" |
41 | #include "xfs_trace.h" | 41 | #include "xfs_trace.h" |
42 | #include "xfs_buf.h" | ||
42 | 43 | ||
43 | 44 | ||
44 | /* | 45 | /* |
@@ -75,7 +76,7 @@ xfs_growfs_rt_alloc( | |||
75 | xfs_mount_t *mp, /* file system mount point */ | 76 | xfs_mount_t *mp, /* file system mount point */ |
76 | xfs_extlen_t oblocks, /* old count of blocks */ | 77 | xfs_extlen_t oblocks, /* old count of blocks */ |
77 | xfs_extlen_t nblocks, /* new count of blocks */ | 78 | xfs_extlen_t nblocks, /* new count of blocks */ |
78 | xfs_ino_t ino) /* inode number (bitmap/summary) */ | 79 | xfs_inode_t *ip) /* inode (bitmap/summary) */ |
79 | { | 80 | { |
80 | xfs_fileoff_t bno; /* block number in file */ | 81 | xfs_fileoff_t bno; /* block number in file */ |
81 | xfs_buf_t *bp; /* temporary buffer for zeroing */ | 82 | xfs_buf_t *bp; /* temporary buffer for zeroing */ |
@@ -85,7 +86,6 @@ xfs_growfs_rt_alloc( | |||
85 | xfs_fsblock_t firstblock; /* first block allocated in xaction */ | 86 | xfs_fsblock_t firstblock; /* first block allocated in xaction */ |
86 | xfs_bmap_free_t flist; /* list of freed blocks */ | 87 | xfs_bmap_free_t flist; /* list of freed blocks */ |
87 | xfs_fsblock_t fsbno; /* filesystem block for bno */ | 88 | xfs_fsblock_t fsbno; /* filesystem block for bno */ |
88 | xfs_inode_t *ip; /* pointer to incore inode */ | ||
89 | xfs_bmbt_irec_t map; /* block map output */ | 89 | xfs_bmbt_irec_t map; /* block map output */ |
90 | int nmap; /* number of block maps */ | 90 | int nmap; /* number of block maps */ |
91 | int resblks; /* space reservation */ | 91 | int resblks; /* space reservation */ |
@@ -111,9 +111,9 @@ xfs_growfs_rt_alloc( | |||
111 | /* | 111 | /* |
112 | * Lock the inode. | 112 | * Lock the inode. |
113 | */ | 113 | */ |
114 | if ((error = xfs_trans_iget(mp, tp, ino, 0, | 114 | xfs_ilock(ip, XFS_ILOCK_EXCL); |
115 | XFS_ILOCK_EXCL, &ip))) | 115 | xfs_trans_ijoin_ref(tp, ip, XFS_ILOCK_EXCL); |
116 | goto error_cancel; | 116 | |
117 | xfs_bmap_init(&flist, &firstblock); | 117 | xfs_bmap_init(&flist, &firstblock); |
118 | /* | 118 | /* |
119 | * Allocate blocks to the bitmap file. | 119 | * Allocate blocks to the bitmap file. |
@@ -154,9 +154,8 @@ xfs_growfs_rt_alloc( | |||
154 | /* | 154 | /* |
155 | * Lock the bitmap inode. | 155 | * Lock the bitmap inode. |
156 | */ | 156 | */ |
157 | if ((error = xfs_trans_iget(mp, tp, ino, 0, | 157 | xfs_ilock(ip, XFS_ILOCK_EXCL); |
158 | XFS_ILOCK_EXCL, &ip))) | 158 | xfs_trans_ijoin_ref(tp, ip, XFS_ILOCK_EXCL); |
159 | goto error_cancel; | ||
160 | /* | 159 | /* |
161 | * Get a buffer for the block. | 160 | * Get a buffer for the block. |
162 | */ | 161 | */ |
@@ -1853,7 +1852,6 @@ xfs_growfs_rt( | |||
1853 | xfs_rtblock_t bmbno; /* bitmap block number */ | 1852 | xfs_rtblock_t bmbno; /* bitmap block number */ |
1854 | xfs_buf_t *bp; /* temporary buffer */ | 1853 | xfs_buf_t *bp; /* temporary buffer */ |
1855 | int error; /* error return value */ | 1854 | int error; /* error return value */ |
1856 | xfs_inode_t *ip; /* bitmap inode, used as lock */ | ||
1857 | xfs_mount_t *nmp; /* new (fake) mount structure */ | 1855 | xfs_mount_t *nmp; /* new (fake) mount structure */ |
1858 | xfs_drfsbno_t nrblocks; /* new number of realtime blocks */ | 1856 | xfs_drfsbno_t nrblocks; /* new number of realtime blocks */ |
1859 | xfs_extlen_t nrbmblocks; /* new number of rt bitmap blocks */ | 1857 | xfs_extlen_t nrbmblocks; /* new number of rt bitmap blocks */ |
@@ -1883,13 +1881,13 @@ xfs_growfs_rt( | |||
1883 | /* | 1881 | /* |
1884 | * Read in the last block of the device, make sure it exists. | 1882 | * Read in the last block of the device, make sure it exists. |
1885 | */ | 1883 | */ |
1886 | error = xfs_read_buf(mp, mp->m_rtdev_targp, | 1884 | bp = xfs_buf_read_uncached(mp, mp->m_rtdev_targp, |
1887 | XFS_FSB_TO_BB(mp, nrblocks - 1), | 1885 | XFS_FSB_TO_BB(mp, nrblocks - 1), |
1888 | XFS_FSB_TO_BB(mp, 1), 0, &bp); | 1886 | XFS_FSB_TO_B(mp, 1), 0); |
1889 | if (error) | 1887 | if (!bp) |
1890 | return error; | 1888 | return EIO; |
1891 | ASSERT(bp); | ||
1892 | xfs_buf_relse(bp); | 1889 | xfs_buf_relse(bp); |
1890 | |||
1893 | /* | 1891 | /* |
1894 | * Calculate new parameters. These are the final values to be reached. | 1892 | * Calculate new parameters. These are the final values to be reached. |
1895 | */ | 1893 | */ |
@@ -1917,11 +1915,11 @@ xfs_growfs_rt( | |||
1917 | /* | 1915 | /* |
1918 | * Allocate space to the bitmap and summary files, as necessary. | 1916 | * Allocate space to the bitmap and summary files, as necessary. |
1919 | */ | 1917 | */ |
1920 | if ((error = xfs_growfs_rt_alloc(mp, rbmblocks, nrbmblocks, | 1918 | error = xfs_growfs_rt_alloc(mp, rbmblocks, nrbmblocks, mp->m_rbmip); |
1921 | mp->m_sb.sb_rbmino))) | 1919 | if (error) |
1922 | return error; | 1920 | return error; |
1923 | if ((error = xfs_growfs_rt_alloc(mp, rsumblocks, nrsumblocks, | 1921 | error = xfs_growfs_rt_alloc(mp, rsumblocks, nrsumblocks, mp->m_rsumip); |
1924 | mp->m_sb.sb_rsumino))) | 1922 | if (error) |
1925 | return error; | 1923 | return error; |
1926 | /* | 1924 | /* |
1927 | * Allocate a new (fake) mount/sb. | 1925 | * Allocate a new (fake) mount/sb. |
@@ -1971,10 +1969,8 @@ xfs_growfs_rt( | |||
1971 | /* | 1969 | /* |
1972 | * Lock out other callers by grabbing the bitmap inode lock. | 1970 | * Lock out other callers by grabbing the bitmap inode lock. |
1973 | */ | 1971 | */ |
1974 | if ((error = xfs_trans_iget(mp, tp, mp->m_sb.sb_rbmino, 0, | 1972 | xfs_ilock(mp->m_rbmip, XFS_ILOCK_EXCL); |
1975 | XFS_ILOCK_EXCL, &ip))) | 1973 | xfs_trans_ijoin_ref(tp, mp->m_rbmip, XFS_ILOCK_EXCL); |
1976 | goto error_cancel; | ||
1977 | ASSERT(ip == mp->m_rbmip); | ||
1978 | /* | 1974 | /* |
1979 | * Update the bitmap inode's size. | 1975 | * Update the bitmap inode's size. |
1980 | */ | 1976 | */ |
@@ -1985,10 +1981,8 @@ xfs_growfs_rt( | |||
1985 | /* | 1981 | /* |
1986 | * Get the summary inode into the transaction. | 1982 | * Get the summary inode into the transaction. |
1987 | */ | 1983 | */ |
1988 | if ((error = xfs_trans_iget(mp, tp, mp->m_sb.sb_rsumino, 0, | 1984 | xfs_ilock(mp->m_rsumip, XFS_ILOCK_EXCL); |
1989 | XFS_ILOCK_EXCL, &ip))) | 1985 | xfs_trans_ijoin_ref(tp, mp->m_rsumip, XFS_ILOCK_EXCL); |
1990 | goto error_cancel; | ||
1991 | ASSERT(ip == mp->m_rsumip); | ||
1992 | /* | 1986 | /* |
1993 | * Update the summary inode's size. | 1987 | * Update the summary inode's size. |
1994 | */ | 1988 | */ |
@@ -2074,15 +2068,15 @@ xfs_rtallocate_extent( | |||
2074 | xfs_extlen_t prod, /* extent product factor */ | 2068 | xfs_extlen_t prod, /* extent product factor */ |
2075 | xfs_rtblock_t *rtblock) /* out: start block allocated */ | 2069 | xfs_rtblock_t *rtblock) /* out: start block allocated */ |
2076 | { | 2070 | { |
2071 | xfs_mount_t *mp = tp->t_mountp; | ||
2077 | int error; /* error value */ | 2072 | int error; /* error value */ |
2078 | xfs_inode_t *ip; /* inode for bitmap file */ | ||
2079 | xfs_mount_t *mp; /* file system mount structure */ | ||
2080 | xfs_rtblock_t r; /* result allocated block */ | 2073 | xfs_rtblock_t r; /* result allocated block */ |
2081 | xfs_fsblock_t sb; /* summary file block number */ | 2074 | xfs_fsblock_t sb; /* summary file block number */ |
2082 | xfs_buf_t *sumbp; /* summary file block buffer */ | 2075 | xfs_buf_t *sumbp; /* summary file block buffer */ |
2083 | 2076 | ||
2077 | ASSERT(xfs_isilocked(mp->m_rbmip, XFS_ILOCK_EXCL)); | ||
2084 | ASSERT(minlen > 0 && minlen <= maxlen); | 2078 | ASSERT(minlen > 0 && minlen <= maxlen); |
2085 | mp = tp->t_mountp; | 2079 | |
2086 | /* | 2080 | /* |
2087 | * If prod is set then figure out what to do to minlen and maxlen. | 2081 | * If prod is set then figure out what to do to minlen and maxlen. |
2088 | */ | 2082 | */ |
@@ -2098,12 +2092,7 @@ xfs_rtallocate_extent( | |||
2098 | return 0; | 2092 | return 0; |
2099 | } | 2093 | } |
2100 | } | 2094 | } |
2101 | /* | 2095 | |
2102 | * Lock out other callers by grabbing the bitmap inode lock. | ||
2103 | */ | ||
2104 | if ((error = xfs_trans_iget(mp, tp, mp->m_sb.sb_rbmino, 0, | ||
2105 | XFS_ILOCK_EXCL, &ip))) | ||
2106 | return error; | ||
2107 | sumbp = NULL; | 2096 | sumbp = NULL; |
2108 | /* | 2097 | /* |
2109 | * Allocate by size, or near another block, or exactly at some block. | 2098 | * Allocate by size, or near another block, or exactly at some block. |
@@ -2122,11 +2111,12 @@ xfs_rtallocate_extent( | |||
2122 | len, &sumbp, &sb, prod, &r); | 2111 | len, &sumbp, &sb, prod, &r); |
2123 | break; | 2112 | break; |
2124 | default: | 2113 | default: |
2114 | error = EIO; | ||
2125 | ASSERT(0); | 2115 | ASSERT(0); |
2126 | } | 2116 | } |
2127 | if (error) { | 2117 | if (error) |
2128 | return error; | 2118 | return error; |
2129 | } | 2119 | |
2130 | /* | 2120 | /* |
2131 | * If it worked, update the superblock. | 2121 | * If it worked, update the superblock. |
2132 | */ | 2122 | */ |
@@ -2154,7 +2144,6 @@ xfs_rtfree_extent( | |||
2154 | xfs_extlen_t len) /* length of extent freed */ | 2144 | xfs_extlen_t len) /* length of extent freed */ |
2155 | { | 2145 | { |
2156 | int error; /* error value */ | 2146 | int error; /* error value */ |
2157 | xfs_inode_t *ip; /* bitmap file inode */ | ||
2158 | xfs_mount_t *mp; /* file system mount structure */ | 2147 | xfs_mount_t *mp; /* file system mount structure */ |
2159 | xfs_fsblock_t sb; /* summary file block number */ | 2148 | xfs_fsblock_t sb; /* summary file block number */ |
2160 | xfs_buf_t *sumbp; /* summary file block buffer */ | 2149 | xfs_buf_t *sumbp; /* summary file block buffer */ |
@@ -2163,9 +2152,9 @@ xfs_rtfree_extent( | |||
2163 | /* | 2152 | /* |
2164 | * Synchronize by locking the bitmap inode. | 2153 | * Synchronize by locking the bitmap inode. |
2165 | */ | 2154 | */ |
2166 | if ((error = xfs_trans_iget(mp, tp, mp->m_sb.sb_rbmino, 0, | 2155 | xfs_ilock(mp->m_rbmip, XFS_ILOCK_EXCL); |
2167 | XFS_ILOCK_EXCL, &ip))) | 2156 | xfs_trans_ijoin_ref(tp, mp->m_rbmip, XFS_ILOCK_EXCL); |
2168 | return error; | 2157 | |
2169 | #if defined(__KERNEL__) && defined(DEBUG) | 2158 | #if defined(__KERNEL__) && defined(DEBUG) |
2170 | /* | 2159 | /* |
2171 | * Check to see that this whole range is currently allocated. | 2160 | * Check to see that this whole range is currently allocated. |
@@ -2198,10 +2187,10 @@ xfs_rtfree_extent( | |||
2198 | */ | 2187 | */ |
2199 | if (tp->t_frextents_delta + mp->m_sb.sb_frextents == | 2188 | if (tp->t_frextents_delta + mp->m_sb.sb_frextents == |
2200 | mp->m_sb.sb_rextents) { | 2189 | mp->m_sb.sb_rextents) { |
2201 | if (!(ip->i_d.di_flags & XFS_DIFLAG_NEWRTBM)) | 2190 | if (!(mp->m_rbmip->i_d.di_flags & XFS_DIFLAG_NEWRTBM)) |
2202 | ip->i_d.di_flags |= XFS_DIFLAG_NEWRTBM; | 2191 | mp->m_rbmip->i_d.di_flags |= XFS_DIFLAG_NEWRTBM; |
2203 | *(__uint64_t *)&ip->i_d.di_atime = 0; | 2192 | *(__uint64_t *)&mp->m_rbmip->i_d.di_atime = 0; |
2204 | xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); | 2193 | xfs_trans_log_inode(tp, mp->m_rbmip, XFS_ILOG_CORE); |
2205 | } | 2194 | } |
2206 | return 0; | 2195 | return 0; |
2207 | } | 2196 | } |
@@ -2215,15 +2204,14 @@ xfs_rtmount_init( | |||
2215 | { | 2204 | { |
2216 | xfs_buf_t *bp; /* buffer for last block of subvolume */ | 2205 | xfs_buf_t *bp; /* buffer for last block of subvolume */ |
2217 | xfs_daddr_t d; /* address of last block of subvolume */ | 2206 | xfs_daddr_t d; /* address of last block of subvolume */ |
2218 | int error; /* error return value */ | ||
2219 | xfs_sb_t *sbp; /* filesystem superblock copy in mount */ | 2207 | xfs_sb_t *sbp; /* filesystem superblock copy in mount */ |
2220 | 2208 | ||
2221 | sbp = &mp->m_sb; | 2209 | sbp = &mp->m_sb; |
2222 | if (sbp->sb_rblocks == 0) | 2210 | if (sbp->sb_rblocks == 0) |
2223 | return 0; | 2211 | return 0; |
2224 | if (mp->m_rtdev_targp == NULL) { | 2212 | if (mp->m_rtdev_targp == NULL) { |
2225 | cmn_err(CE_WARN, | 2213 | xfs_warn(mp, |
2226 | "XFS: This filesystem has a realtime volume, use rtdev=device option"); | 2214 | "Filesystem has a realtime volume, use rtdev=device option"); |
2227 | return XFS_ERROR(ENODEV); | 2215 | return XFS_ERROR(ENODEV); |
2228 | } | 2216 | } |
2229 | mp->m_rsumlevels = sbp->sb_rextslog + 1; | 2217 | mp->m_rsumlevels = sbp->sb_rextslog + 1; |
@@ -2237,20 +2225,17 @@ xfs_rtmount_init( | |||
2237 | */ | 2225 | */ |
2238 | d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_rblocks); | 2226 | d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_rblocks); |
2239 | if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_rblocks) { | 2227 | if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_rblocks) { |
2240 | cmn_err(CE_WARN, "XFS: realtime mount -- %llu != %llu", | 2228 | xfs_warn(mp, "realtime mount -- %llu != %llu", |
2241 | (unsigned long long) XFS_BB_TO_FSB(mp, d), | 2229 | (unsigned long long) XFS_BB_TO_FSB(mp, d), |
2242 | (unsigned long long) mp->m_sb.sb_rblocks); | 2230 | (unsigned long long) mp->m_sb.sb_rblocks); |
2243 | return XFS_ERROR(EFBIG); | 2231 | return XFS_ERROR(EFBIG); |
2244 | } | 2232 | } |
2245 | error = xfs_read_buf(mp, mp->m_rtdev_targp, | 2233 | bp = xfs_buf_read_uncached(mp, mp->m_rtdev_targp, |
2246 | d - XFS_FSB_TO_BB(mp, 1), | 2234 | d - XFS_FSB_TO_BB(mp, 1), |
2247 | XFS_FSB_TO_BB(mp, 1), 0, &bp); | 2235 | XFS_FSB_TO_B(mp, 1), 0); |
2248 | if (error) { | 2236 | if (!bp) { |
2249 | cmn_err(CE_WARN, | 2237 | xfs_warn(mp, "realtime device size check failed"); |
2250 | "XFS: realtime mount -- xfs_read_buf failed, returned %d", error); | 2238 | return EIO; |
2251 | if (error == ENOSPC) | ||
2252 | return XFS_ERROR(EFBIG); | ||
2253 | return error; | ||
2254 | } | 2239 | } |
2255 | xfs_buf_relse(bp); | 2240 | xfs_buf_relse(bp); |
2256 | return 0; | 2241 | return 0; |
@@ -2309,20 +2294,16 @@ xfs_rtpick_extent( | |||
2309 | xfs_rtblock_t *pick) /* result rt extent */ | 2294 | xfs_rtblock_t *pick) /* result rt extent */ |
2310 | { | 2295 | { |
2311 | xfs_rtblock_t b; /* result block */ | 2296 | xfs_rtblock_t b; /* result block */ |
2312 | int error; /* error return value */ | ||
2313 | xfs_inode_t *ip; /* bitmap incore inode */ | ||
2314 | int log2; /* log of sequence number */ | 2297 | int log2; /* log of sequence number */ |
2315 | __uint64_t resid; /* residual after log removed */ | 2298 | __uint64_t resid; /* residual after log removed */ |
2316 | __uint64_t seq; /* sequence number of file creation */ | 2299 | __uint64_t seq; /* sequence number of file creation */ |
2317 | __uint64_t *seqp; /* pointer to seqno in inode */ | 2300 | __uint64_t *seqp; /* pointer to seqno in inode */ |
2318 | 2301 | ||
2319 | if ((error = xfs_trans_iget(mp, tp, mp->m_sb.sb_rbmino, 0, | 2302 | ASSERT(xfs_isilocked(mp->m_rbmip, XFS_ILOCK_EXCL)); |
2320 | XFS_ILOCK_EXCL, &ip))) | 2303 | |
2321 | return error; | 2304 | seqp = (__uint64_t *)&mp->m_rbmip->i_d.di_atime; |
2322 | ASSERT(ip == mp->m_rbmip); | 2305 | if (!(mp->m_rbmip->i_d.di_flags & XFS_DIFLAG_NEWRTBM)) { |
2323 | seqp = (__uint64_t *)&ip->i_d.di_atime; | 2306 | mp->m_rbmip->i_d.di_flags |= XFS_DIFLAG_NEWRTBM; |
2324 | if (!(ip->i_d.di_flags & XFS_DIFLAG_NEWRTBM)) { | ||
2325 | ip->i_d.di_flags |= XFS_DIFLAG_NEWRTBM; | ||
2326 | *seqp = 0; | 2307 | *seqp = 0; |
2327 | } | 2308 | } |
2328 | seq = *seqp; | 2309 | seq = *seqp; |
@@ -2338,7 +2319,7 @@ xfs_rtpick_extent( | |||
2338 | b = mp->m_sb.sb_rextents - len; | 2319 | b = mp->m_sb.sb_rextents - len; |
2339 | } | 2320 | } |
2340 | *seqp = seq + 1; | 2321 | *seqp = seq + 1; |
2341 | xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); | 2322 | xfs_trans_log_inode(tp, mp->m_rbmip, XFS_ILOG_CORE); |
2342 | *pick = b; | 2323 | *pick = b; |
2343 | return 0; | 2324 | return 0; |
2344 | } | 2325 | } |
diff --git a/fs/xfs/xfs_rtalloc.h b/fs/xfs/xfs_rtalloc.h index ff614c29b441..09e1f4f35e97 100644 --- a/fs/xfs/xfs_rtalloc.h +++ b/fs/xfs/xfs_rtalloc.h | |||
@@ -154,7 +154,7 @@ xfs_rtmount_init( | |||
154 | if (mp->m_sb.sb_rblocks == 0) | 154 | if (mp->m_sb.sb_rblocks == 0) |
155 | return 0; | 155 | return 0; |
156 | 156 | ||
157 | cmn_err(CE_WARN, "XFS: Not built with CONFIG_XFS_RT"); | 157 | xfs_warn(mp, "Not built with CONFIG_XFS_RT"); |
158 | return ENOSYS; | 158 | return ENOSYS; |
159 | } | 159 | } |
160 | # define xfs_rtmount_inodes(m) (((mp)->m_sb.sb_rblocks == 0)? 0 : (ENOSYS)) | 160 | # define xfs_rtmount_inodes(m) (((mp)->m_sb.sb_rblocks == 0)? 0 : (ENOSYS)) |
diff --git a/fs/xfs/xfs_rw.c b/fs/xfs/xfs_rw.c index 56861d5daaef..d6d6fdfe9422 100644 --- a/fs/xfs/xfs_rw.c +++ b/fs/xfs/xfs_rw.c | |||
@@ -49,9 +49,9 @@ xfs_do_force_shutdown( | |||
49 | logerror = flags & SHUTDOWN_LOG_IO_ERROR; | 49 | logerror = flags & SHUTDOWN_LOG_IO_ERROR; |
50 | 50 | ||
51 | if (!(flags & SHUTDOWN_FORCE_UMOUNT)) { | 51 | if (!(flags & SHUTDOWN_FORCE_UMOUNT)) { |
52 | cmn_err(CE_NOTE, "xfs_force_shutdown(%s,0x%x) called from " | 52 | xfs_notice(mp, |
53 | "line %d of file %s. Return address = 0x%p", | 53 | "%s(0x%x) called from line %d of file %s. Return address = 0x%p", |
54 | mp->m_fsname, flags, lnnum, fname, __return_address); | 54 | __func__, flags, lnnum, fname, __return_address); |
55 | } | 55 | } |
56 | /* | 56 | /* |
57 | * No need to duplicate efforts. | 57 | * No need to duplicate efforts. |
@@ -69,30 +69,25 @@ xfs_do_force_shutdown( | |||
69 | return; | 69 | return; |
70 | 70 | ||
71 | if (flags & SHUTDOWN_CORRUPT_INCORE) { | 71 | if (flags & SHUTDOWN_CORRUPT_INCORE) { |
72 | xfs_cmn_err(XFS_PTAG_SHUTDOWN_CORRUPT, CE_ALERT, mp, | 72 | xfs_alert_tag(mp, XFS_PTAG_SHUTDOWN_CORRUPT, |
73 | "Corruption of in-memory data detected. Shutting down filesystem: %s", | 73 | "Corruption of in-memory data detected. Shutting down filesystem"); |
74 | mp->m_fsname); | 74 | if (XFS_ERRLEVEL_HIGH <= xfs_error_level) |
75 | if (XFS_ERRLEVEL_HIGH <= xfs_error_level) { | ||
76 | xfs_stack_trace(); | 75 | xfs_stack_trace(); |
77 | } | ||
78 | } else if (!(flags & SHUTDOWN_FORCE_UMOUNT)) { | 76 | } else if (!(flags & SHUTDOWN_FORCE_UMOUNT)) { |
79 | if (logerror) { | 77 | if (logerror) { |
80 | xfs_cmn_err(XFS_PTAG_SHUTDOWN_LOGERROR, CE_ALERT, mp, | 78 | xfs_alert_tag(mp, XFS_PTAG_SHUTDOWN_LOGERROR, |
81 | "Log I/O Error Detected. Shutting down filesystem: %s", | 79 | "Log I/O Error Detected. Shutting down filesystem"); |
82 | mp->m_fsname); | ||
83 | } else if (flags & SHUTDOWN_DEVICE_REQ) { | 80 | } else if (flags & SHUTDOWN_DEVICE_REQ) { |
84 | xfs_cmn_err(XFS_PTAG_SHUTDOWN_IOERROR, CE_ALERT, mp, | 81 | xfs_alert_tag(mp, XFS_PTAG_SHUTDOWN_IOERROR, |
85 | "All device paths lost. Shutting down filesystem: %s", | 82 | "All device paths lost. Shutting down filesystem"); |
86 | mp->m_fsname); | ||
87 | } else if (!(flags & SHUTDOWN_REMOTE_REQ)) { | 83 | } else if (!(flags & SHUTDOWN_REMOTE_REQ)) { |
88 | xfs_cmn_err(XFS_PTAG_SHUTDOWN_IOERROR, CE_ALERT, mp, | 84 | xfs_alert_tag(mp, XFS_PTAG_SHUTDOWN_IOERROR, |
89 | "I/O Error Detected. Shutting down filesystem: %s", | 85 | "I/O Error Detected. Shutting down filesystem"); |
90 | mp->m_fsname); | ||
91 | } | 86 | } |
92 | } | 87 | } |
93 | if (!(flags & SHUTDOWN_FORCE_UMOUNT)) { | 88 | if (!(flags & SHUTDOWN_FORCE_UMOUNT)) { |
94 | cmn_err(CE_ALERT, "Please umount the filesystem, " | 89 | xfs_alert(mp, |
95 | "and rectify the problem(s)"); | 90 | "Please umount the filesystem and rectify the problem(s)"); |
96 | } | 91 | } |
97 | } | 92 | } |
98 | 93 | ||
@@ -106,10 +101,9 @@ xfs_ioerror_alert( | |||
106 | xfs_buf_t *bp, | 101 | xfs_buf_t *bp, |
107 | xfs_daddr_t blkno) | 102 | xfs_daddr_t blkno) |
108 | { | 103 | { |
109 | cmn_err(CE_ALERT, | 104 | xfs_alert(mp, |
110 | "I/O error in filesystem (\"%s\") meta-data dev %s block 0x%llx" | 105 | "I/O error occurred: meta-data dev %s block 0x%llx" |
111 | " (\"%s\") error %d buf count %zd", | 106 | " (\"%s\") error %d buf count %zd", |
112 | (!mp || !mp->m_fsname) ? "(fs name not set)" : mp->m_fsname, | ||
113 | XFS_BUFTARG_NAME(XFS_BUF_TARGET(bp)), | 107 | XFS_BUFTARG_NAME(XFS_BUF_TARGET(bp)), |
114 | (__uint64_t)blkno, func, | 108 | (__uint64_t)blkno, func, |
115 | XFS_BUF_GETERROR(bp), XFS_BUF_COUNT(bp)); | 109 | XFS_BUF_GETERROR(bp), XFS_BUF_COUNT(bp)); |
@@ -173,17 +167,9 @@ xfs_extlen_t | |||
173 | xfs_get_extsz_hint( | 167 | xfs_get_extsz_hint( |
174 | struct xfs_inode *ip) | 168 | struct xfs_inode *ip) |
175 | { | 169 | { |
176 | xfs_extlen_t extsz; | 170 | if ((ip->i_d.di_flags & XFS_DIFLAG_EXTSIZE) && ip->i_d.di_extsize) |
177 | 171 | return ip->i_d.di_extsize; | |
178 | if (unlikely(XFS_IS_REALTIME_INODE(ip))) { | 172 | if (XFS_IS_REALTIME_INODE(ip)) |
179 | extsz = (ip->i_d.di_flags & XFS_DIFLAG_EXTSIZE) | 173 | return ip->i_mount->m_sb.sb_rextsize; |
180 | ? ip->i_d.di_extsize | 174 | return 0; |
181 | : ip->i_mount->m_sb.sb_rextsize; | ||
182 | ASSERT(extsz); | ||
183 | } else { | ||
184 | extsz = (ip->i_d.di_flags & XFS_DIFLAG_EXTSIZE) | ||
185 | ? ip->i_d.di_extsize : 0; | ||
186 | } | ||
187 | |||
188 | return extsz; | ||
189 | } | 175 | } |
diff --git a/fs/xfs/xfs_sb.h b/fs/xfs/xfs_sb.h index 1b017c657494..1eb2ba586814 100644 --- a/fs/xfs/xfs_sb.h +++ b/fs/xfs/xfs_sb.h | |||
@@ -80,10 +80,12 @@ struct xfs_mount; | |||
80 | #define XFS_SB_VERSION2_RESERVED4BIT 0x00000004 | 80 | #define XFS_SB_VERSION2_RESERVED4BIT 0x00000004 |
81 | #define XFS_SB_VERSION2_ATTR2BIT 0x00000008 /* Inline attr rework */ | 81 | #define XFS_SB_VERSION2_ATTR2BIT 0x00000008 /* Inline attr rework */ |
82 | #define XFS_SB_VERSION2_PARENTBIT 0x00000010 /* parent pointers */ | 82 | #define XFS_SB_VERSION2_PARENTBIT 0x00000010 /* parent pointers */ |
83 | #define XFS_SB_VERSION2_PROJID32BIT 0x00000080 /* 32 bit project id */ | ||
83 | 84 | ||
84 | #define XFS_SB_VERSION2_OKREALFBITS \ | 85 | #define XFS_SB_VERSION2_OKREALFBITS \ |
85 | (XFS_SB_VERSION2_LAZYSBCOUNTBIT | \ | 86 | (XFS_SB_VERSION2_LAZYSBCOUNTBIT | \ |
86 | XFS_SB_VERSION2_ATTR2BIT) | 87 | XFS_SB_VERSION2_ATTR2BIT | \ |
88 | XFS_SB_VERSION2_PROJID32BIT) | ||
87 | #define XFS_SB_VERSION2_OKSASHFBITS \ | 89 | #define XFS_SB_VERSION2_OKSASHFBITS \ |
88 | (0) | 90 | (0) |
89 | #define XFS_SB_VERSION2_OKREALBITS \ | 91 | #define XFS_SB_VERSION2_OKREALBITS \ |
@@ -495,6 +497,12 @@ static inline void xfs_sb_version_removeattr2(xfs_sb_t *sbp) | |||
495 | sbp->sb_versionnum &= ~XFS_SB_VERSION_MOREBITSBIT; | 497 | sbp->sb_versionnum &= ~XFS_SB_VERSION_MOREBITSBIT; |
496 | } | 498 | } |
497 | 499 | ||
500 | static inline int xfs_sb_version_hasprojid32bit(xfs_sb_t *sbp) | ||
501 | { | ||
502 | return xfs_sb_version_hasmorebits(sbp) && | ||
503 | (sbp->sb_features2 & XFS_SB_VERSION2_PROJID32BIT); | ||
504 | } | ||
505 | |||
498 | /* | 506 | /* |
499 | * end of superblock version macros | 507 | * end of superblock version macros |
500 | */ | 508 | */ |
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index 1c47edaea0d2..c83f63b33aae 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c | |||
@@ -608,10 +608,8 @@ STATIC void | |||
608 | xfs_trans_free( | 608 | xfs_trans_free( |
609 | struct xfs_trans *tp) | 609 | struct xfs_trans *tp) |
610 | { | 610 | { |
611 | struct xfs_busy_extent *busyp, *n; | 611 | xfs_alloc_busy_sort(&tp->t_busy); |
612 | 612 | xfs_alloc_busy_clear(tp->t_mountp, &tp->t_busy, false); | |
613 | list_for_each_entry_safe(busyp, n, &tp->t_busy, list) | ||
614 | xfs_alloc_busy_clear(tp->t_mountp, busyp); | ||
615 | 613 | ||
616 | atomic_dec(&tp->t_mountp->m_active_trans); | 614 | atomic_dec(&tp->t_mountp->m_active_trans); |
617 | xfs_trans_free_dqinfo(tp); | 615 | xfs_trans_free_dqinfo(tp); |
@@ -696,7 +694,7 @@ xfs_trans_reserve( | |||
696 | * fail if the count would go below zero. | 694 | * fail if the count would go below zero. |
697 | */ | 695 | */ |
698 | if (blocks > 0) { | 696 | if (blocks > 0) { |
699 | error = xfs_mod_incore_sb(tp->t_mountp, XFS_SBS_FDBLOCKS, | 697 | error = xfs_icsb_modify_counters(tp->t_mountp, XFS_SBS_FDBLOCKS, |
700 | -((int64_t)blocks), rsvd); | 698 | -((int64_t)blocks), rsvd); |
701 | if (error != 0) { | 699 | if (error != 0) { |
702 | current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS); | 700 | current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS); |
@@ -767,7 +765,7 @@ undo_log: | |||
767 | 765 | ||
768 | undo_blocks: | 766 | undo_blocks: |
769 | if (blocks > 0) { | 767 | if (blocks > 0) { |
770 | (void) xfs_mod_incore_sb(tp->t_mountp, XFS_SBS_FDBLOCKS, | 768 | xfs_icsb_modify_counters(tp->t_mountp, XFS_SBS_FDBLOCKS, |
771 | (int64_t)blocks, rsvd); | 769 | (int64_t)blocks, rsvd); |
772 | tp->t_blk_res = 0; | 770 | tp->t_blk_res = 0; |
773 | } | 771 | } |
@@ -1009,7 +1007,7 @@ void | |||
1009 | xfs_trans_unreserve_and_mod_sb( | 1007 | xfs_trans_unreserve_and_mod_sb( |
1010 | xfs_trans_t *tp) | 1008 | xfs_trans_t *tp) |
1011 | { | 1009 | { |
1012 | xfs_mod_sb_t msb[14]; /* If you add cases, add entries */ | 1010 | xfs_mod_sb_t msb[9]; /* If you add cases, add entries */ |
1013 | xfs_mod_sb_t *msbp; | 1011 | xfs_mod_sb_t *msbp; |
1014 | xfs_mount_t *mp = tp->t_mountp; | 1012 | xfs_mount_t *mp = tp->t_mountp; |
1015 | /* REFERENCED */ | 1013 | /* REFERENCED */ |
@@ -1017,55 +1015,61 @@ xfs_trans_unreserve_and_mod_sb( | |||
1017 | int rsvd; | 1015 | int rsvd; |
1018 | int64_t blkdelta = 0; | 1016 | int64_t blkdelta = 0; |
1019 | int64_t rtxdelta = 0; | 1017 | int64_t rtxdelta = 0; |
1018 | int64_t idelta = 0; | ||
1019 | int64_t ifreedelta = 0; | ||
1020 | 1020 | ||
1021 | msbp = msb; | 1021 | msbp = msb; |
1022 | rsvd = (tp->t_flags & XFS_TRANS_RESERVE) != 0; | 1022 | rsvd = (tp->t_flags & XFS_TRANS_RESERVE) != 0; |
1023 | 1023 | ||
1024 | /* calculate free blocks delta */ | 1024 | /* calculate deltas */ |
1025 | if (tp->t_blk_res > 0) | 1025 | if (tp->t_blk_res > 0) |
1026 | blkdelta = tp->t_blk_res; | 1026 | blkdelta = tp->t_blk_res; |
1027 | |||
1028 | if ((tp->t_fdblocks_delta != 0) && | 1027 | if ((tp->t_fdblocks_delta != 0) && |
1029 | (xfs_sb_version_haslazysbcount(&mp->m_sb) || | 1028 | (xfs_sb_version_haslazysbcount(&mp->m_sb) || |
1030 | (tp->t_flags & XFS_TRANS_SB_DIRTY))) | 1029 | (tp->t_flags & XFS_TRANS_SB_DIRTY))) |
1031 | blkdelta += tp->t_fdblocks_delta; | 1030 | blkdelta += tp->t_fdblocks_delta; |
1032 | 1031 | ||
1033 | if (blkdelta != 0) { | ||
1034 | msbp->msb_field = XFS_SBS_FDBLOCKS; | ||
1035 | msbp->msb_delta = blkdelta; | ||
1036 | msbp++; | ||
1037 | } | ||
1038 | |||
1039 | /* calculate free realtime extents delta */ | ||
1040 | if (tp->t_rtx_res > 0) | 1032 | if (tp->t_rtx_res > 0) |
1041 | rtxdelta = tp->t_rtx_res; | 1033 | rtxdelta = tp->t_rtx_res; |
1042 | |||
1043 | if ((tp->t_frextents_delta != 0) && | 1034 | if ((tp->t_frextents_delta != 0) && |
1044 | (tp->t_flags & XFS_TRANS_SB_DIRTY)) | 1035 | (tp->t_flags & XFS_TRANS_SB_DIRTY)) |
1045 | rtxdelta += tp->t_frextents_delta; | 1036 | rtxdelta += tp->t_frextents_delta; |
1046 | 1037 | ||
1038 | if (xfs_sb_version_haslazysbcount(&mp->m_sb) || | ||
1039 | (tp->t_flags & XFS_TRANS_SB_DIRTY)) { | ||
1040 | idelta = tp->t_icount_delta; | ||
1041 | ifreedelta = tp->t_ifree_delta; | ||
1042 | } | ||
1043 | |||
1044 | /* apply the per-cpu counters */ | ||
1045 | if (blkdelta) { | ||
1046 | error = xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS, | ||
1047 | blkdelta, rsvd); | ||
1048 | if (error) | ||
1049 | goto out; | ||
1050 | } | ||
1051 | |||
1052 | if (idelta) { | ||
1053 | error = xfs_icsb_modify_counters(mp, XFS_SBS_ICOUNT, | ||
1054 | idelta, rsvd); | ||
1055 | if (error) | ||
1056 | goto out_undo_fdblocks; | ||
1057 | } | ||
1058 | |||
1059 | if (ifreedelta) { | ||
1060 | error = xfs_icsb_modify_counters(mp, XFS_SBS_IFREE, | ||
1061 | ifreedelta, rsvd); | ||
1062 | if (error) | ||
1063 | goto out_undo_icount; | ||
1064 | } | ||
1065 | |||
1066 | /* apply remaining deltas */ | ||
1047 | if (rtxdelta != 0) { | 1067 | if (rtxdelta != 0) { |
1048 | msbp->msb_field = XFS_SBS_FREXTENTS; | 1068 | msbp->msb_field = XFS_SBS_FREXTENTS; |
1049 | msbp->msb_delta = rtxdelta; | 1069 | msbp->msb_delta = rtxdelta; |
1050 | msbp++; | 1070 | msbp++; |
1051 | } | 1071 | } |
1052 | 1072 | ||
1053 | /* apply remaining deltas */ | ||
1054 | |||
1055 | if (xfs_sb_version_haslazysbcount(&mp->m_sb) || | ||
1056 | (tp->t_flags & XFS_TRANS_SB_DIRTY)) { | ||
1057 | if (tp->t_icount_delta != 0) { | ||
1058 | msbp->msb_field = XFS_SBS_ICOUNT; | ||
1059 | msbp->msb_delta = tp->t_icount_delta; | ||
1060 | msbp++; | ||
1061 | } | ||
1062 | if (tp->t_ifree_delta != 0) { | ||
1063 | msbp->msb_field = XFS_SBS_IFREE; | ||
1064 | msbp->msb_delta = tp->t_ifree_delta; | ||
1065 | msbp++; | ||
1066 | } | ||
1067 | } | ||
1068 | |||
1069 | if (tp->t_flags & XFS_TRANS_SB_DIRTY) { | 1073 | if (tp->t_flags & XFS_TRANS_SB_DIRTY) { |
1070 | if (tp->t_dblocks_delta != 0) { | 1074 | if (tp->t_dblocks_delta != 0) { |
1071 | msbp->msb_field = XFS_SBS_DBLOCKS; | 1075 | msbp->msb_field = XFS_SBS_DBLOCKS; |
@@ -1115,8 +1119,24 @@ xfs_trans_unreserve_and_mod_sb( | |||
1115 | if (msbp > msb) { | 1119 | if (msbp > msb) { |
1116 | error = xfs_mod_incore_sb_batch(tp->t_mountp, msb, | 1120 | error = xfs_mod_incore_sb_batch(tp->t_mountp, msb, |
1117 | (uint)(msbp - msb), rsvd); | 1121 | (uint)(msbp - msb), rsvd); |
1118 | ASSERT(error == 0); | 1122 | if (error) |
1123 | goto out_undo_ifreecount; | ||
1119 | } | 1124 | } |
1125 | |||
1126 | return; | ||
1127 | |||
1128 | out_undo_ifreecount: | ||
1129 | if (ifreedelta) | ||
1130 | xfs_icsb_modify_counters(mp, XFS_SBS_IFREE, -ifreedelta, rsvd); | ||
1131 | out_undo_icount: | ||
1132 | if (idelta) | ||
1133 | xfs_icsb_modify_counters(mp, XFS_SBS_ICOUNT, -idelta, rsvd); | ||
1134 | out_undo_fdblocks: | ||
1135 | if (blkdelta) | ||
1136 | xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS, -blkdelta, rsvd); | ||
1137 | out: | ||
1138 | ASSERT(error == 0); | ||
1139 | return; | ||
1120 | } | 1140 | } |
1121 | 1141 | ||
1122 | /* | 1142 | /* |
@@ -1328,7 +1348,7 @@ xfs_trans_fill_vecs( | |||
1328 | * they could be immediately flushed and we'd have to race with the flusher | 1348 | * they could be immediately flushed and we'd have to race with the flusher |
1329 | * trying to pull the item from the AIL as we add it. | 1349 | * trying to pull the item from the AIL as we add it. |
1330 | */ | 1350 | */ |
1331 | void | 1351 | static void |
1332 | xfs_trans_item_committed( | 1352 | xfs_trans_item_committed( |
1333 | struct xfs_log_item *lip, | 1353 | struct xfs_log_item *lip, |
1334 | xfs_lsn_t commit_lsn, | 1354 | xfs_lsn_t commit_lsn, |
@@ -1341,7 +1361,7 @@ xfs_trans_item_committed( | |||
1341 | lip->li_flags |= XFS_LI_ABORTED; | 1361 | lip->li_flags |= XFS_LI_ABORTED; |
1342 | item_lsn = IOP_COMMITTED(lip, commit_lsn); | 1362 | item_lsn = IOP_COMMITTED(lip, commit_lsn); |
1343 | 1363 | ||
1344 | /* If the committed routine returns -1, item has been freed. */ | 1364 | /* item_lsn of -1 means the item needs no further processing */ |
1345 | if (XFS_LSN_CMP(item_lsn, (xfs_lsn_t)-1) == 0) | 1365 | if (XFS_LSN_CMP(item_lsn, (xfs_lsn_t)-1) == 0) |
1346 | return; | 1366 | return; |
1347 | 1367 | ||
@@ -1389,15 +1409,12 @@ xfs_trans_item_committed( | |||
1389 | */ | 1409 | */ |
1390 | STATIC void | 1410 | STATIC void |
1391 | xfs_trans_committed( | 1411 | xfs_trans_committed( |
1392 | struct xfs_trans *tp, | 1412 | void *arg, |
1393 | int abortflag) | 1413 | int abortflag) |
1394 | { | 1414 | { |
1415 | struct xfs_trans *tp = arg; | ||
1395 | struct xfs_log_item_desc *lidp, *next; | 1416 | struct xfs_log_item_desc *lidp, *next; |
1396 | 1417 | ||
1397 | /* Call the transaction's completion callback if there is one. */ | ||
1398 | if (tp->t_callback != NULL) | ||
1399 | tp->t_callback(tp, tp->t_callarg); | ||
1400 | |||
1401 | list_for_each_entry_safe(lidp, next, &tp->t_items, lid_trans) { | 1418 | list_for_each_entry_safe(lidp, next, &tp->t_items, lid_trans) { |
1402 | xfs_trans_item_committed(lidp->lid_item, tp->t_lsn, abortflag); | 1419 | xfs_trans_item_committed(lidp->lid_item, tp->t_lsn, abortflag); |
1403 | xfs_trans_free_item_desc(lidp); | 1420 | xfs_trans_free_item_desc(lidp); |
@@ -1406,21 +1423,120 @@ xfs_trans_committed( | |||
1406 | xfs_trans_free(tp); | 1423 | xfs_trans_free(tp); |
1407 | } | 1424 | } |
1408 | 1425 | ||
1426 | static inline void | ||
1427 | xfs_log_item_batch_insert( | ||
1428 | struct xfs_ail *ailp, | ||
1429 | struct xfs_log_item **log_items, | ||
1430 | int nr_items, | ||
1431 | xfs_lsn_t commit_lsn) | ||
1432 | { | ||
1433 | int i; | ||
1434 | |||
1435 | spin_lock(&ailp->xa_lock); | ||
1436 | /* xfs_trans_ail_update_bulk drops ailp->xa_lock */ | ||
1437 | xfs_trans_ail_update_bulk(ailp, log_items, nr_items, commit_lsn); | ||
1438 | |||
1439 | for (i = 0; i < nr_items; i++) | ||
1440 | IOP_UNPIN(log_items[i], 0); | ||
1441 | } | ||
1442 | |||
1443 | /* | ||
1444 | * Bulk operation version of xfs_trans_committed that takes a log vector of | ||
1445 | * items to insert into the AIL. This uses bulk AIL insertion techniques to | ||
1446 | * minimise lock traffic. | ||
1447 | * | ||
1448 | * If we are called with the aborted flag set, it is because a log write during | ||
1449 | * a CIL checkpoint commit has failed. In this case, all the items in the | ||
1450 | * checkpoint have already gone through IOP_COMMITED and IOP_UNLOCK, which | ||
1451 | * means that checkpoint commit abort handling is treated exactly the same | ||
1452 | * as an iclog write error even though we haven't started any IO yet. Hence in | ||
1453 | * this case all we need to do is IOP_COMMITTED processing, followed by an | ||
1454 | * IOP_UNPIN(aborted) call. | ||
1455 | */ | ||
1456 | void | ||
1457 | xfs_trans_committed_bulk( | ||
1458 | struct xfs_ail *ailp, | ||
1459 | struct xfs_log_vec *log_vector, | ||
1460 | xfs_lsn_t commit_lsn, | ||
1461 | int aborted) | ||
1462 | { | ||
1463 | #define LOG_ITEM_BATCH_SIZE 32 | ||
1464 | struct xfs_log_item *log_items[LOG_ITEM_BATCH_SIZE]; | ||
1465 | struct xfs_log_vec *lv; | ||
1466 | int i = 0; | ||
1467 | |||
1468 | /* unpin all the log items */ | ||
1469 | for (lv = log_vector; lv; lv = lv->lv_next ) { | ||
1470 | struct xfs_log_item *lip = lv->lv_item; | ||
1471 | xfs_lsn_t item_lsn; | ||
1472 | |||
1473 | if (aborted) | ||
1474 | lip->li_flags |= XFS_LI_ABORTED; | ||
1475 | item_lsn = IOP_COMMITTED(lip, commit_lsn); | ||
1476 | |||
1477 | /* item_lsn of -1 means the item needs no further processing */ | ||
1478 | if (XFS_LSN_CMP(item_lsn, (xfs_lsn_t)-1) == 0) | ||
1479 | continue; | ||
1480 | |||
1481 | /* | ||
1482 | * if we are aborting the operation, no point in inserting the | ||
1483 | * object into the AIL as we are in a shutdown situation. | ||
1484 | */ | ||
1485 | if (aborted) { | ||
1486 | ASSERT(XFS_FORCED_SHUTDOWN(ailp->xa_mount)); | ||
1487 | IOP_UNPIN(lip, 1); | ||
1488 | continue; | ||
1489 | } | ||
1490 | |||
1491 | if (item_lsn != commit_lsn) { | ||
1492 | |||
1493 | /* | ||
1494 | * Not a bulk update option due to unusual item_lsn. | ||
1495 | * Push into AIL immediately, rechecking the lsn once | ||
1496 | * we have the ail lock. Then unpin the item. | ||
1497 | */ | ||
1498 | spin_lock(&ailp->xa_lock); | ||
1499 | if (XFS_LSN_CMP(item_lsn, lip->li_lsn) > 0) | ||
1500 | xfs_trans_ail_update(ailp, lip, item_lsn); | ||
1501 | else | ||
1502 | spin_unlock(&ailp->xa_lock); | ||
1503 | IOP_UNPIN(lip, 0); | ||
1504 | continue; | ||
1505 | } | ||
1506 | |||
1507 | /* Item is a candidate for bulk AIL insert. */ | ||
1508 | log_items[i++] = lv->lv_item; | ||
1509 | if (i >= LOG_ITEM_BATCH_SIZE) { | ||
1510 | xfs_log_item_batch_insert(ailp, log_items, | ||
1511 | LOG_ITEM_BATCH_SIZE, commit_lsn); | ||
1512 | i = 0; | ||
1513 | } | ||
1514 | } | ||
1515 | |||
1516 | /* make sure we insert the remainder! */ | ||
1517 | if (i) | ||
1518 | xfs_log_item_batch_insert(ailp, log_items, i, commit_lsn); | ||
1519 | } | ||
1520 | |||
1409 | /* | 1521 | /* |
1410 | * Called from the trans_commit code when we notice that | 1522 | * Called from the trans_commit code when we notice that the filesystem is in |
1411 | * the filesystem is in the middle of a forced shutdown. | 1523 | * the middle of a forced shutdown. |
1524 | * | ||
1525 | * When we are called here, we have already pinned all the items in the | ||
1526 | * transaction. However, neither IOP_COMMITTING or IOP_UNLOCK has been called | ||
1527 | * so we can simply walk the items in the transaction, unpin them with an abort | ||
1528 | * flag and then free the items. Note that unpinning the items can result in | ||
1529 | * them being freed immediately, so we need to use a safe list traversal method | ||
1530 | * here. | ||
1412 | */ | 1531 | */ |
1413 | STATIC void | 1532 | STATIC void |
1414 | xfs_trans_uncommit( | 1533 | xfs_trans_uncommit( |
1415 | struct xfs_trans *tp, | 1534 | struct xfs_trans *tp, |
1416 | uint flags) | 1535 | uint flags) |
1417 | { | 1536 | { |
1418 | struct xfs_log_item_desc *lidp; | 1537 | struct xfs_log_item_desc *lidp, *n; |
1419 | 1538 | ||
1420 | list_for_each_entry(lidp, &tp->t_items, lid_trans) { | 1539 | list_for_each_entry_safe(lidp, n, &tp->t_items, lid_trans) { |
1421 | /* | ||
1422 | * Unpin all but those that aren't dirty. | ||
1423 | */ | ||
1424 | if (lidp->lid_flags & XFS_LID_DIRTY) | 1540 | if (lidp->lid_flags & XFS_LID_DIRTY) |
1425 | IOP_UNPIN(lidp->lid_item, 1); | 1541 | IOP_UNPIN(lidp->lid_item, 1); |
1426 | } | 1542 | } |
@@ -1525,7 +1641,7 @@ xfs_trans_commit_iclog( | |||
1525 | * running in simulation mode (the log is explicitly turned | 1641 | * running in simulation mode (the log is explicitly turned |
1526 | * off). | 1642 | * off). |
1527 | */ | 1643 | */ |
1528 | tp->t_logcb.cb_func = (void(*)(void*, int))xfs_trans_committed; | 1644 | tp->t_logcb.cb_func = xfs_trans_committed; |
1529 | tp->t_logcb.cb_arg = tp; | 1645 | tp->t_logcb.cb_arg = tp; |
1530 | 1646 | ||
1531 | /* | 1647 | /* |
@@ -1637,7 +1753,6 @@ xfs_trans_commit_cil( | |||
1637 | int flags) | 1753 | int flags) |
1638 | { | 1754 | { |
1639 | struct xfs_log_vec *log_vector; | 1755 | struct xfs_log_vec *log_vector; |
1640 | int error; | ||
1641 | 1756 | ||
1642 | /* | 1757 | /* |
1643 | * Get each log item to allocate a vector structure for | 1758 | * Get each log item to allocate a vector structure for |
@@ -1648,9 +1763,7 @@ xfs_trans_commit_cil( | |||
1648 | if (!log_vector) | 1763 | if (!log_vector) |
1649 | return ENOMEM; | 1764 | return ENOMEM; |
1650 | 1765 | ||
1651 | error = xfs_log_commit_cil(mp, tp, log_vector, commit_lsn, flags); | 1766 | xfs_log_commit_cil(mp, tp, log_vector, commit_lsn, flags); |
1652 | if (error) | ||
1653 | return error; | ||
1654 | 1767 | ||
1655 | current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS); | 1768 | current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS); |
1656 | xfs_trans_free(tp); | 1769 | xfs_trans_free(tp); |
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h index c13c0f97b494..06a9759b6352 100644 --- a/fs/xfs/xfs_trans.h +++ b/fs/xfs/xfs_trans.h | |||
@@ -294,8 +294,8 @@ struct xfs_log_item_desc { | |||
294 | #define XFS_ALLOC_BTREE_REF 2 | 294 | #define XFS_ALLOC_BTREE_REF 2 |
295 | #define XFS_BMAP_BTREE_REF 2 | 295 | #define XFS_BMAP_BTREE_REF 2 |
296 | #define XFS_DIR_BTREE_REF 2 | 296 | #define XFS_DIR_BTREE_REF 2 |
297 | #define XFS_INO_REF 2 | ||
297 | #define XFS_ATTR_BTREE_REF 1 | 298 | #define XFS_ATTR_BTREE_REF 1 |
298 | #define XFS_INO_REF 1 | ||
299 | #define XFS_DQUOT_REF 1 | 299 | #define XFS_DQUOT_REF 1 |
300 | 300 | ||
301 | #ifdef __KERNEL__ | 301 | #ifdef __KERNEL__ |
@@ -399,8 +399,6 @@ typedef struct xfs_trans { | |||
399 | * transaction. */ | 399 | * transaction. */ |
400 | struct xfs_mount *t_mountp; /* ptr to fs mount struct */ | 400 | struct xfs_mount *t_mountp; /* ptr to fs mount struct */ |
401 | struct xfs_dquot_acct *t_dqinfo; /* acctg info for dquots */ | 401 | struct xfs_dquot_acct *t_dqinfo; /* acctg info for dquots */ |
402 | xfs_trans_callback_t t_callback; /* transaction callback */ | ||
403 | void *t_callarg; /* callback arg */ | ||
404 | unsigned int t_flags; /* misc flags */ | 402 | unsigned int t_flags; /* misc flags */ |
405 | int64_t t_icount_delta; /* superblock icount change */ | 403 | int64_t t_icount_delta; /* superblock icount change */ |
406 | int64_t t_ifree_delta; /* superblock ifree change */ | 404 | int64_t t_ifree_delta; /* superblock ifree change */ |
@@ -471,8 +469,7 @@ void xfs_trans_inode_buf(xfs_trans_t *, struct xfs_buf *); | |||
471 | void xfs_trans_stale_inode_buf(xfs_trans_t *, struct xfs_buf *); | 469 | void xfs_trans_stale_inode_buf(xfs_trans_t *, struct xfs_buf *); |
472 | void xfs_trans_dquot_buf(xfs_trans_t *, struct xfs_buf *, uint); | 470 | void xfs_trans_dquot_buf(xfs_trans_t *, struct xfs_buf *, uint); |
473 | void xfs_trans_inode_alloc_buf(xfs_trans_t *, struct xfs_buf *); | 471 | void xfs_trans_inode_alloc_buf(xfs_trans_t *, struct xfs_buf *); |
474 | int xfs_trans_iget(struct xfs_mount *, xfs_trans_t *, | 472 | void xfs_trans_ichgtime(struct xfs_trans *, struct xfs_inode *, int); |
475 | xfs_ino_t , uint, uint, struct xfs_inode **); | ||
476 | void xfs_trans_ijoin_ref(struct xfs_trans *, struct xfs_inode *, uint); | 473 | void xfs_trans_ijoin_ref(struct xfs_trans *, struct xfs_inode *, uint); |
477 | void xfs_trans_ijoin(struct xfs_trans *, struct xfs_inode *); | 474 | void xfs_trans_ijoin(struct xfs_trans *, struct xfs_inode *); |
478 | void xfs_trans_log_buf(xfs_trans_t *, struct xfs_buf *, uint, uint); | 475 | void xfs_trans_log_buf(xfs_trans_t *, struct xfs_buf *, uint, uint); |
diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c index dc9069568ff7..5fc2380092c8 100644 --- a/fs/xfs/xfs_trans_ail.c +++ b/fs/xfs/xfs_trans_ail.c | |||
@@ -28,74 +28,138 @@ | |||
28 | #include "xfs_trans_priv.h" | 28 | #include "xfs_trans_priv.h" |
29 | #include "xfs_error.h" | 29 | #include "xfs_error.h" |
30 | 30 | ||
31 | STATIC void xfs_ail_insert(struct xfs_ail *, xfs_log_item_t *); | 31 | struct workqueue_struct *xfs_ail_wq; /* AIL workqueue */ |
32 | STATIC xfs_log_item_t * xfs_ail_delete(struct xfs_ail *, xfs_log_item_t *); | ||
33 | STATIC xfs_log_item_t * xfs_ail_min(struct xfs_ail *); | ||
34 | STATIC xfs_log_item_t * xfs_ail_next(struct xfs_ail *, xfs_log_item_t *); | ||
35 | 32 | ||
36 | #ifdef DEBUG | 33 | #ifdef DEBUG |
37 | STATIC void xfs_ail_check(struct xfs_ail *, xfs_log_item_t *); | 34 | /* |
38 | #else | 35 | * Check that the list is sorted as it should be. |
36 | */ | ||
37 | STATIC void | ||
38 | xfs_ail_check( | ||
39 | struct xfs_ail *ailp, | ||
40 | xfs_log_item_t *lip) | ||
41 | { | ||
42 | xfs_log_item_t *prev_lip; | ||
43 | |||
44 | if (list_empty(&ailp->xa_ail)) | ||
45 | return; | ||
46 | |||
47 | /* | ||
48 | * Check the next and previous entries are valid. | ||
49 | */ | ||
50 | ASSERT((lip->li_flags & XFS_LI_IN_AIL) != 0); | ||
51 | prev_lip = list_entry(lip->li_ail.prev, xfs_log_item_t, li_ail); | ||
52 | if (&prev_lip->li_ail != &ailp->xa_ail) | ||
53 | ASSERT(XFS_LSN_CMP(prev_lip->li_lsn, lip->li_lsn) <= 0); | ||
54 | |||
55 | prev_lip = list_entry(lip->li_ail.next, xfs_log_item_t, li_ail); | ||
56 | if (&prev_lip->li_ail != &ailp->xa_ail) | ||
57 | ASSERT(XFS_LSN_CMP(prev_lip->li_lsn, lip->li_lsn) >= 0); | ||
58 | |||
59 | |||
60 | #ifdef XFS_TRANS_DEBUG | ||
61 | /* | ||
62 | * Walk the list checking lsn ordering, and that every entry has the | ||
63 | * XFS_LI_IN_AIL flag set. This is really expensive, so only do it | ||
64 | * when specifically debugging the transaction subsystem. | ||
65 | */ | ||
66 | prev_lip = list_entry(&ailp->xa_ail, xfs_log_item_t, li_ail); | ||
67 | list_for_each_entry(lip, &ailp->xa_ail, li_ail) { | ||
68 | if (&prev_lip->li_ail != &ailp->xa_ail) | ||
69 | ASSERT(XFS_LSN_CMP(prev_lip->li_lsn, lip->li_lsn) <= 0); | ||
70 | ASSERT((lip->li_flags & XFS_LI_IN_AIL) != 0); | ||
71 | prev_lip = lip; | ||
72 | } | ||
73 | #endif /* XFS_TRANS_DEBUG */ | ||
74 | } | ||
75 | #else /* !DEBUG */ | ||
39 | #define xfs_ail_check(a,l) | 76 | #define xfs_ail_check(a,l) |
40 | #endif /* DEBUG */ | 77 | #endif /* DEBUG */ |
41 | 78 | ||
79 | /* | ||
80 | * Return a pointer to the first item in the AIL. If the AIL is empty, then | ||
81 | * return NULL. | ||
82 | */ | ||
83 | static xfs_log_item_t * | ||
84 | xfs_ail_min( | ||
85 | struct xfs_ail *ailp) | ||
86 | { | ||
87 | if (list_empty(&ailp->xa_ail)) | ||
88 | return NULL; | ||
89 | |||
90 | return list_first_entry(&ailp->xa_ail, xfs_log_item_t, li_ail); | ||
91 | } | ||
92 | |||
93 | /* | ||
94 | * Return a pointer to the last item in the AIL. If the AIL is empty, then | ||
95 | * return NULL. | ||
96 | */ | ||
97 | static xfs_log_item_t * | ||
98 | xfs_ail_max( | ||
99 | struct xfs_ail *ailp) | ||
100 | { | ||
101 | if (list_empty(&ailp->xa_ail)) | ||
102 | return NULL; | ||
103 | |||
104 | return list_entry(ailp->xa_ail.prev, xfs_log_item_t, li_ail); | ||
105 | } | ||
106 | |||
107 | /* | ||
108 | * Return a pointer to the item which follows the given item in the AIL. If | ||
109 | * the given item is the last item in the list, then return NULL. | ||
110 | */ | ||
111 | static xfs_log_item_t * | ||
112 | xfs_ail_next( | ||
113 | struct xfs_ail *ailp, | ||
114 | xfs_log_item_t *lip) | ||
115 | { | ||
116 | if (lip->li_ail.next == &ailp->xa_ail) | ||
117 | return NULL; | ||
118 | |||
119 | return list_first_entry(&lip->li_ail, xfs_log_item_t, li_ail); | ||
120 | } | ||
42 | 121 | ||
43 | /* | 122 | /* |
44 | * This is called by the log manager code to determine the LSN | 123 | * This is called by the log manager code to determine the LSN of the tail of |
45 | * of the tail of the log. This is exactly the LSN of the first | 124 | * the log. This is exactly the LSN of the first item in the AIL. If the AIL |
46 | * item in the AIL. If the AIL is empty, then this function | 125 | * is empty, then this function returns 0. |
47 | * returns 0. | ||
48 | * | 126 | * |
49 | * We need the AIL lock in order to get a coherent read of the | 127 | * We need the AIL lock in order to get a coherent read of the lsn of the last |
50 | * lsn of the last item in the AIL. | 128 | * item in the AIL. |
51 | */ | 129 | */ |
52 | xfs_lsn_t | 130 | xfs_lsn_t |
53 | xfs_trans_ail_tail( | 131 | xfs_ail_min_lsn( |
54 | struct xfs_ail *ailp) | 132 | struct xfs_ail *ailp) |
55 | { | 133 | { |
56 | xfs_lsn_t lsn; | 134 | xfs_lsn_t lsn = 0; |
57 | xfs_log_item_t *lip; | 135 | xfs_log_item_t *lip; |
58 | 136 | ||
59 | spin_lock(&ailp->xa_lock); | 137 | spin_lock(&ailp->xa_lock); |
60 | lip = xfs_ail_min(ailp); | 138 | lip = xfs_ail_min(ailp); |
61 | if (lip == NULL) { | 139 | if (lip) |
62 | lsn = (xfs_lsn_t)0; | ||
63 | } else { | ||
64 | lsn = lip->li_lsn; | 140 | lsn = lip->li_lsn; |
65 | } | ||
66 | spin_unlock(&ailp->xa_lock); | 141 | spin_unlock(&ailp->xa_lock); |
67 | 142 | ||
68 | return lsn; | 143 | return lsn; |
69 | } | 144 | } |
70 | 145 | ||
71 | /* | 146 | /* |
72 | * xfs_trans_push_ail | 147 | * Return the maximum lsn held in the AIL, or zero if the AIL is empty. |
73 | * | ||
74 | * This routine is called to move the tail of the AIL forward. It does this by | ||
75 | * trying to flush items in the AIL whose lsns are below the given | ||
76 | * threshold_lsn. | ||
77 | * | ||
78 | * the push is run asynchronously in a separate thread, so we return the tail | ||
79 | * of the log right now instead of the tail after the push. This means we will | ||
80 | * either continue right away, or we will sleep waiting on the async thread to | ||
81 | * do its work. | ||
82 | * | ||
83 | * We do this unlocked - we only need to know whether there is anything in the | ||
84 | * AIL at the time we are called. We don't need to access the contents of | ||
85 | * any of the objects, so the lock is not needed. | ||
86 | */ | 148 | */ |
87 | void | 149 | static xfs_lsn_t |
88 | xfs_trans_ail_push( | 150 | xfs_ail_max_lsn( |
89 | struct xfs_ail *ailp, | 151 | struct xfs_ail *ailp) |
90 | xfs_lsn_t threshold_lsn) | ||
91 | { | 152 | { |
92 | xfs_log_item_t *lip; | 153 | xfs_lsn_t lsn = 0; |
154 | xfs_log_item_t *lip; | ||
93 | 155 | ||
94 | lip = xfs_ail_min(ailp); | 156 | spin_lock(&ailp->xa_lock); |
95 | if (lip && !XFS_FORCED_SHUTDOWN(ailp->xa_mount)) { | 157 | lip = xfs_ail_max(ailp); |
96 | if (XFS_LSN_CMP(threshold_lsn, ailp->xa_target) > 0) | 158 | if (lip) |
97 | xfsaild_wakeup(ailp, threshold_lsn); | 159 | lsn = lip->li_lsn; |
98 | } | 160 | spin_unlock(&ailp->xa_lock); |
161 | |||
162 | return lsn; | ||
99 | } | 163 | } |
100 | 164 | ||
101 | /* | 165 | /* |
@@ -236,35 +300,78 @@ out: | |||
236 | } | 300 | } |
237 | 301 | ||
238 | /* | 302 | /* |
239 | * xfsaild_push does the work of pushing on the AIL. Returning a timeout of | 303 | * splice the log item list into the AIL at the given LSN. |
240 | * zero indicates that the caller should sleep until woken. | ||
241 | */ | 304 | */ |
242 | long | 305 | static void |
243 | xfsaild_push( | 306 | xfs_ail_splice( |
244 | struct xfs_ail *ailp, | 307 | struct xfs_ail *ailp, |
245 | xfs_lsn_t *last_lsn) | 308 | struct list_head *list, |
309 | xfs_lsn_t lsn) | ||
246 | { | 310 | { |
247 | long tout = 0; | 311 | xfs_log_item_t *next_lip; |
248 | xfs_lsn_t last_pushed_lsn = *last_lsn; | 312 | |
249 | xfs_lsn_t target = ailp->xa_target; | 313 | /* If the list is empty, just insert the item. */ |
250 | xfs_lsn_t lsn; | 314 | if (list_empty(&ailp->xa_ail)) { |
251 | xfs_log_item_t *lip; | 315 | list_splice(list, &ailp->xa_ail); |
252 | int flush_log, count, stuck; | 316 | return; |
253 | xfs_mount_t *mp = ailp->xa_mount; | 317 | } |
318 | |||
319 | list_for_each_entry_reverse(next_lip, &ailp->xa_ail, li_ail) { | ||
320 | if (XFS_LSN_CMP(next_lip->li_lsn, lsn) <= 0) | ||
321 | break; | ||
322 | } | ||
323 | |||
324 | ASSERT(&next_lip->li_ail == &ailp->xa_ail || | ||
325 | XFS_LSN_CMP(next_lip->li_lsn, lsn) <= 0); | ||
326 | |||
327 | list_splice_init(list, &next_lip->li_ail); | ||
328 | } | ||
329 | |||
330 | /* | ||
331 | * Delete the given item from the AIL. Return a pointer to the item. | ||
332 | */ | ||
333 | static void | ||
334 | xfs_ail_delete( | ||
335 | struct xfs_ail *ailp, | ||
336 | xfs_log_item_t *lip) | ||
337 | { | ||
338 | xfs_ail_check(ailp, lip); | ||
339 | list_del(&lip->li_ail); | ||
340 | xfs_trans_ail_cursor_clear(ailp, lip); | ||
341 | } | ||
342 | |||
343 | /* | ||
344 | * xfs_ail_worker does the work of pushing on the AIL. It will requeue itself | ||
345 | * to run at a later time if there is more work to do to complete the push. | ||
346 | */ | ||
347 | STATIC void | ||
348 | xfs_ail_worker( | ||
349 | struct work_struct *work) | ||
350 | { | ||
351 | struct xfs_ail *ailp = container_of(to_delayed_work(work), | ||
352 | struct xfs_ail, xa_work); | ||
353 | xfs_mount_t *mp = ailp->xa_mount; | ||
254 | struct xfs_ail_cursor *cur = &ailp->xa_cursors; | 354 | struct xfs_ail_cursor *cur = &ailp->xa_cursors; |
255 | int push_xfsbufd = 0; | 355 | xfs_log_item_t *lip; |
356 | xfs_lsn_t lsn; | ||
357 | xfs_lsn_t target; | ||
358 | long tout = 10; | ||
359 | int flush_log = 0; | ||
360 | int stuck = 0; | ||
361 | int count = 0; | ||
362 | int push_xfsbufd = 0; | ||
256 | 363 | ||
257 | spin_lock(&ailp->xa_lock); | 364 | spin_lock(&ailp->xa_lock); |
365 | target = ailp->xa_target; | ||
258 | xfs_trans_ail_cursor_init(ailp, cur); | 366 | xfs_trans_ail_cursor_init(ailp, cur); |
259 | lip = xfs_trans_ail_cursor_first(ailp, cur, *last_lsn); | 367 | lip = xfs_trans_ail_cursor_first(ailp, cur, ailp->xa_last_pushed_lsn); |
260 | if (!lip || XFS_FORCED_SHUTDOWN(mp)) { | 368 | if (!lip || XFS_FORCED_SHUTDOWN(mp)) { |
261 | /* | 369 | /* |
262 | * AIL is empty or our push has reached the end. | 370 | * AIL is empty or our push has reached the end. |
263 | */ | 371 | */ |
264 | xfs_trans_ail_cursor_done(ailp, cur); | 372 | xfs_trans_ail_cursor_done(ailp, cur); |
265 | spin_unlock(&ailp->xa_lock); | 373 | spin_unlock(&ailp->xa_lock); |
266 | *last_lsn = 0; | 374 | goto out_done; |
267 | return tout; | ||
268 | } | 375 | } |
269 | 376 | ||
270 | XFS_STATS_INC(xs_push_ail); | 377 | XFS_STATS_INC(xs_push_ail); |
@@ -281,8 +388,7 @@ xfsaild_push( | |||
281 | * lots of contention on the AIL lists. | 388 | * lots of contention on the AIL lists. |
282 | */ | 389 | */ |
283 | lsn = lip->li_lsn; | 390 | lsn = lip->li_lsn; |
284 | flush_log = stuck = count = 0; | 391 | while ((XFS_LSN_CMP(lip->li_lsn, target) <= 0)) { |
285 | while ((XFS_LSN_CMP(lip->li_lsn, target) < 0)) { | ||
286 | int lock_result; | 392 | int lock_result; |
287 | /* | 393 | /* |
288 | * If we can lock the item without sleeping, unlock the AIL | 394 | * If we can lock the item without sleeping, unlock the AIL |
@@ -301,13 +407,13 @@ xfsaild_push( | |||
301 | case XFS_ITEM_SUCCESS: | 407 | case XFS_ITEM_SUCCESS: |
302 | XFS_STATS_INC(xs_push_ail_success); | 408 | XFS_STATS_INC(xs_push_ail_success); |
303 | IOP_PUSH(lip); | 409 | IOP_PUSH(lip); |
304 | last_pushed_lsn = lsn; | 410 | ailp->xa_last_pushed_lsn = lsn; |
305 | break; | 411 | break; |
306 | 412 | ||
307 | case XFS_ITEM_PUSHBUF: | 413 | case XFS_ITEM_PUSHBUF: |
308 | XFS_STATS_INC(xs_push_ail_pushbuf); | 414 | XFS_STATS_INC(xs_push_ail_pushbuf); |
309 | IOP_PUSHBUF(lip); | 415 | IOP_PUSHBUF(lip); |
310 | last_pushed_lsn = lsn; | 416 | ailp->xa_last_pushed_lsn = lsn; |
311 | push_xfsbufd = 1; | 417 | push_xfsbufd = 1; |
312 | break; | 418 | break; |
313 | 419 | ||
@@ -319,7 +425,7 @@ xfsaild_push( | |||
319 | 425 | ||
320 | case XFS_ITEM_LOCKED: | 426 | case XFS_ITEM_LOCKED: |
321 | XFS_STATS_INC(xs_push_ail_locked); | 427 | XFS_STATS_INC(xs_push_ail_locked); |
322 | last_pushed_lsn = lsn; | 428 | ailp->xa_last_pushed_lsn = lsn; |
323 | stuck++; | 429 | stuck++; |
324 | break; | 430 | break; |
325 | 431 | ||
@@ -374,9 +480,27 @@ xfsaild_push( | |||
374 | wake_up_process(mp->m_ddev_targp->bt_task); | 480 | wake_up_process(mp->m_ddev_targp->bt_task); |
375 | } | 481 | } |
376 | 482 | ||
483 | /* assume we have more work to do in a short while */ | ||
484 | out_done: | ||
377 | if (!count) { | 485 | if (!count) { |
378 | /* We're past our target or empty, so idle */ | 486 | /* We're past our target or empty, so idle */ |
379 | last_pushed_lsn = 0; | 487 | ailp->xa_last_pushed_lsn = 0; |
488 | |||
489 | /* | ||
490 | * We clear the XFS_AIL_PUSHING_BIT first before checking | ||
491 | * whether the target has changed. If the target has changed, | ||
492 | * this pushes the requeue race directly onto the result of the | ||
493 | * atomic test/set bit, so we are guaranteed that either the | ||
494 | * the pusher that changed the target or ourselves will requeue | ||
495 | * the work (but not both). | ||
496 | */ | ||
497 | clear_bit(XFS_AIL_PUSHING_BIT, &ailp->xa_flags); | ||
498 | smp_rmb(); | ||
499 | if (XFS_LSN_CMP(ailp->xa_target, target) == 0 || | ||
500 | test_and_set_bit(XFS_AIL_PUSHING_BIT, &ailp->xa_flags)) | ||
501 | return; | ||
502 | |||
503 | tout = 50; | ||
380 | } else if (XFS_LSN_CMP(lsn, target) >= 0) { | 504 | } else if (XFS_LSN_CMP(lsn, target) >= 0) { |
381 | /* | 505 | /* |
382 | * We reached the target so wait a bit longer for I/O to | 506 | * We reached the target so wait a bit longer for I/O to |
@@ -384,7 +508,7 @@ xfsaild_push( | |||
384 | * start the next scan from the start of the AIL. | 508 | * start the next scan from the start of the AIL. |
385 | */ | 509 | */ |
386 | tout = 50; | 510 | tout = 50; |
387 | last_pushed_lsn = 0; | 511 | ailp->xa_last_pushed_lsn = 0; |
388 | } else if ((stuck * 100) / count > 90) { | 512 | } else if ((stuck * 100) / count > 90) { |
389 | /* | 513 | /* |
390 | * Either there is a lot of contention on the AIL or we | 514 | * Either there is a lot of contention on the AIL or we |
@@ -396,14 +520,61 @@ xfsaild_push( | |||
396 | * continuing from where we were. | 520 | * continuing from where we were. |
397 | */ | 521 | */ |
398 | tout = 20; | 522 | tout = 20; |
399 | } else { | ||
400 | /* more to do, but wait a short while before continuing */ | ||
401 | tout = 10; | ||
402 | } | 523 | } |
403 | *last_lsn = last_pushed_lsn; | 524 | |
404 | return tout; | 525 | /* There is more to do, requeue us. */ |
526 | queue_delayed_work(xfs_syncd_wq, &ailp->xa_work, | ||
527 | msecs_to_jiffies(tout)); | ||
405 | } | 528 | } |
406 | 529 | ||
530 | /* | ||
531 | * This routine is called to move the tail of the AIL forward. It does this by | ||
532 | * trying to flush items in the AIL whose lsns are below the given | ||
533 | * threshold_lsn. | ||
534 | * | ||
535 | * The push is run asynchronously in a workqueue, which means the caller needs | ||
536 | * to handle waiting on the async flush for space to become available. | ||
537 | * We don't want to interrupt any push that is in progress, hence we only queue | ||
538 | * work if we set the pushing bit approriately. | ||
539 | * | ||
540 | * We do this unlocked - we only need to know whether there is anything in the | ||
541 | * AIL at the time we are called. We don't need to access the contents of | ||
542 | * any of the objects, so the lock is not needed. | ||
543 | */ | ||
544 | void | ||
545 | xfs_ail_push( | ||
546 | struct xfs_ail *ailp, | ||
547 | xfs_lsn_t threshold_lsn) | ||
548 | { | ||
549 | xfs_log_item_t *lip; | ||
550 | |||
551 | lip = xfs_ail_min(ailp); | ||
552 | if (!lip || XFS_FORCED_SHUTDOWN(ailp->xa_mount) || | ||
553 | XFS_LSN_CMP(threshold_lsn, ailp->xa_target) <= 0) | ||
554 | return; | ||
555 | |||
556 | /* | ||
557 | * Ensure that the new target is noticed in push code before it clears | ||
558 | * the XFS_AIL_PUSHING_BIT. | ||
559 | */ | ||
560 | smp_wmb(); | ||
561 | xfs_trans_ail_copy_lsn(ailp, &ailp->xa_target, &threshold_lsn); | ||
562 | if (!test_and_set_bit(XFS_AIL_PUSHING_BIT, &ailp->xa_flags)) | ||
563 | queue_delayed_work(xfs_syncd_wq, &ailp->xa_work, 0); | ||
564 | } | ||
565 | |||
566 | /* | ||
567 | * Push out all items in the AIL immediately | ||
568 | */ | ||
569 | void | ||
570 | xfs_ail_push_all( | ||
571 | struct xfs_ail *ailp) | ||
572 | { | ||
573 | xfs_lsn_t threshold_lsn = xfs_ail_max_lsn(ailp); | ||
574 | |||
575 | if (threshold_lsn) | ||
576 | xfs_ail_push(ailp, threshold_lsn); | ||
577 | } | ||
407 | 578 | ||
408 | /* | 579 | /* |
409 | * This is to be called when an item is unlocked that may have | 580 | * This is to be called when an item is unlocked that may have |
@@ -449,129 +620,152 @@ xfs_trans_unlocked_item( | |||
449 | xfs_log_move_tail(ailp->xa_mount, 1); | 620 | xfs_log_move_tail(ailp->xa_mount, 1); |
450 | } /* xfs_trans_unlocked_item */ | 621 | } /* xfs_trans_unlocked_item */ |
451 | 622 | ||
452 | |||
453 | /* | 623 | /* |
454 | * Update the position of the item in the AIL with the new | 624 | * xfs_trans_ail_update - bulk AIL insertion operation. |
455 | * lsn. If it is not yet in the AIL, add it. Otherwise, move | 625 | * |
456 | * it to its new position by removing it and re-adding it. | 626 | * @xfs_trans_ail_update takes an array of log items that all need to be |
627 | * positioned at the same LSN in the AIL. If an item is not in the AIL, it will | ||
628 | * be added. Otherwise, it will be repositioned by removing it and re-adding | ||
629 | * it to the AIL. If we move the first item in the AIL, update the log tail to | ||
630 | * match the new minimum LSN in the AIL. | ||
631 | * | ||
632 | * This function takes the AIL lock once to execute the update operations on | ||
633 | * all the items in the array, and as such should not be called with the AIL | ||
634 | * lock held. As a result, once we have the AIL lock, we need to check each log | ||
635 | * item LSN to confirm it needs to be moved forward in the AIL. | ||
457 | * | 636 | * |
458 | * Wakeup anyone with an lsn less than the item's lsn. If the item | 637 | * To optimise the insert operation, we delete all the items from the AIL in |
459 | * we move in the AIL is the minimum one, update the tail lsn in the | 638 | * the first pass, moving them into a temporary list, then splice the temporary |
460 | * log manager. | 639 | * list into the correct position in the AIL. This avoids needing to do an |
640 | * insert operation on every item. | ||
461 | * | 641 | * |
462 | * This function must be called with the AIL lock held. The lock | 642 | * This function must be called with the AIL lock held. The lock is dropped |
463 | * is dropped before returning. | 643 | * before returning. |
464 | */ | 644 | */ |
465 | void | 645 | void |
466 | xfs_trans_ail_update( | 646 | xfs_trans_ail_update_bulk( |
467 | struct xfs_ail *ailp, | 647 | struct xfs_ail *ailp, |
468 | xfs_log_item_t *lip, | 648 | struct xfs_log_item **log_items, |
469 | xfs_lsn_t lsn) __releases(ailp->xa_lock) | 649 | int nr_items, |
650 | xfs_lsn_t lsn) __releases(ailp->xa_lock) | ||
470 | { | 651 | { |
471 | xfs_log_item_t *dlip = NULL; | 652 | xfs_log_item_t *mlip; |
472 | xfs_log_item_t *mlip; /* ptr to minimum lip */ | ||
473 | xfs_lsn_t tail_lsn; | 653 | xfs_lsn_t tail_lsn; |
654 | int mlip_changed = 0; | ||
655 | int i; | ||
656 | LIST_HEAD(tmp); | ||
474 | 657 | ||
475 | mlip = xfs_ail_min(ailp); | 658 | mlip = xfs_ail_min(ailp); |
476 | 659 | ||
477 | if (lip->li_flags & XFS_LI_IN_AIL) { | 660 | for (i = 0; i < nr_items; i++) { |
478 | dlip = xfs_ail_delete(ailp, lip); | 661 | struct xfs_log_item *lip = log_items[i]; |
479 | ASSERT(dlip == lip); | 662 | if (lip->li_flags & XFS_LI_IN_AIL) { |
480 | xfs_trans_ail_cursor_clear(ailp, dlip); | 663 | /* check if we really need to move the item */ |
481 | } else { | 664 | if (XFS_LSN_CMP(lsn, lip->li_lsn) <= 0) |
482 | lip->li_flags |= XFS_LI_IN_AIL; | 665 | continue; |
666 | |||
667 | xfs_ail_delete(ailp, lip); | ||
668 | if (mlip == lip) | ||
669 | mlip_changed = 1; | ||
670 | } else { | ||
671 | lip->li_flags |= XFS_LI_IN_AIL; | ||
672 | } | ||
673 | lip->li_lsn = lsn; | ||
674 | list_add(&lip->li_ail, &tmp); | ||
483 | } | 675 | } |
484 | 676 | ||
485 | lip->li_lsn = lsn; | 677 | xfs_ail_splice(ailp, &tmp, lsn); |
486 | xfs_ail_insert(ailp, lip); | ||
487 | 678 | ||
488 | if (mlip == dlip) { | 679 | if (!mlip_changed) { |
489 | mlip = xfs_ail_min(ailp); | ||
490 | /* | ||
491 | * It is not safe to access mlip after the AIL lock is | ||
492 | * dropped, so we must get a copy of li_lsn before we do | ||
493 | * so. This is especially important on 32-bit platforms | ||
494 | * where accessing and updating 64-bit values like li_lsn | ||
495 | * is not atomic. | ||
496 | */ | ||
497 | tail_lsn = mlip->li_lsn; | ||
498 | spin_unlock(&ailp->xa_lock); | ||
499 | xfs_log_move_tail(ailp->xa_mount, tail_lsn); | ||
500 | } else { | ||
501 | spin_unlock(&ailp->xa_lock); | 680 | spin_unlock(&ailp->xa_lock); |
681 | return; | ||
502 | } | 682 | } |
503 | 683 | ||
504 | 684 | /* | |
505 | } /* xfs_trans_update_ail */ | 685 | * It is not safe to access mlip after the AIL lock is dropped, so we |
686 | * must get a copy of li_lsn before we do so. This is especially | ||
687 | * important on 32-bit platforms where accessing and updating 64-bit | ||
688 | * values like li_lsn is not atomic. | ||
689 | */ | ||
690 | mlip = xfs_ail_min(ailp); | ||
691 | tail_lsn = mlip->li_lsn; | ||
692 | spin_unlock(&ailp->xa_lock); | ||
693 | xfs_log_move_tail(ailp->xa_mount, tail_lsn); | ||
694 | } | ||
506 | 695 | ||
507 | /* | 696 | /* |
508 | * Delete the given item from the AIL. It must already be in | 697 | * xfs_trans_ail_delete_bulk - remove multiple log items from the AIL |
509 | * the AIL. | 698 | * |
699 | * @xfs_trans_ail_delete_bulk takes an array of log items that all need to | ||
700 | * removed from the AIL. The caller is already holding the AIL lock, and done | ||
701 | * all the checks necessary to ensure the items passed in via @log_items are | ||
702 | * ready for deletion. This includes checking that the items are in the AIL. | ||
510 | * | 703 | * |
511 | * Wakeup anyone with an lsn less than item's lsn. If the item | 704 | * For each log item to be removed, unlink it from the AIL, clear the IN_AIL |
512 | * we delete in the AIL is the minimum one, update the tail lsn in the | 705 | * flag from the item and reset the item's lsn to 0. If we remove the first |
513 | * log manager. | 706 | * item in the AIL, update the log tail to match the new minimum LSN in the |
707 | * AIL. | ||
514 | * | 708 | * |
515 | * Clear the IN_AIL flag from the item, reset its lsn to 0, and | 709 | * This function will not drop the AIL lock until all items are removed from |
516 | * bump the AIL's generation count to indicate that the tree | 710 | * the AIL to minimise the amount of lock traffic on the AIL. This does not |
517 | * has changed. | 711 | * greatly increase the AIL hold time, but does significantly reduce the amount |
712 | * of traffic on the lock, especially during IO completion. | ||
518 | * | 713 | * |
519 | * This function must be called with the AIL lock held. The lock | 714 | * This function must be called with the AIL lock held. The lock is dropped |
520 | * is dropped before returning. | 715 | * before returning. |
521 | */ | 716 | */ |
522 | void | 717 | void |
523 | xfs_trans_ail_delete( | 718 | xfs_trans_ail_delete_bulk( |
524 | struct xfs_ail *ailp, | 719 | struct xfs_ail *ailp, |
525 | xfs_log_item_t *lip) __releases(ailp->xa_lock) | 720 | struct xfs_log_item **log_items, |
721 | int nr_items) __releases(ailp->xa_lock) | ||
526 | { | 722 | { |
527 | xfs_log_item_t *dlip; | ||
528 | xfs_log_item_t *mlip; | 723 | xfs_log_item_t *mlip; |
529 | xfs_lsn_t tail_lsn; | 724 | xfs_lsn_t tail_lsn; |
725 | int mlip_changed = 0; | ||
726 | int i; | ||
530 | 727 | ||
531 | if (lip->li_flags & XFS_LI_IN_AIL) { | 728 | mlip = xfs_ail_min(ailp); |
532 | mlip = xfs_ail_min(ailp); | ||
533 | dlip = xfs_ail_delete(ailp, lip); | ||
534 | ASSERT(dlip == lip); | ||
535 | xfs_trans_ail_cursor_clear(ailp, dlip); | ||
536 | 729 | ||
730 | for (i = 0; i < nr_items; i++) { | ||
731 | struct xfs_log_item *lip = log_items[i]; | ||
732 | if (!(lip->li_flags & XFS_LI_IN_AIL)) { | ||
733 | struct xfs_mount *mp = ailp->xa_mount; | ||
537 | 734 | ||
538 | lip->li_flags &= ~XFS_LI_IN_AIL; | ||
539 | lip->li_lsn = 0; | ||
540 | |||
541 | if (mlip == dlip) { | ||
542 | mlip = xfs_ail_min(ailp); | ||
543 | /* | ||
544 | * It is not safe to access mlip after the AIL lock | ||
545 | * is dropped, so we must get a copy of li_lsn | ||
546 | * before we do so. This is especially important | ||
547 | * on 32-bit platforms where accessing and updating | ||
548 | * 64-bit values like li_lsn is not atomic. | ||
549 | */ | ||
550 | tail_lsn = mlip ? mlip->li_lsn : 0; | ||
551 | spin_unlock(&ailp->xa_lock); | ||
552 | xfs_log_move_tail(ailp->xa_mount, tail_lsn); | ||
553 | } else { | ||
554 | spin_unlock(&ailp->xa_lock); | 735 | spin_unlock(&ailp->xa_lock); |
736 | if (!XFS_FORCED_SHUTDOWN(mp)) { | ||
737 | xfs_alert_tag(mp, XFS_PTAG_AILDELETE, | ||
738 | "%s: attempting to delete a log item that is not in the AIL", | ||
739 | __func__); | ||
740 | xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); | ||
741 | } | ||
742 | return; | ||
555 | } | 743 | } |
744 | |||
745 | xfs_ail_delete(ailp, lip); | ||
746 | lip->li_flags &= ~XFS_LI_IN_AIL; | ||
747 | lip->li_lsn = 0; | ||
748 | if (mlip == lip) | ||
749 | mlip_changed = 1; | ||
556 | } | 750 | } |
557 | else { | ||
558 | /* | ||
559 | * If the file system is not being shutdown, we are in | ||
560 | * serious trouble if we get to this stage. | ||
561 | */ | ||
562 | struct xfs_mount *mp = ailp->xa_mount; | ||
563 | 751 | ||
752 | if (!mlip_changed) { | ||
564 | spin_unlock(&ailp->xa_lock); | 753 | spin_unlock(&ailp->xa_lock); |
565 | if (!XFS_FORCED_SHUTDOWN(mp)) { | 754 | return; |
566 | xfs_cmn_err(XFS_PTAG_AILDELETE, CE_ALERT, mp, | ||
567 | "%s: attempting to delete a log item that is not in the AIL", | ||
568 | __func__); | ||
569 | xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); | ||
570 | } | ||
571 | } | 755 | } |
572 | } | ||
573 | |||
574 | 756 | ||
757 | /* | ||
758 | * It is not safe to access mlip after the AIL lock is dropped, so we | ||
759 | * must get a copy of li_lsn before we do so. This is especially | ||
760 | * important on 32-bit platforms where accessing and updating 64-bit | ||
761 | * values like li_lsn is not atomic. It is possible we've emptied the | ||
762 | * AIL here, so if that is the case, pass an LSN of 0 to the tail move. | ||
763 | */ | ||
764 | mlip = xfs_ail_min(ailp); | ||
765 | tail_lsn = mlip ? mlip->li_lsn : 0; | ||
766 | spin_unlock(&ailp->xa_lock); | ||
767 | xfs_log_move_tail(ailp->xa_mount, tail_lsn); | ||
768 | } | ||
575 | 769 | ||
576 | /* | 770 | /* |
577 | * The active item list (AIL) is a doubly linked list of log | 771 | * The active item list (AIL) is a doubly linked list of log |
@@ -592,7 +786,6 @@ xfs_trans_ail_init( | |||
592 | xfs_mount_t *mp) | 786 | xfs_mount_t *mp) |
593 | { | 787 | { |
594 | struct xfs_ail *ailp; | 788 | struct xfs_ail *ailp; |
595 | int error; | ||
596 | 789 | ||
597 | ailp = kmem_zalloc(sizeof(struct xfs_ail), KM_MAYFAIL); | 790 | ailp = kmem_zalloc(sizeof(struct xfs_ail), KM_MAYFAIL); |
598 | if (!ailp) | 791 | if (!ailp) |
@@ -601,15 +794,9 @@ xfs_trans_ail_init( | |||
601 | ailp->xa_mount = mp; | 794 | ailp->xa_mount = mp; |
602 | INIT_LIST_HEAD(&ailp->xa_ail); | 795 | INIT_LIST_HEAD(&ailp->xa_ail); |
603 | spin_lock_init(&ailp->xa_lock); | 796 | spin_lock_init(&ailp->xa_lock); |
604 | error = xfsaild_start(ailp); | 797 | INIT_DELAYED_WORK(&ailp->xa_work, xfs_ail_worker); |
605 | if (error) | ||
606 | goto out_free_ailp; | ||
607 | mp->m_ail = ailp; | 798 | mp->m_ail = ailp; |
608 | return 0; | 799 | return 0; |
609 | |||
610 | out_free_ailp: | ||
611 | kmem_free(ailp); | ||
612 | return error; | ||
613 | } | 800 | } |
614 | 801 | ||
615 | void | 802 | void |
@@ -618,135 +805,6 @@ xfs_trans_ail_destroy( | |||
618 | { | 805 | { |
619 | struct xfs_ail *ailp = mp->m_ail; | 806 | struct xfs_ail *ailp = mp->m_ail; |
620 | 807 | ||
621 | xfsaild_stop(ailp); | 808 | cancel_delayed_work_sync(&ailp->xa_work); |
622 | kmem_free(ailp); | 809 | kmem_free(ailp); |
623 | } | 810 | } |
624 | |||
625 | /* | ||
626 | * Insert the given log item into the AIL. | ||
627 | * We almost always insert at the end of the list, so on inserts | ||
628 | * we search from the end of the list to find where the | ||
629 | * new item belongs. | ||
630 | */ | ||
631 | STATIC void | ||
632 | xfs_ail_insert( | ||
633 | struct xfs_ail *ailp, | ||
634 | xfs_log_item_t *lip) | ||
635 | /* ARGSUSED */ | ||
636 | { | ||
637 | xfs_log_item_t *next_lip; | ||
638 | |||
639 | /* | ||
640 | * If the list is empty, just insert the item. | ||
641 | */ | ||
642 | if (list_empty(&ailp->xa_ail)) { | ||
643 | list_add(&lip->li_ail, &ailp->xa_ail); | ||
644 | return; | ||
645 | } | ||
646 | |||
647 | list_for_each_entry_reverse(next_lip, &ailp->xa_ail, li_ail) { | ||
648 | if (XFS_LSN_CMP(next_lip->li_lsn, lip->li_lsn) <= 0) | ||
649 | break; | ||
650 | } | ||
651 | |||
652 | ASSERT((&next_lip->li_ail == &ailp->xa_ail) || | ||
653 | (XFS_LSN_CMP(next_lip->li_lsn, lip->li_lsn) <= 0)); | ||
654 | |||
655 | list_add(&lip->li_ail, &next_lip->li_ail); | ||
656 | |||
657 | xfs_ail_check(ailp, lip); | ||
658 | return; | ||
659 | } | ||
660 | |||
661 | /* | ||
662 | * Delete the given item from the AIL. Return a pointer to the item. | ||
663 | */ | ||
664 | /*ARGSUSED*/ | ||
665 | STATIC xfs_log_item_t * | ||
666 | xfs_ail_delete( | ||
667 | struct xfs_ail *ailp, | ||
668 | xfs_log_item_t *lip) | ||
669 | /* ARGSUSED */ | ||
670 | { | ||
671 | xfs_ail_check(ailp, lip); | ||
672 | |||
673 | list_del(&lip->li_ail); | ||
674 | |||
675 | return lip; | ||
676 | } | ||
677 | |||
678 | /* | ||
679 | * Return a pointer to the first item in the AIL. | ||
680 | * If the AIL is empty, then return NULL. | ||
681 | */ | ||
682 | STATIC xfs_log_item_t * | ||
683 | xfs_ail_min( | ||
684 | struct xfs_ail *ailp) | ||
685 | /* ARGSUSED */ | ||
686 | { | ||
687 | if (list_empty(&ailp->xa_ail)) | ||
688 | return NULL; | ||
689 | |||
690 | return list_first_entry(&ailp->xa_ail, xfs_log_item_t, li_ail); | ||
691 | } | ||
692 | |||
693 | /* | ||
694 | * Return a pointer to the item which follows | ||
695 | * the given item in the AIL. If the given item | ||
696 | * is the last item in the list, then return NULL. | ||
697 | */ | ||
698 | STATIC xfs_log_item_t * | ||
699 | xfs_ail_next( | ||
700 | struct xfs_ail *ailp, | ||
701 | xfs_log_item_t *lip) | ||
702 | /* ARGSUSED */ | ||
703 | { | ||
704 | if (lip->li_ail.next == &ailp->xa_ail) | ||
705 | return NULL; | ||
706 | |||
707 | return list_first_entry(&lip->li_ail, xfs_log_item_t, li_ail); | ||
708 | } | ||
709 | |||
710 | #ifdef DEBUG | ||
711 | /* | ||
712 | * Check that the list is sorted as it should be. | ||
713 | */ | ||
714 | STATIC void | ||
715 | xfs_ail_check( | ||
716 | struct xfs_ail *ailp, | ||
717 | xfs_log_item_t *lip) | ||
718 | { | ||
719 | xfs_log_item_t *prev_lip; | ||
720 | |||
721 | if (list_empty(&ailp->xa_ail)) | ||
722 | return; | ||
723 | |||
724 | /* | ||
725 | * Check the next and previous entries are valid. | ||
726 | */ | ||
727 | ASSERT((lip->li_flags & XFS_LI_IN_AIL) != 0); | ||
728 | prev_lip = list_entry(lip->li_ail.prev, xfs_log_item_t, li_ail); | ||
729 | if (&prev_lip->li_ail != &ailp->xa_ail) | ||
730 | ASSERT(XFS_LSN_CMP(prev_lip->li_lsn, lip->li_lsn) <= 0); | ||
731 | |||
732 | prev_lip = list_entry(lip->li_ail.next, xfs_log_item_t, li_ail); | ||
733 | if (&prev_lip->li_ail != &ailp->xa_ail) | ||
734 | ASSERT(XFS_LSN_CMP(prev_lip->li_lsn, lip->li_lsn) >= 0); | ||
735 | |||
736 | |||
737 | #ifdef XFS_TRANS_DEBUG | ||
738 | /* | ||
739 | * Walk the list checking lsn ordering, and that every entry has the | ||
740 | * XFS_LI_IN_AIL flag set. This is really expensive, so only do it | ||
741 | * when specifically debugging the transaction subsystem. | ||
742 | */ | ||
743 | prev_lip = list_entry(&ailp->xa_ail, xfs_log_item_t, li_ail); | ||
744 | list_for_each_entry(lip, &ailp->xa_ail, li_ail) { | ||
745 | if (&prev_lip->li_ail != &ailp->xa_ail) | ||
746 | ASSERT(XFS_LSN_CMP(prev_lip->li_lsn, lip->li_lsn) <= 0); | ||
747 | ASSERT((lip->li_flags & XFS_LI_IN_AIL) != 0); | ||
748 | prev_lip = lip; | ||
749 | } | ||
750 | #endif /* XFS_TRANS_DEBUG */ | ||
751 | } | ||
752 | #endif /* DEBUG */ | ||
diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c index 90af025e6839..03b3b7f85a3b 100644 --- a/fs/xfs/xfs_trans_buf.c +++ b/fs/xfs/xfs_trans_buf.c | |||
@@ -305,7 +305,7 @@ xfs_trans_read_buf( | |||
305 | if (xfs_error_target == target) { | 305 | if (xfs_error_target == target) { |
306 | if (((xfs_req_num++) % xfs_error_mod) == 0) { | 306 | if (((xfs_req_num++) % xfs_error_mod) == 0) { |
307 | xfs_buf_relse(bp); | 307 | xfs_buf_relse(bp); |
308 | cmn_err(CE_DEBUG, "Returning error!\n"); | 308 | xfs_debug(mp, "Returning error!"); |
309 | return XFS_ERROR(EIO); | 309 | return XFS_ERROR(EIO); |
310 | } | 310 | } |
311 | } | 311 | } |
@@ -336,7 +336,7 @@ xfs_trans_read_buf( | |||
336 | ASSERT(!XFS_BUF_ISASYNC(bp)); | 336 | ASSERT(!XFS_BUF_ISASYNC(bp)); |
337 | XFS_BUF_READ(bp); | 337 | XFS_BUF_READ(bp); |
338 | xfsbdstrat(tp->t_mountp, bp); | 338 | xfsbdstrat(tp->t_mountp, bp); |
339 | error = xfs_iowait(bp); | 339 | error = xfs_buf_iowait(bp); |
340 | if (error) { | 340 | if (error) { |
341 | xfs_ioerror_alert("xfs_trans_read_buf", mp, | 341 | xfs_ioerror_alert("xfs_trans_read_buf", mp, |
342 | bp, blkno); | 342 | bp, blkno); |
@@ -383,7 +383,8 @@ xfs_trans_read_buf( | |||
383 | bp = xfs_buf_read(target, blkno, len, flags | XBF_DONT_BLOCK); | 383 | bp = xfs_buf_read(target, blkno, len, flags | XBF_DONT_BLOCK); |
384 | if (bp == NULL) { | 384 | if (bp == NULL) { |
385 | *bpp = NULL; | 385 | *bpp = NULL; |
386 | return 0; | 386 | return (flags & XBF_TRYLOCK) ? |
387 | 0 : XFS_ERROR(ENOMEM); | ||
387 | } | 388 | } |
388 | if (XFS_BUF_GETERROR(bp) != 0) { | 389 | if (XFS_BUF_GETERROR(bp) != 0) { |
389 | XFS_BUF_SUPER_STALE(bp); | 390 | XFS_BUF_SUPER_STALE(bp); |
@@ -403,7 +404,7 @@ xfs_trans_read_buf( | |||
403 | xfs_force_shutdown(tp->t_mountp, | 404 | xfs_force_shutdown(tp->t_mountp, |
404 | SHUTDOWN_META_IO_ERROR); | 405 | SHUTDOWN_META_IO_ERROR); |
405 | xfs_buf_relse(bp); | 406 | xfs_buf_relse(bp); |
406 | cmn_err(CE_DEBUG, "Returning trans error!\n"); | 407 | xfs_debug(mp, "Returning trans error!"); |
407 | return XFS_ERROR(EIO); | 408 | return XFS_ERROR(EIO); |
408 | } | 409 | } |
409 | } | 410 | } |
@@ -427,7 +428,7 @@ shutdown_abort: | |||
427 | */ | 428 | */ |
428 | #if defined(DEBUG) | 429 | #if defined(DEBUG) |
429 | if (XFS_BUF_ISSTALE(bp) && XFS_BUF_ISDELAYWRITE(bp)) | 430 | if (XFS_BUF_ISSTALE(bp) && XFS_BUF_ISDELAYWRITE(bp)) |
430 | cmn_err(CE_NOTE, "about to pop assert, bp == 0x%p", bp); | 431 | xfs_notice(mp, "about to pop assert, bp == 0x%p", bp); |
431 | #endif | 432 | #endif |
432 | ASSERT((XFS_BUF_BFLAGS(bp) & (XBF_STALE|XBF_DELWRI)) != | 433 | ASSERT((XFS_BUF_BFLAGS(bp) & (XBF_STALE|XBF_DELWRI)) != |
433 | (XBF_STALE|XBF_DELWRI)); | 434 | (XBF_STALE|XBF_DELWRI)); |
diff --git a/fs/xfs/xfs_trans_extfree.c b/fs/xfs/xfs_trans_extfree.c index f783d5e9fa70..f7590f5badea 100644 --- a/fs/xfs/xfs_trans_extfree.c +++ b/fs/xfs/xfs_trans_extfree.c | |||
@@ -69,12 +69,16 @@ xfs_trans_log_efi_extent(xfs_trans_t *tp, | |||
69 | tp->t_flags |= XFS_TRANS_DIRTY; | 69 | tp->t_flags |= XFS_TRANS_DIRTY; |
70 | efip->efi_item.li_desc->lid_flags |= XFS_LID_DIRTY; | 70 | efip->efi_item.li_desc->lid_flags |= XFS_LID_DIRTY; |
71 | 71 | ||
72 | next_extent = efip->efi_next_extent; | 72 | /* |
73 | * atomic_inc_return gives us the value after the increment; | ||
74 | * we want to use it as an array index so we need to subtract 1 from | ||
75 | * it. | ||
76 | */ | ||
77 | next_extent = atomic_inc_return(&efip->efi_next_extent) - 1; | ||
73 | ASSERT(next_extent < efip->efi_format.efi_nextents); | 78 | ASSERT(next_extent < efip->efi_format.efi_nextents); |
74 | extp = &(efip->efi_format.efi_extents[next_extent]); | 79 | extp = &(efip->efi_format.efi_extents[next_extent]); |
75 | extp->ext_start = start_block; | 80 | extp->ext_start = start_block; |
76 | extp->ext_len = ext_len; | 81 | extp->ext_len = ext_len; |
77 | efip->efi_next_extent++; | ||
78 | } | 82 | } |
79 | 83 | ||
80 | 84 | ||
diff --git a/fs/xfs/xfs_trans_inode.c b/fs/xfs/xfs_trans_inode.c index cdc53a1050c5..048b0c689d3e 100644 --- a/fs/xfs/xfs_trans_inode.c +++ b/fs/xfs/xfs_trans_inode.c | |||
@@ -44,28 +44,6 @@ xfs_trans_inode_broot_debug( | |||
44 | #endif | 44 | #endif |
45 | 45 | ||
46 | /* | 46 | /* |
47 | * Get an inode and join it to the transaction. | ||
48 | */ | ||
49 | int | ||
50 | xfs_trans_iget( | ||
51 | xfs_mount_t *mp, | ||
52 | xfs_trans_t *tp, | ||
53 | xfs_ino_t ino, | ||
54 | uint flags, | ||
55 | uint lock_flags, | ||
56 | xfs_inode_t **ipp) | ||
57 | { | ||
58 | int error; | ||
59 | |||
60 | error = xfs_iget(mp, tp, ino, flags, lock_flags, ipp); | ||
61 | if (!error && tp) { | ||
62 | xfs_trans_ijoin(tp, *ipp); | ||
63 | (*ipp)->i_itemp->ili_lock_flags = lock_flags; | ||
64 | } | ||
65 | return error; | ||
66 | } | ||
67 | |||
68 | /* | ||
69 | * Add a locked inode to the transaction. | 47 | * Add a locked inode to the transaction. |
70 | * | 48 | * |
71 | * The inode must be locked, and it cannot be associated with any transaction. | 49 | * The inode must be locked, and it cannot be associated with any transaction. |
@@ -103,7 +81,7 @@ xfs_trans_ijoin( | |||
103 | * | 81 | * |
104 | * | 82 | * |
105 | * Grabs a reference to the inode which will be dropped when the transaction | 83 | * Grabs a reference to the inode which will be dropped when the transaction |
106 | * is commited. The inode will also be unlocked at that point. The inode | 84 | * is committed. The inode will also be unlocked at that point. The inode |
107 | * must be locked, and it cannot be associated with any transaction. | 85 | * must be locked, and it cannot be associated with any transaction. |
108 | */ | 86 | */ |
109 | void | 87 | void |
@@ -118,6 +96,36 @@ xfs_trans_ijoin_ref( | |||
118 | } | 96 | } |
119 | 97 | ||
120 | /* | 98 | /* |
99 | * Transactional inode timestamp update. Requires the inode to be locked and | ||
100 | * joined to the transaction supplied. Relies on the transaction subsystem to | ||
101 | * track dirty state and update/writeback the inode accordingly. | ||
102 | */ | ||
103 | void | ||
104 | xfs_trans_ichgtime( | ||
105 | struct xfs_trans *tp, | ||
106 | struct xfs_inode *ip, | ||
107 | int flags) | ||
108 | { | ||
109 | struct inode *inode = VFS_I(ip); | ||
110 | timespec_t tv; | ||
111 | |||
112 | ASSERT(tp); | ||
113 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); | ||
114 | ASSERT(ip->i_transp == tp); | ||
115 | |||
116 | tv = current_fs_time(inode->i_sb); | ||
117 | |||
118 | if ((flags & XFS_ICHGTIME_MOD) && | ||
119 | !timespec_equal(&inode->i_mtime, &tv)) { | ||
120 | inode->i_mtime = tv; | ||
121 | } | ||
122 | if ((flags & XFS_ICHGTIME_CHG) && | ||
123 | !timespec_equal(&inode->i_ctime, &tv)) { | ||
124 | inode->i_ctime = tv; | ||
125 | } | ||
126 | } | ||
127 | |||
128 | /* | ||
121 | * This is called to mark the fields indicated in fieldmask as needing | 129 | * This is called to mark the fields indicated in fieldmask as needing |
122 | * to be logged when the transaction is committed. The inode must | 130 | * to be logged when the transaction is committed. The inode must |
123 | * already be associated with the given transaction. | 131 | * already be associated with the given transaction. |
diff --git a/fs/xfs/xfs_trans_priv.h b/fs/xfs/xfs_trans_priv.h index 62da86c90de5..6b164e9e9a1f 100644 --- a/fs/xfs/xfs_trans_priv.h +++ b/fs/xfs/xfs_trans_priv.h | |||
@@ -22,15 +22,17 @@ struct xfs_log_item; | |||
22 | struct xfs_log_item_desc; | 22 | struct xfs_log_item_desc; |
23 | struct xfs_mount; | 23 | struct xfs_mount; |
24 | struct xfs_trans; | 24 | struct xfs_trans; |
25 | struct xfs_ail; | ||
26 | struct xfs_log_vec; | ||
25 | 27 | ||
26 | void xfs_trans_add_item(struct xfs_trans *, struct xfs_log_item *); | 28 | void xfs_trans_add_item(struct xfs_trans *, struct xfs_log_item *); |
27 | void xfs_trans_del_item(struct xfs_log_item *); | 29 | void xfs_trans_del_item(struct xfs_log_item *); |
28 | void xfs_trans_free_items(struct xfs_trans *tp, xfs_lsn_t commit_lsn, | 30 | void xfs_trans_free_items(struct xfs_trans *tp, xfs_lsn_t commit_lsn, |
29 | int flags); | 31 | int flags); |
30 | void xfs_trans_item_committed(struct xfs_log_item *lip, | ||
31 | xfs_lsn_t commit_lsn, int aborted); | ||
32 | void xfs_trans_unreserve_and_mod_sb(struct xfs_trans *tp); | 32 | void xfs_trans_unreserve_and_mod_sb(struct xfs_trans *tp); |
33 | 33 | ||
34 | void xfs_trans_committed_bulk(struct xfs_ail *ailp, struct xfs_log_vec *lv, | ||
35 | xfs_lsn_t commit_lsn, int aborted); | ||
34 | /* | 36 | /* |
35 | * AIL traversal cursor. | 37 | * AIL traversal cursor. |
36 | * | 38 | * |
@@ -63,28 +65,52 @@ struct xfs_ail_cursor { | |||
63 | struct xfs_ail { | 65 | struct xfs_ail { |
64 | struct xfs_mount *xa_mount; | 66 | struct xfs_mount *xa_mount; |
65 | struct list_head xa_ail; | 67 | struct list_head xa_ail; |
66 | uint xa_gen; | ||
67 | struct task_struct *xa_task; | ||
68 | xfs_lsn_t xa_target; | 68 | xfs_lsn_t xa_target; |
69 | struct xfs_ail_cursor xa_cursors; | 69 | struct xfs_ail_cursor xa_cursors; |
70 | spinlock_t xa_lock; | 70 | spinlock_t xa_lock; |
71 | struct delayed_work xa_work; | ||
72 | xfs_lsn_t xa_last_pushed_lsn; | ||
73 | unsigned long xa_flags; | ||
71 | }; | 74 | }; |
72 | 75 | ||
76 | #define XFS_AIL_PUSHING_BIT 0 | ||
77 | |||
73 | /* | 78 | /* |
74 | * From xfs_trans_ail.c | 79 | * From xfs_trans_ail.c |
75 | */ | 80 | */ |
76 | void xfs_trans_ail_update(struct xfs_ail *ailp, | 81 | |
77 | struct xfs_log_item *lip, xfs_lsn_t lsn) | 82 | extern struct workqueue_struct *xfs_ail_wq; /* AIL workqueue */ |
78 | __releases(ailp->xa_lock); | 83 | |
79 | void xfs_trans_ail_delete(struct xfs_ail *ailp, | 84 | void xfs_trans_ail_update_bulk(struct xfs_ail *ailp, |
80 | struct xfs_log_item *lip) | 85 | struct xfs_log_item **log_items, int nr_items, |
81 | __releases(ailp->xa_lock); | 86 | xfs_lsn_t lsn) __releases(ailp->xa_lock); |
82 | void xfs_trans_ail_push(struct xfs_ail *, xfs_lsn_t); | 87 | static inline void |
88 | xfs_trans_ail_update( | ||
89 | struct xfs_ail *ailp, | ||
90 | struct xfs_log_item *lip, | ||
91 | xfs_lsn_t lsn) __releases(ailp->xa_lock) | ||
92 | { | ||
93 | xfs_trans_ail_update_bulk(ailp, &lip, 1, lsn); | ||
94 | } | ||
95 | |||
96 | void xfs_trans_ail_delete_bulk(struct xfs_ail *ailp, | ||
97 | struct xfs_log_item **log_items, int nr_items) | ||
98 | __releases(ailp->xa_lock); | ||
99 | static inline void | ||
100 | xfs_trans_ail_delete( | ||
101 | struct xfs_ail *ailp, | ||
102 | xfs_log_item_t *lip) __releases(ailp->xa_lock) | ||
103 | { | ||
104 | xfs_trans_ail_delete_bulk(ailp, &lip, 1); | ||
105 | } | ||
106 | |||
107 | void xfs_ail_push(struct xfs_ail *, xfs_lsn_t); | ||
108 | void xfs_ail_push_all(struct xfs_ail *); | ||
109 | xfs_lsn_t xfs_ail_min_lsn(struct xfs_ail *ailp); | ||
110 | |||
83 | void xfs_trans_unlocked_item(struct xfs_ail *, | 111 | void xfs_trans_unlocked_item(struct xfs_ail *, |
84 | xfs_log_item_t *); | 112 | xfs_log_item_t *); |
85 | 113 | ||
86 | xfs_lsn_t xfs_trans_ail_tail(struct xfs_ail *ailp); | ||
87 | |||
88 | struct xfs_log_item *xfs_trans_ail_cursor_first(struct xfs_ail *ailp, | 114 | struct xfs_log_item *xfs_trans_ail_cursor_first(struct xfs_ail *ailp, |
89 | struct xfs_ail_cursor *cur, | 115 | struct xfs_ail_cursor *cur, |
90 | xfs_lsn_t lsn); | 116 | xfs_lsn_t lsn); |
@@ -93,11 +119,6 @@ struct xfs_log_item *xfs_trans_ail_cursor_next(struct xfs_ail *ailp, | |||
93 | void xfs_trans_ail_cursor_done(struct xfs_ail *ailp, | 119 | void xfs_trans_ail_cursor_done(struct xfs_ail *ailp, |
94 | struct xfs_ail_cursor *cur); | 120 | struct xfs_ail_cursor *cur); |
95 | 121 | ||
96 | long xfsaild_push(struct xfs_ail *, xfs_lsn_t *); | ||
97 | void xfsaild_wakeup(struct xfs_ail *, xfs_lsn_t); | ||
98 | int xfsaild_start(struct xfs_ail *); | ||
99 | void xfsaild_stop(struct xfs_ail *); | ||
100 | |||
101 | #if BITS_PER_LONG != 64 | 122 | #if BITS_PER_LONG != 64 |
102 | static inline void | 123 | static inline void |
103 | xfs_trans_ail_copy_lsn( | 124 | xfs_trans_ail_copy_lsn( |
diff --git a/fs/xfs/xfs_types.h b/fs/xfs/xfs_types.h index 320775295e32..65584b55607d 100644 --- a/fs/xfs/xfs_types.h +++ b/fs/xfs/xfs_types.h | |||
@@ -73,10 +73,6 @@ typedef __int32_t xfs_tid_t; /* transaction identifier */ | |||
73 | typedef __uint32_t xfs_dablk_t; /* dir/attr block number (in file) */ | 73 | typedef __uint32_t xfs_dablk_t; /* dir/attr block number (in file) */ |
74 | typedef __uint32_t xfs_dahash_t; /* dir/attr hash value */ | 74 | typedef __uint32_t xfs_dahash_t; /* dir/attr hash value */ |
75 | 75 | ||
76 | typedef __uint16_t xfs_prid_t; /* prid_t truncated to 16bits in XFS */ | ||
77 | |||
78 | typedef __uint32_t xlog_tid_t; /* transaction ID type */ | ||
79 | |||
80 | /* | 76 | /* |
81 | * These types are 64 bits on disk but are either 32 or 64 bits in memory. | 77 | * These types are 64 bits on disk but are either 32 or 64 bits in memory. |
82 | * Disk based types: | 78 | * Disk based types: |
diff --git a/fs/xfs/xfs_utils.c b/fs/xfs/xfs_utils.c index b7d5769d2df0..8b32d1a4c5a1 100644 --- a/fs/xfs/xfs_utils.c +++ b/fs/xfs/xfs_utils.c | |||
@@ -56,7 +56,6 @@ xfs_dir_ialloc( | |||
56 | mode_t mode, | 56 | mode_t mode, |
57 | xfs_nlink_t nlink, | 57 | xfs_nlink_t nlink, |
58 | xfs_dev_t rdev, | 58 | xfs_dev_t rdev, |
59 | cred_t *credp, | ||
60 | prid_t prid, /* project id */ | 59 | prid_t prid, /* project id */ |
61 | int okalloc, /* ok to allocate new space */ | 60 | int okalloc, /* ok to allocate new space */ |
62 | xfs_inode_t **ipp, /* pointer to inode; it will be | 61 | xfs_inode_t **ipp, /* pointer to inode; it will be |
@@ -93,7 +92,7 @@ xfs_dir_ialloc( | |||
93 | * transaction commit so that no other process can steal | 92 | * transaction commit so that no other process can steal |
94 | * the inode(s) that we've just allocated. | 93 | * the inode(s) that we've just allocated. |
95 | */ | 94 | */ |
96 | code = xfs_ialloc(tp, dp, mode, nlink, rdev, credp, prid, okalloc, | 95 | code = xfs_ialloc(tp, dp, mode, nlink, rdev, prid, okalloc, |
97 | &ialloc_context, &call_again, &ip); | 96 | &ialloc_context, &call_again, &ip); |
98 | 97 | ||
99 | /* | 98 | /* |
@@ -197,7 +196,7 @@ xfs_dir_ialloc( | |||
197 | * other allocations in this allocation group, | 196 | * other allocations in this allocation group, |
198 | * this call should always succeed. | 197 | * this call should always succeed. |
199 | */ | 198 | */ |
200 | code = xfs_ialloc(tp, dp, mode, nlink, rdev, credp, prid, | 199 | code = xfs_ialloc(tp, dp, mode, nlink, rdev, prid, |
201 | okalloc, &ialloc_context, &call_again, &ip); | 200 | okalloc, &ialloc_context, &call_again, &ip); |
202 | 201 | ||
203 | /* | 202 | /* |
@@ -235,7 +234,7 @@ xfs_droplink( | |||
235 | { | 234 | { |
236 | int error; | 235 | int error; |
237 | 236 | ||
238 | xfs_ichgtime(ip, XFS_ICHGTIME_CHG); | 237 | xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG); |
239 | 238 | ||
240 | ASSERT (ip->i_d.di_nlink > 0); | 239 | ASSERT (ip->i_d.di_nlink > 0); |
241 | ip->i_d.di_nlink--; | 240 | ip->i_d.di_nlink--; |
@@ -299,7 +298,7 @@ xfs_bumplink( | |||
299 | { | 298 | { |
300 | if (ip->i_d.di_nlink >= XFS_MAXLINK) | 299 | if (ip->i_d.di_nlink >= XFS_MAXLINK) |
301 | return XFS_ERROR(EMLINK); | 300 | return XFS_ERROR(EMLINK); |
302 | xfs_ichgtime(ip, XFS_ICHGTIME_CHG); | 301 | xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG); |
303 | 302 | ||
304 | ASSERT(ip->i_d.di_nlink > 0); | 303 | ASSERT(ip->i_d.di_nlink > 0); |
305 | ip->i_d.di_nlink++; | 304 | ip->i_d.di_nlink++; |
diff --git a/fs/xfs/xfs_utils.h b/fs/xfs/xfs_utils.h index f55b9678264f..456fca314933 100644 --- a/fs/xfs/xfs_utils.h +++ b/fs/xfs/xfs_utils.h | |||
@@ -19,8 +19,7 @@ | |||
19 | #define __XFS_UTILS_H__ | 19 | #define __XFS_UTILS_H__ |
20 | 20 | ||
21 | extern int xfs_dir_ialloc(xfs_trans_t **, xfs_inode_t *, mode_t, xfs_nlink_t, | 21 | extern int xfs_dir_ialloc(xfs_trans_t **, xfs_inode_t *, mode_t, xfs_nlink_t, |
22 | xfs_dev_t, cred_t *, prid_t, int, | 22 | xfs_dev_t, prid_t, int, xfs_inode_t **, int *); |
23 | xfs_inode_t **, int *); | ||
24 | extern int xfs_droplink(xfs_trans_t *, xfs_inode_t *); | 23 | extern int xfs_droplink(xfs_trans_t *, xfs_inode_t *); |
25 | extern int xfs_bumplink(xfs_trans_t *, xfs_inode_t *); | 24 | extern int xfs_bumplink(xfs_trans_t *, xfs_inode_t *); |
26 | extern void xfs_bump_ino_vers2(xfs_trans_t *, xfs_inode_t *); | 25 | extern void xfs_bump_ino_vers2(xfs_trans_t *, xfs_inode_t *); |
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index 4c7c7bfb2b2f..619720705bc6 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c | |||
@@ -114,7 +114,7 @@ xfs_setattr( | |||
114 | */ | 114 | */ |
115 | ASSERT(udqp == NULL); | 115 | ASSERT(udqp == NULL); |
116 | ASSERT(gdqp == NULL); | 116 | ASSERT(gdqp == NULL); |
117 | code = xfs_qm_vop_dqalloc(ip, uid, gid, ip->i_d.di_projid, | 117 | code = xfs_qm_vop_dqalloc(ip, uid, gid, xfs_get_projid(ip), |
118 | qflags, &udqp, &gdqp); | 118 | qflags, &udqp, &gdqp); |
119 | if (code) | 119 | if (code) |
120 | return code; | 120 | return code; |
@@ -184,8 +184,11 @@ xfs_setattr( | |||
184 | ip->i_size == 0 && ip->i_d.di_nextents == 0) { | 184 | ip->i_size == 0 && ip->i_d.di_nextents == 0) { |
185 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | 185 | xfs_iunlock(ip, XFS_ILOCK_EXCL); |
186 | lock_flags &= ~XFS_ILOCK_EXCL; | 186 | lock_flags &= ~XFS_ILOCK_EXCL; |
187 | if (mask & ATTR_CTIME) | 187 | if (mask & ATTR_CTIME) { |
188 | xfs_ichgtime(ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); | 188 | inode->i_mtime = inode->i_ctime = |
189 | current_fs_time(inode->i_sb); | ||
190 | xfs_mark_inode_dirty_sync(ip); | ||
191 | } | ||
189 | code = 0; | 192 | code = 0; |
190 | goto error_return; | 193 | goto error_return; |
191 | } | 194 | } |
@@ -950,40 +953,62 @@ xfs_release( | |||
950 | * If we previously truncated this file and removed old data | 953 | * If we previously truncated this file and removed old data |
951 | * in the process, we want to initiate "early" writeout on | 954 | * in the process, we want to initiate "early" writeout on |
952 | * the last close. This is an attempt to combat the notorious | 955 | * the last close. This is an attempt to combat the notorious |
953 | * NULL files problem which is particularly noticable from a | 956 | * NULL files problem which is particularly noticeable from a |
954 | * truncate down, buffered (re-)write (delalloc), followed by | 957 | * truncate down, buffered (re-)write (delalloc), followed by |
955 | * a crash. What we are effectively doing here is | 958 | * a crash. What we are effectively doing here is |
956 | * significantly reducing the time window where we'd otherwise | 959 | * significantly reducing the time window where we'd otherwise |
957 | * be exposed to that problem. | 960 | * be exposed to that problem. |
958 | */ | 961 | */ |
959 | truncated = xfs_iflags_test_and_clear(ip, XFS_ITRUNCATED); | 962 | truncated = xfs_iflags_test_and_clear(ip, XFS_ITRUNCATED); |
960 | if (truncated && VN_DIRTY(VFS_I(ip)) && ip->i_delayed_blks > 0) | 963 | if (truncated) { |
961 | xfs_flush_pages(ip, 0, -1, XBF_ASYNC, FI_NONE); | 964 | xfs_iflags_clear(ip, XFS_IDIRTY_RELEASE); |
965 | if (VN_DIRTY(VFS_I(ip)) && ip->i_delayed_blks > 0) | ||
966 | xfs_flush_pages(ip, 0, -1, XBF_ASYNC, FI_NONE); | ||
967 | } | ||
962 | } | 968 | } |
963 | 969 | ||
964 | if (ip->i_d.di_nlink != 0) { | 970 | if (ip->i_d.di_nlink == 0) |
965 | if ((((ip->i_d.di_mode & S_IFMT) == S_IFREG) && | 971 | return 0; |
966 | ((ip->i_size > 0) || (VN_CACHED(VFS_I(ip)) > 0 || | ||
967 | ip->i_delayed_blks > 0)) && | ||
968 | (ip->i_df.if_flags & XFS_IFEXTENTS)) && | ||
969 | (!(ip->i_d.di_flags & | ||
970 | (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)))) { | ||
971 | 972 | ||
972 | /* | 973 | if ((((ip->i_d.di_mode & S_IFMT) == S_IFREG) && |
973 | * If we can't get the iolock just skip truncating | 974 | ((ip->i_size > 0) || (VN_CACHED(VFS_I(ip)) > 0 || |
974 | * the blocks past EOF because we could deadlock | 975 | ip->i_delayed_blks > 0)) && |
975 | * with the mmap_sem otherwise. We'll get another | 976 | (ip->i_df.if_flags & XFS_IFEXTENTS)) && |
976 | * chance to drop them once the last reference to | 977 | (!(ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)))) { |
977 | * the inode is dropped, so we'll never leak blocks | 978 | |
978 | * permanently. | 979 | /* |
979 | */ | 980 | * If we can't get the iolock just skip truncating the blocks |
980 | error = xfs_free_eofblocks(mp, ip, | 981 | * past EOF because we could deadlock with the mmap_sem |
981 | XFS_FREE_EOF_TRYLOCK); | 982 | * otherwise. We'll get another chance to drop them once the |
982 | if (error) | 983 | * last reference to the inode is dropped, so we'll never leak |
983 | return error; | 984 | * blocks permanently. |
984 | } | 985 | * |
985 | } | 986 | * Further, check if the inode is being opened, written and |
987 | * closed frequently and we have delayed allocation blocks | ||
988 | * outstanding (e.g. streaming writes from the NFS server), | ||
989 | * truncating the blocks past EOF will cause fragmentation to | ||
990 | * occur. | ||
991 | * | ||
992 | * In this case don't do the truncation, either, but we have to | ||
993 | * be careful how we detect this case. Blocks beyond EOF show | ||
994 | * up as i_delayed_blks even when the inode is clean, so we | ||
995 | * need to truncate them away first before checking for a dirty | ||
996 | * release. Hence on the first dirty close we will still remove | ||
997 | * the speculative allocation, but after that we will leave it | ||
998 | * in place. | ||
999 | */ | ||
1000 | if (xfs_iflags_test(ip, XFS_IDIRTY_RELEASE)) | ||
1001 | return 0; | ||
1002 | |||
1003 | error = xfs_free_eofblocks(mp, ip, | ||
1004 | XFS_FREE_EOF_TRYLOCK); | ||
1005 | if (error) | ||
1006 | return error; | ||
986 | 1007 | ||
1008 | /* delalloc blocks after truncation means it really is dirty */ | ||
1009 | if (ip->i_delayed_blks) | ||
1010 | xfs_iflags_set(ip, XFS_IDIRTY_RELEASE); | ||
1011 | } | ||
987 | return 0; | 1012 | return 0; |
988 | } | 1013 | } |
989 | 1014 | ||
@@ -1167,9 +1192,8 @@ xfs_inactive( | |||
1167 | * inode might be lost for a long time or forever. | 1192 | * inode might be lost for a long time or forever. |
1168 | */ | 1193 | */ |
1169 | if (!XFS_FORCED_SHUTDOWN(mp)) { | 1194 | if (!XFS_FORCED_SHUTDOWN(mp)) { |
1170 | cmn_err(CE_NOTE, | 1195 | xfs_notice(mp, "%s: xfs_ifree returned error %d", |
1171 | "xfs_inactive: xfs_ifree() returned an error = %d on %s", | 1196 | __func__, error); |
1172 | error, mp->m_fsname); | ||
1173 | xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR); | 1197 | xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR); |
1174 | } | 1198 | } |
1175 | xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT); | 1199 | xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT); |
@@ -1186,12 +1210,12 @@ xfs_inactive( | |||
1186 | */ | 1210 | */ |
1187 | error = xfs_bmap_finish(&tp, &free_list, &committed); | 1211 | error = xfs_bmap_finish(&tp, &free_list, &committed); |
1188 | if (error) | 1212 | if (error) |
1189 | xfs_fs_cmn_err(CE_NOTE, mp, "xfs_inactive: " | 1213 | xfs_notice(mp, "%s: xfs_bmap_finish returned error %d", |
1190 | "xfs_bmap_finish() returned error %d", error); | 1214 | __func__, error); |
1191 | error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); | 1215 | error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); |
1192 | if (error) | 1216 | if (error) |
1193 | xfs_fs_cmn_err(CE_NOTE, mp, "xfs_inactive: " | 1217 | xfs_notice(mp, "%s: xfs_trans_commit returned error %d", |
1194 | "xfs_trans_commit() returned error %d", error); | 1218 | __func__, error); |
1195 | } | 1219 | } |
1196 | 1220 | ||
1197 | /* | 1221 | /* |
@@ -1253,8 +1277,7 @@ xfs_create( | |||
1253 | struct xfs_name *name, | 1277 | struct xfs_name *name, |
1254 | mode_t mode, | 1278 | mode_t mode, |
1255 | xfs_dev_t rdev, | 1279 | xfs_dev_t rdev, |
1256 | xfs_inode_t **ipp, | 1280 | xfs_inode_t **ipp) |
1257 | cred_t *credp) | ||
1258 | { | 1281 | { |
1259 | int is_dir = S_ISDIR(mode); | 1282 | int is_dir = S_ISDIR(mode); |
1260 | struct xfs_mount *mp = dp->i_mount; | 1283 | struct xfs_mount *mp = dp->i_mount; |
@@ -1266,7 +1289,7 @@ xfs_create( | |||
1266 | boolean_t unlock_dp_on_error = B_FALSE; | 1289 | boolean_t unlock_dp_on_error = B_FALSE; |
1267 | uint cancel_flags; | 1290 | uint cancel_flags; |
1268 | int committed; | 1291 | int committed; |
1269 | xfs_prid_t prid; | 1292 | prid_t prid; |
1270 | struct xfs_dquot *udqp = NULL; | 1293 | struct xfs_dquot *udqp = NULL; |
1271 | struct xfs_dquot *gdqp = NULL; | 1294 | struct xfs_dquot *gdqp = NULL; |
1272 | uint resblks; | 1295 | uint resblks; |
@@ -1279,9 +1302,9 @@ xfs_create( | |||
1279 | return XFS_ERROR(EIO); | 1302 | return XFS_ERROR(EIO); |
1280 | 1303 | ||
1281 | if (dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) | 1304 | if (dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) |
1282 | prid = dp->i_d.di_projid; | 1305 | prid = xfs_get_projid(dp); |
1283 | else | 1306 | else |
1284 | prid = dfltprid; | 1307 | prid = XFS_PROJID_DEFAULT; |
1285 | 1308 | ||
1286 | /* | 1309 | /* |
1287 | * Make sure that we have allocated dquot(s) on disk. | 1310 | * Make sure that we have allocated dquot(s) on disk. |
@@ -1289,7 +1312,7 @@ xfs_create( | |||
1289 | error = xfs_qm_vop_dqalloc(dp, current_fsuid(), current_fsgid(), prid, | 1312 | error = xfs_qm_vop_dqalloc(dp, current_fsuid(), current_fsgid(), prid, |
1290 | XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, &udqp, &gdqp); | 1313 | XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, &udqp, &gdqp); |
1291 | if (error) | 1314 | if (error) |
1292 | goto std_return; | 1315 | return error; |
1293 | 1316 | ||
1294 | if (is_dir) { | 1317 | if (is_dir) { |
1295 | rdev = 0; | 1318 | rdev = 0; |
@@ -1360,7 +1383,7 @@ xfs_create( | |||
1360 | * entry pointing to them, but a directory also the "." entry | 1383 | * entry pointing to them, but a directory also the "." entry |
1361 | * pointing to itself. | 1384 | * pointing to itself. |
1362 | */ | 1385 | */ |
1363 | error = xfs_dir_ialloc(&tp, dp, mode, is_dir ? 2 : 1, rdev, credp, | 1386 | error = xfs_dir_ialloc(&tp, dp, mode, is_dir ? 2 : 1, rdev, |
1364 | prid, resblks > 0, &ip, &committed); | 1387 | prid, resblks > 0, &ip, &committed); |
1365 | if (error) { | 1388 | if (error) { |
1366 | if (error == ENOSPC) | 1389 | if (error == ENOSPC) |
@@ -1369,12 +1392,6 @@ xfs_create( | |||
1369 | } | 1392 | } |
1370 | 1393 | ||
1371 | /* | 1394 | /* |
1372 | * At this point, we've gotten a newly allocated inode. | ||
1373 | * It is locked (and joined to the transaction). | ||
1374 | */ | ||
1375 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); | ||
1376 | |||
1377 | /* | ||
1378 | * Now we join the directory inode to the transaction. We do not do it | 1395 | * Now we join the directory inode to the transaction. We do not do it |
1379 | * earlier because xfs_dir_ialloc might commit the previous transaction | 1396 | * earlier because xfs_dir_ialloc might commit the previous transaction |
1380 | * (and release all the locks). An error from here on will result in | 1397 | * (and release all the locks). An error from here on will result in |
@@ -1391,7 +1408,7 @@ xfs_create( | |||
1391 | ASSERT(error != ENOSPC); | 1408 | ASSERT(error != ENOSPC); |
1392 | goto out_trans_abort; | 1409 | goto out_trans_abort; |
1393 | } | 1410 | } |
1394 | xfs_ichgtime(dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); | 1411 | xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); |
1395 | xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE); | 1412 | xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE); |
1396 | 1413 | ||
1397 | if (is_dir) { | 1414 | if (is_dir) { |
@@ -1419,22 +1436,13 @@ xfs_create( | |||
1419 | */ | 1436 | */ |
1420 | xfs_qm_vop_create_dqattach(tp, ip, udqp, gdqp); | 1437 | xfs_qm_vop_create_dqattach(tp, ip, udqp, gdqp); |
1421 | 1438 | ||
1422 | /* | ||
1423 | * xfs_trans_commit normally decrements the vnode ref count | ||
1424 | * when it unlocks the inode. Since we want to return the | ||
1425 | * vnode to the caller, we bump the vnode ref count now. | ||
1426 | */ | ||
1427 | IHOLD(ip); | ||
1428 | |||
1429 | error = xfs_bmap_finish(&tp, &free_list, &committed); | 1439 | error = xfs_bmap_finish(&tp, &free_list, &committed); |
1430 | if (error) | 1440 | if (error) |
1431 | goto out_abort_rele; | 1441 | goto out_bmap_cancel; |
1432 | 1442 | ||
1433 | error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); | 1443 | error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); |
1434 | if (error) { | 1444 | if (error) |
1435 | IRELE(ip); | 1445 | goto out_release_inode; |
1436 | goto out_dqrele; | ||
1437 | } | ||
1438 | 1446 | ||
1439 | xfs_qm_dqrele(udqp); | 1447 | xfs_qm_dqrele(udqp); |
1440 | xfs_qm_dqrele(gdqp); | 1448 | xfs_qm_dqrele(gdqp); |
@@ -1448,27 +1456,21 @@ xfs_create( | |||
1448 | cancel_flags |= XFS_TRANS_ABORT; | 1456 | cancel_flags |= XFS_TRANS_ABORT; |
1449 | out_trans_cancel: | 1457 | out_trans_cancel: |
1450 | xfs_trans_cancel(tp, cancel_flags); | 1458 | xfs_trans_cancel(tp, cancel_flags); |
1451 | out_dqrele: | 1459 | out_release_inode: |
1460 | /* | ||
1461 | * Wait until after the current transaction is aborted to | ||
1462 | * release the inode. This prevents recursive transactions | ||
1463 | * and deadlocks from xfs_inactive. | ||
1464 | */ | ||
1465 | if (ip) | ||
1466 | IRELE(ip); | ||
1467 | |||
1452 | xfs_qm_dqrele(udqp); | 1468 | xfs_qm_dqrele(udqp); |
1453 | xfs_qm_dqrele(gdqp); | 1469 | xfs_qm_dqrele(gdqp); |
1454 | 1470 | ||
1455 | if (unlock_dp_on_error) | 1471 | if (unlock_dp_on_error) |
1456 | xfs_iunlock(dp, XFS_ILOCK_EXCL); | 1472 | xfs_iunlock(dp, XFS_ILOCK_EXCL); |
1457 | std_return: | ||
1458 | return error; | 1473 | return error; |
1459 | |||
1460 | out_abort_rele: | ||
1461 | /* | ||
1462 | * Wait until after the current transaction is aborted to | ||
1463 | * release the inode. This prevents recursive transactions | ||
1464 | * and deadlocks from xfs_inactive. | ||
1465 | */ | ||
1466 | xfs_bmap_cancel(&free_list); | ||
1467 | cancel_flags |= XFS_TRANS_ABORT; | ||
1468 | xfs_trans_cancel(tp, cancel_flags); | ||
1469 | IRELE(ip); | ||
1470 | unlock_dp_on_error = B_FALSE; | ||
1471 | goto out_dqrele; | ||
1472 | } | 1474 | } |
1473 | 1475 | ||
1474 | #ifdef DEBUG | 1476 | #ifdef DEBUG |
@@ -1742,7 +1744,7 @@ xfs_remove( | |||
1742 | ASSERT(error != ENOENT); | 1744 | ASSERT(error != ENOENT); |
1743 | goto out_bmap_cancel; | 1745 | goto out_bmap_cancel; |
1744 | } | 1746 | } |
1745 | xfs_ichgtime(dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); | 1747 | xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); |
1746 | 1748 | ||
1747 | if (is_dir) { | 1749 | if (is_dir) { |
1748 | /* | 1750 | /* |
@@ -1880,7 +1882,7 @@ xfs_link( | |||
1880 | * the tree quota mechanism could be circumvented. | 1882 | * the tree quota mechanism could be circumvented. |
1881 | */ | 1883 | */ |
1882 | if (unlikely((tdp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) && | 1884 | if (unlikely((tdp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) && |
1883 | (tdp->i_d.di_projid != sip->i_d.di_projid))) { | 1885 | (xfs_get_projid(tdp) != xfs_get_projid(sip)))) { |
1884 | error = XFS_ERROR(EXDEV); | 1886 | error = XFS_ERROR(EXDEV); |
1885 | goto error_return; | 1887 | goto error_return; |
1886 | } | 1888 | } |
@@ -1895,7 +1897,7 @@ xfs_link( | |||
1895 | &first_block, &free_list, resblks); | 1897 | &first_block, &free_list, resblks); |
1896 | if (error) | 1898 | if (error) |
1897 | goto abort_return; | 1899 | goto abort_return; |
1898 | xfs_ichgtime(tdp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); | 1900 | xfs_trans_ichgtime(tp, tdp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); |
1899 | xfs_trans_log_inode(tp, tdp, XFS_ILOG_CORE); | 1901 | xfs_trans_log_inode(tp, tdp, XFS_ILOG_CORE); |
1900 | 1902 | ||
1901 | error = xfs_bumplink(tp, sip); | 1903 | error = xfs_bumplink(tp, sip); |
@@ -1933,8 +1935,7 @@ xfs_symlink( | |||
1933 | struct xfs_name *link_name, | 1935 | struct xfs_name *link_name, |
1934 | const char *target_path, | 1936 | const char *target_path, |
1935 | mode_t mode, | 1937 | mode_t mode, |
1936 | xfs_inode_t **ipp, | 1938 | xfs_inode_t **ipp) |
1937 | cred_t *credp) | ||
1938 | { | 1939 | { |
1939 | xfs_mount_t *mp = dp->i_mount; | 1940 | xfs_mount_t *mp = dp->i_mount; |
1940 | xfs_trans_t *tp; | 1941 | xfs_trans_t *tp; |
@@ -1955,7 +1956,7 @@ xfs_symlink( | |||
1955 | int byte_cnt; | 1956 | int byte_cnt; |
1956 | int n; | 1957 | int n; |
1957 | xfs_buf_t *bp; | 1958 | xfs_buf_t *bp; |
1958 | xfs_prid_t prid; | 1959 | prid_t prid; |
1959 | struct xfs_dquot *udqp, *gdqp; | 1960 | struct xfs_dquot *udqp, *gdqp; |
1960 | uint resblks; | 1961 | uint resblks; |
1961 | 1962 | ||
@@ -1978,9 +1979,9 @@ xfs_symlink( | |||
1978 | 1979 | ||
1979 | udqp = gdqp = NULL; | 1980 | udqp = gdqp = NULL; |
1980 | if (dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) | 1981 | if (dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) |
1981 | prid = dp->i_d.di_projid; | 1982 | prid = xfs_get_projid(dp); |
1982 | else | 1983 | else |
1983 | prid = (xfs_prid_t)dfltprid; | 1984 | prid = XFS_PROJID_DEFAULT; |
1984 | 1985 | ||
1985 | /* | 1986 | /* |
1986 | * Make sure that we have allocated dquot(s) on disk. | 1987 | * Make sure that we have allocated dquot(s) on disk. |
@@ -2046,8 +2047,8 @@ xfs_symlink( | |||
2046 | /* | 2047 | /* |
2047 | * Allocate an inode for the symlink. | 2048 | * Allocate an inode for the symlink. |
2048 | */ | 2049 | */ |
2049 | error = xfs_dir_ialloc(&tp, dp, S_IFLNK | (mode & ~S_IFMT), | 2050 | error = xfs_dir_ialloc(&tp, dp, S_IFLNK | (mode & ~S_IFMT), 1, 0, |
2050 | 1, 0, credp, prid, resblks > 0, &ip, NULL); | 2051 | prid, resblks > 0, &ip, NULL); |
2051 | if (error) { | 2052 | if (error) { |
2052 | if (error == ENOSPC) | 2053 | if (error == ENOSPC) |
2053 | goto error_return; | 2054 | goto error_return; |
@@ -2094,9 +2095,8 @@ xfs_symlink( | |||
2094 | XFS_BMAPI_WRITE | XFS_BMAPI_METADATA, | 2095 | XFS_BMAPI_WRITE | XFS_BMAPI_METADATA, |
2095 | &first_block, resblks, mval, &nmaps, | 2096 | &first_block, resblks, mval, &nmaps, |
2096 | &free_list); | 2097 | &free_list); |
2097 | if (error) { | 2098 | if (error) |
2098 | goto error1; | 2099 | goto error2; |
2099 | } | ||
2100 | 2100 | ||
2101 | if (resblks) | 2101 | if (resblks) |
2102 | resblks -= fs_blocks; | 2102 | resblks -= fs_blocks; |
@@ -2128,8 +2128,8 @@ xfs_symlink( | |||
2128 | error = xfs_dir_createname(tp, dp, link_name, ip->i_ino, | 2128 | error = xfs_dir_createname(tp, dp, link_name, ip->i_ino, |
2129 | &first_block, &free_list, resblks); | 2129 | &first_block, &free_list, resblks); |
2130 | if (error) | 2130 | if (error) |
2131 | goto error1; | 2131 | goto error2; |
2132 | xfs_ichgtime(dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); | 2132 | xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); |
2133 | xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE); | 2133 | xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE); |
2134 | 2134 | ||
2135 | /* | 2135 | /* |
@@ -2141,13 +2141,6 @@ xfs_symlink( | |||
2141 | xfs_trans_set_sync(tp); | 2141 | xfs_trans_set_sync(tp); |
2142 | } | 2142 | } |
2143 | 2143 | ||
2144 | /* | ||
2145 | * xfs_trans_commit normally decrements the vnode ref count | ||
2146 | * when it unlocks the inode. Since we want to return the | ||
2147 | * vnode to the caller, we bump the vnode ref count now. | ||
2148 | */ | ||
2149 | IHOLD(ip); | ||
2150 | |||
2151 | error = xfs_bmap_finish(&tp, &free_list, &committed); | 2144 | error = xfs_bmap_finish(&tp, &free_list, &committed); |
2152 | if (error) { | 2145 | if (error) { |
2153 | goto error2; | 2146 | goto error2; |
@@ -2272,7 +2265,7 @@ xfs_alloc_file_space( | |||
2272 | count = len; | 2265 | count = len; |
2273 | imapp = &imaps[0]; | 2266 | imapp = &imaps[0]; |
2274 | nimaps = 1; | 2267 | nimaps = 1; |
2275 | bmapi_flag = XFS_BMAPI_WRITE | (alloc_type ? XFS_BMAPI_PREALLOC : 0); | 2268 | bmapi_flag = XFS_BMAPI_WRITE | alloc_type; |
2276 | startoffset_fsb = XFS_B_TO_FSBT(mp, offset); | 2269 | startoffset_fsb = XFS_B_TO_FSBT(mp, offset); |
2277 | allocatesize_fsb = XFS_B_TO_FSB(mp, count); | 2270 | allocatesize_fsb = XFS_B_TO_FSB(mp, count); |
2278 | 2271 | ||
@@ -2431,9 +2424,9 @@ xfs_zero_remaining_bytes( | |||
2431 | if (endoff > ip->i_size) | 2424 | if (endoff > ip->i_size) |
2432 | endoff = ip->i_size; | 2425 | endoff = ip->i_size; |
2433 | 2426 | ||
2434 | bp = xfs_buf_get_noaddr(mp->m_sb.sb_blocksize, | 2427 | bp = xfs_buf_get_uncached(XFS_IS_REALTIME_INODE(ip) ? |
2435 | XFS_IS_REALTIME_INODE(ip) ? | 2428 | mp->m_rtdev_targp : mp->m_ddev_targp, |
2436 | mp->m_rtdev_targp : mp->m_ddev_targp); | 2429 | mp->m_sb.sb_blocksize, XBF_DONT_BLOCK); |
2437 | if (!bp) | 2430 | if (!bp) |
2438 | return XFS_ERROR(ENOMEM); | 2431 | return XFS_ERROR(ENOMEM); |
2439 | 2432 | ||
@@ -2459,7 +2452,7 @@ xfs_zero_remaining_bytes( | |||
2459 | XFS_BUF_READ(bp); | 2452 | XFS_BUF_READ(bp); |
2460 | XFS_BUF_SET_ADDR(bp, xfs_fsb_to_db(ip, imap.br_startblock)); | 2453 | XFS_BUF_SET_ADDR(bp, xfs_fsb_to_db(ip, imap.br_startblock)); |
2461 | xfsbdstrat(mp, bp); | 2454 | xfsbdstrat(mp, bp); |
2462 | error = xfs_iowait(bp); | 2455 | error = xfs_buf_iowait(bp); |
2463 | if (error) { | 2456 | if (error) { |
2464 | xfs_ioerror_alert("xfs_zero_remaining_bytes(read)", | 2457 | xfs_ioerror_alert("xfs_zero_remaining_bytes(read)", |
2465 | mp, bp, XFS_BUF_ADDR(bp)); | 2458 | mp, bp, XFS_BUF_ADDR(bp)); |
@@ -2472,7 +2465,7 @@ xfs_zero_remaining_bytes( | |||
2472 | XFS_BUF_UNREAD(bp); | 2465 | XFS_BUF_UNREAD(bp); |
2473 | XFS_BUF_WRITE(bp); | 2466 | XFS_BUF_WRITE(bp); |
2474 | xfsbdstrat(mp, bp); | 2467 | xfsbdstrat(mp, bp); |
2475 | error = xfs_iowait(bp); | 2468 | error = xfs_buf_iowait(bp); |
2476 | if (error) { | 2469 | if (error) { |
2477 | xfs_ioerror_alert("xfs_zero_remaining_bytes(write)", | 2470 | xfs_ioerror_alert("xfs_zero_remaining_bytes(write)", |
2478 | mp, bp, XFS_BUF_ADDR(bp)); | 2471 | mp, bp, XFS_BUF_ADDR(bp)); |
@@ -2711,6 +2704,7 @@ xfs_change_file_space( | |||
2711 | xfs_off_t llen; | 2704 | xfs_off_t llen; |
2712 | xfs_trans_t *tp; | 2705 | xfs_trans_t *tp; |
2713 | struct iattr iattr; | 2706 | struct iattr iattr; |
2707 | int prealloc_type; | ||
2714 | 2708 | ||
2715 | if (!S_ISREG(ip->i_d.di_mode)) | 2709 | if (!S_ISREG(ip->i_d.di_mode)) |
2716 | return XFS_ERROR(EINVAL); | 2710 | return XFS_ERROR(EINVAL); |
@@ -2753,12 +2747,17 @@ xfs_change_file_space( | |||
2753 | * size to be changed. | 2747 | * size to be changed. |
2754 | */ | 2748 | */ |
2755 | setprealloc = clrprealloc = 0; | 2749 | setprealloc = clrprealloc = 0; |
2750 | prealloc_type = XFS_BMAPI_PREALLOC; | ||
2756 | 2751 | ||
2757 | switch (cmd) { | 2752 | switch (cmd) { |
2753 | case XFS_IOC_ZERO_RANGE: | ||
2754 | prealloc_type |= XFS_BMAPI_CONVERT; | ||
2755 | xfs_tosspages(ip, startoffset, startoffset + bf->l_len, 0); | ||
2756 | /* FALLTHRU */ | ||
2758 | case XFS_IOC_RESVSP: | 2757 | case XFS_IOC_RESVSP: |
2759 | case XFS_IOC_RESVSP64: | 2758 | case XFS_IOC_RESVSP64: |
2760 | error = xfs_alloc_file_space(ip, startoffset, bf->l_len, | 2759 | error = xfs_alloc_file_space(ip, startoffset, bf->l_len, |
2761 | 1, attr_flags); | 2760 | prealloc_type, attr_flags); |
2762 | if (error) | 2761 | if (error) |
2763 | return error; | 2762 | return error; |
2764 | setprealloc = 1; | 2763 | setprealloc = 1; |
@@ -2827,7 +2826,7 @@ xfs_change_file_space( | |||
2827 | if (ip->i_d.di_mode & S_IXGRP) | 2826 | if (ip->i_d.di_mode & S_IXGRP) |
2828 | ip->i_d.di_mode &= ~S_ISGID; | 2827 | ip->i_d.di_mode &= ~S_ISGID; |
2829 | 2828 | ||
2830 | xfs_ichgtime(ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); | 2829 | xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); |
2831 | } | 2830 | } |
2832 | if (setprealloc) | 2831 | if (setprealloc) |
2833 | ip->i_d.di_flags |= XFS_DIFLAG_PREALLOC; | 2832 | ip->i_d.di_flags |= XFS_DIFLAG_PREALLOC; |
@@ -2835,7 +2834,8 @@ xfs_change_file_space( | |||
2835 | ip->i_d.di_flags &= ~XFS_DIFLAG_PREALLOC; | 2834 | ip->i_d.di_flags &= ~XFS_DIFLAG_PREALLOC; |
2836 | 2835 | ||
2837 | xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); | 2836 | xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); |
2838 | xfs_trans_set_sync(tp); | 2837 | if (attr_flags & XFS_ATTR_SYNC) |
2838 | xfs_trans_set_sync(tp); | ||
2839 | 2839 | ||
2840 | error = xfs_trans_commit(tp, 0); | 2840 | error = xfs_trans_commit(tp, 0); |
2841 | 2841 | ||
diff --git a/fs/xfs/xfs_vnodeops.h b/fs/xfs/xfs_vnodeops.h index d8dfa8d0dadd..3bcd23353d6c 100644 --- a/fs/xfs/xfs_vnodeops.h +++ b/fs/xfs/xfs_vnodeops.h | |||
@@ -2,7 +2,6 @@ | |||
2 | #define _XFS_VNODEOPS_H 1 | 2 | #define _XFS_VNODEOPS_H 1 |
3 | 3 | ||
4 | struct attrlist_cursor_kern; | 4 | struct attrlist_cursor_kern; |
5 | struct cred; | ||
6 | struct file; | 5 | struct file; |
7 | struct iattr; | 6 | struct iattr; |
8 | struct inode; | 7 | struct inode; |
@@ -19,6 +18,7 @@ int xfs_setattr(struct xfs_inode *ip, struct iattr *vap, int flags); | |||
19 | #define XFS_ATTR_NONBLOCK 0x02 /* return EAGAIN if operation would block */ | 18 | #define XFS_ATTR_NONBLOCK 0x02 /* return EAGAIN if operation would block */ |
20 | #define XFS_ATTR_NOLOCK 0x04 /* Don't grab any conflicting locks */ | 19 | #define XFS_ATTR_NOLOCK 0x04 /* Don't grab any conflicting locks */ |
21 | #define XFS_ATTR_NOACL 0x08 /* Don't call xfs_acl_chmod */ | 20 | #define XFS_ATTR_NOACL 0x08 /* Don't call xfs_acl_chmod */ |
21 | #define XFS_ATTR_SYNC 0x10 /* synchronous operation required */ | ||
22 | 22 | ||
23 | int xfs_readlink(struct xfs_inode *ip, char *link); | 23 | int xfs_readlink(struct xfs_inode *ip, char *link); |
24 | int xfs_release(struct xfs_inode *ip); | 24 | int xfs_release(struct xfs_inode *ip); |
@@ -26,7 +26,7 @@ int xfs_inactive(struct xfs_inode *ip); | |||
26 | int xfs_lookup(struct xfs_inode *dp, struct xfs_name *name, | 26 | int xfs_lookup(struct xfs_inode *dp, struct xfs_name *name, |
27 | struct xfs_inode **ipp, struct xfs_name *ci_name); | 27 | struct xfs_inode **ipp, struct xfs_name *ci_name); |
28 | int xfs_create(struct xfs_inode *dp, struct xfs_name *name, mode_t mode, | 28 | int xfs_create(struct xfs_inode *dp, struct xfs_name *name, mode_t mode, |
29 | xfs_dev_t rdev, struct xfs_inode **ipp, cred_t *credp); | 29 | xfs_dev_t rdev, struct xfs_inode **ipp); |
30 | int xfs_remove(struct xfs_inode *dp, struct xfs_name *name, | 30 | int xfs_remove(struct xfs_inode *dp, struct xfs_name *name, |
31 | struct xfs_inode *ip); | 31 | struct xfs_inode *ip); |
32 | int xfs_link(struct xfs_inode *tdp, struct xfs_inode *sip, | 32 | int xfs_link(struct xfs_inode *tdp, struct xfs_inode *sip, |
@@ -34,8 +34,7 @@ int xfs_link(struct xfs_inode *tdp, struct xfs_inode *sip, | |||
34 | int xfs_readdir(struct xfs_inode *dp, void *dirent, size_t bufsize, | 34 | int xfs_readdir(struct xfs_inode *dp, void *dirent, size_t bufsize, |
35 | xfs_off_t *offset, filldir_t filldir); | 35 | xfs_off_t *offset, filldir_t filldir); |
36 | int xfs_symlink(struct xfs_inode *dp, struct xfs_name *link_name, | 36 | int xfs_symlink(struct xfs_inode *dp, struct xfs_name *link_name, |
37 | const char *target_path, mode_t mode, struct xfs_inode **ipp, | 37 | const char *target_path, mode_t mode, struct xfs_inode **ipp); |
38 | cred_t *credp); | ||
39 | int xfs_set_dmattrs(struct xfs_inode *ip, u_int evmask, u_int16_t state); | 38 | int xfs_set_dmattrs(struct xfs_inode *ip, u_int evmask, u_int16_t state); |
40 | int xfs_change_file_space(struct xfs_inode *ip, int cmd, | 39 | int xfs_change_file_space(struct xfs_inode *ip, int cmd, |
41 | xfs_flock64_t *bf, xfs_off_t offset, int attr_flags); | 40 | xfs_flock64_t *bf, xfs_off_t offset, int attr_flags); |