diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2011-01-14 18:24:17 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2011-01-14 18:24:17 -0500 |
commit | 7cb3920a6529df7f54487abe973b903b8239e901 (patch) | |
tree | d36da0fe62adfa3e2e46a485e0bdb06019b2e560 /fs/xfs/linux-2.6 | |
parent | ad56cbf0fa6c09350c738ec59a3361f2e4ab4bc7 (diff) | |
parent | 73efe4a4ddf8eb2b1cc7039e8a66a23a424961af (diff) |
Merge branch 'for-linus' of git://oss.sgi.com/xfs/xfs
* 'for-linus' of git://oss.sgi.com/xfs/xfs:
xfs: prevent NMI timeouts in cmn_err
xfs: Add log level to assertion printk
xfs: fix an assignment within an ASSERT()
xfs: fix error handling for synchronous writes
xfs: add FITRIM support
xfs: ensure log covering transactions are synchronous
xfs: serialise unaligned direct IOs
xfs: factor common write setup code
xfs: split buffered IO write path from xfs_file_aio_write
xfs: split direct IO write path from xfs_file_aio_write
xfs: introduce xfs_rw_lock() helpers for locking the inode
xfs: factor post-write newsize updates
xfs: factor common post-write isize handling code
xfs: ensure sync write errors are returned
Diffstat (limited to 'fs/xfs/linux-2.6')
-rw-r--r-- | fs/xfs/linux-2.6/xfs_buf.c | 7 | ||||
-rw-r--r-- | fs/xfs/linux-2.6/xfs_buf.h | 7 | ||||
-rw-r--r-- | fs/xfs/linux-2.6/xfs_discard.c | 191 | ||||
-rw-r--r-- | fs/xfs/linux-2.6/xfs_discard.h | 8 | ||||
-rw-r--r-- | fs/xfs/linux-2.6/xfs_file.c | 535 | ||||
-rw-r--r-- | fs/xfs/linux-2.6/xfs_ioctl.c | 3 | ||||
-rw-r--r-- | fs/xfs/linux-2.6/xfs_super.c | 2 | ||||
-rw-r--r-- | fs/xfs/linux-2.6/xfs_sync.c | 11 | ||||
-rw-r--r-- | fs/xfs/linux-2.6/xfs_sysctl.c | 23 | ||||
-rw-r--r-- | fs/xfs/linux-2.6/xfs_trace.h | 33 |
10 files changed, 573 insertions, 247 deletions
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index 92f1f2acc6ab..ac1c7e8378dd 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c | |||
@@ -896,7 +896,6 @@ xfs_buf_rele( | |||
896 | trace_xfs_buf_rele(bp, _RET_IP_); | 896 | trace_xfs_buf_rele(bp, _RET_IP_); |
897 | 897 | ||
898 | if (!pag) { | 898 | if (!pag) { |
899 | ASSERT(!bp->b_relse); | ||
900 | ASSERT(list_empty(&bp->b_lru)); | 899 | ASSERT(list_empty(&bp->b_lru)); |
901 | ASSERT(RB_EMPTY_NODE(&bp->b_rbnode)); | 900 | ASSERT(RB_EMPTY_NODE(&bp->b_rbnode)); |
902 | if (atomic_dec_and_test(&bp->b_hold)) | 901 | if (atomic_dec_and_test(&bp->b_hold)) |
@@ -908,11 +907,7 @@ xfs_buf_rele( | |||
908 | 907 | ||
909 | ASSERT(atomic_read(&bp->b_hold) > 0); | 908 | ASSERT(atomic_read(&bp->b_hold) > 0); |
910 | if (atomic_dec_and_lock(&bp->b_hold, &pag->pag_buf_lock)) { | 909 | if (atomic_dec_and_lock(&bp->b_hold, &pag->pag_buf_lock)) { |
911 | if (bp->b_relse) { | 910 | if (!(bp->b_flags & XBF_STALE) && |
912 | atomic_inc(&bp->b_hold); | ||
913 | spin_unlock(&pag->pag_buf_lock); | ||
914 | bp->b_relse(bp); | ||
915 | } else if (!(bp->b_flags & XBF_STALE) && | ||
916 | atomic_read(&bp->b_lru_ref)) { | 911 | atomic_read(&bp->b_lru_ref)) { |
917 | xfs_buf_lru_add(bp); | 912 | xfs_buf_lru_add(bp); |
918 | spin_unlock(&pag->pag_buf_lock); | 913 | spin_unlock(&pag->pag_buf_lock); |
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h index a76c2428faff..cbe65950e524 100644 --- a/fs/xfs/linux-2.6/xfs_buf.h +++ b/fs/xfs/linux-2.6/xfs_buf.h | |||
@@ -152,8 +152,6 @@ typedef struct xfs_buftarg { | |||
152 | 152 | ||
153 | struct xfs_buf; | 153 | struct xfs_buf; |
154 | typedef void (*xfs_buf_iodone_t)(struct xfs_buf *); | 154 | typedef void (*xfs_buf_iodone_t)(struct xfs_buf *); |
155 | typedef void (*xfs_buf_relse_t)(struct xfs_buf *); | ||
156 | typedef int (*xfs_buf_bdstrat_t)(struct xfs_buf *); | ||
157 | 155 | ||
158 | #define XB_PAGES 2 | 156 | #define XB_PAGES 2 |
159 | 157 | ||
@@ -183,7 +181,6 @@ typedef struct xfs_buf { | |||
183 | void *b_addr; /* virtual address of buffer */ | 181 | void *b_addr; /* virtual address of buffer */ |
184 | struct work_struct b_iodone_work; | 182 | struct work_struct b_iodone_work; |
185 | xfs_buf_iodone_t b_iodone; /* I/O completion function */ | 183 | xfs_buf_iodone_t b_iodone; /* I/O completion function */ |
186 | xfs_buf_relse_t b_relse; /* releasing function */ | ||
187 | struct completion b_iowait; /* queue for I/O waiters */ | 184 | struct completion b_iowait; /* queue for I/O waiters */ |
188 | void *b_fspriv; | 185 | void *b_fspriv; |
189 | void *b_fspriv2; | 186 | void *b_fspriv2; |
@@ -323,7 +320,6 @@ void xfs_buf_stale(struct xfs_buf *bp); | |||
323 | #define XFS_BUF_FSPRIVATE2(bp, type) ((type)(bp)->b_fspriv2) | 320 | #define XFS_BUF_FSPRIVATE2(bp, type) ((type)(bp)->b_fspriv2) |
324 | #define XFS_BUF_SET_FSPRIVATE2(bp, val) ((bp)->b_fspriv2 = (void*)(val)) | 321 | #define XFS_BUF_SET_FSPRIVATE2(bp, val) ((bp)->b_fspriv2 = (void*)(val)) |
325 | #define XFS_BUF_SET_START(bp) do { } while (0) | 322 | #define XFS_BUF_SET_START(bp) do { } while (0) |
326 | #define XFS_BUF_SET_BRELSE_FUNC(bp, func) ((bp)->b_relse = (func)) | ||
327 | 323 | ||
328 | #define XFS_BUF_PTR(bp) (xfs_caddr_t)((bp)->b_addr) | 324 | #define XFS_BUF_PTR(bp) (xfs_caddr_t)((bp)->b_addr) |
329 | #define XFS_BUF_SET_PTR(bp, val, cnt) xfs_buf_associate_memory(bp, val, cnt) | 325 | #define XFS_BUF_SET_PTR(bp, val, cnt) xfs_buf_associate_memory(bp, val, cnt) |
@@ -360,8 +356,7 @@ xfs_buf_set_ref( | |||
360 | 356 | ||
361 | static inline void xfs_buf_relse(xfs_buf_t *bp) | 357 | static inline void xfs_buf_relse(xfs_buf_t *bp) |
362 | { | 358 | { |
363 | if (!bp->b_relse) | 359 | xfs_buf_unlock(bp); |
364 | xfs_buf_unlock(bp); | ||
365 | xfs_buf_rele(bp); | 360 | xfs_buf_rele(bp); |
366 | } | 361 | } |
367 | 362 | ||
diff --git a/fs/xfs/linux-2.6/xfs_discard.c b/fs/xfs/linux-2.6/xfs_discard.c new file mode 100644 index 000000000000..05201ae719e5 --- /dev/null +++ b/fs/xfs/linux-2.6/xfs_discard.c | |||
@@ -0,0 +1,191 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2010 Red Hat, Inc. | ||
3 | * All Rights Reserved. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or | ||
6 | * modify it under the terms of the GNU General Public License as | ||
7 | * published by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope that it would be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, write the Free Software Foundation, | ||
16 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
17 | */ | ||
18 | #include "xfs.h" | ||
19 | #include "xfs_sb.h" | ||
20 | #include "xfs_inum.h" | ||
21 | #include "xfs_log.h" | ||
22 | #include "xfs_ag.h" | ||
23 | #include "xfs_mount.h" | ||
24 | #include "xfs_quota.h" | ||
25 | #include "xfs_trans.h" | ||
26 | #include "xfs_alloc_btree.h" | ||
27 | #include "xfs_bmap_btree.h" | ||
28 | #include "xfs_ialloc_btree.h" | ||
29 | #include "xfs_btree.h" | ||
30 | #include "xfs_inode.h" | ||
31 | #include "xfs_alloc.h" | ||
32 | #include "xfs_error.h" | ||
33 | #include "xfs_discard.h" | ||
34 | #include "xfs_trace.h" | ||
35 | |||
36 | STATIC int | ||
37 | xfs_trim_extents( | ||
38 | struct xfs_mount *mp, | ||
39 | xfs_agnumber_t agno, | ||
40 | xfs_fsblock_t start, | ||
41 | xfs_fsblock_t len, | ||
42 | xfs_fsblock_t minlen, | ||
43 | __uint64_t *blocks_trimmed) | ||
44 | { | ||
45 | struct block_device *bdev = mp->m_ddev_targp->bt_bdev; | ||
46 | struct xfs_btree_cur *cur; | ||
47 | struct xfs_buf *agbp; | ||
48 | struct xfs_perag *pag; | ||
49 | int error; | ||
50 | int i; | ||
51 | |||
52 | pag = xfs_perag_get(mp, agno); | ||
53 | |||
54 | error = xfs_alloc_read_agf(mp, NULL, agno, 0, &agbp); | ||
55 | if (error || !agbp) | ||
56 | goto out_put_perag; | ||
57 | |||
58 | cur = xfs_allocbt_init_cursor(mp, NULL, agbp, agno, XFS_BTNUM_CNT); | ||
59 | |||
60 | /* | ||
61 | * Force out the log. This means any transactions that might have freed | ||
62 | * space before we took the AGF buffer lock are now on disk, and the | ||
63 | * volatile disk cache is flushed. | ||
64 | */ | ||
65 | xfs_log_force(mp, XFS_LOG_SYNC); | ||
66 | |||
67 | /* | ||
68 | * Look up the longest btree in the AGF and start with it. | ||
69 | */ | ||
70 | error = xfs_alloc_lookup_le(cur, 0, | ||
71 | XFS_BUF_TO_AGF(agbp)->agf_longest, &i); | ||
72 | if (error) | ||
73 | goto out_del_cursor; | ||
74 | |||
75 | /* | ||
76 | * Loop until we are done with all extents that are large | ||
77 | * enough to be worth discarding. | ||
78 | */ | ||
79 | while (i) { | ||
80 | xfs_agblock_t fbno; | ||
81 | xfs_extlen_t flen; | ||
82 | |||
83 | error = xfs_alloc_get_rec(cur, &fbno, &flen, &i); | ||
84 | if (error) | ||
85 | goto out_del_cursor; | ||
86 | XFS_WANT_CORRUPTED_GOTO(i == 1, out_del_cursor); | ||
87 | ASSERT(flen <= XFS_BUF_TO_AGF(agbp)->agf_longest); | ||
88 | |||
89 | /* | ||
90 | * Too small? Give up. | ||
91 | */ | ||
92 | if (flen < minlen) { | ||
93 | trace_xfs_discard_toosmall(mp, agno, fbno, flen); | ||
94 | goto out_del_cursor; | ||
95 | } | ||
96 | |||
97 | /* | ||
98 | * If the extent is entirely outside of the range we are | ||
99 | * supposed to discard skip it. Do not bother to trim | ||
100 | * down partially overlapping ranges for now. | ||
101 | */ | ||
102 | if (XFS_AGB_TO_FSB(mp, agno, fbno) + flen < start || | ||
103 | XFS_AGB_TO_FSB(mp, agno, fbno) >= start + len) { | ||
104 | trace_xfs_discard_exclude(mp, agno, fbno, flen); | ||
105 | goto next_extent; | ||
106 | } | ||
107 | |||
108 | /* | ||
109 | * If any blocks in the range are still busy, skip the | ||
110 | * discard and try again the next time. | ||
111 | */ | ||
112 | if (xfs_alloc_busy_search(mp, agno, fbno, flen)) { | ||
113 | trace_xfs_discard_busy(mp, agno, fbno, flen); | ||
114 | goto next_extent; | ||
115 | } | ||
116 | |||
117 | trace_xfs_discard_extent(mp, agno, fbno, flen); | ||
118 | error = -blkdev_issue_discard(bdev, | ||
119 | XFS_AGB_TO_DADDR(mp, agno, fbno), | ||
120 | XFS_FSB_TO_BB(mp, flen), | ||
121 | GFP_NOFS, 0); | ||
122 | if (error) | ||
123 | goto out_del_cursor; | ||
124 | *blocks_trimmed += flen; | ||
125 | |||
126 | next_extent: | ||
127 | error = xfs_btree_decrement(cur, 0, &i); | ||
128 | if (error) | ||
129 | goto out_del_cursor; | ||
130 | } | ||
131 | |||
132 | out_del_cursor: | ||
133 | xfs_btree_del_cursor(cur, error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR); | ||
134 | xfs_buf_relse(agbp); | ||
135 | out_put_perag: | ||
136 | xfs_perag_put(pag); | ||
137 | return error; | ||
138 | } | ||
139 | |||
140 | int | ||
141 | xfs_ioc_trim( | ||
142 | struct xfs_mount *mp, | ||
143 | struct fstrim_range __user *urange) | ||
144 | { | ||
145 | struct request_queue *q = mp->m_ddev_targp->bt_bdev->bd_disk->queue; | ||
146 | unsigned int granularity = q->limits.discard_granularity; | ||
147 | struct fstrim_range range; | ||
148 | xfs_fsblock_t start, len, minlen; | ||
149 | xfs_agnumber_t start_agno, end_agno, agno; | ||
150 | __uint64_t blocks_trimmed = 0; | ||
151 | int error, last_error = 0; | ||
152 | |||
153 | if (!capable(CAP_SYS_ADMIN)) | ||
154 | return -XFS_ERROR(EPERM); | ||
155 | if (copy_from_user(&range, urange, sizeof(range))) | ||
156 | return -XFS_ERROR(EFAULT); | ||
157 | |||
158 | /* | ||
159 | * Truncating down the len isn't actually quite correct, but using | ||
160 | * XFS_B_TO_FSB would mean we trivially get overflows for values | ||
161 | * of ULLONG_MAX or slightly lower. And ULLONG_MAX is the default | ||
162 | * used by the fstrim application. In the end it really doesn't | ||
163 | * matter as trimming blocks is an advisory interface. | ||
164 | */ | ||
165 | start = XFS_B_TO_FSBT(mp, range.start); | ||
166 | len = XFS_B_TO_FSBT(mp, range.len); | ||
167 | minlen = XFS_B_TO_FSB(mp, max_t(u64, granularity, range.minlen)); | ||
168 | |||
169 | start_agno = XFS_FSB_TO_AGNO(mp, start); | ||
170 | if (start_agno >= mp->m_sb.sb_agcount) | ||
171 | return -XFS_ERROR(EINVAL); | ||
172 | |||
173 | end_agno = XFS_FSB_TO_AGNO(mp, start + len); | ||
174 | if (end_agno >= mp->m_sb.sb_agcount) | ||
175 | end_agno = mp->m_sb.sb_agcount - 1; | ||
176 | |||
177 | for (agno = start_agno; agno <= end_agno; agno++) { | ||
178 | error = -xfs_trim_extents(mp, agno, start, len, minlen, | ||
179 | &blocks_trimmed); | ||
180 | if (error) | ||
181 | last_error = error; | ||
182 | } | ||
183 | |||
184 | if (last_error) | ||
185 | return last_error; | ||
186 | |||
187 | range.len = XFS_FSB_TO_B(mp, blocks_trimmed); | ||
188 | if (copy_to_user(urange, &range, sizeof(range))) | ||
189 | return -XFS_ERROR(EFAULT); | ||
190 | return 0; | ||
191 | } | ||
diff --git a/fs/xfs/linux-2.6/xfs_discard.h b/fs/xfs/linux-2.6/xfs_discard.h new file mode 100644 index 000000000000..e82b6dd3e127 --- /dev/null +++ b/fs/xfs/linux-2.6/xfs_discard.h | |||
@@ -0,0 +1,8 @@ | |||
1 | #ifndef XFS_DISCARD_H | ||
2 | #define XFS_DISCARD_H 1 | ||
3 | |||
4 | struct fstrim_range; | ||
5 | |||
6 | extern int xfs_ioc_trim(struct xfs_mount *, struct fstrim_range __user *); | ||
7 | |||
8 | #endif /* XFS_DISCARD_H */ | ||
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c index ba8ad422a165..ef51eb43e137 100644 --- a/fs/xfs/linux-2.6/xfs_file.c +++ b/fs/xfs/linux-2.6/xfs_file.c | |||
@@ -41,6 +41,40 @@ | |||
41 | static const struct vm_operations_struct xfs_file_vm_ops; | 41 | static const struct vm_operations_struct xfs_file_vm_ops; |
42 | 42 | ||
43 | /* | 43 | /* |
44 | * Locking primitives for read and write IO paths to ensure we consistently use | ||
45 | * and order the inode->i_mutex, ip->i_lock and ip->i_iolock. | ||
46 | */ | ||
47 | static inline void | ||
48 | xfs_rw_ilock( | ||
49 | struct xfs_inode *ip, | ||
50 | int type) | ||
51 | { | ||
52 | if (type & XFS_IOLOCK_EXCL) | ||
53 | mutex_lock(&VFS_I(ip)->i_mutex); | ||
54 | xfs_ilock(ip, type); | ||
55 | } | ||
56 | |||
57 | static inline void | ||
58 | xfs_rw_iunlock( | ||
59 | struct xfs_inode *ip, | ||
60 | int type) | ||
61 | { | ||
62 | xfs_iunlock(ip, type); | ||
63 | if (type & XFS_IOLOCK_EXCL) | ||
64 | mutex_unlock(&VFS_I(ip)->i_mutex); | ||
65 | } | ||
66 | |||
67 | static inline void | ||
68 | xfs_rw_ilock_demote( | ||
69 | struct xfs_inode *ip, | ||
70 | int type) | ||
71 | { | ||
72 | xfs_ilock_demote(ip, type); | ||
73 | if (type & XFS_IOLOCK_EXCL) | ||
74 | mutex_unlock(&VFS_I(ip)->i_mutex); | ||
75 | } | ||
76 | |||
77 | /* | ||
44 | * xfs_iozero | 78 | * xfs_iozero |
45 | * | 79 | * |
46 | * xfs_iozero clears the specified range of buffer supplied, | 80 | * xfs_iozero clears the specified range of buffer supplied, |
@@ -262,22 +296,21 @@ xfs_file_aio_read( | |||
262 | if (XFS_FORCED_SHUTDOWN(mp)) | 296 | if (XFS_FORCED_SHUTDOWN(mp)) |
263 | return -EIO; | 297 | return -EIO; |
264 | 298 | ||
265 | if (unlikely(ioflags & IO_ISDIRECT)) | ||
266 | mutex_lock(&inode->i_mutex); | ||
267 | xfs_ilock(ip, XFS_IOLOCK_SHARED); | ||
268 | |||
269 | if (unlikely(ioflags & IO_ISDIRECT)) { | 299 | if (unlikely(ioflags & IO_ISDIRECT)) { |
300 | xfs_rw_ilock(ip, XFS_IOLOCK_EXCL); | ||
301 | |||
270 | if (inode->i_mapping->nrpages) { | 302 | if (inode->i_mapping->nrpages) { |
271 | ret = -xfs_flushinval_pages(ip, | 303 | ret = -xfs_flushinval_pages(ip, |
272 | (iocb->ki_pos & PAGE_CACHE_MASK), | 304 | (iocb->ki_pos & PAGE_CACHE_MASK), |
273 | -1, FI_REMAPF_LOCKED); | 305 | -1, FI_REMAPF_LOCKED); |
306 | if (ret) { | ||
307 | xfs_rw_iunlock(ip, XFS_IOLOCK_EXCL); | ||
308 | return ret; | ||
309 | } | ||
274 | } | 310 | } |
275 | mutex_unlock(&inode->i_mutex); | 311 | xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL); |
276 | if (ret) { | 312 | } else |
277 | xfs_iunlock(ip, XFS_IOLOCK_SHARED); | 313 | xfs_rw_ilock(ip, XFS_IOLOCK_SHARED); |
278 | return ret; | ||
279 | } | ||
280 | } | ||
281 | 314 | ||
282 | trace_xfs_file_read(ip, size, iocb->ki_pos, ioflags); | 315 | trace_xfs_file_read(ip, size, iocb->ki_pos, ioflags); |
283 | 316 | ||
@@ -285,7 +318,7 @@ xfs_file_aio_read( | |||
285 | if (ret > 0) | 318 | if (ret > 0) |
286 | XFS_STATS_ADD(xs_read_bytes, ret); | 319 | XFS_STATS_ADD(xs_read_bytes, ret); |
287 | 320 | ||
288 | xfs_iunlock(ip, XFS_IOLOCK_SHARED); | 321 | xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED); |
289 | return ret; | 322 | return ret; |
290 | } | 323 | } |
291 | 324 | ||
@@ -309,7 +342,7 @@ xfs_file_splice_read( | |||
309 | if (XFS_FORCED_SHUTDOWN(ip->i_mount)) | 342 | if (XFS_FORCED_SHUTDOWN(ip->i_mount)) |
310 | return -EIO; | 343 | return -EIO; |
311 | 344 | ||
312 | xfs_ilock(ip, XFS_IOLOCK_SHARED); | 345 | xfs_rw_ilock(ip, XFS_IOLOCK_SHARED); |
313 | 346 | ||
314 | trace_xfs_file_splice_read(ip, count, *ppos, ioflags); | 347 | trace_xfs_file_splice_read(ip, count, *ppos, ioflags); |
315 | 348 | ||
@@ -317,10 +350,61 @@ xfs_file_splice_read( | |||
317 | if (ret > 0) | 350 | if (ret > 0) |
318 | XFS_STATS_ADD(xs_read_bytes, ret); | 351 | XFS_STATS_ADD(xs_read_bytes, ret); |
319 | 352 | ||
320 | xfs_iunlock(ip, XFS_IOLOCK_SHARED); | 353 | xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED); |
321 | return ret; | 354 | return ret; |
322 | } | 355 | } |
323 | 356 | ||
357 | STATIC void | ||
358 | xfs_aio_write_isize_update( | ||
359 | struct inode *inode, | ||
360 | loff_t *ppos, | ||
361 | ssize_t bytes_written) | ||
362 | { | ||
363 | struct xfs_inode *ip = XFS_I(inode); | ||
364 | xfs_fsize_t isize = i_size_read(inode); | ||
365 | |||
366 | if (bytes_written > 0) | ||
367 | XFS_STATS_ADD(xs_write_bytes, bytes_written); | ||
368 | |||
369 | if (unlikely(bytes_written < 0 && bytes_written != -EFAULT && | ||
370 | *ppos > isize)) | ||
371 | *ppos = isize; | ||
372 | |||
373 | if (*ppos > ip->i_size) { | ||
374 | xfs_rw_ilock(ip, XFS_ILOCK_EXCL); | ||
375 | if (*ppos > ip->i_size) | ||
376 | ip->i_size = *ppos; | ||
377 | xfs_rw_iunlock(ip, XFS_ILOCK_EXCL); | ||
378 | } | ||
379 | } | ||
380 | |||
381 | /* | ||
382 | * If this was a direct or synchronous I/O that failed (such as ENOSPC) then | ||
383 | * part of the I/O may have been written to disk before the error occured. In | ||
384 | * this case the on-disk file size may have been adjusted beyond the in-memory | ||
385 | * file size and now needs to be truncated back. | ||
386 | */ | ||
387 | STATIC void | ||
388 | xfs_aio_write_newsize_update( | ||
389 | struct xfs_inode *ip) | ||
390 | { | ||
391 | if (ip->i_new_size) { | ||
392 | xfs_rw_ilock(ip, XFS_ILOCK_EXCL); | ||
393 | ip->i_new_size = 0; | ||
394 | if (ip->i_d.di_size > ip->i_size) | ||
395 | ip->i_d.di_size = ip->i_size; | ||
396 | xfs_rw_iunlock(ip, XFS_ILOCK_EXCL); | ||
397 | } | ||
398 | } | ||
399 | |||
400 | /* | ||
401 | * xfs_file_splice_write() does not use xfs_rw_ilock() because | ||
402 | * generic_file_splice_write() takes the i_mutex itself. This, in theory, | ||
403 | * couuld cause lock inversions between the aio_write path and the splice path | ||
404 | * if someone is doing concurrent splice(2) based writes and write(2) based | ||
405 | * writes to the same inode. The only real way to fix this is to re-implement | ||
406 | * the generic code here with correct locking orders. | ||
407 | */ | ||
324 | STATIC ssize_t | 408 | STATIC ssize_t |
325 | xfs_file_splice_write( | 409 | xfs_file_splice_write( |
326 | struct pipe_inode_info *pipe, | 410 | struct pipe_inode_info *pipe, |
@@ -331,7 +415,7 @@ xfs_file_splice_write( | |||
331 | { | 415 | { |
332 | struct inode *inode = outfilp->f_mapping->host; | 416 | struct inode *inode = outfilp->f_mapping->host; |
333 | struct xfs_inode *ip = XFS_I(inode); | 417 | struct xfs_inode *ip = XFS_I(inode); |
334 | xfs_fsize_t isize, new_size; | 418 | xfs_fsize_t new_size; |
335 | int ioflags = 0; | 419 | int ioflags = 0; |
336 | ssize_t ret; | 420 | ssize_t ret; |
337 | 421 | ||
@@ -355,27 +439,9 @@ xfs_file_splice_write( | |||
355 | trace_xfs_file_splice_write(ip, count, *ppos, ioflags); | 439 | trace_xfs_file_splice_write(ip, count, *ppos, ioflags); |
356 | 440 | ||
357 | ret = generic_file_splice_write(pipe, outfilp, ppos, count, flags); | 441 | ret = generic_file_splice_write(pipe, outfilp, ppos, count, flags); |
358 | if (ret > 0) | ||
359 | XFS_STATS_ADD(xs_write_bytes, ret); | ||
360 | |||
361 | isize = i_size_read(inode); | ||
362 | if (unlikely(ret < 0 && ret != -EFAULT && *ppos > isize)) | ||
363 | *ppos = isize; | ||
364 | 442 | ||
365 | if (*ppos > ip->i_size) { | 443 | xfs_aio_write_isize_update(inode, ppos, ret); |
366 | xfs_ilock(ip, XFS_ILOCK_EXCL); | 444 | xfs_aio_write_newsize_update(ip); |
367 | if (*ppos > ip->i_size) | ||
368 | ip->i_size = *ppos; | ||
369 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | ||
370 | } | ||
371 | |||
372 | if (ip->i_new_size) { | ||
373 | xfs_ilock(ip, XFS_ILOCK_EXCL); | ||
374 | ip->i_new_size = 0; | ||
375 | if (ip->i_d.di_size > ip->i_size) | ||
376 | ip->i_d.di_size = ip->i_size; | ||
377 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | ||
378 | } | ||
379 | xfs_iunlock(ip, XFS_IOLOCK_EXCL); | 445 | xfs_iunlock(ip, XFS_IOLOCK_EXCL); |
380 | return ret; | 446 | return ret; |
381 | } | 447 | } |
@@ -562,245 +628,258 @@ out_lock: | |||
562 | return error; | 628 | return error; |
563 | } | 629 | } |
564 | 630 | ||
631 | /* | ||
632 | * Common pre-write limit and setup checks. | ||
633 | * | ||
634 | * Returns with iolock held according to @iolock. | ||
635 | */ | ||
565 | STATIC ssize_t | 636 | STATIC ssize_t |
566 | xfs_file_aio_write( | 637 | xfs_file_aio_write_checks( |
567 | struct kiocb *iocb, | 638 | struct file *file, |
568 | const struct iovec *iovp, | 639 | loff_t *pos, |
569 | unsigned long nr_segs, | 640 | size_t *count, |
570 | loff_t pos) | 641 | int *iolock) |
571 | { | 642 | { |
572 | struct file *file = iocb->ki_filp; | 643 | struct inode *inode = file->f_mapping->host; |
573 | struct address_space *mapping = file->f_mapping; | ||
574 | struct inode *inode = mapping->host; | ||
575 | struct xfs_inode *ip = XFS_I(inode); | 644 | struct xfs_inode *ip = XFS_I(inode); |
576 | struct xfs_mount *mp = ip->i_mount; | 645 | xfs_fsize_t new_size; |
577 | ssize_t ret = 0, error = 0; | 646 | int error = 0; |
578 | int ioflags = 0; | ||
579 | xfs_fsize_t isize, new_size; | ||
580 | int iolock; | ||
581 | size_t ocount = 0, count; | ||
582 | int need_i_mutex; | ||
583 | 647 | ||
584 | XFS_STATS_INC(xs_write_calls); | 648 | error = generic_write_checks(file, pos, count, S_ISBLK(inode->i_mode)); |
649 | if (error) { | ||
650 | xfs_rw_iunlock(ip, XFS_ILOCK_EXCL | *iolock); | ||
651 | *iolock = 0; | ||
652 | return error; | ||
653 | } | ||
585 | 654 | ||
586 | BUG_ON(iocb->ki_pos != pos); | 655 | new_size = *pos + *count; |
656 | if (new_size > ip->i_size) | ||
657 | ip->i_new_size = new_size; | ||
587 | 658 | ||
588 | if (unlikely(file->f_flags & O_DIRECT)) | 659 | if (likely(!(file->f_mode & FMODE_NOCMTIME))) |
589 | ioflags |= IO_ISDIRECT; | 660 | file_update_time(file); |
590 | if (file->f_mode & FMODE_NOCMTIME) | ||
591 | ioflags |= IO_INVIS; | ||
592 | 661 | ||
593 | error = generic_segment_checks(iovp, &nr_segs, &ocount, VERIFY_READ); | 662 | /* |
663 | * If the offset is beyond the size of the file, we need to zero any | ||
664 | * blocks that fall between the existing EOF and the start of this | ||
665 | * write. | ||
666 | */ | ||
667 | if (*pos > ip->i_size) | ||
668 | error = -xfs_zero_eof(ip, *pos, ip->i_size); | ||
669 | |||
670 | xfs_rw_iunlock(ip, XFS_ILOCK_EXCL); | ||
594 | if (error) | 671 | if (error) |
595 | return error; | 672 | return error; |
596 | 673 | ||
597 | count = ocount; | 674 | /* |
598 | if (count == 0) | 675 | * If we're writing the file then make sure to clear the setuid and |
599 | return 0; | 676 | * setgid bits if the process is not being run by root. This keeps |
600 | 677 | * people from modifying setuid and setgid binaries. | |
601 | xfs_wait_for_freeze(mp, SB_FREEZE_WRITE); | 678 | */ |
602 | 679 | return file_remove_suid(file); | |
603 | if (XFS_FORCED_SHUTDOWN(mp)) | ||
604 | return -EIO; | ||
605 | |||
606 | relock: | ||
607 | if (ioflags & IO_ISDIRECT) { | ||
608 | iolock = XFS_IOLOCK_SHARED; | ||
609 | need_i_mutex = 0; | ||
610 | } else { | ||
611 | iolock = XFS_IOLOCK_EXCL; | ||
612 | need_i_mutex = 1; | ||
613 | mutex_lock(&inode->i_mutex); | ||
614 | } | ||
615 | 680 | ||
616 | xfs_ilock(ip, XFS_ILOCK_EXCL|iolock); | 681 | } |
617 | 682 | ||
618 | start: | 683 | /* |
619 | error = -generic_write_checks(file, &pos, &count, | 684 | * xfs_file_dio_aio_write - handle direct IO writes |
620 | S_ISBLK(inode->i_mode)); | 685 | * |
621 | if (error) { | 686 | * Lock the inode appropriately to prepare for and issue a direct IO write. |
622 | xfs_iunlock(ip, XFS_ILOCK_EXCL|iolock); | 687 | * By separating it from the buffered write path we remove all the tricky to |
623 | goto out_unlock_mutex; | 688 | * follow locking changes and looping. |
689 | * | ||
690 | * If there are cached pages or we're extending the file, we need IOLOCK_EXCL | ||
691 | * until we're sure the bytes at the new EOF have been zeroed and/or the cached | ||
692 | * pages are flushed out. | ||
693 | * | ||
694 | * In most cases the direct IO writes will be done holding IOLOCK_SHARED | ||
695 | * allowing them to be done in parallel with reads and other direct IO writes. | ||
696 | * However, if the IO is not aligned to filesystem blocks, the direct IO layer | ||
697 | * needs to do sub-block zeroing and that requires serialisation against other | ||
698 | * direct IOs to the same block. In this case we need to serialise the | ||
699 | * submission of the unaligned IOs so that we don't get racing block zeroing in | ||
700 | * the dio layer. To avoid the problem with aio, we also need to wait for | ||
701 | * outstanding IOs to complete so that unwritten extent conversion is completed | ||
702 | * before we try to map the overlapping block. This is currently implemented by | ||
703 | * hitting it with a big hammer (i.e. xfs_ioend_wait()). | ||
704 | * | ||
705 | * Returns with locks held indicated by @iolock and errors indicated by | ||
706 | * negative return values. | ||
707 | */ | ||
708 | STATIC ssize_t | ||
709 | xfs_file_dio_aio_write( | ||
710 | struct kiocb *iocb, | ||
711 | const struct iovec *iovp, | ||
712 | unsigned long nr_segs, | ||
713 | loff_t pos, | ||
714 | size_t ocount, | ||
715 | int *iolock) | ||
716 | { | ||
717 | struct file *file = iocb->ki_filp; | ||
718 | struct address_space *mapping = file->f_mapping; | ||
719 | struct inode *inode = mapping->host; | ||
720 | struct xfs_inode *ip = XFS_I(inode); | ||
721 | struct xfs_mount *mp = ip->i_mount; | ||
722 | ssize_t ret = 0; | ||
723 | size_t count = ocount; | ||
724 | int unaligned_io = 0; | ||
725 | struct xfs_buftarg *target = XFS_IS_REALTIME_INODE(ip) ? | ||
726 | mp->m_rtdev_targp : mp->m_ddev_targp; | ||
727 | |||
728 | *iolock = 0; | ||
729 | if ((pos & target->bt_smask) || (count & target->bt_smask)) | ||
730 | return -XFS_ERROR(EINVAL); | ||
731 | |||
732 | if ((pos & mp->m_blockmask) || ((pos + count) & mp->m_blockmask)) | ||
733 | unaligned_io = 1; | ||
734 | |||
735 | if (unaligned_io || mapping->nrpages || pos > ip->i_size) | ||
736 | *iolock = XFS_IOLOCK_EXCL; | ||
737 | else | ||
738 | *iolock = XFS_IOLOCK_SHARED; | ||
739 | xfs_rw_ilock(ip, XFS_ILOCK_EXCL | *iolock); | ||
740 | |||
741 | ret = xfs_file_aio_write_checks(file, &pos, &count, iolock); | ||
742 | if (ret) | ||
743 | return ret; | ||
744 | |||
745 | if (mapping->nrpages) { | ||
746 | WARN_ON(*iolock != XFS_IOLOCK_EXCL); | ||
747 | ret = -xfs_flushinval_pages(ip, (pos & PAGE_CACHE_MASK), -1, | ||
748 | FI_REMAPF_LOCKED); | ||
749 | if (ret) | ||
750 | return ret; | ||
624 | } | 751 | } |
625 | 752 | ||
626 | if (ioflags & IO_ISDIRECT) { | 753 | /* |
627 | xfs_buftarg_t *target = | 754 | * If we are doing unaligned IO, wait for all other IO to drain, |
628 | XFS_IS_REALTIME_INODE(ip) ? | 755 | * otherwise demote the lock if we had to flush cached pages |
629 | mp->m_rtdev_targp : mp->m_ddev_targp; | 756 | */ |
630 | 757 | if (unaligned_io) | |
631 | if ((pos & target->bt_smask) || (count & target->bt_smask)) { | 758 | xfs_ioend_wait(ip); |
632 | xfs_iunlock(ip, XFS_ILOCK_EXCL|iolock); | 759 | else if (*iolock == XFS_IOLOCK_EXCL) { |
633 | return XFS_ERROR(-EINVAL); | 760 | xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL); |
634 | } | 761 | *iolock = XFS_IOLOCK_SHARED; |
635 | |||
636 | if (!need_i_mutex && (mapping->nrpages || pos > ip->i_size)) { | ||
637 | xfs_iunlock(ip, XFS_ILOCK_EXCL|iolock); | ||
638 | iolock = XFS_IOLOCK_EXCL; | ||
639 | need_i_mutex = 1; | ||
640 | mutex_lock(&inode->i_mutex); | ||
641 | xfs_ilock(ip, XFS_ILOCK_EXCL|iolock); | ||
642 | goto start; | ||
643 | } | ||
644 | } | 762 | } |
645 | 763 | ||
646 | new_size = pos + count; | 764 | trace_xfs_file_direct_write(ip, count, iocb->ki_pos, 0); |
647 | if (new_size > ip->i_size) | 765 | ret = generic_file_direct_write(iocb, iovp, |
648 | ip->i_new_size = new_size; | 766 | &nr_segs, pos, &iocb->ki_pos, count, ocount); |
649 | 767 | ||
650 | if (likely(!(ioflags & IO_INVIS))) | 768 | /* No fallback to buffered IO on errors for XFS. */ |
651 | file_update_time(file); | 769 | ASSERT(ret < 0 || ret == count); |
770 | return ret; | ||
771 | } | ||
652 | 772 | ||
653 | /* | 773 | STATIC ssize_t |
654 | * If the offset is beyond the size of the file, we have a couple | 774 | xfs_file_buffered_aio_write( |
655 | * of things to do. First, if there is already space allocated | 775 | struct kiocb *iocb, |
656 | * we need to either create holes or zero the disk or ... | 776 | const struct iovec *iovp, |
657 | * | 777 | unsigned long nr_segs, |
658 | * If there is a page where the previous size lands, we need | 778 | loff_t pos, |
659 | * to zero it out up to the new size. | 779 | size_t ocount, |
660 | */ | 780 | int *iolock) |
781 | { | ||
782 | struct file *file = iocb->ki_filp; | ||
783 | struct address_space *mapping = file->f_mapping; | ||
784 | struct inode *inode = mapping->host; | ||
785 | struct xfs_inode *ip = XFS_I(inode); | ||
786 | ssize_t ret; | ||
787 | int enospc = 0; | ||
788 | size_t count = ocount; | ||
661 | 789 | ||
662 | if (pos > ip->i_size) { | 790 | *iolock = XFS_IOLOCK_EXCL; |
663 | error = xfs_zero_eof(ip, pos, ip->i_size); | 791 | xfs_rw_ilock(ip, XFS_ILOCK_EXCL | *iolock); |
664 | if (error) { | ||
665 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | ||
666 | goto out_unlock_internal; | ||
667 | } | ||
668 | } | ||
669 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | ||
670 | 792 | ||
671 | /* | 793 | ret = xfs_file_aio_write_checks(file, &pos, &count, iolock); |
672 | * If we're writing the file then make sure to clear the | 794 | if (ret) |
673 | * setuid and setgid bits if the process is not being run | 795 | return ret; |
674 | * by root. This keeps people from modifying setuid and | ||
675 | * setgid binaries. | ||
676 | */ | ||
677 | error = -file_remove_suid(file); | ||
678 | if (unlikely(error)) | ||
679 | goto out_unlock_internal; | ||
680 | 796 | ||
681 | /* We can write back this queue in page reclaim */ | 797 | /* We can write back this queue in page reclaim */ |
682 | current->backing_dev_info = mapping->backing_dev_info; | 798 | current->backing_dev_info = mapping->backing_dev_info; |
683 | 799 | ||
684 | if ((ioflags & IO_ISDIRECT)) { | 800 | write_retry: |
685 | if (mapping->nrpages) { | 801 | trace_xfs_file_buffered_write(ip, count, iocb->ki_pos, 0); |
686 | WARN_ON(need_i_mutex == 0); | 802 | ret = generic_file_buffered_write(iocb, iovp, nr_segs, |
687 | error = xfs_flushinval_pages(ip, | 803 | pos, &iocb->ki_pos, count, ret); |
688 | (pos & PAGE_CACHE_MASK), | 804 | /* |
689 | -1, FI_REMAPF_LOCKED); | 805 | * if we just got an ENOSPC, flush the inode now we aren't holding any |
690 | if (error) | 806 | * page locks and retry *once* |
691 | goto out_unlock_internal; | 807 | */ |
692 | } | 808 | if (ret == -ENOSPC && !enospc) { |
693 | 809 | ret = -xfs_flush_pages(ip, 0, -1, 0, FI_NONE); | |
694 | if (need_i_mutex) { | 810 | if (ret) |
695 | /* demote the lock now the cached pages are gone */ | 811 | return ret; |
696 | xfs_ilock_demote(ip, XFS_IOLOCK_EXCL); | 812 | enospc = 1; |
697 | mutex_unlock(&inode->i_mutex); | 813 | goto write_retry; |
814 | } | ||
815 | current->backing_dev_info = NULL; | ||
816 | return ret; | ||
817 | } | ||
698 | 818 | ||
699 | iolock = XFS_IOLOCK_SHARED; | 819 | STATIC ssize_t |
700 | need_i_mutex = 0; | 820 | xfs_file_aio_write( |
701 | } | 821 | struct kiocb *iocb, |
822 | const struct iovec *iovp, | ||
823 | unsigned long nr_segs, | ||
824 | loff_t pos) | ||
825 | { | ||
826 | struct file *file = iocb->ki_filp; | ||
827 | struct address_space *mapping = file->f_mapping; | ||
828 | struct inode *inode = mapping->host; | ||
829 | struct xfs_inode *ip = XFS_I(inode); | ||
830 | ssize_t ret; | ||
831 | int iolock; | ||
832 | size_t ocount = 0; | ||
702 | 833 | ||
703 | trace_xfs_file_direct_write(ip, count, iocb->ki_pos, ioflags); | 834 | XFS_STATS_INC(xs_write_calls); |
704 | ret = generic_file_direct_write(iocb, iovp, | ||
705 | &nr_segs, pos, &iocb->ki_pos, count, ocount); | ||
706 | 835 | ||
707 | /* | 836 | BUG_ON(iocb->ki_pos != pos); |
708 | * direct-io write to a hole: fall through to buffered I/O | ||
709 | * for completing the rest of the request. | ||
710 | */ | ||
711 | if (ret >= 0 && ret != count) { | ||
712 | XFS_STATS_ADD(xs_write_bytes, ret); | ||
713 | 837 | ||
714 | pos += ret; | 838 | ret = generic_segment_checks(iovp, &nr_segs, &ocount, VERIFY_READ); |
715 | count -= ret; | 839 | if (ret) |
840 | return ret; | ||
716 | 841 | ||
717 | ioflags &= ~IO_ISDIRECT; | 842 | if (ocount == 0) |
718 | xfs_iunlock(ip, iolock); | 843 | return 0; |
719 | goto relock; | ||
720 | } | ||
721 | } else { | ||
722 | int enospc = 0; | ||
723 | ssize_t ret2 = 0; | ||
724 | 844 | ||
725 | write_retry: | 845 | xfs_wait_for_freeze(ip->i_mount, SB_FREEZE_WRITE); |
726 | trace_xfs_file_buffered_write(ip, count, iocb->ki_pos, ioflags); | ||
727 | ret2 = generic_file_buffered_write(iocb, iovp, nr_segs, | ||
728 | pos, &iocb->ki_pos, count, ret); | ||
729 | /* | ||
730 | * if we just got an ENOSPC, flush the inode now we | ||
731 | * aren't holding any page locks and retry *once* | ||
732 | */ | ||
733 | if (ret2 == -ENOSPC && !enospc) { | ||
734 | error = xfs_flush_pages(ip, 0, -1, 0, FI_NONE); | ||
735 | if (error) | ||
736 | goto out_unlock_internal; | ||
737 | enospc = 1; | ||
738 | goto write_retry; | ||
739 | } | ||
740 | ret = ret2; | ||
741 | } | ||
742 | 846 | ||
743 | current->backing_dev_info = NULL; | 847 | if (XFS_FORCED_SHUTDOWN(ip->i_mount)) |
848 | return -EIO; | ||
744 | 849 | ||
745 | isize = i_size_read(inode); | 850 | if (unlikely(file->f_flags & O_DIRECT)) |
746 | if (unlikely(ret < 0 && ret != -EFAULT && iocb->ki_pos > isize)) | 851 | ret = xfs_file_dio_aio_write(iocb, iovp, nr_segs, pos, |
747 | iocb->ki_pos = isize; | 852 | ocount, &iolock); |
853 | else | ||
854 | ret = xfs_file_buffered_aio_write(iocb, iovp, nr_segs, pos, | ||
855 | ocount, &iolock); | ||
748 | 856 | ||
749 | if (iocb->ki_pos > ip->i_size) { | 857 | xfs_aio_write_isize_update(inode, &iocb->ki_pos, ret); |
750 | xfs_ilock(ip, XFS_ILOCK_EXCL); | ||
751 | if (iocb->ki_pos > ip->i_size) | ||
752 | ip->i_size = iocb->ki_pos; | ||
753 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | ||
754 | } | ||
755 | 858 | ||
756 | error = -ret; | ||
757 | if (ret <= 0) | 859 | if (ret <= 0) |
758 | goto out_unlock_internal; | 860 | goto out_unlock; |
759 | |||
760 | XFS_STATS_ADD(xs_write_bytes, ret); | ||
761 | 861 | ||
762 | /* Handle various SYNC-type writes */ | 862 | /* Handle various SYNC-type writes */ |
763 | if ((file->f_flags & O_DSYNC) || IS_SYNC(inode)) { | 863 | if ((file->f_flags & O_DSYNC) || IS_SYNC(inode)) { |
764 | loff_t end = pos + ret - 1; | 864 | loff_t end = pos + ret - 1; |
765 | int error2; | 865 | int error, error2; |
766 | |||
767 | xfs_iunlock(ip, iolock); | ||
768 | if (need_i_mutex) | ||
769 | mutex_unlock(&inode->i_mutex); | ||
770 | 866 | ||
771 | error2 = filemap_write_and_wait_range(mapping, pos, end); | 867 | xfs_rw_iunlock(ip, iolock); |
772 | if (!error) | 868 | error = filemap_write_and_wait_range(mapping, pos, end); |
773 | error = error2; | 869 | xfs_rw_ilock(ip, iolock); |
774 | if (need_i_mutex) | ||
775 | mutex_lock(&inode->i_mutex); | ||
776 | xfs_ilock(ip, iolock); | ||
777 | 870 | ||
778 | error2 = -xfs_file_fsync(file, | 871 | error2 = -xfs_file_fsync(file, |
779 | (file->f_flags & __O_SYNC) ? 0 : 1); | 872 | (file->f_flags & __O_SYNC) ? 0 : 1); |
780 | if (!error) | 873 | if (error) |
781 | error = error2; | 874 | ret = error; |
875 | else if (error2) | ||
876 | ret = error2; | ||
782 | } | 877 | } |
783 | 878 | ||
784 | out_unlock_internal: | 879 | out_unlock: |
785 | if (ip->i_new_size) { | 880 | xfs_aio_write_newsize_update(ip); |
786 | xfs_ilock(ip, XFS_ILOCK_EXCL); | 881 | xfs_rw_iunlock(ip, iolock); |
787 | ip->i_new_size = 0; | 882 | return ret; |
788 | /* | ||
789 | * If this was a direct or synchronous I/O that failed (such | ||
790 | * as ENOSPC) then part of the I/O may have been written to | ||
791 | * disk before the error occured. In this case the on-disk | ||
792 | * file size may have been adjusted beyond the in-memory file | ||
793 | * size and now needs to be truncated back. | ||
794 | */ | ||
795 | if (ip->i_d.di_size > ip->i_size) | ||
796 | ip->i_d.di_size = ip->i_size; | ||
797 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | ||
798 | } | ||
799 | xfs_iunlock(ip, iolock); | ||
800 | out_unlock_mutex: | ||
801 | if (need_i_mutex) | ||
802 | mutex_unlock(&inode->i_mutex); | ||
803 | return -error; | ||
804 | } | 883 | } |
805 | 884 | ||
806 | STATIC int | 885 | STATIC int |
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c index ad442d9e392e..b06ede1d0bed 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl.c +++ b/fs/xfs/linux-2.6/xfs_ioctl.c | |||
@@ -39,6 +39,7 @@ | |||
39 | #include "xfs_dfrag.h" | 39 | #include "xfs_dfrag.h" |
40 | #include "xfs_fsops.h" | 40 | #include "xfs_fsops.h" |
41 | #include "xfs_vnodeops.h" | 41 | #include "xfs_vnodeops.h" |
42 | #include "xfs_discard.h" | ||
42 | #include "xfs_quota.h" | 43 | #include "xfs_quota.h" |
43 | #include "xfs_inode_item.h" | 44 | #include "xfs_inode_item.h" |
44 | #include "xfs_export.h" | 45 | #include "xfs_export.h" |
@@ -1294,6 +1295,8 @@ xfs_file_ioctl( | |||
1294 | trace_xfs_file_ioctl(ip); | 1295 | trace_xfs_file_ioctl(ip); |
1295 | 1296 | ||
1296 | switch (cmd) { | 1297 | switch (cmd) { |
1298 | case FITRIM: | ||
1299 | return xfs_ioc_trim(mp, arg); | ||
1297 | case XFS_IOC_ALLOCSP: | 1300 | case XFS_IOC_ALLOCSP: |
1298 | case XFS_IOC_FREESP: | 1301 | case XFS_IOC_FREESP: |
1299 | case XFS_IOC_RESVSP: | 1302 | case XFS_IOC_RESVSP: |
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index bd07f7339366..9731898083ae 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c | |||
@@ -1414,7 +1414,7 @@ xfs_fs_freeze( | |||
1414 | 1414 | ||
1415 | xfs_save_resvblks(mp); | 1415 | xfs_save_resvblks(mp); |
1416 | xfs_quiesce_attr(mp); | 1416 | xfs_quiesce_attr(mp); |
1417 | return -xfs_fs_log_dummy(mp, SYNC_WAIT); | 1417 | return -xfs_fs_log_dummy(mp); |
1418 | } | 1418 | } |
1419 | 1419 | ||
1420 | STATIC int | 1420 | STATIC int |
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index a02480de9759..e22f0057d21f 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c | |||
@@ -362,7 +362,7 @@ xfs_quiesce_data( | |||
362 | 362 | ||
363 | /* mark the log as covered if needed */ | 363 | /* mark the log as covered if needed */ |
364 | if (xfs_log_need_covered(mp)) | 364 | if (xfs_log_need_covered(mp)) |
365 | error2 = xfs_fs_log_dummy(mp, SYNC_WAIT); | 365 | error2 = xfs_fs_log_dummy(mp); |
366 | 366 | ||
367 | /* flush data-only devices */ | 367 | /* flush data-only devices */ |
368 | if (mp->m_rtdev_targp) | 368 | if (mp->m_rtdev_targp) |
@@ -503,13 +503,14 @@ xfs_sync_worker( | |||
503 | int error; | 503 | int error; |
504 | 504 | ||
505 | if (!(mp->m_flags & XFS_MOUNT_RDONLY)) { | 505 | if (!(mp->m_flags & XFS_MOUNT_RDONLY)) { |
506 | xfs_log_force(mp, 0); | ||
507 | xfs_reclaim_inodes(mp, 0); | ||
508 | /* dgc: errors ignored here */ | 506 | /* dgc: errors ignored here */ |
509 | error = xfs_qm_sync(mp, SYNC_TRYLOCK); | ||
510 | if (mp->m_super->s_frozen == SB_UNFROZEN && | 507 | if (mp->m_super->s_frozen == SB_UNFROZEN && |
511 | xfs_log_need_covered(mp)) | 508 | xfs_log_need_covered(mp)) |
512 | error = xfs_fs_log_dummy(mp, 0); | 509 | error = xfs_fs_log_dummy(mp); |
510 | else | ||
511 | xfs_log_force(mp, 0); | ||
512 | xfs_reclaim_inodes(mp, 0); | ||
513 | error = xfs_qm_sync(mp, SYNC_TRYLOCK); | ||
513 | } | 514 | } |
514 | mp->m_sync_seq++; | 515 | mp->m_sync_seq++; |
515 | wake_up(&mp->m_wait_single_sync_task); | 516 | wake_up(&mp->m_wait_single_sync_task); |
diff --git a/fs/xfs/linux-2.6/xfs_sysctl.c b/fs/xfs/linux-2.6/xfs_sysctl.c index 7bb5092d6ae4..ee3cee097e7e 100644 --- a/fs/xfs/linux-2.6/xfs_sysctl.c +++ b/fs/xfs/linux-2.6/xfs_sysctl.c | |||
@@ -18,6 +18,7 @@ | |||
18 | #include "xfs.h" | 18 | #include "xfs.h" |
19 | #include <linux/sysctl.h> | 19 | #include <linux/sysctl.h> |
20 | #include <linux/proc_fs.h> | 20 | #include <linux/proc_fs.h> |
21 | #include "xfs_error.h" | ||
21 | 22 | ||
22 | static struct ctl_table_header *xfs_table_header; | 23 | static struct ctl_table_header *xfs_table_header; |
23 | 24 | ||
@@ -51,6 +52,26 @@ xfs_stats_clear_proc_handler( | |||
51 | 52 | ||
52 | return ret; | 53 | return ret; |
53 | } | 54 | } |
55 | |||
56 | STATIC int | ||
57 | xfs_panic_mask_proc_handler( | ||
58 | ctl_table *ctl, | ||
59 | int write, | ||
60 | void __user *buffer, | ||
61 | size_t *lenp, | ||
62 | loff_t *ppos) | ||
63 | { | ||
64 | int ret, *valp = ctl->data; | ||
65 | |||
66 | ret = proc_dointvec_minmax(ctl, write, buffer, lenp, ppos); | ||
67 | if (!ret && write) { | ||
68 | xfs_panic_mask = *valp; | ||
69 | #ifdef DEBUG | ||
70 | xfs_panic_mask |= (XFS_PTAG_SHUTDOWN_CORRUPT | XFS_PTAG_LOGRES); | ||
71 | #endif | ||
72 | } | ||
73 | return ret; | ||
74 | } | ||
54 | #endif /* CONFIG_PROC_FS */ | 75 | #endif /* CONFIG_PROC_FS */ |
55 | 76 | ||
56 | static ctl_table xfs_table[] = { | 77 | static ctl_table xfs_table[] = { |
@@ -77,7 +98,7 @@ static ctl_table xfs_table[] = { | |||
77 | .data = &xfs_params.panic_mask.val, | 98 | .data = &xfs_params.panic_mask.val, |
78 | .maxlen = sizeof(int), | 99 | .maxlen = sizeof(int), |
79 | .mode = 0644, | 100 | .mode = 0644, |
80 | .proc_handler = proc_dointvec_minmax, | 101 | .proc_handler = xfs_panic_mask_proc_handler, |
81 | .extra1 = &xfs_params.panic_mask.min, | 102 | .extra1 = &xfs_params.panic_mask.min, |
82 | .extra2 = &xfs_params.panic_mask.max | 103 | .extra2 = &xfs_params.panic_mask.max |
83 | }, | 104 | }, |
diff --git a/fs/xfs/linux-2.6/xfs_trace.h b/fs/xfs/linux-2.6/xfs_trace.h index 647af2a2e7aa..2d0bcb479075 100644 --- a/fs/xfs/linux-2.6/xfs_trace.h +++ b/fs/xfs/linux-2.6/xfs_trace.h | |||
@@ -1759,6 +1759,39 @@ DEFINE_LOG_RECOVER_INO_ITEM(xfs_log_recover_inode_recover); | |||
1759 | DEFINE_LOG_RECOVER_INO_ITEM(xfs_log_recover_inode_cancel); | 1759 | DEFINE_LOG_RECOVER_INO_ITEM(xfs_log_recover_inode_cancel); |
1760 | DEFINE_LOG_RECOVER_INO_ITEM(xfs_log_recover_inode_skip); | 1760 | DEFINE_LOG_RECOVER_INO_ITEM(xfs_log_recover_inode_skip); |
1761 | 1761 | ||
1762 | DECLARE_EVENT_CLASS(xfs_discard_class, | ||
1763 | TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, | ||
1764 | xfs_agblock_t agbno, xfs_extlen_t len), | ||
1765 | TP_ARGS(mp, agno, agbno, len), | ||
1766 | TP_STRUCT__entry( | ||
1767 | __field(dev_t, dev) | ||
1768 | __field(xfs_agnumber_t, agno) | ||
1769 | __field(xfs_agblock_t, agbno) | ||
1770 | __field(xfs_extlen_t, len) | ||
1771 | ), | ||
1772 | TP_fast_assign( | ||
1773 | __entry->dev = mp->m_super->s_dev; | ||
1774 | __entry->agno = agno; | ||
1775 | __entry->agbno = agbno; | ||
1776 | __entry->len = len; | ||
1777 | ), | ||
1778 | TP_printk("dev %d:%d agno %u agbno %u len %u\n", | ||
1779 | MAJOR(__entry->dev), MINOR(__entry->dev), | ||
1780 | __entry->agno, | ||
1781 | __entry->agbno, | ||
1782 | __entry->len) | ||
1783 | ) | ||
1784 | |||
1785 | #define DEFINE_DISCARD_EVENT(name) \ | ||
1786 | DEFINE_EVENT(xfs_discard_class, name, \ | ||
1787 | TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, \ | ||
1788 | xfs_agblock_t agbno, xfs_extlen_t len), \ | ||
1789 | TP_ARGS(mp, agno, agbno, len)) | ||
1790 | DEFINE_DISCARD_EVENT(xfs_discard_extent); | ||
1791 | DEFINE_DISCARD_EVENT(xfs_discard_toosmall); | ||
1792 | DEFINE_DISCARD_EVENT(xfs_discard_exclude); | ||
1793 | DEFINE_DISCARD_EVENT(xfs_discard_busy); | ||
1794 | |||
1762 | #endif /* _TRACE_XFS_H */ | 1795 | #endif /* _TRACE_XFS_H */ |
1763 | 1796 | ||
1764 | #undef TRACE_INCLUDE_PATH | 1797 | #undef TRACE_INCLUDE_PATH |