aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-03-23 12:19:22 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2012-03-23 12:19:22 -0400
commit49d99a2f9c4d033cc3965958a1397b1fad573dd3 (patch)
treecda1849d49d40d2f25773e86605c55bc6745cf1f /fs
parent1c3ddfe5ab886c4dc0443535e95ad8e41c41d0e5 (diff)
parentf074211f6041305b645669464343d504f4e6a290 (diff)
Merge branch 'for-linus' of git://oss.sgi.com/xfs/xfs
Pull XFS updates from Ben Myers: "Scalability improvements for dquots, log grant code cleanups, plus bugfixes and cleanups large and small" Fix up various trivial conflicts that were due to some of the earlier patches already having been integrated into v3.3 as bugfixes, and then there were development patches on top of those. Easily merged by just taking the newer version from the pulled branch. * 'for-linus' of git://oss.sgi.com/xfs/xfs: (45 commits) xfs: fallback to vmalloc for large buffers in xfs_getbmap xfs: fallback to vmalloc for large buffers in xfs_attrmulti_attr_get xfs: remove remaining scraps of struct xfs_iomap xfs: fix inode lookup race xfs: clean up minor sparse warnings xfs: remove the global xfs_Gqm structure xfs: remove the per-filesystem list of dquots xfs: use per-filesystem radix trees for dquot lookup xfs: per-filesystem dquot LRU lists xfs: use common code for quota statistics xfs: reimplement fdatasync support xfs: split in-core and on-disk inode log item fields xfs: make xfs_inode_item_size idempotent xfs: log timestamp updates xfs: log file size updates at I/O completion time xfs: log file size updates as part of unwritten extent conversion xfs: do not require an ioend for new EOF calculation xfs: use per-filesystem I/O completion workqueues quota: make Q_XQUOTASYNC a noop xfs: include reservations in quota reporting ...
Diffstat (limited to 'fs')
-rw-r--r--fs/quota/quota.c3
-rw-r--r--fs/xfs/Makefile3
-rw-r--r--fs/xfs/xfs_aops.c183
-rw-r--r--fs/xfs/xfs_aops.h4
-rw-r--r--fs/xfs/xfs_bmap.c13
-rw-r--r--fs/xfs/xfs_buf.c17
-rw-r--r--fs/xfs/xfs_dfrag.c24
-rw-r--r--fs/xfs/xfs_dir2_block.c1
-rw-r--r--fs/xfs/xfs_dquot.c418
-rw-r--r--fs/xfs/xfs_dquot.h49
-rw-r--r--fs/xfs/xfs_file.c84
-rw-r--r--fs/xfs/xfs_iget.c41
-rw-r--r--fs/xfs/xfs_inode.c94
-rw-r--r--fs/xfs/xfs_inode.h23
-rw-r--r--fs/xfs/xfs_inode_item.c297
-rw-r--r--fs/xfs/xfs_inode_item.h16
-rw-r--r--fs/xfs/xfs_ioctl.c14
-rw-r--r--fs/xfs/xfs_ioctl32.c2
-rw-r--r--fs/xfs/xfs_iomap.c19
-rw-r--r--fs/xfs/xfs_iops.c71
-rw-r--r--fs/xfs/xfs_itable.c21
-rw-r--r--fs/xfs/xfs_log.c612
-rw-r--r--fs/xfs/xfs_log.h16
-rw-r--r--fs/xfs/xfs_log_priv.h28
-rw-r--r--fs/xfs/xfs_log_recover.c6
-rw-r--r--fs/xfs/xfs_mount.c8
-rw-r--r--fs/xfs/xfs_mount.h5
-rw-r--r--fs/xfs/xfs_qm.c628
-rw-r--r--fs/xfs/xfs_qm.h49
-rw-r--r--fs/xfs/xfs_qm_bhv.c42
-rw-r--r--fs/xfs/xfs_qm_stats.c105
-rw-r--r--fs/xfs/xfs_qm_stats.h53
-rw-r--r--fs/xfs/xfs_qm_syscalls.c130
-rw-r--r--fs/xfs/xfs_quota.h2
-rw-r--r--fs/xfs/xfs_quota_priv.h11
-rw-r--r--fs/xfs/xfs_sb.h1
-rw-r--r--fs/xfs/xfs_stats.c99
-rw-r--r--fs/xfs/xfs_stats.h10
-rw-r--r--fs/xfs/xfs_super.c164
-rw-r--r--fs/xfs/xfs_super.h8
-rw-r--r--fs/xfs/xfs_sync.c46
-rw-r--r--fs/xfs/xfs_sync.h2
-rw-r--r--fs/xfs/xfs_trace.h28
-rw-r--r--fs/xfs/xfs_trans.c31
-rw-r--r--fs/xfs/xfs_trans_ail.c83
-rw-r--r--fs/xfs/xfs_trans_buf.c25
-rw-r--r--fs/xfs/xfs_trans_dquot.c21
-rw-r--r--fs/xfs/xfs_trans_inode.c8
-rw-r--r--fs/xfs/xfs_trans_priv.h3
-rw-r--r--fs/xfs/xfs_vnode.h1
-rw-r--r--fs/xfs/xfs_vnodeops.h3
51 files changed, 1375 insertions, 2250 deletions
diff --git a/fs/quota/quota.c b/fs/quota/quota.c
index fc2c4388d126..9a391204ca27 100644
--- a/fs/quota/quota.c
+++ b/fs/quota/quota.c
@@ -282,10 +282,9 @@ static int do_quotactl(struct super_block *sb, int type, int cmd, qid_t id,
282 case Q_XGETQUOTA: 282 case Q_XGETQUOTA:
283 return quota_getxquota(sb, type, id, addr); 283 return quota_getxquota(sb, type, id, addr);
284 case Q_XQUOTASYNC: 284 case Q_XQUOTASYNC:
285 /* caller already holds s_umount */
286 if (sb->s_flags & MS_RDONLY) 285 if (sb->s_flags & MS_RDONLY)
287 return -EROFS; 286 return -EROFS;
288 writeback_inodes_sb(sb, WB_REASON_SYNC); 287 /* XFS quotas are fully coherent now, making this call a noop */
289 return 0; 288 return 0;
290 default: 289 default:
291 return -EINVAL; 290 return -EINVAL;
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index 427a4e82a588..0a9977983f92 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -96,9 +96,6 @@ xfs-$(CONFIG_XFS_QUOTA) += xfs_dquot.o \
96 xfs_qm_bhv.o \ 96 xfs_qm_bhv.o \
97 xfs_qm.o \ 97 xfs_qm.o \
98 xfs_quotaops.o 98 xfs_quotaops.o
99ifeq ($(CONFIG_XFS_QUOTA),y)
100xfs-$(CONFIG_PROC_FS) += xfs_qm_stats.o
101endif
102xfs-$(CONFIG_XFS_RT) += xfs_rtalloc.o 99xfs-$(CONFIG_XFS_RT) += xfs_rtalloc.o
103xfs-$(CONFIG_XFS_POSIX_ACL) += xfs_acl.o 100xfs-$(CONFIG_XFS_POSIX_ACL) += xfs_acl.o
104xfs-$(CONFIG_PROC_FS) += xfs_stats.o 101xfs-$(CONFIG_PROC_FS) += xfs_stats.o
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 74b9baf36ac3..0dbb9e70fe21 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -26,6 +26,7 @@
26#include "xfs_bmap_btree.h" 26#include "xfs_bmap_btree.h"
27#include "xfs_dinode.h" 27#include "xfs_dinode.h"
28#include "xfs_inode.h" 28#include "xfs_inode.h"
29#include "xfs_inode_item.h"
29#include "xfs_alloc.h" 30#include "xfs_alloc.h"
30#include "xfs_error.h" 31#include "xfs_error.h"
31#include "xfs_rw.h" 32#include "xfs_rw.h"
@@ -99,23 +100,6 @@ xfs_destroy_ioend(
99} 100}
100 101
101/* 102/*
102 * If the end of the current ioend is beyond the current EOF,
103 * return the new EOF value, otherwise zero.
104 */
105STATIC xfs_fsize_t
106xfs_ioend_new_eof(
107 xfs_ioend_t *ioend)
108{
109 xfs_inode_t *ip = XFS_I(ioend->io_inode);
110 xfs_fsize_t isize;
111 xfs_fsize_t bsize;
112
113 bsize = ioend->io_offset + ioend->io_size;
114 isize = MIN(i_size_read(VFS_I(ip)), bsize);
115 return isize > ip->i_d.di_size ? isize : 0;
116}
117
118/*
119 * Fast and loose check if this write could update the on-disk inode size. 103 * Fast and loose check if this write could update the on-disk inode size.
120 */ 104 */
121static inline bool xfs_ioend_is_append(struct xfs_ioend *ioend) 105static inline bool xfs_ioend_is_append(struct xfs_ioend *ioend)
@@ -124,32 +108,65 @@ static inline bool xfs_ioend_is_append(struct xfs_ioend *ioend)
124 XFS_I(ioend->io_inode)->i_d.di_size; 108 XFS_I(ioend->io_inode)->i_d.di_size;
125} 109}
126 110
111STATIC int
112xfs_setfilesize_trans_alloc(
113 struct xfs_ioend *ioend)
114{
115 struct xfs_mount *mp = XFS_I(ioend->io_inode)->i_mount;
116 struct xfs_trans *tp;
117 int error;
118
119 tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS);
120
121 error = xfs_trans_reserve(tp, 0, XFS_FSYNC_TS_LOG_RES(mp), 0, 0, 0);
122 if (error) {
123 xfs_trans_cancel(tp, 0);
124 return error;
125 }
126
127 ioend->io_append_trans = tp;
128
129 /*
130 * We hand off the transaction to the completion thread now, so
131 * clear the flag here.
132 */
133 current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS);
134 return 0;
135}
136
127/* 137/*
128 * Update on-disk file size now that data has been written to disk. 138 * Update on-disk file size now that data has been written to disk.
129 *
130 * This function does not block as blocking on the inode lock in IO completion
131 * can lead to IO completion order dependency deadlocks.. If it can't get the
132 * inode ilock it will return EAGAIN. Callers must handle this.
133 */ 139 */
134STATIC int 140STATIC int
135xfs_setfilesize( 141xfs_setfilesize(
136 xfs_ioend_t *ioend) 142 struct xfs_ioend *ioend)
137{ 143{
138 xfs_inode_t *ip = XFS_I(ioend->io_inode); 144 struct xfs_inode *ip = XFS_I(ioend->io_inode);
145 struct xfs_trans *tp = ioend->io_append_trans;
139 xfs_fsize_t isize; 146 xfs_fsize_t isize;
140 147
141 if (!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) 148 /*
142 return EAGAIN; 149 * The transaction was allocated in the I/O submission thread,
150 * thus we need to mark ourselves as beeing in a transaction
151 * manually.
152 */
153 current_set_flags_nested(&tp->t_pflags, PF_FSTRANS);
143 154
144 isize = xfs_ioend_new_eof(ioend); 155 xfs_ilock(ip, XFS_ILOCK_EXCL);
145 if (isize) { 156 isize = xfs_new_eof(ip, ioend->io_offset + ioend->io_size);
146 trace_xfs_setfilesize(ip, ioend->io_offset, ioend->io_size); 157 if (!isize) {
147 ip->i_d.di_size = isize; 158 xfs_iunlock(ip, XFS_ILOCK_EXCL);
148 xfs_mark_inode_dirty(ip); 159 xfs_trans_cancel(tp, 0);
160 return 0;
149 } 161 }
150 162
151 xfs_iunlock(ip, XFS_ILOCK_EXCL); 163 trace_xfs_setfilesize(ip, ioend->io_offset, ioend->io_size);
152 return 0; 164
165 ip->i_d.di_size = isize;
166 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
167 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
168
169 return xfs_trans_commit(tp, 0);
153} 170}
154 171
155/* 172/*
@@ -163,10 +180,12 @@ xfs_finish_ioend(
163 struct xfs_ioend *ioend) 180 struct xfs_ioend *ioend)
164{ 181{
165 if (atomic_dec_and_test(&ioend->io_remaining)) { 182 if (atomic_dec_and_test(&ioend->io_remaining)) {
183 struct xfs_mount *mp = XFS_I(ioend->io_inode)->i_mount;
184
166 if (ioend->io_type == IO_UNWRITTEN) 185 if (ioend->io_type == IO_UNWRITTEN)
167 queue_work(xfsconvertd_workqueue, &ioend->io_work); 186 queue_work(mp->m_unwritten_workqueue, &ioend->io_work);
168 else if (xfs_ioend_is_append(ioend)) 187 else if (ioend->io_append_trans)
169 queue_work(xfsdatad_workqueue, &ioend->io_work); 188 queue_work(mp->m_data_workqueue, &ioend->io_work);
170 else 189 else
171 xfs_destroy_ioend(ioend); 190 xfs_destroy_ioend(ioend);
172 } 191 }
@@ -195,35 +214,36 @@ xfs_end_io(
195 * range to normal written extens after the data I/O has finished. 214 * range to normal written extens after the data I/O has finished.
196 */ 215 */
197 if (ioend->io_type == IO_UNWRITTEN) { 216 if (ioend->io_type == IO_UNWRITTEN) {
217 /*
218 * For buffered I/O we never preallocate a transaction when
219 * doing the unwritten extent conversion, but for direct I/O
220 * we do not know if we are converting an unwritten extent
221 * or not at the point where we preallocate the transaction.
222 */
223 if (ioend->io_append_trans) {
224 ASSERT(ioend->io_isdirect);
225
226 current_set_flags_nested(
227 &ioend->io_append_trans->t_pflags, PF_FSTRANS);
228 xfs_trans_cancel(ioend->io_append_trans, 0);
229 }
230
198 error = xfs_iomap_write_unwritten(ip, ioend->io_offset, 231 error = xfs_iomap_write_unwritten(ip, ioend->io_offset,
199 ioend->io_size); 232 ioend->io_size);
200 if (error) { 233 if (error) {
201 ioend->io_error = -error; 234 ioend->io_error = -error;
202 goto done; 235 goto done;
203 } 236 }
237 } else if (ioend->io_append_trans) {
238 error = xfs_setfilesize(ioend);
239 if (error)
240 ioend->io_error = -error;
241 } else {
242 ASSERT(!xfs_ioend_is_append(ioend));
204 } 243 }
205 244
206 /*
207 * We might have to update the on-disk file size after extending
208 * writes.
209 */
210 error = xfs_setfilesize(ioend);
211 ASSERT(!error || error == EAGAIN);
212
213done: 245done:
214 /* 246 xfs_destroy_ioend(ioend);
215 * If we didn't complete processing of the ioend, requeue it to the
216 * tail of the workqueue for another attempt later. Otherwise destroy
217 * it.
218 */
219 if (error == EAGAIN) {
220 atomic_inc(&ioend->io_remaining);
221 xfs_finish_ioend(ioend);
222 /* ensure we don't spin on blocked ioends */
223 delay(1);
224 } else {
225 xfs_destroy_ioend(ioend);
226 }
227} 247}
228 248
229/* 249/*
@@ -259,6 +279,7 @@ xfs_alloc_ioend(
259 */ 279 */
260 atomic_set(&ioend->io_remaining, 1); 280 atomic_set(&ioend->io_remaining, 1);
261 ioend->io_isasync = 0; 281 ioend->io_isasync = 0;
282 ioend->io_isdirect = 0;
262 ioend->io_error = 0; 283 ioend->io_error = 0;
263 ioend->io_list = NULL; 284 ioend->io_list = NULL;
264 ioend->io_type = type; 285 ioend->io_type = type;
@@ -269,6 +290,7 @@ xfs_alloc_ioend(
269 ioend->io_size = 0; 290 ioend->io_size = 0;
270 ioend->io_iocb = NULL; 291 ioend->io_iocb = NULL;
271 ioend->io_result = 0; 292 ioend->io_result = 0;
293 ioend->io_append_trans = NULL;
272 294
273 INIT_WORK(&ioend->io_work, xfs_end_io); 295 INIT_WORK(&ioend->io_work, xfs_end_io);
274 return ioend; 296 return ioend;
@@ -379,14 +401,6 @@ xfs_submit_ioend_bio(
379 atomic_inc(&ioend->io_remaining); 401 atomic_inc(&ioend->io_remaining);
380 bio->bi_private = ioend; 402 bio->bi_private = ioend;
381 bio->bi_end_io = xfs_end_bio; 403 bio->bi_end_io = xfs_end_bio;
382
383 /*
384 * If the I/O is beyond EOF we mark the inode dirty immediately
385 * but don't update the inode size until I/O completion.
386 */
387 if (xfs_ioend_new_eof(ioend))
388 xfs_mark_inode_dirty(XFS_I(ioend->io_inode));
389
390 submit_bio(wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE, bio); 404 submit_bio(wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE, bio);
391} 405}
392 406
@@ -1033,8 +1047,20 @@ xfs_vm_writepage(
1033 wbc, end_index); 1047 wbc, end_index);
1034 } 1048 }
1035 1049
1036 if (iohead) 1050 if (iohead) {
1051 /*
1052 * Reserve log space if we might write beyond the on-disk
1053 * inode size.
1054 */
1055 if (ioend->io_type != IO_UNWRITTEN &&
1056 xfs_ioend_is_append(ioend)) {
1057 err = xfs_setfilesize_trans_alloc(ioend);
1058 if (err)
1059 goto error;
1060 }
1061
1037 xfs_submit_ioend(wbc, iohead); 1062 xfs_submit_ioend(wbc, iohead);
1063 }
1038 1064
1039 return 0; 1065 return 0;
1040 1066
@@ -1314,17 +1340,32 @@ xfs_vm_direct_IO(
1314{ 1340{
1315 struct inode *inode = iocb->ki_filp->f_mapping->host; 1341 struct inode *inode = iocb->ki_filp->f_mapping->host;
1316 struct block_device *bdev = xfs_find_bdev_for_inode(inode); 1342 struct block_device *bdev = xfs_find_bdev_for_inode(inode);
1343 struct xfs_ioend *ioend = NULL;
1317 ssize_t ret; 1344 ssize_t ret;
1318 1345
1319 if (rw & WRITE) { 1346 if (rw & WRITE) {
1320 iocb->private = xfs_alloc_ioend(inode, IO_DIRECT); 1347 size_t size = iov_length(iov, nr_segs);
1348
1349 /*
1350 * We need to preallocate a transaction for a size update
1351 * here. In the case that this write both updates the size
1352 * and converts at least on unwritten extent we will cancel
1353 * the still clean transaction after the I/O has finished.
1354 */
1355 iocb->private = ioend = xfs_alloc_ioend(inode, IO_DIRECT);
1356 if (offset + size > XFS_I(inode)->i_d.di_size) {
1357 ret = xfs_setfilesize_trans_alloc(ioend);
1358 if (ret)
1359 goto out_destroy_ioend;
1360 ioend->io_isdirect = 1;
1361 }
1321 1362
1322 ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov, 1363 ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov,
1323 offset, nr_segs, 1364 offset, nr_segs,
1324 xfs_get_blocks_direct, 1365 xfs_get_blocks_direct,
1325 xfs_end_io_direct_write, NULL, 0); 1366 xfs_end_io_direct_write, NULL, 0);
1326 if (ret != -EIOCBQUEUED && iocb->private) 1367 if (ret != -EIOCBQUEUED && iocb->private)
1327 xfs_destroy_ioend(iocb->private); 1368 goto out_trans_cancel;
1328 } else { 1369 } else {
1329 ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov, 1370 ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov,
1330 offset, nr_segs, 1371 offset, nr_segs,
@@ -1333,6 +1374,16 @@ xfs_vm_direct_IO(
1333 } 1374 }
1334 1375
1335 return ret; 1376 return ret;
1377
1378out_trans_cancel:
1379 if (ioend->io_append_trans) {
1380 current_set_flags_nested(&ioend->io_append_trans->t_pflags,
1381 PF_FSTRANS);
1382 xfs_trans_cancel(ioend->io_append_trans, 0);
1383 }
1384out_destroy_ioend:
1385 xfs_destroy_ioend(ioend);
1386 return ret;
1336} 1387}
1337 1388
1338STATIC void 1389STATIC void
diff --git a/fs/xfs/xfs_aops.h b/fs/xfs/xfs_aops.h
index 116dd5c37034..84eafbcb0d9d 100644
--- a/fs/xfs/xfs_aops.h
+++ b/fs/xfs/xfs_aops.h
@@ -18,8 +18,6 @@
18#ifndef __XFS_AOPS_H__ 18#ifndef __XFS_AOPS_H__
19#define __XFS_AOPS_H__ 19#define __XFS_AOPS_H__
20 20
21extern struct workqueue_struct *xfsdatad_workqueue;
22extern struct workqueue_struct *xfsconvertd_workqueue;
23extern mempool_t *xfs_ioend_pool; 21extern mempool_t *xfs_ioend_pool;
24 22
25/* 23/*
@@ -48,12 +46,14 @@ typedef struct xfs_ioend {
48 int io_error; /* I/O error code */ 46 int io_error; /* I/O error code */
49 atomic_t io_remaining; /* hold count */ 47 atomic_t io_remaining; /* hold count */
50 unsigned int io_isasync : 1; /* needs aio_complete */ 48 unsigned int io_isasync : 1; /* needs aio_complete */
49 unsigned int io_isdirect : 1;/* direct I/O */
51 struct inode *io_inode; /* file being written to */ 50 struct inode *io_inode; /* file being written to */
52 struct buffer_head *io_buffer_head;/* buffer linked list head */ 51 struct buffer_head *io_buffer_head;/* buffer linked list head */
53 struct buffer_head *io_buffer_tail;/* buffer linked list tail */ 52 struct buffer_head *io_buffer_tail;/* buffer linked list tail */
54 size_t io_size; /* size of the extent */ 53 size_t io_size; /* size of the extent */
55 xfs_off_t io_offset; /* offset in the file */ 54 xfs_off_t io_offset; /* offset in the file */
56 struct work_struct io_work; /* xfsdatad work queue */ 55 struct work_struct io_work; /* xfsdatad work queue */
56 struct xfs_trans *io_append_trans;/* xact. for size update */
57 struct kiocb *io_iocb; 57 struct kiocb *io_iocb;
58 int io_result; 58 int io_result;
59} xfs_ioend_t; 59} xfs_ioend_t;
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index 188ef2fbd628..3548c6f75593 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -5536,8 +5536,12 @@ xfs_getbmap(
5536 if (bmv->bmv_count > ULONG_MAX / sizeof(struct getbmapx)) 5536 if (bmv->bmv_count > ULONG_MAX / sizeof(struct getbmapx))
5537 return XFS_ERROR(ENOMEM); 5537 return XFS_ERROR(ENOMEM);
5538 out = kmem_zalloc(bmv->bmv_count * sizeof(struct getbmapx), KM_MAYFAIL); 5538 out = kmem_zalloc(bmv->bmv_count * sizeof(struct getbmapx), KM_MAYFAIL);
5539 if (!out) 5539 if (!out) {
5540 return XFS_ERROR(ENOMEM); 5540 out = kmem_zalloc_large(bmv->bmv_count *
5541 sizeof(struct getbmapx));
5542 if (!out)
5543 return XFS_ERROR(ENOMEM);
5544 }
5541 5545
5542 xfs_ilock(ip, XFS_IOLOCK_SHARED); 5546 xfs_ilock(ip, XFS_IOLOCK_SHARED);
5543 if (whichfork == XFS_DATA_FORK && !(iflags & BMV_IF_DELALLOC)) { 5547 if (whichfork == XFS_DATA_FORK && !(iflags & BMV_IF_DELALLOC)) {
@@ -5661,7 +5665,10 @@ xfs_getbmap(
5661 break; 5665 break;
5662 } 5666 }
5663 5667
5664 kmem_free(out); 5668 if (is_vmalloc_addr(out))
5669 kmem_free_large(out);
5670 else
5671 kmem_free(out);
5665 return error; 5672 return error;
5666} 5673}
5667 5674
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 4dff85c7d7eb..6819b5163e33 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -45,8 +45,6 @@ static kmem_zone_t *xfs_buf_zone;
45STATIC int xfsbufd(void *); 45STATIC int xfsbufd(void *);
46 46
47static struct workqueue_struct *xfslogd_workqueue; 47static struct workqueue_struct *xfslogd_workqueue;
48struct workqueue_struct *xfsdatad_workqueue;
49struct workqueue_struct *xfsconvertd_workqueue;
50 48
51#ifdef XFS_BUF_LOCK_TRACKING 49#ifdef XFS_BUF_LOCK_TRACKING
52# define XB_SET_OWNER(bp) ((bp)->b_last_holder = current->pid) 50# define XB_SET_OWNER(bp) ((bp)->b_last_holder = current->pid)
@@ -1793,21 +1791,8 @@ xfs_buf_init(void)
1793 if (!xfslogd_workqueue) 1791 if (!xfslogd_workqueue)
1794 goto out_free_buf_zone; 1792 goto out_free_buf_zone;
1795 1793
1796 xfsdatad_workqueue = alloc_workqueue("xfsdatad", WQ_MEM_RECLAIM, 1);
1797 if (!xfsdatad_workqueue)
1798 goto out_destroy_xfslogd_workqueue;
1799
1800 xfsconvertd_workqueue = alloc_workqueue("xfsconvertd",
1801 WQ_MEM_RECLAIM, 1);
1802 if (!xfsconvertd_workqueue)
1803 goto out_destroy_xfsdatad_workqueue;
1804
1805 return 0; 1794 return 0;
1806 1795
1807 out_destroy_xfsdatad_workqueue:
1808 destroy_workqueue(xfsdatad_workqueue);
1809 out_destroy_xfslogd_workqueue:
1810 destroy_workqueue(xfslogd_workqueue);
1811 out_free_buf_zone: 1796 out_free_buf_zone:
1812 kmem_zone_destroy(xfs_buf_zone); 1797 kmem_zone_destroy(xfs_buf_zone);
1813 out: 1798 out:
@@ -1817,8 +1802,6 @@ xfs_buf_init(void)
1817void 1802void
1818xfs_buf_terminate(void) 1803xfs_buf_terminate(void)
1819{ 1804{
1820 destroy_workqueue(xfsconvertd_workqueue);
1821 destroy_workqueue(xfsdatad_workqueue);
1822 destroy_workqueue(xfslogd_workqueue); 1805 destroy_workqueue(xfslogd_workqueue);
1823 kmem_zone_destroy(xfs_buf_zone); 1806 kmem_zone_destroy(xfs_buf_zone);
1824} 1807}
diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c
index dd974a55c77d..1137bbc5eccb 100644
--- a/fs/xfs/xfs_dfrag.c
+++ b/fs/xfs/xfs_dfrag.c
@@ -215,7 +215,7 @@ xfs_swap_extents(
215 xfs_trans_t *tp; 215 xfs_trans_t *tp;
216 xfs_bstat_t *sbp = &sxp->sx_stat; 216 xfs_bstat_t *sbp = &sxp->sx_stat;
217 xfs_ifork_t *tempifp, *ifp, *tifp; 217 xfs_ifork_t *tempifp, *ifp, *tifp;
218 int ilf_fields, tilf_fields; 218 int src_log_flags, target_log_flags;
219 int error = 0; 219 int error = 0;
220 int aforkblks = 0; 220 int aforkblks = 0;
221 int taforkblks = 0; 221 int taforkblks = 0;
@@ -385,9 +385,8 @@ xfs_swap_extents(
385 tip->i_delayed_blks = ip->i_delayed_blks; 385 tip->i_delayed_blks = ip->i_delayed_blks;
386 ip->i_delayed_blks = 0; 386 ip->i_delayed_blks = 0;
387 387
388 ilf_fields = XFS_ILOG_CORE; 388 src_log_flags = XFS_ILOG_CORE;
389 389 switch (ip->i_d.di_format) {
390 switch(ip->i_d.di_format) {
391 case XFS_DINODE_FMT_EXTENTS: 390 case XFS_DINODE_FMT_EXTENTS:
392 /* If the extents fit in the inode, fix the 391 /* If the extents fit in the inode, fix the
393 * pointer. Otherwise it's already NULL or 392 * pointer. Otherwise it's already NULL or
@@ -397,16 +396,15 @@ xfs_swap_extents(
397 ifp->if_u1.if_extents = 396 ifp->if_u1.if_extents =
398 ifp->if_u2.if_inline_ext; 397 ifp->if_u2.if_inline_ext;
399 } 398 }
400 ilf_fields |= XFS_ILOG_DEXT; 399 src_log_flags |= XFS_ILOG_DEXT;
401 break; 400 break;
402 case XFS_DINODE_FMT_BTREE: 401 case XFS_DINODE_FMT_BTREE:
403 ilf_fields |= XFS_ILOG_DBROOT; 402 src_log_flags |= XFS_ILOG_DBROOT;
404 break; 403 break;
405 } 404 }
406 405
407 tilf_fields = XFS_ILOG_CORE; 406 target_log_flags = XFS_ILOG_CORE;
408 407 switch (tip->i_d.di_format) {
409 switch(tip->i_d.di_format) {
410 case XFS_DINODE_FMT_EXTENTS: 408 case XFS_DINODE_FMT_EXTENTS:
411 /* If the extents fit in the inode, fix the 409 /* If the extents fit in the inode, fix the
412 * pointer. Otherwise it's already NULL or 410 * pointer. Otherwise it's already NULL or
@@ -416,10 +414,10 @@ xfs_swap_extents(
416 tifp->if_u1.if_extents = 414 tifp->if_u1.if_extents =
417 tifp->if_u2.if_inline_ext; 415 tifp->if_u2.if_inline_ext;
418 } 416 }
419 tilf_fields |= XFS_ILOG_DEXT; 417 target_log_flags |= XFS_ILOG_DEXT;
420 break; 418 break;
421 case XFS_DINODE_FMT_BTREE: 419 case XFS_DINODE_FMT_BTREE:
422 tilf_fields |= XFS_ILOG_DBROOT; 420 target_log_flags |= XFS_ILOG_DBROOT;
423 break; 421 break;
424 } 422 }
425 423
@@ -427,8 +425,8 @@ xfs_swap_extents(
427 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); 425 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
428 xfs_trans_ijoin(tp, tip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); 426 xfs_trans_ijoin(tp, tip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
429 427
430 xfs_trans_log_inode(tp, ip, ilf_fields); 428 xfs_trans_log_inode(tp, ip, src_log_flags);
431 xfs_trans_log_inode(tp, tip, tilf_fields); 429 xfs_trans_log_inode(tp, tip, target_log_flags);
432 430
433 /* 431 /*
434 * If this is a synchronous mount, make sure that the 432 * If this is a synchronous mount, make sure that the
diff --git a/fs/xfs/xfs_dir2_block.c b/fs/xfs/xfs_dir2_block.c
index 9245e029b8ea..d3b63aefd01d 100644
--- a/fs/xfs/xfs_dir2_block.c
+++ b/fs/xfs/xfs_dir2_block.c
@@ -29,6 +29,7 @@
29#include "xfs_dinode.h" 29#include "xfs_dinode.h"
30#include "xfs_inode.h" 30#include "xfs_inode.h"
31#include "xfs_inode_item.h" 31#include "xfs_inode_item.h"
32#include "xfs_dir2.h"
32#include "xfs_dir2_format.h" 33#include "xfs_dir2_format.h"
33#include "xfs_dir2_priv.h" 34#include "xfs_dir2_priv.h"
34#include "xfs_error.h" 35#include "xfs_error.h"
diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c
index 53db20ee3e77..4be16a0cbe5a 100644
--- a/fs/xfs/xfs_dquot.c
+++ b/fs/xfs/xfs_dquot.c
@@ -43,11 +43,10 @@
43 * Lock order: 43 * Lock order:
44 * 44 *
45 * ip->i_lock 45 * ip->i_lock
46 * qh->qh_lock 46 * qi->qi_tree_lock
47 * qi->qi_dqlist_lock 47 * dquot->q_qlock (xfs_dqlock() and friends)
48 * dquot->q_qlock (xfs_dqlock() and friends) 48 * dquot->q_flush (xfs_dqflock() and friends)
49 * dquot->q_flush (xfs_dqflock() and friends) 49 * qi->qi_lru_lock
50 * xfs_Gqm->qm_dqfrlist_lock
51 * 50 *
52 * If two dquots need to be locked the order is user before group/project, 51 * If two dquots need to be locked the order is user before group/project,
53 * otherwise by the lowest id first, see xfs_dqlock2. 52 * otherwise by the lowest id first, see xfs_dqlock2.
@@ -60,6 +59,9 @@ int xfs_dqreq_num;
60int xfs_dqerror_mod = 33; 59int xfs_dqerror_mod = 33;
61#endif 60#endif
62 61
62struct kmem_zone *xfs_qm_dqtrxzone;
63static struct kmem_zone *xfs_qm_dqzone;
64
63static struct lock_class_key xfs_dquot_other_class; 65static struct lock_class_key xfs_dquot_other_class;
64 66
65/* 67/*
@@ -69,12 +71,12 @@ void
69xfs_qm_dqdestroy( 71xfs_qm_dqdestroy(
70 xfs_dquot_t *dqp) 72 xfs_dquot_t *dqp)
71{ 73{
72 ASSERT(list_empty(&dqp->q_freelist)); 74 ASSERT(list_empty(&dqp->q_lru));
73 75
74 mutex_destroy(&dqp->q_qlock); 76 mutex_destroy(&dqp->q_qlock);
75 kmem_zone_free(xfs_Gqm->qm_dqzone, dqp); 77 kmem_zone_free(xfs_qm_dqzone, dqp);
76 78
77 atomic_dec(&xfs_Gqm->qm_totaldquots); 79 XFS_STATS_DEC(xs_qm_dquot);
78} 80}
79 81
80/* 82/*
@@ -282,7 +284,7 @@ xfs_qm_dqalloc(
282 * Return if this type of quotas is turned off while we didn't 284 * Return if this type of quotas is turned off while we didn't
283 * have an inode lock 285 * have an inode lock
284 */ 286 */
285 if (XFS_IS_THIS_QUOTA_OFF(dqp)) { 287 if (!xfs_this_quota_on(dqp->q_mount, dqp->dq_flags)) {
286 xfs_iunlock(quotip, XFS_ILOCK_EXCL); 288 xfs_iunlock(quotip, XFS_ILOCK_EXCL);
287 return (ESRCH); 289 return (ESRCH);
288 } 290 }
@@ -384,7 +386,7 @@ xfs_qm_dqtobp(
384 dqp->q_fileoffset = (xfs_fileoff_t)id / mp->m_quotainfo->qi_dqperchunk; 386 dqp->q_fileoffset = (xfs_fileoff_t)id / mp->m_quotainfo->qi_dqperchunk;
385 387
386 xfs_ilock(quotip, XFS_ILOCK_SHARED); 388 xfs_ilock(quotip, XFS_ILOCK_SHARED);
387 if (XFS_IS_THIS_QUOTA_OFF(dqp)) { 389 if (!xfs_this_quota_on(dqp->q_mount, dqp->dq_flags)) {
388 /* 390 /*
389 * Return if this type of quotas is turned off while we 391 * Return if this type of quotas is turned off while we
390 * didn't have the quota inode lock. 392 * didn't have the quota inode lock.
@@ -492,12 +494,12 @@ xfs_qm_dqread(
492 int cancelflags = 0; 494 int cancelflags = 0;
493 495
494 496
495 dqp = kmem_zone_zalloc(xfs_Gqm->qm_dqzone, KM_SLEEP); 497 dqp = kmem_zone_zalloc(xfs_qm_dqzone, KM_SLEEP);
496 498
497 dqp->dq_flags = type; 499 dqp->dq_flags = type;
498 dqp->q_core.d_id = cpu_to_be32(id); 500 dqp->q_core.d_id = cpu_to_be32(id);
499 dqp->q_mount = mp; 501 dqp->q_mount = mp;
500 INIT_LIST_HEAD(&dqp->q_freelist); 502 INIT_LIST_HEAD(&dqp->q_lru);
501 mutex_init(&dqp->q_qlock); 503 mutex_init(&dqp->q_qlock);
502 init_waitqueue_head(&dqp->q_pinwait); 504 init_waitqueue_head(&dqp->q_pinwait);
503 505
@@ -516,7 +518,7 @@ xfs_qm_dqread(
516 if (!(type & XFS_DQ_USER)) 518 if (!(type & XFS_DQ_USER))
517 lockdep_set_class(&dqp->q_qlock, &xfs_dquot_other_class); 519 lockdep_set_class(&dqp->q_qlock, &xfs_dquot_other_class);
518 520
519 atomic_inc(&xfs_Gqm->qm_totaldquots); 521 XFS_STATS_INC(xs_qm_dquot);
520 522
521 trace_xfs_dqread(dqp); 523 trace_xfs_dqread(dqp);
522 524
@@ -602,60 +604,6 @@ error0:
602} 604}
603 605
604/* 606/*
605 * Lookup a dquot in the incore dquot hashtable. We keep two separate
606 * hashtables for user and group dquots; and, these are global tables
607 * inside the XQM, not per-filesystem tables.
608 * The hash chain must be locked by caller, and it is left locked
609 * on return. Returning dquot is locked.
610 */
611STATIC int
612xfs_qm_dqlookup(
613 xfs_mount_t *mp,
614 xfs_dqid_t id,
615 xfs_dqhash_t *qh,
616 xfs_dquot_t **O_dqpp)
617{
618 xfs_dquot_t *dqp;
619
620 ASSERT(mutex_is_locked(&qh->qh_lock));
621
622 /*
623 * Traverse the hashchain looking for a match
624 */
625 list_for_each_entry(dqp, &qh->qh_list, q_hashlist) {
626 /*
627 * We already have the hashlock. We don't need the
628 * dqlock to look at the id field of the dquot, since the
629 * id can't be modified without the hashlock anyway.
630 */
631 if (be32_to_cpu(dqp->q_core.d_id) != id || dqp->q_mount != mp)
632 continue;
633
634 trace_xfs_dqlookup_found(dqp);
635
636 xfs_dqlock(dqp);
637 if (dqp->dq_flags & XFS_DQ_FREEING) {
638 *O_dqpp = NULL;
639 xfs_dqunlock(dqp);
640 return -1;
641 }
642
643 dqp->q_nrefs++;
644
645 /*
646 * move the dquot to the front of the hashchain
647 */
648 list_move(&dqp->q_hashlist, &qh->qh_list);
649 trace_xfs_dqlookup_done(dqp);
650 *O_dqpp = dqp;
651 return 0;
652 }
653
654 *O_dqpp = NULL;
655 return 1;
656}
657
658/*
659 * Given the file system, inode OR id, and type (UDQUOT/GDQUOT), return a 607 * Given the file system, inode OR id, and type (UDQUOT/GDQUOT), return a
660 * a locked dquot, doing an allocation (if requested) as needed. 608 * a locked dquot, doing an allocation (if requested) as needed.
661 * When both an inode and an id are given, the inode's id takes precedence. 609 * When both an inode and an id are given, the inode's id takes precedence.
@@ -672,10 +620,10 @@ xfs_qm_dqget(
672 uint flags, /* DQALLOC, DQSUSER, DQREPAIR, DOWARN */ 620 uint flags, /* DQALLOC, DQSUSER, DQREPAIR, DOWARN */
673 xfs_dquot_t **O_dqpp) /* OUT : locked incore dquot */ 621 xfs_dquot_t **O_dqpp) /* OUT : locked incore dquot */
674{ 622{
675 xfs_dquot_t *dqp; 623 struct xfs_quotainfo *qi = mp->m_quotainfo;
676 xfs_dqhash_t *h; 624 struct radix_tree_root *tree = XFS_DQUOT_TREE(qi, type);
677 uint version; 625 struct xfs_dquot *dqp;
678 int error; 626 int error;
679 627
680 ASSERT(XFS_IS_QUOTA_RUNNING(mp)); 628 ASSERT(XFS_IS_QUOTA_RUNNING(mp));
681 if ((! XFS_IS_UQUOTA_ON(mp) && type == XFS_DQ_USER) || 629 if ((! XFS_IS_UQUOTA_ON(mp) && type == XFS_DQ_USER) ||
@@ -683,7 +631,6 @@ xfs_qm_dqget(
683 (! XFS_IS_GQUOTA_ON(mp) && type == XFS_DQ_GROUP)) { 631 (! XFS_IS_GQUOTA_ON(mp) && type == XFS_DQ_GROUP)) {
684 return (ESRCH); 632 return (ESRCH);
685 } 633 }
686 h = XFS_DQ_HASH(mp, id, type);
687 634
688#ifdef DEBUG 635#ifdef DEBUG
689 if (xfs_do_dqerror) { 636 if (xfs_do_dqerror) {
@@ -699,42 +646,33 @@ xfs_qm_dqget(
699 type == XFS_DQ_GROUP); 646 type == XFS_DQ_GROUP);
700 if (ip) { 647 if (ip) {
701 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); 648 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
702 if (type == XFS_DQ_USER) 649 ASSERT(xfs_inode_dquot(ip, type) == NULL);
703 ASSERT(ip->i_udquot == NULL);
704 else
705 ASSERT(ip->i_gdquot == NULL);
706 } 650 }
707#endif 651#endif
708 652
709restart: 653restart:
710 mutex_lock(&h->qh_lock); 654 mutex_lock(&qi->qi_tree_lock);
655 dqp = radix_tree_lookup(tree, id);
656 if (dqp) {
657 xfs_dqlock(dqp);
658 if (dqp->dq_flags & XFS_DQ_FREEING) {
659 xfs_dqunlock(dqp);
660 mutex_unlock(&qi->qi_tree_lock);
661 trace_xfs_dqget_freeing(dqp);
662 delay(1);
663 goto restart;
664 }
711 665
712 /* 666 dqp->q_nrefs++;
713 * Look in the cache (hashtable). 667 mutex_unlock(&qi->qi_tree_lock);
714 * The chain is kept locked during lookup. 668
715 */ 669 trace_xfs_dqget_hit(dqp);
716 switch (xfs_qm_dqlookup(mp, id, h, O_dqpp)) { 670 XFS_STATS_INC(xs_qm_dqcachehits);
717 case -1: 671 *O_dqpp = dqp;
718 XQM_STATS_INC(xqmstats.xs_qm_dquot_dups); 672 return 0;
719 mutex_unlock(&h->qh_lock);
720 delay(1);
721 goto restart;
722 case 0:
723 XQM_STATS_INC(xqmstats.xs_qm_dqcachehits);
724 /*
725 * The dquot was found, moved to the front of the chain,
726 * taken off the freelist if it was on it, and locked
727 * at this point. Just unlock the hashchain and return.
728 */
729 ASSERT(*O_dqpp);
730 ASSERT(XFS_DQ_IS_LOCKED(*O_dqpp));
731 mutex_unlock(&h->qh_lock);
732 trace_xfs_dqget_hit(*O_dqpp);
733 return 0; /* success */
734 default:
735 XQM_STATS_INC(xqmstats.xs_qm_dqcachemisses);
736 break;
737 } 673 }
674 mutex_unlock(&qi->qi_tree_lock);
675 XFS_STATS_INC(xs_qm_dqcachemisses);
738 676
739 /* 677 /*
740 * Dquot cache miss. We don't want to keep the inode lock across 678 * Dquot cache miss. We don't want to keep the inode lock across
@@ -745,12 +683,6 @@ restart:
745 */ 683 */
746 if (ip) 684 if (ip)
747 xfs_iunlock(ip, XFS_ILOCK_EXCL); 685 xfs_iunlock(ip, XFS_ILOCK_EXCL);
748 /*
749 * Save the hashchain version stamp, and unlock the chain, so that
750 * we don't keep the lock across a disk read
751 */
752 version = h->qh_version;
753 mutex_unlock(&h->qh_lock);
754 686
755 error = xfs_qm_dqread(mp, id, type, flags, &dqp); 687 error = xfs_qm_dqread(mp, id, type, flags, &dqp);
756 688
@@ -760,97 +692,53 @@ restart:
760 if (error) 692 if (error)
761 return error; 693 return error;
762 694
763 /*
764 * Dquot lock comes after hashlock in the lock ordering
765 */
766 if (ip) { 695 if (ip) {
767 /* 696 /*
768 * A dquot could be attached to this inode by now, since 697 * A dquot could be attached to this inode by now, since
769 * we had dropped the ilock. 698 * we had dropped the ilock.
770 */ 699 */
771 if (type == XFS_DQ_USER) { 700 if (xfs_this_quota_on(mp, type)) {
772 if (!XFS_IS_UQUOTA_ON(mp)) { 701 struct xfs_dquot *dqp1;
773 /* inode stays locked on return */ 702
774 xfs_qm_dqdestroy(dqp); 703 dqp1 = xfs_inode_dquot(ip, type);
775 return XFS_ERROR(ESRCH); 704 if (dqp1) {
776 }
777 if (ip->i_udquot) {
778 xfs_qm_dqdestroy(dqp); 705 xfs_qm_dqdestroy(dqp);
779 dqp = ip->i_udquot; 706 dqp = dqp1;
780 xfs_dqlock(dqp); 707 xfs_dqlock(dqp);
781 goto dqret; 708 goto dqret;
782 } 709 }
783 } else { 710 } else {
784 if (!XFS_IS_OQUOTA_ON(mp)) { 711 /* inode stays locked on return */
785 /* inode stays locked on return */ 712 xfs_qm_dqdestroy(dqp);
786 xfs_qm_dqdestroy(dqp); 713 return XFS_ERROR(ESRCH);
787 return XFS_ERROR(ESRCH);
788 }
789 if (ip->i_gdquot) {
790 xfs_qm_dqdestroy(dqp);
791 dqp = ip->i_gdquot;
792 xfs_dqlock(dqp);
793 goto dqret;
794 }
795 } 714 }
796 } 715 }
797 716
798 /* 717 mutex_lock(&qi->qi_tree_lock);
799 * Hashlock comes after ilock in lock order 718 error = -radix_tree_insert(tree, id, dqp);
800 */ 719 if (unlikely(error)) {
801 mutex_lock(&h->qh_lock); 720 WARN_ON(error != EEXIST);
802 if (version != h->qh_version) { 721
803 xfs_dquot_t *tmpdqp;
804 /* 722 /*
805 * Now, see if somebody else put the dquot in the 723 * Duplicate found. Just throw away the new dquot and start
806 * hashtable before us. This can happen because we didn't 724 * over.
807 * keep the hashchain lock. We don't have to worry about
808 * lock order between the two dquots here since dqp isn't
809 * on any findable lists yet.
810 */ 725 */
811 switch (xfs_qm_dqlookup(mp, id, h, &tmpdqp)) { 726 mutex_unlock(&qi->qi_tree_lock);
812 case 0: 727 trace_xfs_dqget_dup(dqp);
813 case -1: 728 xfs_qm_dqdestroy(dqp);
814 /* 729 XFS_STATS_INC(xs_qm_dquot_dups);
815 * Duplicate found, either in cache or on its way out. 730 goto restart;
816 * Just throw away the new dquot and start over.
817 */
818 if (tmpdqp)
819 xfs_qm_dqput(tmpdqp);
820 mutex_unlock(&h->qh_lock);
821 xfs_qm_dqdestroy(dqp);
822 XQM_STATS_INC(xqmstats.xs_qm_dquot_dups);
823 goto restart;
824 default:
825 break;
826 }
827 } 731 }
828 732
829 /* 733 /*
830 * Put the dquot at the beginning of the hash-chain and mp's list
831 * LOCK ORDER: hashlock, freelistlock, mplistlock, udqlock, gdqlock ..
832 */
833 ASSERT(mutex_is_locked(&h->qh_lock));
834 dqp->q_hash = h;
835 list_add(&dqp->q_hashlist, &h->qh_list);
836 h->qh_version++;
837
838 /*
839 * Attach this dquot to this filesystem's list of all dquots,
840 * kept inside the mount structure in m_quotainfo field
841 */
842 mutex_lock(&mp->m_quotainfo->qi_dqlist_lock);
843
844 /*
845 * We return a locked dquot to the caller, with a reference taken 734 * We return a locked dquot to the caller, with a reference taken
846 */ 735 */
847 xfs_dqlock(dqp); 736 xfs_dqlock(dqp);
848 dqp->q_nrefs = 1; 737 dqp->q_nrefs = 1;
849 738
850 list_add(&dqp->q_mplist, &mp->m_quotainfo->qi_dqlist); 739 qi->qi_dquots++;
851 mp->m_quotainfo->qi_dquots++; 740 mutex_unlock(&qi->qi_tree_lock);
852 mutex_unlock(&mp->m_quotainfo->qi_dqlist_lock); 741
853 mutex_unlock(&h->qh_lock);
854 dqret: 742 dqret:
855 ASSERT((ip == NULL) || xfs_isilocked(ip, XFS_ILOCK_EXCL)); 743 ASSERT((ip == NULL) || xfs_isilocked(ip, XFS_ILOCK_EXCL));
856 trace_xfs_dqget_miss(dqp); 744 trace_xfs_dqget_miss(dqp);
@@ -859,37 +747,22 @@ restart:
859} 747}
860 748
861 749
862/* 750STATIC void
863 * Release a reference to the dquot (decrement ref-count) 751xfs_qm_dqput_final(
864 * and unlock it. If there is a group quota attached to this
865 * dquot, carefully release that too without tripping over
866 * deadlocks'n'stuff.
867 */
868void
869xfs_qm_dqput(
870 struct xfs_dquot *dqp) 752 struct xfs_dquot *dqp)
871{ 753{
754 struct xfs_quotainfo *qi = dqp->q_mount->m_quotainfo;
872 struct xfs_dquot *gdqp; 755 struct xfs_dquot *gdqp;
873 756
874 ASSERT(dqp->q_nrefs > 0);
875 ASSERT(XFS_DQ_IS_LOCKED(dqp));
876
877 trace_xfs_dqput(dqp);
878
879recurse:
880 if (--dqp->q_nrefs > 0) {
881 xfs_dqunlock(dqp);
882 return;
883 }
884
885 trace_xfs_dqput_free(dqp); 757 trace_xfs_dqput_free(dqp);
886 758
887 mutex_lock(&xfs_Gqm->qm_dqfrlist_lock); 759 mutex_lock(&qi->qi_lru_lock);
888 if (list_empty(&dqp->q_freelist)) { 760 if (list_empty(&dqp->q_lru)) {
889 list_add_tail(&dqp->q_freelist, &xfs_Gqm->qm_dqfrlist); 761 list_add_tail(&dqp->q_lru, &qi->qi_lru_list);
890 xfs_Gqm->qm_dqfrlist_cnt++; 762 qi->qi_lru_count++;
763 XFS_STATS_INC(xs_qm_dquot_unused);
891 } 764 }
892 mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock); 765 mutex_unlock(&qi->qi_lru_lock);
893 766
894 /* 767 /*
895 * If we just added a udquot to the freelist, then we want to release 768 * If we just added a udquot to the freelist, then we want to release
@@ -906,10 +779,29 @@ recurse:
906 /* 779 /*
907 * If we had a group quota hint, release it now. 780 * If we had a group quota hint, release it now.
908 */ 781 */
909 if (gdqp) { 782 if (gdqp)
910 dqp = gdqp; 783 xfs_qm_dqput(gdqp);
911 goto recurse; 784}
912 } 785
786/*
787 * Release a reference to the dquot (decrement ref-count) and unlock it.
788 *
789 * If there is a group quota attached to this dquot, carefully release that
790 * too without tripping over deadlocks'n'stuff.
791 */
792void
793xfs_qm_dqput(
794 struct xfs_dquot *dqp)
795{
796 ASSERT(dqp->q_nrefs > 0);
797 ASSERT(XFS_DQ_IS_LOCKED(dqp));
798
799 trace_xfs_dqput(dqp);
800
801 if (--dqp->q_nrefs > 0)
802 xfs_dqunlock(dqp);
803 else
804 xfs_qm_dqput_final(dqp);
913} 805}
914 806
915/* 807/*
@@ -1091,17 +983,6 @@ xfs_qm_dqflush(
1091 983
1092} 984}
1093 985
1094void
1095xfs_dqunlock(
1096 xfs_dquot_t *dqp)
1097{
1098 xfs_dqunlock_nonotify(dqp);
1099 if (dqp->q_logitem.qli_dquot == dqp) {
1100 xfs_trans_unlocked_item(dqp->q_logitem.qli_item.li_ailp,
1101 &dqp->q_logitem.qli_item);
1102 }
1103}
1104
1105/* 986/*
1106 * Lock two xfs_dquot structures. 987 * Lock two xfs_dquot structures.
1107 * 988 *
@@ -1131,85 +1012,6 @@ xfs_dqlock2(
1131} 1012}
1132 1013
1133/* 1014/*
1134 * Take a dquot out of the mount's dqlist as well as the hashlist. This is
1135 * called via unmount as well as quotaoff, and the purge will always succeed.
1136 */
1137void
1138xfs_qm_dqpurge(
1139 struct xfs_dquot *dqp)
1140{
1141 struct xfs_mount *mp = dqp->q_mount;
1142 struct xfs_dqhash *qh = dqp->q_hash;
1143
1144 xfs_dqlock(dqp);
1145
1146 /*
1147 * If we're turning off quotas, we have to make sure that, for
1148 * example, we don't delete quota disk blocks while dquots are
1149 * in the process of getting written to those disk blocks.
1150 * This dquot might well be on AIL, and we can't leave it there
1151 * if we're turning off quotas. Basically, we need this flush
1152 * lock, and are willing to block on it.
1153 */
1154 if (!xfs_dqflock_nowait(dqp)) {
1155 /*
1156 * Block on the flush lock after nudging dquot buffer,
1157 * if it is incore.
1158 */
1159 xfs_dqflock_pushbuf_wait(dqp);
1160 }
1161
1162 /*
1163 * If we are turning this type of quotas off, we don't care
1164 * about the dirty metadata sitting in this dquot. OTOH, if
1165 * we're unmounting, we do care, so we flush it and wait.
1166 */
1167 if (XFS_DQ_IS_DIRTY(dqp)) {
1168 int error;
1169
1170 /*
1171 * We don't care about getting disk errors here. We need
1172 * to purge this dquot anyway, so we go ahead regardless.
1173 */
1174 error = xfs_qm_dqflush(dqp, SYNC_WAIT);
1175 if (error)
1176 xfs_warn(mp, "%s: dquot %p flush failed",
1177 __func__, dqp);
1178 xfs_dqflock(dqp);
1179 }
1180
1181 ASSERT(atomic_read(&dqp->q_pincount) == 0);
1182 ASSERT(XFS_FORCED_SHUTDOWN(mp) ||
1183 !(dqp->q_logitem.qli_item.li_flags & XFS_LI_IN_AIL));
1184
1185 xfs_dqfunlock(dqp);
1186 xfs_dqunlock(dqp);
1187
1188 mutex_lock(&qh->qh_lock);
1189 list_del_init(&dqp->q_hashlist);
1190 qh->qh_version++;
1191 mutex_unlock(&qh->qh_lock);
1192
1193 mutex_lock(&mp->m_quotainfo->qi_dqlist_lock);
1194 list_del_init(&dqp->q_mplist);
1195 mp->m_quotainfo->qi_dqreclaims++;
1196 mp->m_quotainfo->qi_dquots--;
1197 mutex_unlock(&mp->m_quotainfo->qi_dqlist_lock);
1198
1199 /*
1200 * We move dquots to the freelist as soon as their reference count
1201 * hits zero, so it really should be on the freelist here.
1202 */
1203 mutex_lock(&xfs_Gqm->qm_dqfrlist_lock);
1204 ASSERT(!list_empty(&dqp->q_freelist));
1205 list_del_init(&dqp->q_freelist);
1206 xfs_Gqm->qm_dqfrlist_cnt--;
1207 mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
1208
1209 xfs_qm_dqdestroy(dqp);
1210}
1211
1212/*
1213 * Give the buffer a little push if it is incore and 1015 * Give the buffer a little push if it is incore and
1214 * wait on the flush lock. 1016 * wait on the flush lock.
1215 */ 1017 */
@@ -1241,3 +1043,31 @@ xfs_dqflock_pushbuf_wait(
1241out_lock: 1043out_lock:
1242 xfs_dqflock(dqp); 1044 xfs_dqflock(dqp);
1243} 1045}
1046
1047int __init
1048xfs_qm_init(void)
1049{
1050 xfs_qm_dqzone =
1051 kmem_zone_init(sizeof(struct xfs_dquot), "xfs_dquot");
1052 if (!xfs_qm_dqzone)
1053 goto out;
1054
1055 xfs_qm_dqtrxzone =
1056 kmem_zone_init(sizeof(struct xfs_dquot_acct), "xfs_dqtrx");
1057 if (!xfs_qm_dqtrxzone)
1058 goto out_free_dqzone;
1059
1060 return 0;
1061
1062out_free_dqzone:
1063 kmem_zone_destroy(xfs_qm_dqzone);
1064out:
1065 return -ENOMEM;
1066}
1067
1068void __exit
1069xfs_qm_exit(void)
1070{
1071 kmem_zone_destroy(xfs_qm_dqtrxzone);
1072 kmem_zone_destroy(xfs_qm_dqzone);
1073}
diff --git a/fs/xfs/xfs_dquot.h b/fs/xfs/xfs_dquot.h
index a1d91d8f1802..ef9190bd8b30 100644
--- a/fs/xfs/xfs_dquot.h
+++ b/fs/xfs/xfs_dquot.h
@@ -29,16 +29,6 @@
29 * when quotas are off. 29 * when quotas are off.
30 */ 30 */
31 31
32/*
33 * The hash chain headers (hash buckets)
34 */
35typedef struct xfs_dqhash {
36 struct list_head qh_list;
37 struct mutex qh_lock;
38 uint qh_version; /* ever increasing version */
39 uint qh_nelems; /* number of dquots on the list */
40} xfs_dqhash_t;
41
42struct xfs_mount; 32struct xfs_mount;
43struct xfs_trans; 33struct xfs_trans;
44 34
@@ -47,10 +37,7 @@ struct xfs_trans;
47 */ 37 */
48typedef struct xfs_dquot { 38typedef struct xfs_dquot {
49 uint dq_flags; /* various flags (XFS_DQ_*) */ 39 uint dq_flags; /* various flags (XFS_DQ_*) */
50 struct list_head q_freelist; /* global free list of dquots */ 40 struct list_head q_lru; /* global free list of dquots */
51 struct list_head q_mplist; /* mount's list of dquots */
52 struct list_head q_hashlist; /* gloabl hash list of dquots */
53 xfs_dqhash_t *q_hash; /* the hashchain header */
54 struct xfs_mount*q_mount; /* filesystem this relates to */ 41 struct xfs_mount*q_mount; /* filesystem this relates to */
55 struct xfs_trans*q_transp; /* trans this belongs to currently */ 42 struct xfs_trans*q_transp; /* trans this belongs to currently */
56 uint q_nrefs; /* # active refs from inodes */ 43 uint q_nrefs; /* # active refs from inodes */
@@ -110,11 +97,37 @@ static inline void xfs_dqlock(struct xfs_dquot *dqp)
110 mutex_lock(&dqp->q_qlock); 97 mutex_lock(&dqp->q_qlock);
111} 98}
112 99
113static inline void xfs_dqunlock_nonotify(struct xfs_dquot *dqp) 100static inline void xfs_dqunlock(struct xfs_dquot *dqp)
114{ 101{
115 mutex_unlock(&dqp->q_qlock); 102 mutex_unlock(&dqp->q_qlock);
116} 103}
117 104
105static inline int xfs_this_quota_on(struct xfs_mount *mp, int type)
106{
107 switch (type & XFS_DQ_ALLTYPES) {
108 case XFS_DQ_USER:
109 return XFS_IS_UQUOTA_ON(mp);
110 case XFS_DQ_GROUP:
111 case XFS_DQ_PROJ:
112 return XFS_IS_OQUOTA_ON(mp);
113 default:
114 return 0;
115 }
116}
117
118static inline xfs_dquot_t *xfs_inode_dquot(struct xfs_inode *ip, int type)
119{
120 switch (type & XFS_DQ_ALLTYPES) {
121 case XFS_DQ_USER:
122 return ip->i_udquot;
123 case XFS_DQ_GROUP:
124 case XFS_DQ_PROJ:
125 return ip->i_gdquot;
126 default:
127 return NULL;
128 }
129}
130
118#define XFS_DQ_IS_LOCKED(dqp) (mutex_is_locked(&((dqp)->q_qlock))) 131#define XFS_DQ_IS_LOCKED(dqp) (mutex_is_locked(&((dqp)->q_qlock)))
119#define XFS_DQ_IS_DIRTY(dqp) ((dqp)->dq_flags & XFS_DQ_DIRTY) 132#define XFS_DQ_IS_DIRTY(dqp) ((dqp)->dq_flags & XFS_DQ_DIRTY)
120#define XFS_QM_ISUDQ(dqp) ((dqp)->dq_flags & XFS_DQ_USER) 133#define XFS_QM_ISUDQ(dqp) ((dqp)->dq_flags & XFS_DQ_USER)
@@ -125,15 +138,10 @@ static inline void xfs_dqunlock_nonotify(struct xfs_dquot *dqp)
125 XFS_DQ_TO_QINF(dqp)->qi_uquotaip : \ 138 XFS_DQ_TO_QINF(dqp)->qi_uquotaip : \
126 XFS_DQ_TO_QINF(dqp)->qi_gquotaip) 139 XFS_DQ_TO_QINF(dqp)->qi_gquotaip)
127 140
128#define XFS_IS_THIS_QUOTA_OFF(d) (! (XFS_QM_ISUDQ(d) ? \
129 (XFS_IS_UQUOTA_ON((d)->q_mount)) : \
130 (XFS_IS_OQUOTA_ON((d)->q_mount))))
131
132extern int xfs_qm_dqread(struct xfs_mount *, xfs_dqid_t, uint, 141extern int xfs_qm_dqread(struct xfs_mount *, xfs_dqid_t, uint,
133 uint, struct xfs_dquot **); 142 uint, struct xfs_dquot **);
134extern void xfs_qm_dqdestroy(xfs_dquot_t *); 143extern void xfs_qm_dqdestroy(xfs_dquot_t *);
135extern int xfs_qm_dqflush(xfs_dquot_t *, uint); 144extern int xfs_qm_dqflush(xfs_dquot_t *, uint);
136extern void xfs_qm_dqpurge(xfs_dquot_t *);
137extern void xfs_qm_dqunpin_wait(xfs_dquot_t *); 145extern void xfs_qm_dqunpin_wait(xfs_dquot_t *);
138extern void xfs_qm_adjust_dqtimers(xfs_mount_t *, 146extern void xfs_qm_adjust_dqtimers(xfs_mount_t *,
139 xfs_disk_dquot_t *); 147 xfs_disk_dquot_t *);
@@ -144,7 +152,6 @@ extern int xfs_qm_dqget(xfs_mount_t *, xfs_inode_t *,
144extern void xfs_qm_dqput(xfs_dquot_t *); 152extern void xfs_qm_dqput(xfs_dquot_t *);
145 153
146extern void xfs_dqlock2(struct xfs_dquot *, struct xfs_dquot *); 154extern void xfs_dqlock2(struct xfs_dquot *, struct xfs_dquot *);
147extern void xfs_dqunlock(struct xfs_dquot *);
148extern void xfs_dqflock_pushbuf_wait(struct xfs_dquot *dqp); 155extern void xfs_dqflock_pushbuf_wait(struct xfs_dquot *dqp);
149 156
150static inline struct xfs_dquot *xfs_qm_dqhold(struct xfs_dquot *dqp) 157static inline struct xfs_dquot *xfs_qm_dqhold(struct xfs_dquot *dqp)
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 7e5bc872f2b4..54a67dd9ac0a 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -163,7 +163,6 @@ xfs_file_fsync(
163 struct inode *inode = file->f_mapping->host; 163 struct inode *inode = file->f_mapping->host;
164 struct xfs_inode *ip = XFS_I(inode); 164 struct xfs_inode *ip = XFS_I(inode);
165 struct xfs_mount *mp = ip->i_mount; 165 struct xfs_mount *mp = ip->i_mount;
166 struct xfs_trans *tp;
167 int error = 0; 166 int error = 0;
168 int log_flushed = 0; 167 int log_flushed = 0;
169 xfs_lsn_t lsn = 0; 168 xfs_lsn_t lsn = 0;
@@ -194,75 +193,18 @@ xfs_file_fsync(
194 } 193 }
195 194
196 /* 195 /*
197 * We always need to make sure that the required inode state is safe on 196 * All metadata updates are logged, which means that we just have
198 * disk. The inode might be clean but we still might need to force the 197 * to flush the log up to the latest LSN that touched the inode.
199 * log because of committed transactions that haven't hit the disk yet.
200 * Likewise, there could be unflushed non-transactional changes to the
201 * inode core that have to go to disk and this requires us to issue
202 * a synchronous transaction to capture these changes correctly.
203 *
204 * This code relies on the assumption that if the i_update_core field
205 * of the inode is clear and the inode is unpinned then it is clean
206 * and no action is required.
207 */ 198 */
208 xfs_ilock(ip, XFS_ILOCK_SHARED); 199 xfs_ilock(ip, XFS_ILOCK_SHARED);
209 200 if (xfs_ipincount(ip)) {
210 /* 201 if (!datasync ||
211 * First check if the VFS inode is marked dirty. All the dirtying 202 (ip->i_itemp->ili_fields & ~XFS_ILOG_TIMESTAMP))
212 * of non-transactional updates do not go through mark_inode_dirty*,
213 * which allows us to distinguish between pure timestamp updates
214 * and i_size updates which need to be caught for fdatasync.
215 * After that also check for the dirty state in the XFS inode, which
216 * might gets cleared when the inode gets written out via the AIL
217 * or xfs_iflush_cluster.
218 */
219 if (((inode->i_state & I_DIRTY_DATASYNC) ||
220 ((inode->i_state & I_DIRTY_SYNC) && !datasync)) &&
221 ip->i_update_core) {
222 /*
223 * Kick off a transaction to log the inode core to get the
224 * updates. The sync transaction will also force the log.
225 */
226 xfs_iunlock(ip, XFS_ILOCK_SHARED);
227 tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS);
228 error = xfs_trans_reserve(tp, 0,
229 XFS_FSYNC_TS_LOG_RES(mp), 0, 0, 0);
230 if (error) {
231 xfs_trans_cancel(tp, 0);
232 return -error;
233 }
234 xfs_ilock(ip, XFS_ILOCK_EXCL);
235
236 /*
237 * Note - it's possible that we might have pushed ourselves out
238 * of the way during trans_reserve which would flush the inode.
239 * But there's no guarantee that the inode buffer has actually
240 * gone out yet (it's delwri). Plus the buffer could be pinned
241 * anyway if it's part of an inode in another recent
242 * transaction. So we play it safe and fire off the
243 * transaction anyway.
244 */
245 xfs_trans_ijoin(tp, ip, 0);
246 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
247 error = xfs_trans_commit(tp, 0);
248
249 lsn = ip->i_itemp->ili_last_lsn;
250 xfs_iunlock(ip, XFS_ILOCK_EXCL);
251 } else {
252 /*
253 * Timestamps/size haven't changed since last inode flush or
254 * inode transaction commit. That means either nothing got
255 * written or a transaction committed which caught the updates.
256 * If the latter happened and the transaction hasn't hit the
257 * disk yet, the inode will be still be pinned. If it is,
258 * force the log.
259 */
260 if (xfs_ipincount(ip))
261 lsn = ip->i_itemp->ili_last_lsn; 203 lsn = ip->i_itemp->ili_last_lsn;
262 xfs_iunlock(ip, XFS_ILOCK_SHARED);
263 } 204 }
205 xfs_iunlock(ip, XFS_ILOCK_SHARED);
264 206
265 if (!error && lsn) 207 if (lsn)
266 error = _xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, &log_flushed); 208 error = _xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, &log_flushed);
267 209
268 /* 210 /*
@@ -659,9 +601,6 @@ restart:
659 return error; 601 return error;
660 } 602 }
661 603
662 if (likely(!(file->f_mode & FMODE_NOCMTIME)))
663 file_update_time(file);
664
665 /* 604 /*
666 * If the offset is beyond the size of the file, we need to zero any 605 * If the offset is beyond the size of the file, we need to zero any
667 * blocks that fall between the existing EOF and the start of this 606 * blocks that fall between the existing EOF and the start of this
@@ -685,6 +624,15 @@ restart:
685 return error; 624 return error;
686 625
687 /* 626 /*
627 * Updating the timestamps will grab the ilock again from
628 * xfs_fs_dirty_inode, so we have to call it after dropping the
629 * lock above. Eventually we should look into a way to avoid
630 * the pointless lock roundtrip.
631 */
632 if (likely(!(file->f_mode & FMODE_NOCMTIME)))
633 file_update_time(file);
634
635 /*
688 * If we're writing the file then make sure to clear the setuid and 636 * If we're writing the file then make sure to clear the setuid and
689 * setgid bits if the process is not being run by root. This keeps 637 * setgid bits if the process is not being run by root. This keeps
690 * people from modifying setuid and setgid binaries. 638 * people from modifying setuid and setgid binaries.
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index 8c3e46394d48..a98cb4524e6c 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -91,7 +91,6 @@ xfs_inode_alloc(
91 ip->i_afp = NULL; 91 ip->i_afp = NULL;
92 memset(&ip->i_df, 0, sizeof(xfs_ifork_t)); 92 memset(&ip->i_df, 0, sizeof(xfs_ifork_t));
93 ip->i_flags = 0; 93 ip->i_flags = 0;
94 ip->i_update_core = 0;
95 ip->i_delayed_blks = 0; 94 ip->i_delayed_blks = 0;
96 memset(&ip->i_d, 0, sizeof(xfs_icdinode_t)); 95 memset(&ip->i_d, 0, sizeof(xfs_icdinode_t));
97 96
@@ -350,9 +349,20 @@ xfs_iget_cache_miss(
350 BUG(); 349 BUG();
351 } 350 }
352 351
353 spin_lock(&pag->pag_ici_lock); 352 /*
353 * These values must be set before inserting the inode into the radix
354 * tree as the moment it is inserted a concurrent lookup (allowed by the
355 * RCU locking mechanism) can find it and that lookup must see that this
356 * is an inode currently under construction (i.e. that XFS_INEW is set).
357 * The ip->i_flags_lock that protects the XFS_INEW flag forms the
358 * memory barrier that ensures this detection works correctly at lookup
359 * time.
360 */
361 ip->i_udquot = ip->i_gdquot = NULL;
362 xfs_iflags_set(ip, XFS_INEW);
354 363
355 /* insert the new inode */ 364 /* insert the new inode */
365 spin_lock(&pag->pag_ici_lock);
356 error = radix_tree_insert(&pag->pag_ici_root, agino, ip); 366 error = radix_tree_insert(&pag->pag_ici_root, agino, ip);
357 if (unlikely(error)) { 367 if (unlikely(error)) {
358 WARN_ON(error != -EEXIST); 368 WARN_ON(error != -EEXIST);
@@ -360,11 +370,6 @@ xfs_iget_cache_miss(
360 error = EAGAIN; 370 error = EAGAIN;
361 goto out_preload_end; 371 goto out_preload_end;
362 } 372 }
363
364 /* These values _must_ be set before releasing the radix tree lock! */
365 ip->i_udquot = ip->i_gdquot = NULL;
366 xfs_iflags_set(ip, XFS_INEW);
367
368 spin_unlock(&pag->pag_ici_lock); 373 spin_unlock(&pag->pag_ici_lock);
369 radix_tree_preload_end(); 374 radix_tree_preload_end();
370 375
@@ -418,6 +423,15 @@ xfs_iget(
418 xfs_perag_t *pag; 423 xfs_perag_t *pag;
419 xfs_agino_t agino; 424 xfs_agino_t agino;
420 425
426 /*
427 * xfs_reclaim_inode() uses the ILOCK to ensure an inode
428 * doesn't get freed while it's being referenced during a
429 * radix tree traversal here. It assumes this function
430 * aqcuires only the ILOCK (and therefore it has no need to
431 * involve the IOLOCK in this synchronization).
432 */
433 ASSERT((lock_flags & (XFS_IOLOCK_EXCL | XFS_IOLOCK_SHARED)) == 0);
434
421 /* reject inode numbers outside existing AGs */ 435 /* reject inode numbers outside existing AGs */
422 if (!ino || XFS_INO_TO_AGNO(mp, ino) >= mp->m_sb.sb_agcount) 436 if (!ino || XFS_INO_TO_AGNO(mp, ino) >= mp->m_sb.sb_agcount)
423 return EINVAL; 437 return EINVAL;
@@ -642,8 +656,7 @@ xfs_iunlock(
642 (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)); 656 (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL));
643 ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) != 657 ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) !=
644 (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)); 658 (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
645 ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_IUNLOCK_NONOTIFY | 659 ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0);
646 XFS_LOCK_DEP_MASK)) == 0);
647 ASSERT(lock_flags != 0); 660 ASSERT(lock_flags != 0);
648 661
649 if (lock_flags & XFS_IOLOCK_EXCL) 662 if (lock_flags & XFS_IOLOCK_EXCL)
@@ -656,16 +669,6 @@ xfs_iunlock(
656 else if (lock_flags & XFS_ILOCK_SHARED) 669 else if (lock_flags & XFS_ILOCK_SHARED)
657 mrunlock_shared(&ip->i_lock); 670 mrunlock_shared(&ip->i_lock);
658 671
659 if ((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) &&
660 !(lock_flags & XFS_IUNLOCK_NONOTIFY) && ip->i_itemp) {
661 /*
662 * Let the AIL know that this item has been unlocked in case
663 * it is in the AIL and anyone is waiting on it. Don't do
664 * this if the caller has asked us not to.
665 */
666 xfs_trans_unlocked_item(ip->i_itemp->ili_item.li_ailp,
667 (xfs_log_item_t*)(ip->i_itemp));
668 }
669 trace_xfs_iunlock(ip, lock_flags, _RET_IP_); 672 trace_xfs_iunlock(ip, lock_flags, _RET_IP_);
670} 673}
671 674
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index b21022499c2e..bc46c0a133d3 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1656,14 +1656,13 @@ retry:
1656 iip = ip->i_itemp; 1656 iip = ip->i_itemp;
1657 if (!iip || xfs_inode_clean(ip)) { 1657 if (!iip || xfs_inode_clean(ip)) {
1658 ASSERT(ip != free_ip); 1658 ASSERT(ip != free_ip);
1659 ip->i_update_core = 0;
1660 xfs_ifunlock(ip); 1659 xfs_ifunlock(ip);
1661 xfs_iunlock(ip, XFS_ILOCK_EXCL); 1660 xfs_iunlock(ip, XFS_ILOCK_EXCL);
1662 continue; 1661 continue;
1663 } 1662 }
1664 1663
1665 iip->ili_last_fields = iip->ili_format.ilf_fields; 1664 iip->ili_last_fields = iip->ili_fields;
1666 iip->ili_format.ilf_fields = 0; 1665 iip->ili_fields = 0;
1667 iip->ili_logged = 1; 1666 iip->ili_logged = 1;
1668 xfs_trans_ail_copy_lsn(mp->m_ail, &iip->ili_flush_lsn, 1667 xfs_trans_ail_copy_lsn(mp->m_ail, &iip->ili_flush_lsn,
1669 &iip->ili_item.li_lsn); 1668 &iip->ili_item.li_lsn);
@@ -2177,7 +2176,7 @@ xfs_iflush_fork(
2177 mp = ip->i_mount; 2176 mp = ip->i_mount;
2178 switch (XFS_IFORK_FORMAT(ip, whichfork)) { 2177 switch (XFS_IFORK_FORMAT(ip, whichfork)) {
2179 case XFS_DINODE_FMT_LOCAL: 2178 case XFS_DINODE_FMT_LOCAL:
2180 if ((iip->ili_format.ilf_fields & dataflag[whichfork]) && 2179 if ((iip->ili_fields & dataflag[whichfork]) &&
2181 (ifp->if_bytes > 0)) { 2180 (ifp->if_bytes > 0)) {
2182 ASSERT(ifp->if_u1.if_data != NULL); 2181 ASSERT(ifp->if_u1.if_data != NULL);
2183 ASSERT(ifp->if_bytes <= XFS_IFORK_SIZE(ip, whichfork)); 2182 ASSERT(ifp->if_bytes <= XFS_IFORK_SIZE(ip, whichfork));
@@ -2187,8 +2186,8 @@ xfs_iflush_fork(
2187 2186
2188 case XFS_DINODE_FMT_EXTENTS: 2187 case XFS_DINODE_FMT_EXTENTS:
2189 ASSERT((ifp->if_flags & XFS_IFEXTENTS) || 2188 ASSERT((ifp->if_flags & XFS_IFEXTENTS) ||
2190 !(iip->ili_format.ilf_fields & extflag[whichfork])); 2189 !(iip->ili_fields & extflag[whichfork]));
2191 if ((iip->ili_format.ilf_fields & extflag[whichfork]) && 2190 if ((iip->ili_fields & extflag[whichfork]) &&
2192 (ifp->if_bytes > 0)) { 2191 (ifp->if_bytes > 0)) {
2193 ASSERT(xfs_iext_get_ext(ifp, 0)); 2192 ASSERT(xfs_iext_get_ext(ifp, 0));
2194 ASSERT(XFS_IFORK_NEXTENTS(ip, whichfork) > 0); 2193 ASSERT(XFS_IFORK_NEXTENTS(ip, whichfork) > 0);
@@ -2198,7 +2197,7 @@ xfs_iflush_fork(
2198 break; 2197 break;
2199 2198
2200 case XFS_DINODE_FMT_BTREE: 2199 case XFS_DINODE_FMT_BTREE:
2201 if ((iip->ili_format.ilf_fields & brootflag[whichfork]) && 2200 if ((iip->ili_fields & brootflag[whichfork]) &&
2202 (ifp->if_broot_bytes > 0)) { 2201 (ifp->if_broot_bytes > 0)) {
2203 ASSERT(ifp->if_broot != NULL); 2202 ASSERT(ifp->if_broot != NULL);
2204 ASSERT(ifp->if_broot_bytes <= 2203 ASSERT(ifp->if_broot_bytes <=
@@ -2211,14 +2210,14 @@ xfs_iflush_fork(
2211 break; 2210 break;
2212 2211
2213 case XFS_DINODE_FMT_DEV: 2212 case XFS_DINODE_FMT_DEV:
2214 if (iip->ili_format.ilf_fields & XFS_ILOG_DEV) { 2213 if (iip->ili_fields & XFS_ILOG_DEV) {
2215 ASSERT(whichfork == XFS_DATA_FORK); 2214 ASSERT(whichfork == XFS_DATA_FORK);
2216 xfs_dinode_put_rdev(dip, ip->i_df.if_u2.if_rdev); 2215 xfs_dinode_put_rdev(dip, ip->i_df.if_u2.if_rdev);
2217 } 2216 }
2218 break; 2217 break;
2219 2218
2220 case XFS_DINODE_FMT_UUID: 2219 case XFS_DINODE_FMT_UUID:
2221 if (iip->ili_format.ilf_fields & XFS_ILOG_UUID) { 2220 if (iip->ili_fields & XFS_ILOG_UUID) {
2222 ASSERT(whichfork == XFS_DATA_FORK); 2221 ASSERT(whichfork == XFS_DATA_FORK);
2223 memcpy(XFS_DFORK_DPTR(dip), 2222 memcpy(XFS_DFORK_DPTR(dip),
2224 &ip->i_df.if_u2.if_uuid, 2223 &ip->i_df.if_u2.if_uuid,
@@ -2451,9 +2450,8 @@ xfs_iflush(
2451 * to disk, because the log record didn't make it to disk! 2450 * to disk, because the log record didn't make it to disk!
2452 */ 2451 */
2453 if (XFS_FORCED_SHUTDOWN(mp)) { 2452 if (XFS_FORCED_SHUTDOWN(mp)) {
2454 ip->i_update_core = 0;
2455 if (iip) 2453 if (iip)
2456 iip->ili_format.ilf_fields = 0; 2454 iip->ili_fields = 0;
2457 xfs_ifunlock(ip); 2455 xfs_ifunlock(ip);
2458 return XFS_ERROR(EIO); 2456 return XFS_ERROR(EIO);
2459 } 2457 }
@@ -2533,26 +2531,6 @@ xfs_iflush_int(
2533 /* set *dip = inode's place in the buffer */ 2531 /* set *dip = inode's place in the buffer */
2534 dip = (xfs_dinode_t *)xfs_buf_offset(bp, ip->i_imap.im_boffset); 2532 dip = (xfs_dinode_t *)xfs_buf_offset(bp, ip->i_imap.im_boffset);
2535 2533
2536 /*
2537 * Clear i_update_core before copying out the data.
2538 * This is for coordination with our timestamp updates
2539 * that don't hold the inode lock. They will always
2540 * update the timestamps BEFORE setting i_update_core,
2541 * so if we clear i_update_core after they set it we
2542 * are guaranteed to see their updates to the timestamps.
2543 * I believe that this depends on strongly ordered memory
2544 * semantics, but we have that. We use the SYNCHRONIZE
2545 * macro to make sure that the compiler does not reorder
2546 * the i_update_core access below the data copy below.
2547 */
2548 ip->i_update_core = 0;
2549 SYNCHRONIZE();
2550
2551 /*
2552 * Make sure to get the latest timestamps from the Linux inode.
2553 */
2554 xfs_synchronize_times(ip);
2555
2556 if (XFS_TEST_ERROR(dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC), 2534 if (XFS_TEST_ERROR(dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC),
2557 mp, XFS_ERRTAG_IFLUSH_1, XFS_RANDOM_IFLUSH_1)) { 2535 mp, XFS_ERRTAG_IFLUSH_1, XFS_RANDOM_IFLUSH_1)) {
2558 xfs_alert_tag(mp, XFS_PTAG_IFLUSH, 2536 xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
@@ -2663,36 +2641,33 @@ xfs_iflush_int(
2663 xfs_inobp_check(mp, bp); 2641 xfs_inobp_check(mp, bp);
2664 2642
2665 /* 2643 /*
2666 * We've recorded everything logged in the inode, so we'd 2644 * We've recorded everything logged in the inode, so we'd like to clear
2667 * like to clear the ilf_fields bits so we don't log and 2645 * the ili_fields bits so we don't log and flush things unnecessarily.
2668 * flush things unnecessarily. However, we can't stop 2646 * However, we can't stop logging all this information until the data
2669 * logging all this information until the data we've copied 2647 * we've copied into the disk buffer is written to disk. If we did we
2670 * into the disk buffer is written to disk. If we did we might 2648 * might overwrite the copy of the inode in the log with all the data
2671 * overwrite the copy of the inode in the log with all the 2649 * after re-logging only part of it, and in the face of a crash we
2672 * data after re-logging only part of it, and in the face of 2650 * wouldn't have all the data we need to recover.
2673 * a crash we wouldn't have all the data we need to recover.
2674 * 2651 *
2675 * What we do is move the bits to the ili_last_fields field. 2652 * What we do is move the bits to the ili_last_fields field. When
2676 * When logging the inode, these bits are moved back to the 2653 * logging the inode, these bits are moved back to the ili_fields field.
2677 * ilf_fields field. In the xfs_iflush_done() routine we 2654 * In the xfs_iflush_done() routine we clear ili_last_fields, since we
2678 * clear ili_last_fields, since we know that the information 2655 * know that the information those bits represent is permanently on
2679 * those bits represent is permanently on disk. As long as 2656 * disk. As long as the flush completes before the inode is logged
2680 * the flush completes before the inode is logged again, then 2657 * again, then both ili_fields and ili_last_fields will be cleared.
2681 * both ilf_fields and ili_last_fields will be cleared.
2682 * 2658 *
2683 * We can play with the ilf_fields bits here, because the inode 2659 * We can play with the ili_fields bits here, because the inode lock
2684 * lock must be held exclusively in order to set bits there 2660 * must be held exclusively in order to set bits there and the flush
2685 * and the flush lock protects the ili_last_fields bits. 2661 * lock protects the ili_last_fields bits. Set ili_logged so the flush
2686 * Set ili_logged so the flush done 2662 * done routine can tell whether or not to look in the AIL. Also, store
2687 * routine can tell whether or not to look in the AIL. 2663 * the current LSN of the inode so that we can tell whether the item has
2688 * Also, store the current LSN of the inode so that we can tell 2664 * moved in the AIL from xfs_iflush_done(). In order to read the lsn we
2689 * whether the item has moved in the AIL from xfs_iflush_done(). 2665 * need the AIL lock, because it is a 64 bit value that cannot be read
2690 * In order to read the lsn we need the AIL lock, because 2666 * atomically.
2691 * it is a 64 bit value that cannot be read atomically.
2692 */ 2667 */
2693 if (iip != NULL && iip->ili_format.ilf_fields != 0) { 2668 if (iip != NULL && iip->ili_fields != 0) {
2694 iip->ili_last_fields = iip->ili_format.ilf_fields; 2669 iip->ili_last_fields = iip->ili_fields;
2695 iip->ili_format.ilf_fields = 0; 2670 iip->ili_fields = 0;
2696 iip->ili_logged = 1; 2671 iip->ili_logged = 1;
2697 2672
2698 xfs_trans_ail_copy_lsn(mp->m_ail, &iip->ili_flush_lsn, 2673 xfs_trans_ail_copy_lsn(mp->m_ail, &iip->ili_flush_lsn,
@@ -2711,8 +2686,7 @@ xfs_iflush_int(
2711 } else { 2686 } else {
2712 /* 2687 /*
2713 * We're flushing an inode which is not in the AIL and has 2688 * We're flushing an inode which is not in the AIL and has
2714 * not been logged but has i_update_core set. For this 2689 * not been logged. For this case we can immediately drop
2715 * case we can use a B_DELWRI flush and immediately drop
2716 * the inode flush lock because we can avoid the whole 2690 * the inode flush lock because we can avoid the whole
2717 * AIL state thing. It's OK to drop the flush lock now, 2691 * AIL state thing. It's OK to drop the flush lock now,
2718 * because we've already locked the buffer and to do anything 2692 * because we've already locked the buffer and to do anything
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 2f27b7454085..f123dbe6d42a 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -241,7 +241,6 @@ typedef struct xfs_inode {
241 spinlock_t i_flags_lock; /* inode i_flags lock */ 241 spinlock_t i_flags_lock; /* inode i_flags lock */
242 /* Miscellaneous state. */ 242 /* Miscellaneous state. */
243 unsigned long i_flags; /* see defined flags below */ 243 unsigned long i_flags; /* see defined flags below */
244 unsigned char i_update_core; /* timestamps/size is dirty */
245 unsigned int i_delayed_blks; /* count of delay alloc blks */ 244 unsigned int i_delayed_blks; /* count of delay alloc blks */
246 245
247 xfs_icdinode_t i_d; /* most of ondisk inode */ 246 xfs_icdinode_t i_d; /* most of ondisk inode */
@@ -275,6 +274,20 @@ static inline xfs_fsize_t XFS_ISIZE(struct xfs_inode *ip)
275} 274}
276 275
277/* 276/*
277 * If this I/O goes past the on-disk inode size update it unless it would
278 * be past the current in-core inode size.
279 */
280static inline xfs_fsize_t
281xfs_new_eof(struct xfs_inode *ip, xfs_fsize_t new_size)
282{
283 xfs_fsize_t i_size = i_size_read(VFS_I(ip));
284
285 if (new_size > i_size)
286 new_size = i_size;
287 return new_size > ip->i_d.di_size ? new_size : 0;
288}
289
290/*
278 * i_flags helper functions 291 * i_flags helper functions
279 */ 292 */
280static inline void 293static inline void
@@ -422,7 +435,6 @@ static inline int xfs_isiflocked(struct xfs_inode *ip)
422#define XFS_IOLOCK_SHARED (1<<1) 435#define XFS_IOLOCK_SHARED (1<<1)
423#define XFS_ILOCK_EXCL (1<<2) 436#define XFS_ILOCK_EXCL (1<<2)
424#define XFS_ILOCK_SHARED (1<<3) 437#define XFS_ILOCK_SHARED (1<<3)
425#define XFS_IUNLOCK_NONOTIFY (1<<4)
426 438
427#define XFS_LOCK_MASK (XFS_IOLOCK_EXCL | XFS_IOLOCK_SHARED \ 439#define XFS_LOCK_MASK (XFS_IOLOCK_EXCL | XFS_IOLOCK_SHARED \
428 | XFS_ILOCK_EXCL | XFS_ILOCK_SHARED) 440 | XFS_ILOCK_EXCL | XFS_ILOCK_SHARED)
@@ -431,8 +443,7 @@ static inline int xfs_isiflocked(struct xfs_inode *ip)
431 { XFS_IOLOCK_EXCL, "IOLOCK_EXCL" }, \ 443 { XFS_IOLOCK_EXCL, "IOLOCK_EXCL" }, \
432 { XFS_IOLOCK_SHARED, "IOLOCK_SHARED" }, \ 444 { XFS_IOLOCK_SHARED, "IOLOCK_SHARED" }, \
433 { XFS_ILOCK_EXCL, "ILOCK_EXCL" }, \ 445 { XFS_ILOCK_EXCL, "ILOCK_EXCL" }, \
434 { XFS_ILOCK_SHARED, "ILOCK_SHARED" }, \ 446 { XFS_ILOCK_SHARED, "ILOCK_SHARED" }
435 { XFS_IUNLOCK_NONOTIFY, "IUNLOCK_NONOTIFY" }
436 447
437 448
438/* 449/*
@@ -522,10 +533,6 @@ void xfs_promote_inode(struct xfs_inode *);
522void xfs_lock_inodes(xfs_inode_t **, int, uint); 533void xfs_lock_inodes(xfs_inode_t **, int, uint);
523void xfs_lock_two_inodes(xfs_inode_t *, xfs_inode_t *, uint); 534void xfs_lock_two_inodes(xfs_inode_t *, xfs_inode_t *, uint);
524 535
525void xfs_synchronize_times(xfs_inode_t *);
526void xfs_mark_inode_dirty(xfs_inode_t *);
527void xfs_mark_inode_dirty_sync(xfs_inode_t *);
528
529#define IHOLD(ip) \ 536#define IHOLD(ip) \
530do { \ 537do { \
531 ASSERT(atomic_read(&VFS_I(ip)->i_count) > 0) ; \ 538 ASSERT(atomic_read(&VFS_I(ip)->i_count) > 0) ; \
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index 91d71dcd4852..05d924efceaf 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -57,77 +57,28 @@ xfs_inode_item_size(
57 struct xfs_inode *ip = iip->ili_inode; 57 struct xfs_inode *ip = iip->ili_inode;
58 uint nvecs = 2; 58 uint nvecs = 2;
59 59
60 /*
61 * Only log the data/extents/b-tree root if there is something
62 * left to log.
63 */
64 iip->ili_format.ilf_fields |= XFS_ILOG_CORE;
65
66 switch (ip->i_d.di_format) { 60 switch (ip->i_d.di_format) {
67 case XFS_DINODE_FMT_EXTENTS: 61 case XFS_DINODE_FMT_EXTENTS:
68 iip->ili_format.ilf_fields &= 62 if ((iip->ili_fields & XFS_ILOG_DEXT) &&
69 ~(XFS_ILOG_DDATA | XFS_ILOG_DBROOT | 63 ip->i_d.di_nextents > 0 &&
70 XFS_ILOG_DEV | XFS_ILOG_UUID); 64 ip->i_df.if_bytes > 0)
71 if ((iip->ili_format.ilf_fields & XFS_ILOG_DEXT) &&
72 (ip->i_d.di_nextents > 0) &&
73 (ip->i_df.if_bytes > 0)) {
74 ASSERT(ip->i_df.if_u1.if_extents != NULL);
75 nvecs++; 65 nvecs++;
76 } else {
77 iip->ili_format.ilf_fields &= ~XFS_ILOG_DEXT;
78 }
79 break; 66 break;
80 67
81 case XFS_DINODE_FMT_BTREE: 68 case XFS_DINODE_FMT_BTREE:
82 iip->ili_format.ilf_fields &= 69 if ((iip->ili_fields & XFS_ILOG_DBROOT) &&
83 ~(XFS_ILOG_DDATA | XFS_ILOG_DEXT | 70 ip->i_df.if_broot_bytes > 0)
84 XFS_ILOG_DEV | XFS_ILOG_UUID);
85 if ((iip->ili_format.ilf_fields & XFS_ILOG_DBROOT) &&
86 (ip->i_df.if_broot_bytes > 0)) {
87 ASSERT(ip->i_df.if_broot != NULL);
88 nvecs++; 71 nvecs++;
89 } else {
90 ASSERT(!(iip->ili_format.ilf_fields &
91 XFS_ILOG_DBROOT));
92#ifdef XFS_TRANS_DEBUG
93 if (iip->ili_root_size > 0) {
94 ASSERT(iip->ili_root_size ==
95 ip->i_df.if_broot_bytes);
96 ASSERT(memcmp(iip->ili_orig_root,
97 ip->i_df.if_broot,
98 iip->ili_root_size) == 0);
99 } else {
100 ASSERT(ip->i_df.if_broot_bytes == 0);
101 }
102#endif
103 iip->ili_format.ilf_fields &= ~XFS_ILOG_DBROOT;
104 }
105 break; 72 break;
106 73
107 case XFS_DINODE_FMT_LOCAL: 74 case XFS_DINODE_FMT_LOCAL:
108 iip->ili_format.ilf_fields &= 75 if ((iip->ili_fields & XFS_ILOG_DDATA) &&
109 ~(XFS_ILOG_DEXT | XFS_ILOG_DBROOT | 76 ip->i_df.if_bytes > 0)
110 XFS_ILOG_DEV | XFS_ILOG_UUID);
111 if ((iip->ili_format.ilf_fields & XFS_ILOG_DDATA) &&
112 (ip->i_df.if_bytes > 0)) {
113 ASSERT(ip->i_df.if_u1.if_data != NULL);
114 ASSERT(ip->i_d.di_size > 0);
115 nvecs++; 77 nvecs++;
116 } else {
117 iip->ili_format.ilf_fields &= ~XFS_ILOG_DDATA;
118 }
119 break; 78 break;
120 79
121 case XFS_DINODE_FMT_DEV: 80 case XFS_DINODE_FMT_DEV:
122 iip->ili_format.ilf_fields &=
123 ~(XFS_ILOG_DDATA | XFS_ILOG_DBROOT |
124 XFS_ILOG_DEXT | XFS_ILOG_UUID);
125 break;
126
127 case XFS_DINODE_FMT_UUID: 81 case XFS_DINODE_FMT_UUID:
128 iip->ili_format.ilf_fields &=
129 ~(XFS_ILOG_DDATA | XFS_ILOG_DBROOT |
130 XFS_ILOG_DEXT | XFS_ILOG_DEV);
131 break; 82 break;
132 83
133 default: 84 default:
@@ -135,56 +86,31 @@ xfs_inode_item_size(
135 break; 86 break;
136 } 87 }
137 88
138 /* 89 if (!XFS_IFORK_Q(ip))
139 * If there are no attributes associated with this file,
140 * then there cannot be anything more to log.
141 * Clear all attribute-related log flags.
142 */
143 if (!XFS_IFORK_Q(ip)) {
144 iip->ili_format.ilf_fields &=
145 ~(XFS_ILOG_ADATA | XFS_ILOG_ABROOT | XFS_ILOG_AEXT);
146 return nvecs; 90 return nvecs;
147 } 91
148 92
149 /* 93 /*
150 * Log any necessary attribute data. 94 * Log any necessary attribute data.
151 */ 95 */
152 switch (ip->i_d.di_aformat) { 96 switch (ip->i_d.di_aformat) {
153 case XFS_DINODE_FMT_EXTENTS: 97 case XFS_DINODE_FMT_EXTENTS:
154 iip->ili_format.ilf_fields &= 98 if ((iip->ili_fields & XFS_ILOG_AEXT) &&
155 ~(XFS_ILOG_ADATA | XFS_ILOG_ABROOT); 99 ip->i_d.di_anextents > 0 &&
156 if ((iip->ili_format.ilf_fields & XFS_ILOG_AEXT) && 100 ip->i_afp->if_bytes > 0)
157 (ip->i_d.di_anextents > 0) &&
158 (ip->i_afp->if_bytes > 0)) {
159 ASSERT(ip->i_afp->if_u1.if_extents != NULL);
160 nvecs++; 101 nvecs++;
161 } else {
162 iip->ili_format.ilf_fields &= ~XFS_ILOG_AEXT;
163 }
164 break; 102 break;
165 103
166 case XFS_DINODE_FMT_BTREE: 104 case XFS_DINODE_FMT_BTREE:
167 iip->ili_format.ilf_fields &= 105 if ((iip->ili_fields & XFS_ILOG_ABROOT) &&
168 ~(XFS_ILOG_ADATA | XFS_ILOG_AEXT); 106 ip->i_afp->if_broot_bytes > 0)
169 if ((iip->ili_format.ilf_fields & XFS_ILOG_ABROOT) &&
170 (ip->i_afp->if_broot_bytes > 0)) {
171 ASSERT(ip->i_afp->if_broot != NULL);
172 nvecs++; 107 nvecs++;
173 } else {
174 iip->ili_format.ilf_fields &= ~XFS_ILOG_ABROOT;
175 }
176 break; 108 break;
177 109
178 case XFS_DINODE_FMT_LOCAL: 110 case XFS_DINODE_FMT_LOCAL:
179 iip->ili_format.ilf_fields &= 111 if ((iip->ili_fields & XFS_ILOG_ADATA) &&
180 ~(XFS_ILOG_AEXT | XFS_ILOG_ABROOT); 112 ip->i_afp->if_bytes > 0)
181 if ((iip->ili_format.ilf_fields & XFS_ILOG_ADATA) &&
182 (ip->i_afp->if_bytes > 0)) {
183 ASSERT(ip->i_afp->if_u1.if_data != NULL);
184 nvecs++; 113 nvecs++;
185 } else {
186 iip->ili_format.ilf_fields &= ~XFS_ILOG_ADATA;
187 }
188 break; 114 break;
189 115
190 default: 116 default:
@@ -254,48 +180,11 @@ xfs_inode_item_format(
254 vecp++; 180 vecp++;
255 nvecs = 1; 181 nvecs = 1;
256 182
257 /*
258 * Clear i_update_core if the timestamps (or any other
259 * non-transactional modification) need flushing/logging
260 * and we're about to log them with the rest of the core.
261 *
262 * This is the same logic as xfs_iflush() but this code can't
263 * run at the same time as xfs_iflush because we're in commit
264 * processing here and so we have the inode lock held in
265 * exclusive mode. Although it doesn't really matter
266 * for the timestamps if both routines were to grab the
267 * timestamps or not. That would be ok.
268 *
269 * We clear i_update_core before copying out the data.
270 * This is for coordination with our timestamp updates
271 * that don't hold the inode lock. They will always
272 * update the timestamps BEFORE setting i_update_core,
273 * so if we clear i_update_core after they set it we
274 * are guaranteed to see their updates to the timestamps
275 * either here. Likewise, if they set it after we clear it
276 * here, we'll see it either on the next commit of this
277 * inode or the next time the inode gets flushed via
278 * xfs_iflush(). This depends on strongly ordered memory
279 * semantics, but we have that. We use the SYNCHRONIZE
280 * macro to make sure that the compiler does not reorder
281 * the i_update_core access below the data copy below.
282 */
283 if (ip->i_update_core) {
284 ip->i_update_core = 0;
285 SYNCHRONIZE();
286 }
287
288 /*
289 * Make sure to get the latest timestamps from the Linux inode.
290 */
291 xfs_synchronize_times(ip);
292
293 vecp->i_addr = &ip->i_d; 183 vecp->i_addr = &ip->i_d;
294 vecp->i_len = sizeof(struct xfs_icdinode); 184 vecp->i_len = sizeof(struct xfs_icdinode);
295 vecp->i_type = XLOG_REG_TYPE_ICORE; 185 vecp->i_type = XLOG_REG_TYPE_ICORE;
296 vecp++; 186 vecp++;
297 nvecs++; 187 nvecs++;
298 iip->ili_format.ilf_fields |= XFS_ILOG_CORE;
299 188
300 /* 189 /*
301 * If this is really an old format inode, then we need to 190 * If this is really an old format inode, then we need to
@@ -328,16 +217,17 @@ xfs_inode_item_format(
328 217
329 switch (ip->i_d.di_format) { 218 switch (ip->i_d.di_format) {
330 case XFS_DINODE_FMT_EXTENTS: 219 case XFS_DINODE_FMT_EXTENTS:
331 ASSERT(!(iip->ili_format.ilf_fields & 220 iip->ili_fields &=
332 (XFS_ILOG_DDATA | XFS_ILOG_DBROOT | 221 ~(XFS_ILOG_DDATA | XFS_ILOG_DBROOT |
333 XFS_ILOG_DEV | XFS_ILOG_UUID))); 222 XFS_ILOG_DEV | XFS_ILOG_UUID);
334 if (iip->ili_format.ilf_fields & XFS_ILOG_DEXT) { 223
335 ASSERT(ip->i_df.if_bytes > 0); 224 if ((iip->ili_fields & XFS_ILOG_DEXT) &&
225 ip->i_d.di_nextents > 0 &&
226 ip->i_df.if_bytes > 0) {
336 ASSERT(ip->i_df.if_u1.if_extents != NULL); 227 ASSERT(ip->i_df.if_u1.if_extents != NULL);
337 ASSERT(ip->i_d.di_nextents > 0); 228 ASSERT(ip->i_df.if_bytes / sizeof(xfs_bmbt_rec_t) > 0);
338 ASSERT(iip->ili_extents_buf == NULL); 229 ASSERT(iip->ili_extents_buf == NULL);
339 ASSERT((ip->i_df.if_bytes / 230
340 (uint)sizeof(xfs_bmbt_rec_t)) > 0);
341#ifdef XFS_NATIVE_HOST 231#ifdef XFS_NATIVE_HOST
342 if (ip->i_d.di_nextents == ip->i_df.if_bytes / 232 if (ip->i_d.di_nextents == ip->i_df.if_bytes /
343 (uint)sizeof(xfs_bmbt_rec_t)) { 233 (uint)sizeof(xfs_bmbt_rec_t)) {
@@ -359,15 +249,18 @@ xfs_inode_item_format(
359 iip->ili_format.ilf_dsize = vecp->i_len; 249 iip->ili_format.ilf_dsize = vecp->i_len;
360 vecp++; 250 vecp++;
361 nvecs++; 251 nvecs++;
252 } else {
253 iip->ili_fields &= ~XFS_ILOG_DEXT;
362 } 254 }
363 break; 255 break;
364 256
365 case XFS_DINODE_FMT_BTREE: 257 case XFS_DINODE_FMT_BTREE:
366 ASSERT(!(iip->ili_format.ilf_fields & 258 iip->ili_fields &=
367 (XFS_ILOG_DDATA | XFS_ILOG_DEXT | 259 ~(XFS_ILOG_DDATA | XFS_ILOG_DEXT |
368 XFS_ILOG_DEV | XFS_ILOG_UUID))); 260 XFS_ILOG_DEV | XFS_ILOG_UUID);
369 if (iip->ili_format.ilf_fields & XFS_ILOG_DBROOT) { 261
370 ASSERT(ip->i_df.if_broot_bytes > 0); 262 if ((iip->ili_fields & XFS_ILOG_DBROOT) &&
263 ip->i_df.if_broot_bytes > 0) {
371 ASSERT(ip->i_df.if_broot != NULL); 264 ASSERT(ip->i_df.if_broot != NULL);
372 vecp->i_addr = ip->i_df.if_broot; 265 vecp->i_addr = ip->i_df.if_broot;
373 vecp->i_len = ip->i_df.if_broot_bytes; 266 vecp->i_len = ip->i_df.if_broot_bytes;
@@ -375,15 +268,30 @@ xfs_inode_item_format(
375 vecp++; 268 vecp++;
376 nvecs++; 269 nvecs++;
377 iip->ili_format.ilf_dsize = ip->i_df.if_broot_bytes; 270 iip->ili_format.ilf_dsize = ip->i_df.if_broot_bytes;
271 } else {
272 ASSERT(!(iip->ili_fields &
273 XFS_ILOG_DBROOT));
274#ifdef XFS_TRANS_DEBUG
275 if (iip->ili_root_size > 0) {
276 ASSERT(iip->ili_root_size ==
277 ip->i_df.if_broot_bytes);
278 ASSERT(memcmp(iip->ili_orig_root,
279 ip->i_df.if_broot,
280 iip->ili_root_size) == 0);
281 } else {
282 ASSERT(ip->i_df.if_broot_bytes == 0);
283 }
284#endif
285 iip->ili_fields &= ~XFS_ILOG_DBROOT;
378 } 286 }
379 break; 287 break;
380 288
381 case XFS_DINODE_FMT_LOCAL: 289 case XFS_DINODE_FMT_LOCAL:
382 ASSERT(!(iip->ili_format.ilf_fields & 290 iip->ili_fields &=
383 (XFS_ILOG_DBROOT | XFS_ILOG_DEXT | 291 ~(XFS_ILOG_DEXT | XFS_ILOG_DBROOT |
384 XFS_ILOG_DEV | XFS_ILOG_UUID))); 292 XFS_ILOG_DEV | XFS_ILOG_UUID);
385 if (iip->ili_format.ilf_fields & XFS_ILOG_DDATA) { 293 if ((iip->ili_fields & XFS_ILOG_DDATA) &&
386 ASSERT(ip->i_df.if_bytes > 0); 294 ip->i_df.if_bytes > 0) {
387 ASSERT(ip->i_df.if_u1.if_data != NULL); 295 ASSERT(ip->i_df.if_u1.if_data != NULL);
388 ASSERT(ip->i_d.di_size > 0); 296 ASSERT(ip->i_d.di_size > 0);
389 297
@@ -401,24 +309,26 @@ xfs_inode_item_format(
401 vecp++; 309 vecp++;
402 nvecs++; 310 nvecs++;
403 iip->ili_format.ilf_dsize = (unsigned)data_bytes; 311 iip->ili_format.ilf_dsize = (unsigned)data_bytes;
312 } else {
313 iip->ili_fields &= ~XFS_ILOG_DDATA;
404 } 314 }
405 break; 315 break;
406 316
407 case XFS_DINODE_FMT_DEV: 317 case XFS_DINODE_FMT_DEV:
408 ASSERT(!(iip->ili_format.ilf_fields & 318 iip->ili_fields &=
409 (XFS_ILOG_DBROOT | XFS_ILOG_DEXT | 319 ~(XFS_ILOG_DDATA | XFS_ILOG_DBROOT |
410 XFS_ILOG_DDATA | XFS_ILOG_UUID))); 320 XFS_ILOG_DEXT | XFS_ILOG_UUID);
411 if (iip->ili_format.ilf_fields & XFS_ILOG_DEV) { 321 if (iip->ili_fields & XFS_ILOG_DEV) {
412 iip->ili_format.ilf_u.ilfu_rdev = 322 iip->ili_format.ilf_u.ilfu_rdev =
413 ip->i_df.if_u2.if_rdev; 323 ip->i_df.if_u2.if_rdev;
414 } 324 }
415 break; 325 break;
416 326
417 case XFS_DINODE_FMT_UUID: 327 case XFS_DINODE_FMT_UUID:
418 ASSERT(!(iip->ili_format.ilf_fields & 328 iip->ili_fields &=
419 (XFS_ILOG_DBROOT | XFS_ILOG_DEXT | 329 ~(XFS_ILOG_DDATA | XFS_ILOG_DBROOT |
420 XFS_ILOG_DDATA | XFS_ILOG_DEV))); 330 XFS_ILOG_DEXT | XFS_ILOG_DEV);
421 if (iip->ili_format.ilf_fields & XFS_ILOG_UUID) { 331 if (iip->ili_fields & XFS_ILOG_UUID) {
422 iip->ili_format.ilf_u.ilfu_uuid = 332 iip->ili_format.ilf_u.ilfu_uuid =
423 ip->i_df.if_u2.if_uuid; 333 ip->i_df.if_u2.if_uuid;
424 } 334 }
@@ -430,31 +340,25 @@ xfs_inode_item_format(
430 } 340 }
431 341
432 /* 342 /*
433 * If there are no attributes associated with the file, 343 * If there are no attributes associated with the file, then we're done.
434 * then we're done.
435 * Assert that no attribute-related log flags are set.
436 */ 344 */
437 if (!XFS_IFORK_Q(ip)) { 345 if (!XFS_IFORK_Q(ip)) {
438 iip->ili_format.ilf_size = nvecs; 346 iip->ili_fields &=
439 ASSERT(!(iip->ili_format.ilf_fields & 347 ~(XFS_ILOG_ADATA | XFS_ILOG_ABROOT | XFS_ILOG_AEXT);
440 (XFS_ILOG_ADATA | XFS_ILOG_ABROOT | XFS_ILOG_AEXT))); 348 goto out;
441 return;
442 } 349 }
443 350
444 switch (ip->i_d.di_aformat) { 351 switch (ip->i_d.di_aformat) {
445 case XFS_DINODE_FMT_EXTENTS: 352 case XFS_DINODE_FMT_EXTENTS:
446 ASSERT(!(iip->ili_format.ilf_fields & 353 iip->ili_fields &=
447 (XFS_ILOG_ADATA | XFS_ILOG_ABROOT))); 354 ~(XFS_ILOG_ADATA | XFS_ILOG_ABROOT);
448 if (iip->ili_format.ilf_fields & XFS_ILOG_AEXT) { 355
449#ifdef DEBUG 356 if ((iip->ili_fields & XFS_ILOG_AEXT) &&
450 int nrecs = ip->i_afp->if_bytes / 357 ip->i_d.di_anextents > 0 &&
451 (uint)sizeof(xfs_bmbt_rec_t); 358 ip->i_afp->if_bytes > 0) {
452 ASSERT(nrecs > 0); 359 ASSERT(ip->i_afp->if_bytes / sizeof(xfs_bmbt_rec_t) ==
453 ASSERT(nrecs == ip->i_d.di_anextents); 360 ip->i_d.di_anextents);
454 ASSERT(ip->i_afp->if_bytes > 0);
455 ASSERT(ip->i_afp->if_u1.if_extents != NULL); 361 ASSERT(ip->i_afp->if_u1.if_extents != NULL);
456 ASSERT(ip->i_d.di_anextents > 0);
457#endif
458#ifdef XFS_NATIVE_HOST 362#ifdef XFS_NATIVE_HOST
459 /* 363 /*
460 * There are not delayed allocation extents 364 * There are not delayed allocation extents
@@ -471,29 +375,36 @@ xfs_inode_item_format(
471 iip->ili_format.ilf_asize = vecp->i_len; 375 iip->ili_format.ilf_asize = vecp->i_len;
472 vecp++; 376 vecp++;
473 nvecs++; 377 nvecs++;
378 } else {
379 iip->ili_fields &= ~XFS_ILOG_AEXT;
474 } 380 }
475 break; 381 break;
476 382
477 case XFS_DINODE_FMT_BTREE: 383 case XFS_DINODE_FMT_BTREE:
478 ASSERT(!(iip->ili_format.ilf_fields & 384 iip->ili_fields &=
479 (XFS_ILOG_ADATA | XFS_ILOG_AEXT))); 385 ~(XFS_ILOG_ADATA | XFS_ILOG_AEXT);
480 if (iip->ili_format.ilf_fields & XFS_ILOG_ABROOT) { 386
481 ASSERT(ip->i_afp->if_broot_bytes > 0); 387 if ((iip->ili_fields & XFS_ILOG_ABROOT) &&
388 ip->i_afp->if_broot_bytes > 0) {
482 ASSERT(ip->i_afp->if_broot != NULL); 389 ASSERT(ip->i_afp->if_broot != NULL);
390
483 vecp->i_addr = ip->i_afp->if_broot; 391 vecp->i_addr = ip->i_afp->if_broot;
484 vecp->i_len = ip->i_afp->if_broot_bytes; 392 vecp->i_len = ip->i_afp->if_broot_bytes;
485 vecp->i_type = XLOG_REG_TYPE_IATTR_BROOT; 393 vecp->i_type = XLOG_REG_TYPE_IATTR_BROOT;
486 vecp++; 394 vecp++;
487 nvecs++; 395 nvecs++;
488 iip->ili_format.ilf_asize = ip->i_afp->if_broot_bytes; 396 iip->ili_format.ilf_asize = ip->i_afp->if_broot_bytes;
397 } else {
398 iip->ili_fields &= ~XFS_ILOG_ABROOT;
489 } 399 }
490 break; 400 break;
491 401
492 case XFS_DINODE_FMT_LOCAL: 402 case XFS_DINODE_FMT_LOCAL:
493 ASSERT(!(iip->ili_format.ilf_fields & 403 iip->ili_fields &=
494 (XFS_ILOG_ABROOT | XFS_ILOG_AEXT))); 404 ~(XFS_ILOG_AEXT | XFS_ILOG_ABROOT);
495 if (iip->ili_format.ilf_fields & XFS_ILOG_ADATA) { 405
496 ASSERT(ip->i_afp->if_bytes > 0); 406 if ((iip->ili_fields & XFS_ILOG_ADATA) &&
407 ip->i_afp->if_bytes > 0) {
497 ASSERT(ip->i_afp->if_u1.if_data != NULL); 408 ASSERT(ip->i_afp->if_u1.if_data != NULL);
498 409
499 vecp->i_addr = ip->i_afp->if_u1.if_data; 410 vecp->i_addr = ip->i_afp->if_u1.if_data;
@@ -510,6 +421,8 @@ xfs_inode_item_format(
510 vecp++; 421 vecp++;
511 nvecs++; 422 nvecs++;
512 iip->ili_format.ilf_asize = (unsigned)data_bytes; 423 iip->ili_format.ilf_asize = (unsigned)data_bytes;
424 } else {
425 iip->ili_fields &= ~XFS_ILOG_ADATA;
513 } 426 }
514 break; 427 break;
515 428
@@ -518,6 +431,15 @@ xfs_inode_item_format(
518 break; 431 break;
519 } 432 }
520 433
434out:
435 /*
436 * Now update the log format that goes out to disk from the in-core
437 * values. We always write the inode core to make the arithmetic
438 * games in recovery easier, which isn't a big deal as just about any
439 * transaction would dirty it anyway.
440 */
441 iip->ili_format.ilf_fields = XFS_ILOG_CORE |
442 (iip->ili_fields & ~XFS_ILOG_TIMESTAMP);
521 iip->ili_format.ilf_size = nvecs; 443 iip->ili_format.ilf_size = nvecs;
522} 444}
523 445
@@ -596,17 +518,13 @@ xfs_inode_item_trylock(
596 /* Stale items should force out the iclog */ 518 /* Stale items should force out the iclog */
597 if (ip->i_flags & XFS_ISTALE) { 519 if (ip->i_flags & XFS_ISTALE) {
598 xfs_ifunlock(ip); 520 xfs_ifunlock(ip);
599 /* 521 xfs_iunlock(ip, XFS_ILOCK_SHARED);
600 * we hold the AIL lock - notify the unlock routine of this
601 * so it doesn't try to get the lock again.
602 */
603 xfs_iunlock(ip, XFS_ILOCK_SHARED|XFS_IUNLOCK_NONOTIFY);
604 return XFS_ITEM_PINNED; 522 return XFS_ITEM_PINNED;
605 } 523 }
606 524
607#ifdef DEBUG 525#ifdef DEBUG
608 if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) { 526 if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) {
609 ASSERT(iip->ili_format.ilf_fields != 0); 527 ASSERT(iip->ili_fields != 0);
610 ASSERT(iip->ili_logged == 0); 528 ASSERT(iip->ili_logged == 0);
611 ASSERT(lip->li_flags & XFS_LI_IN_AIL); 529 ASSERT(lip->li_flags & XFS_LI_IN_AIL);
612 } 530 }
@@ -638,7 +556,7 @@ xfs_inode_item_unlock(
638 if (iip->ili_extents_buf != NULL) { 556 if (iip->ili_extents_buf != NULL) {
639 ASSERT(ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS); 557 ASSERT(ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS);
640 ASSERT(ip->i_d.di_nextents > 0); 558 ASSERT(ip->i_d.di_nextents > 0);
641 ASSERT(iip->ili_format.ilf_fields & XFS_ILOG_DEXT); 559 ASSERT(iip->ili_fields & XFS_ILOG_DEXT);
642 ASSERT(ip->i_df.if_bytes > 0); 560 ASSERT(ip->i_df.if_bytes > 0);
643 kmem_free(iip->ili_extents_buf); 561 kmem_free(iip->ili_extents_buf);
644 iip->ili_extents_buf = NULL; 562 iip->ili_extents_buf = NULL;
@@ -646,7 +564,7 @@ xfs_inode_item_unlock(
646 if (iip->ili_aextents_buf != NULL) { 564 if (iip->ili_aextents_buf != NULL) {
647 ASSERT(ip->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS); 565 ASSERT(ip->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS);
648 ASSERT(ip->i_d.di_anextents > 0); 566 ASSERT(ip->i_d.di_anextents > 0);
649 ASSERT(iip->ili_format.ilf_fields & XFS_ILOG_AEXT); 567 ASSERT(iip->ili_fields & XFS_ILOG_AEXT);
650 ASSERT(ip->i_afp->if_bytes > 0); 568 ASSERT(ip->i_afp->if_bytes > 0);
651 kmem_free(iip->ili_aextents_buf); 569 kmem_free(iip->ili_aextents_buf);
652 iip->ili_aextents_buf = NULL; 570 iip->ili_aextents_buf = NULL;
@@ -761,8 +679,7 @@ xfs_inode_item_push(
761 * lock without sleeping, then there must not have been 679 * lock without sleeping, then there must not have been
762 * anyone in the process of flushing the inode. 680 * anyone in the process of flushing the inode.
763 */ 681 */
764 ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) || 682 ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) || iip->ili_fields != 0);
765 iip->ili_format.ilf_fields != 0);
766 683
767 /* 684 /*
768 * Push the inode to it's backing buffer. This will not remove the 685 * Push the inode to it's backing buffer. This will not remove the
@@ -985,7 +902,7 @@ xfs_iflush_abort(
985 * Clear the inode logging fields so no more flushes are 902 * Clear the inode logging fields so no more flushes are
986 * attempted. 903 * attempted.
987 */ 904 */
988 iip->ili_format.ilf_fields = 0; 905 iip->ili_fields = 0;
989 } 906 }
990 /* 907 /*
991 * Release the inode's flush lock since we're done with it. 908 * Release the inode's flush lock since we're done with it.
diff --git a/fs/xfs/xfs_inode_item.h b/fs/xfs/xfs_inode_item.h
index d3dee61e6d91..41d61c3b7a36 100644
--- a/fs/xfs/xfs_inode_item.h
+++ b/fs/xfs/xfs_inode_item.h
@@ -86,6 +86,15 @@ typedef struct xfs_inode_log_format_64 {
86#define XFS_ILOG_AEXT 0x080 /* log i_af.if_extents */ 86#define XFS_ILOG_AEXT 0x080 /* log i_af.if_extents */
87#define XFS_ILOG_ABROOT 0x100 /* log i_af.i_broot */ 87#define XFS_ILOG_ABROOT 0x100 /* log i_af.i_broot */
88 88
89
90/*
91 * The timestamps are dirty, but not necessarily anything else in the inode
92 * core. Unlike the other fields above this one must never make it to disk
93 * in the ilf_fields of the inode_log_format, but is purely store in-memory in
94 * ili_fields in the inode_log_item.
95 */
96#define XFS_ILOG_TIMESTAMP 0x4000
97
89#define XFS_ILOG_NONCORE (XFS_ILOG_DDATA | XFS_ILOG_DEXT | \ 98#define XFS_ILOG_NONCORE (XFS_ILOG_DDATA | XFS_ILOG_DEXT | \
90 XFS_ILOG_DBROOT | XFS_ILOG_DEV | \ 99 XFS_ILOG_DBROOT | XFS_ILOG_DEV | \
91 XFS_ILOG_UUID | XFS_ILOG_ADATA | \ 100 XFS_ILOG_UUID | XFS_ILOG_ADATA | \
@@ -101,7 +110,7 @@ typedef struct xfs_inode_log_format_64 {
101 XFS_ILOG_DEXT | XFS_ILOG_DBROOT | \ 110 XFS_ILOG_DEXT | XFS_ILOG_DBROOT | \
102 XFS_ILOG_DEV | XFS_ILOG_UUID | \ 111 XFS_ILOG_DEV | XFS_ILOG_UUID | \
103 XFS_ILOG_ADATA | XFS_ILOG_AEXT | \ 112 XFS_ILOG_ADATA | XFS_ILOG_AEXT | \
104 XFS_ILOG_ABROOT) 113 XFS_ILOG_ABROOT | XFS_ILOG_TIMESTAMP)
105 114
106static inline int xfs_ilog_fbroot(int w) 115static inline int xfs_ilog_fbroot(int w)
107{ 116{
@@ -134,6 +143,7 @@ typedef struct xfs_inode_log_item {
134 unsigned short ili_lock_flags; /* lock flags */ 143 unsigned short ili_lock_flags; /* lock flags */
135 unsigned short ili_logged; /* flushed logged data */ 144 unsigned short ili_logged; /* flushed logged data */
136 unsigned int ili_last_fields; /* fields when flushed */ 145 unsigned int ili_last_fields; /* fields when flushed */
146 unsigned int ili_fields; /* fields to be logged */
137 struct xfs_bmbt_rec *ili_extents_buf; /* array of logged 147 struct xfs_bmbt_rec *ili_extents_buf; /* array of logged
138 data exts */ 148 data exts */
139 struct xfs_bmbt_rec *ili_aextents_buf; /* array of logged 149 struct xfs_bmbt_rec *ili_aextents_buf; /* array of logged
@@ -148,9 +158,7 @@ typedef struct xfs_inode_log_item {
148 158
149static inline int xfs_inode_clean(xfs_inode_t *ip) 159static inline int xfs_inode_clean(xfs_inode_t *ip)
150{ 160{
151 return (!ip->i_itemp || 161 return !ip->i_itemp || !(ip->i_itemp->ili_fields & XFS_ILOG_ALL);
152 !(ip->i_itemp->ili_format.ilf_fields & XFS_ILOG_ALL)) &&
153 !ip->i_update_core;
154} 162}
155 163
156extern void xfs_inode_item_init(struct xfs_inode *, struct xfs_mount *); 164extern void xfs_inode_item_init(struct xfs_inode *, struct xfs_mount *);
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index 76f3ca5cfc36..f588320dc4b9 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -450,9 +450,12 @@ xfs_attrmulti_attr_get(
450 450
451 if (*len > XATTR_SIZE_MAX) 451 if (*len > XATTR_SIZE_MAX)
452 return EINVAL; 452 return EINVAL;
453 kbuf = kmalloc(*len, GFP_KERNEL); 453 kbuf = kmem_zalloc(*len, KM_SLEEP | KM_MAYFAIL);
454 if (!kbuf) 454 if (!kbuf) {
455 return ENOMEM; 455 kbuf = kmem_zalloc_large(*len);
456 if (!kbuf)
457 return ENOMEM;
458 }
456 459
457 error = xfs_attr_get(XFS_I(inode), name, kbuf, (int *)len, flags); 460 error = xfs_attr_get(XFS_I(inode), name, kbuf, (int *)len, flags);
458 if (error) 461 if (error)
@@ -462,7 +465,10 @@ xfs_attrmulti_attr_get(
462 error = EFAULT; 465 error = EFAULT;
463 466
464 out_kfree: 467 out_kfree:
465 kfree(kbuf); 468 if (is_vmalloc_addr(kbuf))
469 kmem_free_large(kbuf);
470 else
471 kmem_free(kbuf);
466 return error; 472 return error;
467} 473}
468 474
diff --git a/fs/xfs/xfs_ioctl32.c b/fs/xfs/xfs_ioctl32.c
index f9ccb7b7c043..a849a5473aff 100644
--- a/fs/xfs/xfs_ioctl32.c
+++ b/fs/xfs/xfs_ioctl32.c
@@ -293,7 +293,7 @@ xfs_compat_ioc_bulkstat(
293 int res; 293 int res;
294 294
295 error = xfs_bulkstat_one_compat(mp, inlast, bulkreq.ubuffer, 295 error = xfs_bulkstat_one_compat(mp, inlast, bulkreq.ubuffer,
296 sizeof(compat_xfs_bstat_t), 0, &res); 296 sizeof(compat_xfs_bstat_t), NULL, &res);
297 } else if (cmd == XFS_IOC_FSBULKSTAT_32) { 297 } else if (cmd == XFS_IOC_FSBULKSTAT_32) {
298 error = xfs_bulkstat(mp, &inlast, &count, 298 error = xfs_bulkstat(mp, &inlast, &count,
299 xfs_bulkstat_one_compat, sizeof(compat_xfs_bstat_t), 299 xfs_bulkstat_one_compat, sizeof(compat_xfs_bstat_t),
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 246c7d57c6f9..71a464503c43 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -31,6 +31,7 @@
31#include "xfs_ialloc_btree.h" 31#include "xfs_ialloc_btree.h"
32#include "xfs_dinode.h" 32#include "xfs_dinode.h"
33#include "xfs_inode.h" 33#include "xfs_inode.h"
34#include "xfs_inode_item.h"
34#include "xfs_btree.h" 35#include "xfs_btree.h"
35#include "xfs_bmap.h" 36#include "xfs_bmap.h"
36#include "xfs_rtalloc.h" 37#include "xfs_rtalloc.h"
@@ -645,6 +646,7 @@ xfs_iomap_write_unwritten(
645 xfs_trans_t *tp; 646 xfs_trans_t *tp;
646 xfs_bmbt_irec_t imap; 647 xfs_bmbt_irec_t imap;
647 xfs_bmap_free_t free_list; 648 xfs_bmap_free_t free_list;
649 xfs_fsize_t i_size;
648 uint resblks; 650 uint resblks;
649 int committed; 651 int committed;
650 int error; 652 int error;
@@ -705,7 +707,22 @@ xfs_iomap_write_unwritten(
705 if (error) 707 if (error)
706 goto error_on_bmapi_transaction; 708 goto error_on_bmapi_transaction;
707 709
708 error = xfs_bmap_finish(&(tp), &(free_list), &committed); 710 /*
711 * Log the updated inode size as we go. We have to be careful
712 * to only log it up to the actual write offset if it is
713 * halfway into a block.
714 */
715 i_size = XFS_FSB_TO_B(mp, offset_fsb + count_fsb);
716 if (i_size > offset + count)
717 i_size = offset + count;
718
719 i_size = xfs_new_eof(ip, i_size);
720 if (i_size) {
721 ip->i_d.di_size = i_size;
722 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
723 }
724
725 error = xfs_bmap_finish(&tp, &free_list, &committed);
709 if (error) 726 if (error)
710 goto error_on_bmapi_transaction; 727 goto error_on_bmapi_transaction;
711 728
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index ab302539e5b9..3011b879f850 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -50,65 +50,15 @@
50#include <linux/fiemap.h> 50#include <linux/fiemap.h>
51#include <linux/slab.h> 51#include <linux/slab.h>
52 52
53/* 53static int
54 * Bring the timestamps in the XFS inode uptodate. 54xfs_initxattrs(
55 * 55 struct inode *inode,
56 * Used before writing the inode to disk. 56 const struct xattr *xattr_array,
57 */ 57 void *fs_info)
58void
59xfs_synchronize_times(
60 xfs_inode_t *ip)
61{
62 struct inode *inode = VFS_I(ip);
63
64 ip->i_d.di_atime.t_sec = (__int32_t)inode->i_atime.tv_sec;
65 ip->i_d.di_atime.t_nsec = (__int32_t)inode->i_atime.tv_nsec;
66 ip->i_d.di_ctime.t_sec = (__int32_t)inode->i_ctime.tv_sec;
67 ip->i_d.di_ctime.t_nsec = (__int32_t)inode->i_ctime.tv_nsec;
68 ip->i_d.di_mtime.t_sec = (__int32_t)inode->i_mtime.tv_sec;
69 ip->i_d.di_mtime.t_nsec = (__int32_t)inode->i_mtime.tv_nsec;
70}
71
72/*
73 * If the linux inode is valid, mark it dirty, else mark the dirty state
74 * in the XFS inode to make sure we pick it up when reclaiming the inode.
75 */
76void
77xfs_mark_inode_dirty_sync(
78 xfs_inode_t *ip)
79{
80 struct inode *inode = VFS_I(ip);
81
82 if (!(inode->i_state & (I_WILL_FREE|I_FREEING)))
83 mark_inode_dirty_sync(inode);
84 else {
85 barrier();
86 ip->i_update_core = 1;
87 }
88}
89
90void
91xfs_mark_inode_dirty(
92 xfs_inode_t *ip)
93{
94 struct inode *inode = VFS_I(ip);
95
96 if (!(inode->i_state & (I_WILL_FREE|I_FREEING)))
97 mark_inode_dirty(inode);
98 else {
99 barrier();
100 ip->i_update_core = 1;
101 }
102
103}
104
105
106int xfs_initxattrs(struct inode *inode, const struct xattr *xattr_array,
107 void *fs_info)
108{ 58{
109 const struct xattr *xattr; 59 const struct xattr *xattr;
110 struct xfs_inode *ip = XFS_I(inode); 60 struct xfs_inode *ip = XFS_I(inode);
111 int error = 0; 61 int error = 0;
112 62
113 for (xattr = xattr_array; xattr->name != NULL; xattr++) { 63 for (xattr = xattr_array; xattr->name != NULL; xattr++) {
114 error = xfs_attr_set(ip, xattr->name, xattr->value, 64 error = xfs_attr_set(ip, xattr->name, xattr->value,
@@ -678,19 +628,16 @@ xfs_setattr_nonsize(
678 inode->i_atime = iattr->ia_atime; 628 inode->i_atime = iattr->ia_atime;
679 ip->i_d.di_atime.t_sec = iattr->ia_atime.tv_sec; 629 ip->i_d.di_atime.t_sec = iattr->ia_atime.tv_sec;
680 ip->i_d.di_atime.t_nsec = iattr->ia_atime.tv_nsec; 630 ip->i_d.di_atime.t_nsec = iattr->ia_atime.tv_nsec;
681 ip->i_update_core = 1;
682 } 631 }
683 if (mask & ATTR_CTIME) { 632 if (mask & ATTR_CTIME) {
684 inode->i_ctime = iattr->ia_ctime; 633 inode->i_ctime = iattr->ia_ctime;
685 ip->i_d.di_ctime.t_sec = iattr->ia_ctime.tv_sec; 634 ip->i_d.di_ctime.t_sec = iattr->ia_ctime.tv_sec;
686 ip->i_d.di_ctime.t_nsec = iattr->ia_ctime.tv_nsec; 635 ip->i_d.di_ctime.t_nsec = iattr->ia_ctime.tv_nsec;
687 ip->i_update_core = 1;
688 } 636 }
689 if (mask & ATTR_MTIME) { 637 if (mask & ATTR_MTIME) {
690 inode->i_mtime = iattr->ia_mtime; 638 inode->i_mtime = iattr->ia_mtime;
691 ip->i_d.di_mtime.t_sec = iattr->ia_mtime.tv_sec; 639 ip->i_d.di_mtime.t_sec = iattr->ia_mtime.tv_sec;
692 ip->i_d.di_mtime.t_nsec = iattr->ia_mtime.tv_nsec; 640 ip->i_d.di_mtime.t_nsec = iattr->ia_mtime.tv_nsec;
693 ip->i_update_core = 1;
694 } 641 }
695 642
696 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 643 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
@@ -918,13 +865,11 @@ xfs_setattr_size(
918 inode->i_ctime = iattr->ia_ctime; 865 inode->i_ctime = iattr->ia_ctime;
919 ip->i_d.di_ctime.t_sec = iattr->ia_ctime.tv_sec; 866 ip->i_d.di_ctime.t_sec = iattr->ia_ctime.tv_sec;
920 ip->i_d.di_ctime.t_nsec = iattr->ia_ctime.tv_nsec; 867 ip->i_d.di_ctime.t_nsec = iattr->ia_ctime.tv_nsec;
921 ip->i_update_core = 1;
922 } 868 }
923 if (mask & ATTR_MTIME) { 869 if (mask & ATTR_MTIME) {
924 inode->i_mtime = iattr->ia_mtime; 870 inode->i_mtime = iattr->ia_mtime;
925 ip->i_d.di_mtime.t_sec = iattr->ia_mtime.tv_sec; 871 ip->i_d.di_mtime.t_sec = iattr->ia_mtime.tv_sec;
926 ip->i_d.di_mtime.t_nsec = iattr->ia_mtime.tv_nsec; 872 ip->i_d.di_mtime.t_nsec = iattr->ia_mtime.tv_nsec;
927 ip->i_update_core = 1;
928 } 873 }
929 874
930 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 875 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
index 751e94fe1f77..9720c54bbed0 100644
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -62,7 +62,6 @@ xfs_bulkstat_one_int(
62{ 62{
63 struct xfs_icdinode *dic; /* dinode core info pointer */ 63 struct xfs_icdinode *dic; /* dinode core info pointer */
64 struct xfs_inode *ip; /* incore inode pointer */ 64 struct xfs_inode *ip; /* incore inode pointer */
65 struct inode *inode;
66 struct xfs_bstat *buf; /* return buffer */ 65 struct xfs_bstat *buf; /* return buffer */
67 int error = 0; /* error value */ 66 int error = 0; /* error value */
68 67
@@ -86,7 +85,6 @@ xfs_bulkstat_one_int(
86 ASSERT(ip->i_imap.im_blkno != 0); 85 ASSERT(ip->i_imap.im_blkno != 0);
87 86
88 dic = &ip->i_d; 87 dic = &ip->i_d;
89 inode = VFS_I(ip);
90 88
91 /* xfs_iget returns the following without needing 89 /* xfs_iget returns the following without needing
92 * further change. 90 * further change.
@@ -99,19 +97,12 @@ xfs_bulkstat_one_int(
99 buf->bs_uid = dic->di_uid; 97 buf->bs_uid = dic->di_uid;
100 buf->bs_gid = dic->di_gid; 98 buf->bs_gid = dic->di_gid;
101 buf->bs_size = dic->di_size; 99 buf->bs_size = dic->di_size;
102 100 buf->bs_atime.tv_sec = dic->di_atime.t_sec;
103 /* 101 buf->bs_atime.tv_nsec = dic->di_atime.t_nsec;
104 * We need to read the timestamps from the Linux inode because 102 buf->bs_mtime.tv_sec = dic->di_mtime.t_sec;
105 * the VFS keeps writing directly into the inode structure instead 103 buf->bs_mtime.tv_nsec = dic->di_mtime.t_nsec;
106 * of telling us about the updates. 104 buf->bs_ctime.tv_sec = dic->di_ctime.t_sec;
107 */ 105 buf->bs_ctime.tv_nsec = dic->di_ctime.t_nsec;
108 buf->bs_atime.tv_sec = inode->i_atime.tv_sec;
109 buf->bs_atime.tv_nsec = inode->i_atime.tv_nsec;
110 buf->bs_mtime.tv_sec = inode->i_mtime.tv_sec;
111 buf->bs_mtime.tv_nsec = inode->i_mtime.tv_nsec;
112 buf->bs_ctime.tv_sec = inode->i_ctime.tv_sec;
113 buf->bs_ctime.tv_nsec = inode->i_ctime.tv_nsec;
114
115 buf->bs_xflags = xfs_ip2xflags(ip); 106 buf->bs_xflags = xfs_ip2xflags(ip);
116 buf->bs_extsize = dic->di_extsize << mp->m_sb.sb_blocklog; 107 buf->bs_extsize = dic->di_extsize << mp->m_sb.sb_blocklog;
117 buf->bs_extents = dic->di_nextents; 108 buf->bs_extents = dic->di_nextents;
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index e2cc3568c299..98a9cb5ffd17 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -67,15 +67,10 @@ STATIC void xlog_state_switch_iclogs(xlog_t *log,
67 int eventual_size); 67 int eventual_size);
68STATIC void xlog_state_want_sync(xlog_t *log, xlog_in_core_t *iclog); 68STATIC void xlog_state_want_sync(xlog_t *log, xlog_in_core_t *iclog);
69 69
70/* local functions to manipulate grant head */
71STATIC int xlog_grant_log_space(xlog_t *log,
72 xlog_ticket_t *xtic);
73STATIC void xlog_grant_push_ail(struct log *log, 70STATIC void xlog_grant_push_ail(struct log *log,
74 int need_bytes); 71 int need_bytes);
75STATIC void xlog_regrant_reserve_log_space(xlog_t *log, 72STATIC void xlog_regrant_reserve_log_space(xlog_t *log,
76 xlog_ticket_t *ticket); 73 xlog_ticket_t *ticket);
77STATIC int xlog_regrant_write_log_space(xlog_t *log,
78 xlog_ticket_t *ticket);
79STATIC void xlog_ungrant_log_space(xlog_t *log, 74STATIC void xlog_ungrant_log_space(xlog_t *log,
80 xlog_ticket_t *ticket); 75 xlog_ticket_t *ticket);
81 76
@@ -150,78 +145,93 @@ xlog_grant_add_space(
150 } while (head_val != old); 145 } while (head_val != old);
151} 146}
152 147
153STATIC bool 148STATIC void
154xlog_reserveq_wake( 149xlog_grant_head_init(
155 struct log *log, 150 struct xlog_grant_head *head)
156 int *free_bytes) 151{
152 xlog_assign_grant_head(&head->grant, 1, 0);
153 INIT_LIST_HEAD(&head->waiters);
154 spin_lock_init(&head->lock);
155}
156
157STATIC void
158xlog_grant_head_wake_all(
159 struct xlog_grant_head *head)
157{ 160{
158 struct xlog_ticket *tic; 161 struct xlog_ticket *tic;
159 int need_bytes;
160 162
161 list_for_each_entry(tic, &log->l_reserveq, t_queue) { 163 spin_lock(&head->lock);
164 list_for_each_entry(tic, &head->waiters, t_queue)
165 wake_up_process(tic->t_task);
166 spin_unlock(&head->lock);
167}
168
169static inline int
170xlog_ticket_reservation(
171 struct log *log,
172 struct xlog_grant_head *head,
173 struct xlog_ticket *tic)
174{
175 if (head == &log->l_write_head) {
176 ASSERT(tic->t_flags & XLOG_TIC_PERM_RESERV);
177 return tic->t_unit_res;
178 } else {
162 if (tic->t_flags & XLOG_TIC_PERM_RESERV) 179 if (tic->t_flags & XLOG_TIC_PERM_RESERV)
163 need_bytes = tic->t_unit_res * tic->t_cnt; 180 return tic->t_unit_res * tic->t_cnt;
164 else 181 else
165 need_bytes = tic->t_unit_res; 182 return tic->t_unit_res;
166
167 if (*free_bytes < need_bytes)
168 return false;
169 *free_bytes -= need_bytes;
170
171 trace_xfs_log_grant_wake_up(log, tic);
172 wake_up(&tic->t_wait);
173 } 183 }
174
175 return true;
176} 184}
177 185
178STATIC bool 186STATIC bool
179xlog_writeq_wake( 187xlog_grant_head_wake(
180 struct log *log, 188 struct log *log,
189 struct xlog_grant_head *head,
181 int *free_bytes) 190 int *free_bytes)
182{ 191{
183 struct xlog_ticket *tic; 192 struct xlog_ticket *tic;
184 int need_bytes; 193 int need_bytes;
185 194
186 list_for_each_entry(tic, &log->l_writeq, t_queue) { 195 list_for_each_entry(tic, &head->waiters, t_queue) {
187 ASSERT(tic->t_flags & XLOG_TIC_PERM_RESERV); 196 need_bytes = xlog_ticket_reservation(log, head, tic);
188
189 need_bytes = tic->t_unit_res;
190
191 if (*free_bytes < need_bytes) 197 if (*free_bytes < need_bytes)
192 return false; 198 return false;
193 *free_bytes -= need_bytes;
194 199
195 trace_xfs_log_regrant_write_wake_up(log, tic); 200 *free_bytes -= need_bytes;
196 wake_up(&tic->t_wait); 201 trace_xfs_log_grant_wake_up(log, tic);
202 wake_up_process(tic->t_task);
197 } 203 }
198 204
199 return true; 205 return true;
200} 206}
201 207
202STATIC int 208STATIC int
203xlog_reserveq_wait( 209xlog_grant_head_wait(
204 struct log *log, 210 struct log *log,
211 struct xlog_grant_head *head,
205 struct xlog_ticket *tic, 212 struct xlog_ticket *tic,
206 int need_bytes) 213 int need_bytes)
207{ 214{
208 list_add_tail(&tic->t_queue, &log->l_reserveq); 215 list_add_tail(&tic->t_queue, &head->waiters);
209 216
210 do { 217 do {
211 if (XLOG_FORCED_SHUTDOWN(log)) 218 if (XLOG_FORCED_SHUTDOWN(log))
212 goto shutdown; 219 goto shutdown;
213 xlog_grant_push_ail(log, need_bytes); 220 xlog_grant_push_ail(log, need_bytes);
214 221
222 __set_current_state(TASK_UNINTERRUPTIBLE);
223 spin_unlock(&head->lock);
224
215 XFS_STATS_INC(xs_sleep_logspace); 225 XFS_STATS_INC(xs_sleep_logspace);
216 trace_xfs_log_grant_sleep(log, tic);
217 226
218 xlog_wait(&tic->t_wait, &log->l_grant_reserve_lock); 227 trace_xfs_log_grant_sleep(log, tic);
228 schedule();
219 trace_xfs_log_grant_wake(log, tic); 229 trace_xfs_log_grant_wake(log, tic);
220 230
221 spin_lock(&log->l_grant_reserve_lock); 231 spin_lock(&head->lock);
222 if (XLOG_FORCED_SHUTDOWN(log)) 232 if (XLOG_FORCED_SHUTDOWN(log))
223 goto shutdown; 233 goto shutdown;
224 } while (xlog_space_left(log, &log->l_grant_reserve_head) < need_bytes); 234 } while (xlog_space_left(log, &head->grant) < need_bytes);
225 235
226 list_del_init(&tic->t_queue); 236 list_del_init(&tic->t_queue);
227 return 0; 237 return 0;
@@ -230,35 +240,58 @@ shutdown:
230 return XFS_ERROR(EIO); 240 return XFS_ERROR(EIO);
231} 241}
232 242
243/*
244 * Atomically get the log space required for a log ticket.
245 *
246 * Once a ticket gets put onto head->waiters, it will only return after the
247 * needed reservation is satisfied.
248 *
249 * This function is structured so that it has a lock free fast path. This is
250 * necessary because every new transaction reservation will come through this
251 * path. Hence any lock will be globally hot if we take it unconditionally on
252 * every pass.
253 *
254 * As tickets are only ever moved on and off head->waiters under head->lock, we
255 * only need to take that lock if we are going to add the ticket to the queue
256 * and sleep. We can avoid taking the lock if the ticket was never added to
257 * head->waiters because the t_queue list head will be empty and we hold the
258 * only reference to it so it can safely be checked unlocked.
259 */
233STATIC int 260STATIC int
234xlog_writeq_wait( 261xlog_grant_head_check(
235 struct log *log, 262 struct log *log,
263 struct xlog_grant_head *head,
236 struct xlog_ticket *tic, 264 struct xlog_ticket *tic,
237 int need_bytes) 265 int *need_bytes)
238{ 266{
239 list_add_tail(&tic->t_queue, &log->l_writeq); 267 int free_bytes;
240 268 int error = 0;
241 do {
242 if (XLOG_FORCED_SHUTDOWN(log))
243 goto shutdown;
244 xlog_grant_push_ail(log, need_bytes);
245
246 XFS_STATS_INC(xs_sleep_logspace);
247 trace_xfs_log_regrant_write_sleep(log, tic);
248 269
249 xlog_wait(&tic->t_wait, &log->l_grant_write_lock); 270 ASSERT(!(log->l_flags & XLOG_ACTIVE_RECOVERY));
250 trace_xfs_log_regrant_write_wake(log, tic);
251 271
252 spin_lock(&log->l_grant_write_lock); 272 /*
253 if (XLOG_FORCED_SHUTDOWN(log)) 273 * If there are other waiters on the queue then give them a chance at
254 goto shutdown; 274 * logspace before us. Wake up the first waiters, if we do not wake
255 } while (xlog_space_left(log, &log->l_grant_write_head) < need_bytes); 275 * up all the waiters then go to sleep waiting for more free space,
276 * otherwise try to get some space for this transaction.
277 */
278 *need_bytes = xlog_ticket_reservation(log, head, tic);
279 free_bytes = xlog_space_left(log, &head->grant);
280 if (!list_empty_careful(&head->waiters)) {
281 spin_lock(&head->lock);
282 if (!xlog_grant_head_wake(log, head, &free_bytes) ||
283 free_bytes < *need_bytes) {
284 error = xlog_grant_head_wait(log, head, tic,
285 *need_bytes);
286 }
287 spin_unlock(&head->lock);
288 } else if (free_bytes < *need_bytes) {
289 spin_lock(&head->lock);
290 error = xlog_grant_head_wait(log, head, tic, *need_bytes);
291 spin_unlock(&head->lock);
292 }
256 293
257 list_del_init(&tic->t_queue); 294 return error;
258 return 0;
259shutdown:
260 list_del_init(&tic->t_queue);
261 return XFS_ERROR(EIO);
262} 295}
263 296
264static void 297static void
@@ -286,6 +319,128 @@ xlog_tic_add_region(xlog_ticket_t *tic, uint len, uint type)
286} 319}
287 320
288/* 321/*
322 * Replenish the byte reservation required by moving the grant write head.
323 */
324int
325xfs_log_regrant(
326 struct xfs_mount *mp,
327 struct xlog_ticket *tic)
328{
329 struct log *log = mp->m_log;
330 int need_bytes;
331 int error = 0;
332
333 if (XLOG_FORCED_SHUTDOWN(log))
334 return XFS_ERROR(EIO);
335
336 XFS_STATS_INC(xs_try_logspace);
337
338 /*
339 * This is a new transaction on the ticket, so we need to change the
340 * transaction ID so that the next transaction has a different TID in
341 * the log. Just add one to the existing tid so that we can see chains
342 * of rolling transactions in the log easily.
343 */
344 tic->t_tid++;
345
346 xlog_grant_push_ail(log, tic->t_unit_res);
347
348 tic->t_curr_res = tic->t_unit_res;
349 xlog_tic_reset_res(tic);
350
351 if (tic->t_cnt > 0)
352 return 0;
353
354 trace_xfs_log_regrant(log, tic);
355
356 error = xlog_grant_head_check(log, &log->l_write_head, tic,
357 &need_bytes);
358 if (error)
359 goto out_error;
360
361 xlog_grant_add_space(log, &log->l_write_head.grant, need_bytes);
362 trace_xfs_log_regrant_exit(log, tic);
363 xlog_verify_grant_tail(log);
364 return 0;
365
366out_error:
367 /*
368 * If we are failing, make sure the ticket doesn't have any current
369 * reservations. We don't want to add this back when the ticket/
370 * transaction gets cancelled.
371 */
372 tic->t_curr_res = 0;
373 tic->t_cnt = 0; /* ungrant will give back unit_res * t_cnt. */
374 return error;
375}
376
377/*
378 * Reserve log space and return a ticket corresponding the reservation.
379 *
380 * Each reservation is going to reserve extra space for a log record header.
381 * When writes happen to the on-disk log, we don't subtract the length of the
382 * log record header from any reservation. By wasting space in each
383 * reservation, we prevent over allocation problems.
384 */
385int
386xfs_log_reserve(
387 struct xfs_mount *mp,
388 int unit_bytes,
389 int cnt,
390 struct xlog_ticket **ticp,
391 __uint8_t client,
392 bool permanent,
393 uint t_type)
394{
395 struct log *log = mp->m_log;
396 struct xlog_ticket *tic;
397 int need_bytes;
398 int error = 0;
399
400 ASSERT(client == XFS_TRANSACTION || client == XFS_LOG);
401
402 if (XLOG_FORCED_SHUTDOWN(log))
403 return XFS_ERROR(EIO);
404
405 XFS_STATS_INC(xs_try_logspace);
406
407 ASSERT(*ticp == NULL);
408 tic = xlog_ticket_alloc(log, unit_bytes, cnt, client, permanent,
409 KM_SLEEP | KM_MAYFAIL);
410 if (!tic)
411 return XFS_ERROR(ENOMEM);
412
413 tic->t_trans_type = t_type;
414 *ticp = tic;
415
416 xlog_grant_push_ail(log, tic->t_unit_res * tic->t_cnt);
417
418 trace_xfs_log_reserve(log, tic);
419
420 error = xlog_grant_head_check(log, &log->l_reserve_head, tic,
421 &need_bytes);
422 if (error)
423 goto out_error;
424
425 xlog_grant_add_space(log, &log->l_reserve_head.grant, need_bytes);
426 xlog_grant_add_space(log, &log->l_write_head.grant, need_bytes);
427 trace_xfs_log_reserve_exit(log, tic);
428 xlog_verify_grant_tail(log);
429 return 0;
430
431out_error:
432 /*
433 * If we are failing, make sure the ticket doesn't have any current
434 * reservations. We don't want to add this back when the ticket/
435 * transaction gets cancelled.
436 */
437 tic->t_curr_res = 0;
438 tic->t_cnt = 0; /* ungrant will give back unit_res * t_cnt. */
439 return error;
440}
441
442
443/*
289 * NOTES: 444 * NOTES:
290 * 445 *
291 * 1. currblock field gets updated at startup and after in-core logs 446 * 1. currblock field gets updated at startup and after in-core logs
@@ -395,88 +550,6 @@ xfs_log_release_iclog(
395} 550}
396 551
397/* 552/*
398 * 1. Reserve an amount of on-disk log space and return a ticket corresponding
399 * to the reservation.
400 * 2. Potentially, push buffers at tail of log to disk.
401 *
402 * Each reservation is going to reserve extra space for a log record header.
403 * When writes happen to the on-disk log, we don't subtract the length of the
404 * log record header from any reservation. By wasting space in each
405 * reservation, we prevent over allocation problems.
406 */
407int
408xfs_log_reserve(
409 struct xfs_mount *mp,
410 int unit_bytes,
411 int cnt,
412 struct xlog_ticket **ticket,
413 __uint8_t client,
414 uint flags,
415 uint t_type)
416{
417 struct log *log = mp->m_log;
418 struct xlog_ticket *internal_ticket;
419 int retval = 0;
420
421 ASSERT(client == XFS_TRANSACTION || client == XFS_LOG);
422
423 if (XLOG_FORCED_SHUTDOWN(log))
424 return XFS_ERROR(EIO);
425
426 XFS_STATS_INC(xs_try_logspace);
427
428
429 if (*ticket != NULL) {
430 ASSERT(flags & XFS_LOG_PERM_RESERV);
431 internal_ticket = *ticket;
432
433 /*
434 * this is a new transaction on the ticket, so we need to
435 * change the transaction ID so that the next transaction has a
436 * different TID in the log. Just add one to the existing tid
437 * so that we can see chains of rolling transactions in the log
438 * easily.
439 */
440 internal_ticket->t_tid++;
441
442 trace_xfs_log_reserve(log, internal_ticket);
443
444 xlog_grant_push_ail(log, internal_ticket->t_unit_res);
445 retval = xlog_regrant_write_log_space(log, internal_ticket);
446 } else {
447 /* may sleep if need to allocate more tickets */
448 internal_ticket = xlog_ticket_alloc(log, unit_bytes, cnt,
449 client, flags,
450 KM_SLEEP|KM_MAYFAIL);
451 if (!internal_ticket)
452 return XFS_ERROR(ENOMEM);
453 internal_ticket->t_trans_type = t_type;
454 *ticket = internal_ticket;
455
456 trace_xfs_log_reserve(log, internal_ticket);
457
458 xlog_grant_push_ail(log,
459 (internal_ticket->t_unit_res *
460 internal_ticket->t_cnt));
461 retval = xlog_grant_log_space(log, internal_ticket);
462 }
463
464 if (unlikely(retval)) {
465 /*
466 * If we are failing, make sure the ticket doesn't have any
467 * current reservations. We don't want to add this back
468 * when the ticket/ transaction gets cancelled.
469 */
470 internal_ticket->t_curr_res = 0;
471 /* ungrant will give back unit_res * t_cnt. */
472 internal_ticket->t_cnt = 0;
473 }
474
475 return retval;
476}
477
478
479/*
480 * Mount a log filesystem 553 * Mount a log filesystem
481 * 554 *
482 * mp - ubiquitous xfs mount point structure 555 * mp - ubiquitous xfs mount point structure
@@ -760,64 +833,35 @@ xfs_log_item_init(
760 INIT_LIST_HEAD(&item->li_cil); 833 INIT_LIST_HEAD(&item->li_cil);
761} 834}
762 835
836/*
837 * Wake up processes waiting for log space after we have moved the log tail.
838 */
763void 839void
764xfs_log_move_tail(xfs_mount_t *mp, 840xfs_log_space_wake(
765 xfs_lsn_t tail_lsn) 841 struct xfs_mount *mp)
766{ 842{
767 xlog_ticket_t *tic; 843 struct log *log = mp->m_log;
768 xlog_t *log = mp->m_log; 844 int free_bytes;
769 int need_bytes, free_bytes;
770 845
771 if (XLOG_FORCED_SHUTDOWN(log)) 846 if (XLOG_FORCED_SHUTDOWN(log))
772 return; 847 return;
773 848
774 if (tail_lsn == 0) 849 if (!list_empty_careful(&log->l_write_head.waiters)) {
775 tail_lsn = atomic64_read(&log->l_last_sync_lsn); 850 ASSERT(!(log->l_flags & XLOG_ACTIVE_RECOVERY));
776
777 /* tail_lsn == 1 implies that we weren't passed a valid value. */
778 if (tail_lsn != 1)
779 atomic64_set(&log->l_tail_lsn, tail_lsn);
780
781 if (!list_empty_careful(&log->l_writeq)) {
782#ifdef DEBUG
783 if (log->l_flags & XLOG_ACTIVE_RECOVERY)
784 panic("Recovery problem");
785#endif
786 spin_lock(&log->l_grant_write_lock);
787 free_bytes = xlog_space_left(log, &log->l_grant_write_head);
788 list_for_each_entry(tic, &log->l_writeq, t_queue) {
789 ASSERT(tic->t_flags & XLOG_TIC_PERM_RESERV);
790 851
791 if (free_bytes < tic->t_unit_res && tail_lsn != 1) 852 spin_lock(&log->l_write_head.lock);
792 break; 853 free_bytes = xlog_space_left(log, &log->l_write_head.grant);
793 tail_lsn = 0; 854 xlog_grant_head_wake(log, &log->l_write_head, &free_bytes);
794 free_bytes -= tic->t_unit_res; 855 spin_unlock(&log->l_write_head.lock);
795 trace_xfs_log_regrant_write_wake_up(log, tic);
796 wake_up(&tic->t_wait);
797 }
798 spin_unlock(&log->l_grant_write_lock);
799 } 856 }
800 857
801 if (!list_empty_careful(&log->l_reserveq)) { 858 if (!list_empty_careful(&log->l_reserve_head.waiters)) {
802#ifdef DEBUG 859 ASSERT(!(log->l_flags & XLOG_ACTIVE_RECOVERY));
803 if (log->l_flags & XLOG_ACTIVE_RECOVERY) 860
804 panic("Recovery problem"); 861 spin_lock(&log->l_reserve_head.lock);
805#endif 862 free_bytes = xlog_space_left(log, &log->l_reserve_head.grant);
806 spin_lock(&log->l_grant_reserve_lock); 863 xlog_grant_head_wake(log, &log->l_reserve_head, &free_bytes);
807 free_bytes = xlog_space_left(log, &log->l_grant_reserve_head); 864 spin_unlock(&log->l_reserve_head.lock);
808 list_for_each_entry(tic, &log->l_reserveq, t_queue) {
809 if (tic->t_flags & XLOG_TIC_PERM_RESERV)
810 need_bytes = tic->t_unit_res*tic->t_cnt;
811 else
812 need_bytes = tic->t_unit_res;
813 if (free_bytes < need_bytes && tail_lsn != 1)
814 break;
815 tail_lsn = 0;
816 free_bytes -= need_bytes;
817 trace_xfs_log_grant_wake_up(log, tic);
818 wake_up(&tic->t_wait);
819 }
820 spin_unlock(&log->l_grant_reserve_lock);
821 } 865 }
822} 866}
823 867
@@ -867,21 +911,7 @@ xfs_log_need_covered(xfs_mount_t *mp)
867 return needed; 911 return needed;
868} 912}
869 913
870/****************************************************************************** 914/*
871 *
872 * local routines
873 *
874 ******************************************************************************
875 */
876
877/* xfs_trans_tail_ail returns 0 when there is nothing in the list.
878 * The log manager must keep track of the last LR which was committed
879 * to disk. The lsn of this LR will become the new tail_lsn whenever
880 * xfs_trans_tail_ail returns 0. If we don't do this, we run into
881 * the situation where stuff could be written into the log but nothing
882 * was ever in the AIL when asked. Eventually, we panic since the
883 * tail hits the head.
884 *
885 * We may be holding the log iclog lock upon entering this routine. 915 * We may be holding the log iclog lock upon entering this routine.
886 */ 916 */
887xfs_lsn_t 917xfs_lsn_t
@@ -891,10 +921,17 @@ xlog_assign_tail_lsn(
891 xfs_lsn_t tail_lsn; 921 xfs_lsn_t tail_lsn;
892 struct log *log = mp->m_log; 922 struct log *log = mp->m_log;
893 923
924 /*
925 * To make sure we always have a valid LSN for the log tail we keep
926 * track of the last LSN which was committed in log->l_last_sync_lsn,
927 * and use that when the AIL was empty and xfs_ail_min_lsn returns 0.
928 *
929 * If the AIL has been emptied we also need to wake any process
930 * waiting for this condition.
931 */
894 tail_lsn = xfs_ail_min_lsn(mp->m_ail); 932 tail_lsn = xfs_ail_min_lsn(mp->m_ail);
895 if (!tail_lsn) 933 if (!tail_lsn)
896 tail_lsn = atomic64_read(&log->l_last_sync_lsn); 934 tail_lsn = atomic64_read(&log->l_last_sync_lsn);
897
898 atomic64_set(&log->l_tail_lsn, tail_lsn); 935 atomic64_set(&log->l_tail_lsn, tail_lsn);
899 return tail_lsn; 936 return tail_lsn;
900} 937}
@@ -1100,12 +1137,9 @@ xlog_alloc_log(xfs_mount_t *mp,
1100 xlog_assign_atomic_lsn(&log->l_tail_lsn, 1, 0); 1137 xlog_assign_atomic_lsn(&log->l_tail_lsn, 1, 0);
1101 xlog_assign_atomic_lsn(&log->l_last_sync_lsn, 1, 0); 1138 xlog_assign_atomic_lsn(&log->l_last_sync_lsn, 1, 0);
1102 log->l_curr_cycle = 1; /* 0 is bad since this is initial value */ 1139 log->l_curr_cycle = 1; /* 0 is bad since this is initial value */
1103 xlog_assign_grant_head(&log->l_grant_reserve_head, 1, 0); 1140
1104 xlog_assign_grant_head(&log->l_grant_write_head, 1, 0); 1141 xlog_grant_head_init(&log->l_reserve_head);
1105 INIT_LIST_HEAD(&log->l_reserveq); 1142 xlog_grant_head_init(&log->l_write_head);
1106 INIT_LIST_HEAD(&log->l_writeq);
1107 spin_lock_init(&log->l_grant_reserve_lock);
1108 spin_lock_init(&log->l_grant_write_lock);
1109 1143
1110 error = EFSCORRUPTED; 1144 error = EFSCORRUPTED;
1111 if (xfs_sb_version_hassector(&mp->m_sb)) { 1145 if (xfs_sb_version_hassector(&mp->m_sb)) {
@@ -1280,7 +1314,7 @@ xlog_grant_push_ail(
1280 1314
1281 ASSERT(BTOBB(need_bytes) < log->l_logBBsize); 1315 ASSERT(BTOBB(need_bytes) < log->l_logBBsize);
1282 1316
1283 free_bytes = xlog_space_left(log, &log->l_grant_reserve_head); 1317 free_bytes = xlog_space_left(log, &log->l_reserve_head.grant);
1284 free_blocks = BTOBBT(free_bytes); 1318 free_blocks = BTOBBT(free_bytes);
1285 1319
1286 /* 1320 /*
@@ -1412,8 +1446,8 @@ xlog_sync(xlog_t *log,
1412 roundoff < BBTOB(1))); 1446 roundoff < BBTOB(1)));
1413 1447
1414 /* move grant heads by roundoff in sync */ 1448 /* move grant heads by roundoff in sync */
1415 xlog_grant_add_space(log, &log->l_grant_reserve_head, roundoff); 1449 xlog_grant_add_space(log, &log->l_reserve_head.grant, roundoff);
1416 xlog_grant_add_space(log, &log->l_grant_write_head, roundoff); 1450 xlog_grant_add_space(log, &log->l_write_head.grant, roundoff);
1417 1451
1418 /* put cycle number in every block */ 1452 /* put cycle number in every block */
1419 xlog_pack_data(log, iclog, roundoff); 1453 xlog_pack_data(log, iclog, roundoff);
@@ -2566,119 +2600,6 @@ restart:
2566 return 0; 2600 return 0;
2567} /* xlog_state_get_iclog_space */ 2601} /* xlog_state_get_iclog_space */
2568 2602
2569/*
2570 * Atomically get the log space required for a log ticket.
2571 *
2572 * Once a ticket gets put onto the reserveq, it will only return after the
2573 * needed reservation is satisfied.
2574 *
2575 * This function is structured so that it has a lock free fast path. This is
2576 * necessary because every new transaction reservation will come through this
2577 * path. Hence any lock will be globally hot if we take it unconditionally on
2578 * every pass.
2579 *
2580 * As tickets are only ever moved on and off the reserveq under the
2581 * l_grant_reserve_lock, we only need to take that lock if we are going to add
2582 * the ticket to the queue and sleep. We can avoid taking the lock if the ticket
2583 * was never added to the reserveq because the t_queue list head will be empty
2584 * and we hold the only reference to it so it can safely be checked unlocked.
2585 */
2586STATIC int
2587xlog_grant_log_space(
2588 struct log *log,
2589 struct xlog_ticket *tic)
2590{
2591 int free_bytes, need_bytes;
2592 int error = 0;
2593
2594 ASSERT(!(log->l_flags & XLOG_ACTIVE_RECOVERY));
2595
2596 trace_xfs_log_grant_enter(log, tic);
2597
2598 /*
2599 * If there are other waiters on the queue then give them a chance at
2600 * logspace before us. Wake up the first waiters, if we do not wake
2601 * up all the waiters then go to sleep waiting for more free space,
2602 * otherwise try to get some space for this transaction.
2603 */
2604 need_bytes = tic->t_unit_res;
2605 if (tic->t_flags & XFS_LOG_PERM_RESERV)
2606 need_bytes *= tic->t_ocnt;
2607 free_bytes = xlog_space_left(log, &log->l_grant_reserve_head);
2608 if (!list_empty_careful(&log->l_reserveq)) {
2609 spin_lock(&log->l_grant_reserve_lock);
2610 if (!xlog_reserveq_wake(log, &free_bytes) ||
2611 free_bytes < need_bytes)
2612 error = xlog_reserveq_wait(log, tic, need_bytes);
2613 spin_unlock(&log->l_grant_reserve_lock);
2614 } else if (free_bytes < need_bytes) {
2615 spin_lock(&log->l_grant_reserve_lock);
2616 error = xlog_reserveq_wait(log, tic, need_bytes);
2617 spin_unlock(&log->l_grant_reserve_lock);
2618 }
2619 if (error)
2620 return error;
2621
2622 xlog_grant_add_space(log, &log->l_grant_reserve_head, need_bytes);
2623 xlog_grant_add_space(log, &log->l_grant_write_head, need_bytes);
2624 trace_xfs_log_grant_exit(log, tic);
2625 xlog_verify_grant_tail(log);
2626 return 0;
2627}
2628
2629/*
2630 * Replenish the byte reservation required by moving the grant write head.
2631 *
2632 * Similar to xlog_grant_log_space, the function is structured to have a lock
2633 * free fast path.
2634 */
2635STATIC int
2636xlog_regrant_write_log_space(
2637 struct log *log,
2638 struct xlog_ticket *tic)
2639{
2640 int free_bytes, need_bytes;
2641 int error = 0;
2642
2643 tic->t_curr_res = tic->t_unit_res;
2644 xlog_tic_reset_res(tic);
2645
2646 if (tic->t_cnt > 0)
2647 return 0;
2648
2649 ASSERT(!(log->l_flags & XLOG_ACTIVE_RECOVERY));
2650
2651 trace_xfs_log_regrant_write_enter(log, tic);
2652
2653 /*
2654 * If there are other waiters on the queue then give them a chance at
2655 * logspace before us. Wake up the first waiters, if we do not wake
2656 * up all the waiters then go to sleep waiting for more free space,
2657 * otherwise try to get some space for this transaction.
2658 */
2659 need_bytes = tic->t_unit_res;
2660 free_bytes = xlog_space_left(log, &log->l_grant_write_head);
2661 if (!list_empty_careful(&log->l_writeq)) {
2662 spin_lock(&log->l_grant_write_lock);
2663 if (!xlog_writeq_wake(log, &free_bytes) ||
2664 free_bytes < need_bytes)
2665 error = xlog_writeq_wait(log, tic, need_bytes);
2666 spin_unlock(&log->l_grant_write_lock);
2667 } else if (free_bytes < need_bytes) {
2668 spin_lock(&log->l_grant_write_lock);
2669 error = xlog_writeq_wait(log, tic, need_bytes);
2670 spin_unlock(&log->l_grant_write_lock);
2671 }
2672
2673 if (error)
2674 return error;
2675
2676 xlog_grant_add_space(log, &log->l_grant_write_head, need_bytes);
2677 trace_xfs_log_regrant_write_exit(log, tic);
2678 xlog_verify_grant_tail(log);
2679 return 0;
2680}
2681
2682/* The first cnt-1 times through here we don't need to 2603/* The first cnt-1 times through here we don't need to
2683 * move the grant write head because the permanent 2604 * move the grant write head because the permanent
2684 * reservation has reserved cnt times the unit amount. 2605 * reservation has reserved cnt times the unit amount.
@@ -2695,9 +2616,9 @@ xlog_regrant_reserve_log_space(xlog_t *log,
2695 if (ticket->t_cnt > 0) 2616 if (ticket->t_cnt > 0)
2696 ticket->t_cnt--; 2617 ticket->t_cnt--;
2697 2618
2698 xlog_grant_sub_space(log, &log->l_grant_reserve_head, 2619 xlog_grant_sub_space(log, &log->l_reserve_head.grant,
2699 ticket->t_curr_res); 2620 ticket->t_curr_res);
2700 xlog_grant_sub_space(log, &log->l_grant_write_head, 2621 xlog_grant_sub_space(log, &log->l_write_head.grant,
2701 ticket->t_curr_res); 2622 ticket->t_curr_res);
2702 ticket->t_curr_res = ticket->t_unit_res; 2623 ticket->t_curr_res = ticket->t_unit_res;
2703 xlog_tic_reset_res(ticket); 2624 xlog_tic_reset_res(ticket);
@@ -2708,7 +2629,7 @@ xlog_regrant_reserve_log_space(xlog_t *log,
2708 if (ticket->t_cnt > 0) 2629 if (ticket->t_cnt > 0)
2709 return; 2630 return;
2710 2631
2711 xlog_grant_add_space(log, &log->l_grant_reserve_head, 2632 xlog_grant_add_space(log, &log->l_reserve_head.grant,
2712 ticket->t_unit_res); 2633 ticket->t_unit_res);
2713 2634
2714 trace_xfs_log_regrant_reserve_exit(log, ticket); 2635 trace_xfs_log_regrant_reserve_exit(log, ticket);
@@ -2754,14 +2675,13 @@ xlog_ungrant_log_space(xlog_t *log,
2754 bytes += ticket->t_unit_res*ticket->t_cnt; 2675 bytes += ticket->t_unit_res*ticket->t_cnt;
2755 } 2676 }
2756 2677
2757 xlog_grant_sub_space(log, &log->l_grant_reserve_head, bytes); 2678 xlog_grant_sub_space(log, &log->l_reserve_head.grant, bytes);
2758 xlog_grant_sub_space(log, &log->l_grant_write_head, bytes); 2679 xlog_grant_sub_space(log, &log->l_write_head.grant, bytes);
2759 2680
2760 trace_xfs_log_ungrant_exit(log, ticket); 2681 trace_xfs_log_ungrant_exit(log, ticket);
2761 2682
2762 xfs_log_move_tail(log->l_mp, 1); 2683 xfs_log_space_wake(log->l_mp);
2763} /* xlog_ungrant_log_space */ 2684}
2764
2765 2685
2766/* 2686/*
2767 * Flush iclog to disk if this is the last reference to the given iclog and 2687 * Flush iclog to disk if this is the last reference to the given iclog and
@@ -3219,7 +3139,7 @@ xlog_ticket_alloc(
3219 int unit_bytes, 3139 int unit_bytes,
3220 int cnt, 3140 int cnt,
3221 char client, 3141 char client,
3222 uint xflags, 3142 bool permanent,
3223 int alloc_flags) 3143 int alloc_flags)
3224{ 3144{
3225 struct xlog_ticket *tic; 3145 struct xlog_ticket *tic;
@@ -3313,6 +3233,7 @@ xlog_ticket_alloc(
3313 } 3233 }
3314 3234
3315 atomic_set(&tic->t_ref, 1); 3235 atomic_set(&tic->t_ref, 1);
3236 tic->t_task = current;
3316 INIT_LIST_HEAD(&tic->t_queue); 3237 INIT_LIST_HEAD(&tic->t_queue);
3317 tic->t_unit_res = unit_bytes; 3238 tic->t_unit_res = unit_bytes;
3318 tic->t_curr_res = unit_bytes; 3239 tic->t_curr_res = unit_bytes;
@@ -3322,9 +3243,8 @@ xlog_ticket_alloc(
3322 tic->t_clientid = client; 3243 tic->t_clientid = client;
3323 tic->t_flags = XLOG_TIC_INITED; 3244 tic->t_flags = XLOG_TIC_INITED;
3324 tic->t_trans_type = 0; 3245 tic->t_trans_type = 0;
3325 if (xflags & XFS_LOG_PERM_RESERV) 3246 if (permanent)
3326 tic->t_flags |= XLOG_TIC_PERM_RESERV; 3247 tic->t_flags |= XLOG_TIC_PERM_RESERV;
3327 init_waitqueue_head(&tic->t_wait);
3328 3248
3329 xlog_tic_reset_res(tic); 3249 xlog_tic_reset_res(tic);
3330 3250
@@ -3380,7 +3300,7 @@ xlog_verify_grant_tail(
3380 int tail_cycle, tail_blocks; 3300 int tail_cycle, tail_blocks;
3381 int cycle, space; 3301 int cycle, space;
3382 3302
3383 xlog_crack_grant_head(&log->l_grant_write_head, &cycle, &space); 3303 xlog_crack_grant_head(&log->l_write_head.grant, &cycle, &space);
3384 xlog_crack_atomic_lsn(&log->l_tail_lsn, &tail_cycle, &tail_blocks); 3304 xlog_crack_atomic_lsn(&log->l_tail_lsn, &tail_cycle, &tail_blocks);
3385 if (tail_cycle != cycle) { 3305 if (tail_cycle != cycle) {
3386 if (cycle - 1 != tail_cycle && 3306 if (cycle - 1 != tail_cycle &&
@@ -3582,7 +3502,6 @@ xfs_log_force_umount(
3582 struct xfs_mount *mp, 3502 struct xfs_mount *mp,
3583 int logerror) 3503 int logerror)
3584{ 3504{
3585 xlog_ticket_t *tic;
3586 xlog_t *log; 3505 xlog_t *log;
3587 int retval; 3506 int retval;
3588 3507
@@ -3650,15 +3569,8 @@ xfs_log_force_umount(
3650 * we don't enqueue anything once the SHUTDOWN flag is set, and this 3569 * we don't enqueue anything once the SHUTDOWN flag is set, and this
3651 * action is protected by the grant locks. 3570 * action is protected by the grant locks.
3652 */ 3571 */
3653 spin_lock(&log->l_grant_reserve_lock); 3572 xlog_grant_head_wake_all(&log->l_reserve_head);
3654 list_for_each_entry(tic, &log->l_reserveq, t_queue) 3573 xlog_grant_head_wake_all(&log->l_write_head);
3655 wake_up(&tic->t_wait);
3656 spin_unlock(&log->l_grant_reserve_lock);
3657
3658 spin_lock(&log->l_grant_write_lock);
3659 list_for_each_entry(tic, &log->l_writeq, t_queue)
3660 wake_up(&tic->t_wait);
3661 spin_unlock(&log->l_grant_write_lock);
3662 3574
3663 if (!(log->l_iclog->ic_state & XLOG_STATE_IOERROR)) { 3575 if (!(log->l_iclog->ic_state & XLOG_STATE_IOERROR)) {
3664 ASSERT(!logerror); 3576 ASSERT(!logerror);
diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h
index 2aee3b22d29c..2c622bedb302 100644
--- a/fs/xfs/xfs_log.h
+++ b/fs/xfs/xfs_log.h
@@ -53,15 +53,6 @@ static inline xfs_lsn_t _lsn_cmp(xfs_lsn_t lsn1, xfs_lsn_t lsn2)
53#define XFS_LOG_REL_PERM_RESERV 0x1 53#define XFS_LOG_REL_PERM_RESERV 0x1
54 54
55/* 55/*
56 * Flags to xfs_log_reserve()
57 *
58 * XFS_LOG_PERM_RESERV: Permanent reservation. When writes are
59 * performed against this type of reservation, the reservation
60 * is not decreased. Long running transactions should use this.
61 */
62#define XFS_LOG_PERM_RESERV 0x2
63
64/*
65 * Flags to xfs_log_force() 56 * Flags to xfs_log_force()
66 * 57 *
67 * XFS_LOG_SYNC: Synchronous force in-core log to disk 58 * XFS_LOG_SYNC: Synchronous force in-core log to disk
@@ -160,8 +151,8 @@ int xfs_log_mount(struct xfs_mount *mp,
160 xfs_daddr_t start_block, 151 xfs_daddr_t start_block,
161 int num_bblocks); 152 int num_bblocks);
162int xfs_log_mount_finish(struct xfs_mount *mp); 153int xfs_log_mount_finish(struct xfs_mount *mp);
163void xfs_log_move_tail(struct xfs_mount *mp, 154xfs_lsn_t xlog_assign_tail_lsn(struct xfs_mount *mp);
164 xfs_lsn_t tail_lsn); 155void xfs_log_space_wake(struct xfs_mount *mp);
165int xfs_log_notify(struct xfs_mount *mp, 156int xfs_log_notify(struct xfs_mount *mp,
166 struct xlog_in_core *iclog, 157 struct xlog_in_core *iclog,
167 xfs_log_callback_t *callback_entry); 158 xfs_log_callback_t *callback_entry);
@@ -172,8 +163,9 @@ int xfs_log_reserve(struct xfs_mount *mp,
172 int count, 163 int count,
173 struct xlog_ticket **ticket, 164 struct xlog_ticket **ticket,
174 __uint8_t clientid, 165 __uint8_t clientid,
175 uint flags, 166 bool permanent,
176 uint t_type); 167 uint t_type);
168int xfs_log_regrant(struct xfs_mount *mp, struct xlog_ticket *tic);
177int xfs_log_unmount_write(struct xfs_mount *mp); 169int xfs_log_unmount_write(struct xfs_mount *mp);
178void xfs_log_unmount(struct xfs_mount *mp); 170void xfs_log_unmount(struct xfs_mount *mp);
179int xfs_log_force_umount(struct xfs_mount *mp, int logerror); 171int xfs_log_force_umount(struct xfs_mount *mp, int logerror);
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index 2d3b6a498d63..2152900b79d4 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -239,8 +239,8 @@ typedef struct xlog_res {
239} xlog_res_t; 239} xlog_res_t;
240 240
241typedef struct xlog_ticket { 241typedef struct xlog_ticket {
242 wait_queue_head_t t_wait; /* ticket wait queue */
243 struct list_head t_queue; /* reserve/write queue */ 242 struct list_head t_queue; /* reserve/write queue */
243 struct task_struct *t_task; /* task that owns this ticket */
244 xlog_tid_t t_tid; /* transaction identifier : 4 */ 244 xlog_tid_t t_tid; /* transaction identifier : 4 */
245 atomic_t t_ref; /* ticket reference count : 4 */ 245 atomic_t t_ref; /* ticket reference count : 4 */
246 int t_curr_res; /* current reservation in bytes : 4 */ 246 int t_curr_res; /* current reservation in bytes : 4 */
@@ -470,6 +470,16 @@ struct xfs_cil {
470#define XLOG_CIL_HARD_SPACE_LIMIT(log) (3 * (log->l_logsize >> 4)) 470#define XLOG_CIL_HARD_SPACE_LIMIT(log) (3 * (log->l_logsize >> 4))
471 471
472/* 472/*
473 * ticket grant locks, queues and accounting have their own cachlines
474 * as these are quite hot and can be operated on concurrently.
475 */
476struct xlog_grant_head {
477 spinlock_t lock ____cacheline_aligned_in_smp;
478 struct list_head waiters;
479 atomic64_t grant;
480};
481
482/*
473 * The reservation head lsn is not made up of a cycle number and block number. 483 * The reservation head lsn is not made up of a cycle number and block number.
474 * Instead, it uses a cycle number and byte number. Logs don't expect to 484 * Instead, it uses a cycle number and byte number. Logs don't expect to
475 * overflow 31 bits worth of byte offset, so using a byte number will mean 485 * overflow 31 bits worth of byte offset, so using a byte number will mean
@@ -520,17 +530,8 @@ typedef struct log {
520 /* lsn of 1st LR with unflushed * buffers */ 530 /* lsn of 1st LR with unflushed * buffers */
521 atomic64_t l_tail_lsn ____cacheline_aligned_in_smp; 531 atomic64_t l_tail_lsn ____cacheline_aligned_in_smp;
522 532
523 /* 533 struct xlog_grant_head l_reserve_head;
524 * ticket grant locks, queues and accounting have their own cachlines 534 struct xlog_grant_head l_write_head;
525 * as these are quite hot and can be operated on concurrently.
526 */
527 spinlock_t l_grant_reserve_lock ____cacheline_aligned_in_smp;
528 struct list_head l_reserveq;
529 atomic64_t l_grant_reserve_head;
530
531 spinlock_t l_grant_write_lock ____cacheline_aligned_in_smp;
532 struct list_head l_writeq;
533 atomic64_t l_grant_write_head;
534 535
535 /* The following field are used for debugging; need to hold icloglock */ 536 /* The following field are used for debugging; need to hold icloglock */
536#ifdef DEBUG 537#ifdef DEBUG
@@ -545,14 +546,13 @@ typedef struct log {
545#define XLOG_FORCED_SHUTDOWN(log) ((log)->l_flags & XLOG_IO_ERROR) 546#define XLOG_FORCED_SHUTDOWN(log) ((log)->l_flags & XLOG_IO_ERROR)
546 547
547/* common routines */ 548/* common routines */
548extern xfs_lsn_t xlog_assign_tail_lsn(struct xfs_mount *mp);
549extern int xlog_recover(xlog_t *log); 549extern int xlog_recover(xlog_t *log);
550extern int xlog_recover_finish(xlog_t *log); 550extern int xlog_recover_finish(xlog_t *log);
551extern void xlog_pack_data(xlog_t *log, xlog_in_core_t *iclog, int); 551extern void xlog_pack_data(xlog_t *log, xlog_in_core_t *iclog, int);
552 552
553extern kmem_zone_t *xfs_log_ticket_zone; 553extern kmem_zone_t *xfs_log_ticket_zone;
554struct xlog_ticket *xlog_ticket_alloc(struct log *log, int unit_bytes, 554struct xlog_ticket *xlog_ticket_alloc(struct log *log, int unit_bytes,
555 int count, char client, uint xflags, 555 int count, char client, bool permanent,
556 int alloc_flags); 556 int alloc_flags);
557 557
558 558
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 0ed9ee77937c..7c75c7374d5a 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -965,9 +965,9 @@ xlog_find_tail(
965 log->l_curr_cycle++; 965 log->l_curr_cycle++;
966 atomic64_set(&log->l_tail_lsn, be64_to_cpu(rhead->h_tail_lsn)); 966 atomic64_set(&log->l_tail_lsn, be64_to_cpu(rhead->h_tail_lsn));
967 atomic64_set(&log->l_last_sync_lsn, be64_to_cpu(rhead->h_lsn)); 967 atomic64_set(&log->l_last_sync_lsn, be64_to_cpu(rhead->h_lsn));
968 xlog_assign_grant_head(&log->l_grant_reserve_head, log->l_curr_cycle, 968 xlog_assign_grant_head(&log->l_reserve_head.grant, log->l_curr_cycle,
969 BBTOB(log->l_curr_block)); 969 BBTOB(log->l_curr_block));
970 xlog_assign_grant_head(&log->l_grant_write_head, log->l_curr_cycle, 970 xlog_assign_grant_head(&log->l_write_head.grant, log->l_curr_cycle,
971 BBTOB(log->l_curr_block)); 971 BBTOB(log->l_curr_block));
972 972
973 /* 973 /*
@@ -3695,7 +3695,7 @@ xlog_do_recover(
3695 3695
3696 /* Convert superblock from on-disk format */ 3696 /* Convert superblock from on-disk format */
3697 sbp = &log->l_mp->m_sb; 3697 sbp = &log->l_mp->m_sb;
3698 xfs_sb_from_disk(sbp, XFS_BUF_TO_SBP(bp)); 3698 xfs_sb_from_disk(log->l_mp, XFS_BUF_TO_SBP(bp));
3699 ASSERT(sbp->sb_magicnum == XFS_SB_MAGIC); 3699 ASSERT(sbp->sb_magicnum == XFS_SB_MAGIC);
3700 ASSERT(xfs_sb_good_version(sbp)); 3700 ASSERT(xfs_sb_good_version(sbp));
3701 xfs_buf_relse(bp); 3701 xfs_buf_relse(bp);
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index d06afbc3540d..1ffead4b2296 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -158,7 +158,7 @@ xfs_uuid_mount(
158 158
159 out_duplicate: 159 out_duplicate:
160 mutex_unlock(&xfs_uuid_table_mutex); 160 mutex_unlock(&xfs_uuid_table_mutex);
161 xfs_warn(mp, "Filesystem has duplicate UUID - can't mount"); 161 xfs_warn(mp, "Filesystem has duplicate UUID %pU - can't mount", uuid);
162 return XFS_ERROR(EINVAL); 162 return XFS_ERROR(EINVAL);
163} 163}
164 164
@@ -553,9 +553,11 @@ out_unwind:
553 553
554void 554void
555xfs_sb_from_disk( 555xfs_sb_from_disk(
556 xfs_sb_t *to, 556 struct xfs_mount *mp,
557 xfs_dsb_t *from) 557 xfs_dsb_t *from)
558{ 558{
559 struct xfs_sb *to = &mp->m_sb;
560
559 to->sb_magicnum = be32_to_cpu(from->sb_magicnum); 561 to->sb_magicnum = be32_to_cpu(from->sb_magicnum);
560 to->sb_blocksize = be32_to_cpu(from->sb_blocksize); 562 to->sb_blocksize = be32_to_cpu(from->sb_blocksize);
561 to->sb_dblocks = be64_to_cpu(from->sb_dblocks); 563 to->sb_dblocks = be64_to_cpu(from->sb_dblocks);
@@ -693,7 +695,7 @@ reread:
693 * Initialize the mount structure from the superblock. 695 * Initialize the mount structure from the superblock.
694 * But first do some basic consistency checking. 696 * But first do some basic consistency checking.
695 */ 697 */
696 xfs_sb_from_disk(&mp->m_sb, XFS_BUF_TO_SBP(bp)); 698 xfs_sb_from_disk(mp, XFS_BUF_TO_SBP(bp));
697 error = xfs_mount_validate_sb(mp, &(mp->m_sb), flags); 699 error = xfs_mount_validate_sb(mp, &(mp->m_sb), flags);
698 if (error) { 700 if (error) {
699 if (loud) 701 if (loud)
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 19f69e232509..9eba73887829 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -211,6 +211,9 @@ typedef struct xfs_mount {
211 struct shrinker m_inode_shrink; /* inode reclaim shrinker */ 211 struct shrinker m_inode_shrink; /* inode reclaim shrinker */
212 int64_t m_low_space[XFS_LOWSP_MAX]; 212 int64_t m_low_space[XFS_LOWSP_MAX];
213 /* low free space thresholds */ 213 /* low free space thresholds */
214
215 struct workqueue_struct *m_data_workqueue;
216 struct workqueue_struct *m_unwritten_workqueue;
214} xfs_mount_t; 217} xfs_mount_t;
215 218
216/* 219/*
@@ -395,7 +398,7 @@ extern void xfs_set_low_space_thresholds(struct xfs_mount *);
395extern void xfs_mod_sb(struct xfs_trans *, __int64_t); 398extern void xfs_mod_sb(struct xfs_trans *, __int64_t);
396extern int xfs_initialize_perag(struct xfs_mount *, xfs_agnumber_t, 399extern int xfs_initialize_perag(struct xfs_mount *, xfs_agnumber_t,
397 xfs_agnumber_t *); 400 xfs_agnumber_t *);
398extern void xfs_sb_from_disk(struct xfs_sb *, struct xfs_dsb *); 401extern void xfs_sb_from_disk(struct xfs_mount *, struct xfs_dsb *);
399extern void xfs_sb_to_disk(struct xfs_dsb *, struct xfs_sb *, __int64_t); 402extern void xfs_sb_to_disk(struct xfs_dsb *, struct xfs_sb *, __int64_t);
400 403
401#endif /* __XFS_MOUNT_H__ */ 404#endif /* __XFS_MOUNT_H__ */
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c
index c436def733bf..55c6afedc879 100644
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -48,194 +48,189 @@
48 * quota functionality, including maintaining the freelist and hash 48 * quota functionality, including maintaining the freelist and hash
49 * tables of dquots. 49 * tables of dquots.
50 */ 50 */
51struct mutex xfs_Gqm_lock;
52struct xfs_qm *xfs_Gqm;
53
54kmem_zone_t *qm_dqzone;
55kmem_zone_t *qm_dqtrxzone;
56
57STATIC void xfs_qm_list_init(xfs_dqlist_t *, char *, int);
58STATIC void xfs_qm_list_destroy(xfs_dqlist_t *);
59
60STATIC int xfs_qm_init_quotainos(xfs_mount_t *); 51STATIC int xfs_qm_init_quotainos(xfs_mount_t *);
61STATIC int xfs_qm_init_quotainfo(xfs_mount_t *); 52STATIC int xfs_qm_init_quotainfo(xfs_mount_t *);
62STATIC int xfs_qm_shake(struct shrinker *, struct shrink_control *); 53STATIC int xfs_qm_shake(struct shrinker *, struct shrink_control *);
63 54
64static struct shrinker xfs_qm_shaker = {
65 .shrink = xfs_qm_shake,
66 .seeks = DEFAULT_SEEKS,
67};
68
69/* 55/*
70 * Initialize the XQM structure. 56 * We use the batch lookup interface to iterate over the dquots as it
71 * Note that there is not one quota manager per file system. 57 * currently is the only interface into the radix tree code that allows
58 * fuzzy lookups instead of exact matches. Holding the lock over multiple
59 * operations is fine as all callers are used either during mount/umount
60 * or quotaoff.
72 */ 61 */
73STATIC struct xfs_qm * 62#define XFS_DQ_LOOKUP_BATCH 32
74xfs_Gqm_init(void) 63
64STATIC int
65xfs_qm_dquot_walk(
66 struct xfs_mount *mp,
67 int type,
68 int (*execute)(struct xfs_dquot *dqp))
75{ 69{
76 xfs_dqhash_t *udqhash, *gdqhash; 70 struct xfs_quotainfo *qi = mp->m_quotainfo;
77 xfs_qm_t *xqm; 71 struct radix_tree_root *tree = XFS_DQUOT_TREE(qi, type);
78 size_t hsize; 72 uint32_t next_index;
79 uint i; 73 int last_error = 0;
74 int skipped;
75 int nr_found;
76
77restart:
78 skipped = 0;
79 next_index = 0;
80 nr_found = 0;
81
82 while (1) {
83 struct xfs_dquot *batch[XFS_DQ_LOOKUP_BATCH];
84 int error = 0;
85 int i;
86
87 mutex_lock(&qi->qi_tree_lock);
88 nr_found = radix_tree_gang_lookup(tree, (void **)batch,
89 next_index, XFS_DQ_LOOKUP_BATCH);
90 if (!nr_found) {
91 mutex_unlock(&qi->qi_tree_lock);
92 break;
93 }
80 94
81 /* 95 for (i = 0; i < nr_found; i++) {
82 * Initialize the dquot hash tables. 96 struct xfs_dquot *dqp = batch[i];
83 */
84 udqhash = kmem_zalloc_greedy(&hsize,
85 XFS_QM_HASHSIZE_LOW * sizeof(xfs_dqhash_t),
86 XFS_QM_HASHSIZE_HIGH * sizeof(xfs_dqhash_t));
87 if (!udqhash)
88 goto out;
89 97
90 gdqhash = kmem_zalloc_large(hsize); 98 next_index = be32_to_cpu(dqp->q_core.d_id) + 1;
91 if (!gdqhash)
92 goto out_free_udqhash;
93 99
94 hsize /= sizeof(xfs_dqhash_t); 100 error = execute(batch[i]);
101 if (error == EAGAIN) {
102 skipped++;
103 continue;
104 }
105 if (error && last_error != EFSCORRUPTED)
106 last_error = error;
107 }
95 108
96 xqm = kmem_zalloc(sizeof(xfs_qm_t), KM_SLEEP); 109 mutex_unlock(&qi->qi_tree_lock);
97 xqm->qm_dqhashmask = hsize - 1;
98 xqm->qm_usr_dqhtable = udqhash;
99 xqm->qm_grp_dqhtable = gdqhash;
100 ASSERT(xqm->qm_usr_dqhtable != NULL);
101 ASSERT(xqm->qm_grp_dqhtable != NULL);
102 110
103 for (i = 0; i < hsize; i++) { 111 /* bail out if the filesystem is corrupted. */
104 xfs_qm_list_init(&(xqm->qm_usr_dqhtable[i]), "uxdqh", i); 112 if (last_error == EFSCORRUPTED) {
105 xfs_qm_list_init(&(xqm->qm_grp_dqhtable[i]), "gxdqh", i); 113 skipped = 0;
114 break;
115 }
106 } 116 }
107 117
108 /* 118 if (skipped) {
109 * Freelist of all dquots of all file systems 119 delay(1);
110 */ 120 goto restart;
111 INIT_LIST_HEAD(&xqm->qm_dqfrlist); 121 }
112 xqm->qm_dqfrlist_cnt = 0;
113 mutex_init(&xqm->qm_dqfrlist_lock);
114
115 /*
116 * dquot zone. we register our own low-memory callback.
117 */
118 if (!qm_dqzone) {
119 xqm->qm_dqzone = kmem_zone_init(sizeof(xfs_dquot_t),
120 "xfs_dquots");
121 qm_dqzone = xqm->qm_dqzone;
122 } else
123 xqm->qm_dqzone = qm_dqzone;
124
125 register_shrinker(&xfs_qm_shaker);
126
127 /*
128 * The t_dqinfo portion of transactions.
129 */
130 if (!qm_dqtrxzone) {
131 xqm->qm_dqtrxzone = kmem_zone_init(sizeof(xfs_dquot_acct_t),
132 "xfs_dqtrx");
133 qm_dqtrxzone = xqm->qm_dqtrxzone;
134 } else
135 xqm->qm_dqtrxzone = qm_dqtrxzone;
136
137 atomic_set(&xqm->qm_totaldquots, 0);
138 xqm->qm_nrefs = 0;
139 return xqm;
140 122
141 out_free_udqhash: 123 return last_error;
142 kmem_free_large(udqhash);
143 out:
144 return NULL;
145} 124}
146 125
126
147/* 127/*
148 * Destroy the global quota manager when its reference count goes to zero. 128 * Purge a dquot from all tracking data structures and free it.
149 */ 129 */
150STATIC void 130STATIC int
151xfs_qm_destroy( 131xfs_qm_dqpurge(
152 struct xfs_qm *xqm) 132 struct xfs_dquot *dqp)
153{ 133{
154 int hsize, i; 134 struct xfs_mount *mp = dqp->q_mount;
135 struct xfs_quotainfo *qi = mp->m_quotainfo;
136 struct xfs_dquot *gdqp = NULL;
155 137
156 ASSERT(xqm != NULL); 138 xfs_dqlock(dqp);
157 ASSERT(xqm->qm_nrefs == 0); 139 if ((dqp->dq_flags & XFS_DQ_FREEING) || dqp->q_nrefs != 0) {
140 xfs_dqunlock(dqp);
141 return EAGAIN;
142 }
158 143
159 unregister_shrinker(&xfs_qm_shaker); 144 /*
145 * If this quota has a group hint attached, prepare for releasing it
146 * now.
147 */
148 gdqp = dqp->q_gdquot;
149 if (gdqp) {
150 xfs_dqlock(gdqp);
151 dqp->q_gdquot = NULL;
152 }
160 153
161 mutex_lock(&xqm->qm_dqfrlist_lock); 154 dqp->dq_flags |= XFS_DQ_FREEING;
162 ASSERT(list_empty(&xqm->qm_dqfrlist));
163 mutex_unlock(&xqm->qm_dqfrlist_lock);
164 155
165 hsize = xqm->qm_dqhashmask + 1; 156 /*
166 for (i = 0; i < hsize; i++) { 157 * If we're turning off quotas, we have to make sure that, for
167 xfs_qm_list_destroy(&(xqm->qm_usr_dqhtable[i])); 158 * example, we don't delete quota disk blocks while dquots are
168 xfs_qm_list_destroy(&(xqm->qm_grp_dqhtable[i])); 159 * in the process of getting written to those disk blocks.
160 * This dquot might well be on AIL, and we can't leave it there
161 * if we're turning off quotas. Basically, we need this flush
162 * lock, and are willing to block on it.
163 */
164 if (!xfs_dqflock_nowait(dqp)) {
165 /*
166 * Block on the flush lock after nudging dquot buffer,
167 * if it is incore.
168 */
169 xfs_dqflock_pushbuf_wait(dqp);
169 } 170 }
170 kmem_free_large(xqm->qm_usr_dqhtable);
171 kmem_free_large(xqm->qm_grp_dqhtable);
172 xqm->qm_usr_dqhtable = NULL;
173 xqm->qm_grp_dqhtable = NULL;
174 xqm->qm_dqhashmask = 0;
175 171
176 kmem_free(xqm);
177}
178
179/*
180 * Called at mount time to let XQM know that another file system is
181 * starting quotas. This isn't crucial information as the individual mount
182 * structures are pretty independent, but it helps the XQM keep a
183 * global view of what's going on.
184 */
185/* ARGSUSED */
186STATIC int
187xfs_qm_hold_quotafs_ref(
188 struct xfs_mount *mp)
189{
190 /* 172 /*
191 * Need to lock the xfs_Gqm structure for things like this. For example, 173 * If we are turning this type of quotas off, we don't care
192 * the structure could disappear between the entry to this routine and 174 * about the dirty metadata sitting in this dquot. OTOH, if
193 * a HOLD operation if not locked. 175 * we're unmounting, we do care, so we flush it and wait.
194 */ 176 */
195 mutex_lock(&xfs_Gqm_lock); 177 if (XFS_DQ_IS_DIRTY(dqp)) {
178 int error;
196 179
197 if (!xfs_Gqm) { 180 /*
198 xfs_Gqm = xfs_Gqm_init(); 181 * We don't care about getting disk errors here. We need
199 if (!xfs_Gqm) { 182 * to purge this dquot anyway, so we go ahead regardless.
200 mutex_unlock(&xfs_Gqm_lock); 183 */
201 return ENOMEM; 184 error = xfs_qm_dqflush(dqp, SYNC_WAIT);
202 } 185 if (error)
186 xfs_warn(mp, "%s: dquot %p flush failed",
187 __func__, dqp);
188 xfs_dqflock(dqp);
203 } 189 }
204 190
191 ASSERT(atomic_read(&dqp->q_pincount) == 0);
192 ASSERT(XFS_FORCED_SHUTDOWN(mp) ||
193 !(dqp->q_logitem.qli_item.li_flags & XFS_LI_IN_AIL));
194
195 xfs_dqfunlock(dqp);
196 xfs_dqunlock(dqp);
197
198 radix_tree_delete(XFS_DQUOT_TREE(qi, dqp->q_core.d_flags),
199 be32_to_cpu(dqp->q_core.d_id));
200 qi->qi_dquots--;
201
205 /* 202 /*
206 * We can keep a list of all filesystems with quotas mounted for 203 * We move dquots to the freelist as soon as their reference count
207 * debugging and statistical purposes, but ... 204 * hits zero, so it really should be on the freelist here.
208 * Just take a reference and get out.
209 */ 205 */
210 xfs_Gqm->qm_nrefs++; 206 mutex_lock(&qi->qi_lru_lock);
211 mutex_unlock(&xfs_Gqm_lock); 207 ASSERT(!list_empty(&dqp->q_lru));
208 list_del_init(&dqp->q_lru);
209 qi->qi_lru_count--;
210 XFS_STATS_DEC(xs_qm_dquot_unused);
211 mutex_unlock(&qi->qi_lru_lock);
212 212
213 xfs_qm_dqdestroy(dqp);
214
215 if (gdqp)
216 xfs_qm_dqput(gdqp);
213 return 0; 217 return 0;
214} 218}
215 219
216
217/* 220/*
218 * Release the reference that a filesystem took at mount time, 221 * Purge the dquot cache.
219 * so that we know when we need to destroy the entire quota manager.
220 */ 222 */
221/* ARGSUSED */ 223void
222STATIC void 224xfs_qm_dqpurge_all(
223xfs_qm_rele_quotafs_ref( 225 struct xfs_mount *mp,
224 struct xfs_mount *mp) 226 uint flags)
225{ 227{
226 ASSERT(xfs_Gqm); 228 if (flags & XFS_QMOPT_UQUOTA)
227 ASSERT(xfs_Gqm->qm_nrefs > 0); 229 xfs_qm_dquot_walk(mp, XFS_DQ_USER, xfs_qm_dqpurge);
228 230 if (flags & XFS_QMOPT_GQUOTA)
229 /* 231 xfs_qm_dquot_walk(mp, XFS_DQ_GROUP, xfs_qm_dqpurge);
230 * Destroy the entire XQM. If somebody mounts with quotaon, this'll 232 if (flags & XFS_QMOPT_PQUOTA)
231 * be restarted. 233 xfs_qm_dquot_walk(mp, XFS_DQ_PROJ, xfs_qm_dqpurge);
232 */
233 mutex_lock(&xfs_Gqm_lock);
234 if (--xfs_Gqm->qm_nrefs == 0) {
235 xfs_qm_destroy(xfs_Gqm);
236 xfs_Gqm = NULL;
237 }
238 mutex_unlock(&xfs_Gqm_lock);
239} 234}
240 235
241/* 236/*
@@ -376,175 +371,6 @@ xfs_qm_unmount_quotas(
376 } 371 }
377} 372}
378 373
379/*
380 * Flush all dquots of the given file system to disk. The dquots are
381 * _not_ purged from memory here, just their data written to disk.
382 */
383STATIC int
384xfs_qm_dqflush_all(
385 struct xfs_mount *mp)
386{
387 struct xfs_quotainfo *q = mp->m_quotainfo;
388 int recl;
389 struct xfs_dquot *dqp;
390 int error;
391
392 if (!q)
393 return 0;
394again:
395 mutex_lock(&q->qi_dqlist_lock);
396 list_for_each_entry(dqp, &q->qi_dqlist, q_mplist) {
397 xfs_dqlock(dqp);
398 if ((dqp->dq_flags & XFS_DQ_FREEING) ||
399 !XFS_DQ_IS_DIRTY(dqp)) {
400 xfs_dqunlock(dqp);
401 continue;
402 }
403
404 /* XXX a sentinel would be better */
405 recl = q->qi_dqreclaims;
406 if (!xfs_dqflock_nowait(dqp)) {
407 /*
408 * If we can't grab the flush lock then check
409 * to see if the dquot has been flushed delayed
410 * write. If so, grab its buffer and send it
411 * out immediately. We'll be able to acquire
412 * the flush lock when the I/O completes.
413 */
414 xfs_dqflock_pushbuf_wait(dqp);
415 }
416 /*
417 * Let go of the mplist lock. We don't want to hold it
418 * across a disk write.
419 */
420 mutex_unlock(&q->qi_dqlist_lock);
421 error = xfs_qm_dqflush(dqp, 0);
422 xfs_dqunlock(dqp);
423 if (error)
424 return error;
425
426 mutex_lock(&q->qi_dqlist_lock);
427 if (recl != q->qi_dqreclaims) {
428 mutex_unlock(&q->qi_dqlist_lock);
429 /* XXX restart limit */
430 goto again;
431 }
432 }
433
434 mutex_unlock(&q->qi_dqlist_lock);
435 /* return ! busy */
436 return 0;
437}
438
439/*
440 * Release the group dquot pointers the user dquots may be
441 * carrying around as a hint. mplist is locked on entry and exit.
442 */
443STATIC void
444xfs_qm_detach_gdquots(
445 struct xfs_mount *mp)
446{
447 struct xfs_quotainfo *q = mp->m_quotainfo;
448 struct xfs_dquot *dqp, *gdqp;
449
450 again:
451 ASSERT(mutex_is_locked(&q->qi_dqlist_lock));
452 list_for_each_entry(dqp, &q->qi_dqlist, q_mplist) {
453 xfs_dqlock(dqp);
454 if (dqp->dq_flags & XFS_DQ_FREEING) {
455 xfs_dqunlock(dqp);
456 mutex_unlock(&q->qi_dqlist_lock);
457 delay(1);
458 mutex_lock(&q->qi_dqlist_lock);
459 goto again;
460 }
461
462 gdqp = dqp->q_gdquot;
463 if (gdqp)
464 dqp->q_gdquot = NULL;
465 xfs_dqunlock(dqp);
466
467 if (gdqp)
468 xfs_qm_dqrele(gdqp);
469 }
470}
471
472/*
473 * Go through all the incore dquots of this file system and take them
474 * off the mplist and hashlist, if the dquot type matches the dqtype
475 * parameter. This is used when turning off quota accounting for
476 * users and/or groups, as well as when the filesystem is unmounting.
477 */
478STATIC int
479xfs_qm_dqpurge_int(
480 struct xfs_mount *mp,
481 uint flags)
482{
483 struct xfs_quotainfo *q = mp->m_quotainfo;
484 struct xfs_dquot *dqp, *n;
485 uint dqtype;
486 int nmisses = 0;
487 LIST_HEAD (dispose_list);
488
489 if (!q)
490 return 0;
491
492 dqtype = (flags & XFS_QMOPT_UQUOTA) ? XFS_DQ_USER : 0;
493 dqtype |= (flags & XFS_QMOPT_PQUOTA) ? XFS_DQ_PROJ : 0;
494 dqtype |= (flags & XFS_QMOPT_GQUOTA) ? XFS_DQ_GROUP : 0;
495
496 mutex_lock(&q->qi_dqlist_lock);
497
498 /*
499 * In the first pass through all incore dquots of this filesystem,
500 * we release the group dquot pointers the user dquots may be
501 * carrying around as a hint. We need to do this irrespective of
502 * what's being turned off.
503 */
504 xfs_qm_detach_gdquots(mp);
505
506 /*
507 * Try to get rid of all of the unwanted dquots.
508 */
509 list_for_each_entry_safe(dqp, n, &q->qi_dqlist, q_mplist) {
510 xfs_dqlock(dqp);
511 if ((dqp->dq_flags & dqtype) != 0 &&
512 !(dqp->dq_flags & XFS_DQ_FREEING)) {
513 if (dqp->q_nrefs == 0) {
514 dqp->dq_flags |= XFS_DQ_FREEING;
515 list_move_tail(&dqp->q_mplist, &dispose_list);
516 } else
517 nmisses++;
518 }
519 xfs_dqunlock(dqp);
520 }
521 mutex_unlock(&q->qi_dqlist_lock);
522
523 list_for_each_entry_safe(dqp, n, &dispose_list, q_mplist)
524 xfs_qm_dqpurge(dqp);
525
526 return nmisses;
527}
528
529int
530xfs_qm_dqpurge_all(
531 xfs_mount_t *mp,
532 uint flags)
533{
534 int ndquots;
535
536 /*
537 * Purge the dquot cache.
538 * None of the dquots should really be busy at this point.
539 */
540 if (mp->m_quotainfo) {
541 while ((ndquots = xfs_qm_dqpurge_int(mp, flags))) {
542 delay(ndquots * 10);
543 }
544 }
545 return 0;
546}
547
548STATIC int 374STATIC int
549xfs_qm_dqattach_one( 375xfs_qm_dqattach_one(
550 xfs_inode_t *ip, 376 xfs_inode_t *ip,
@@ -783,14 +609,6 @@ xfs_qm_dqdetach(
783} 609}
784 610
785/* 611/*
786 * The hash chains and the mplist use the same xfs_dqhash structure as
787 * their list head, but we can take the mplist qh_lock and one of the
788 * hash qh_locks at the same time without any problem as they aren't
789 * related.
790 */
791static struct lock_class_key xfs_quota_mplist_class;
792
793/*
794 * This initializes all the quota information that's kept in the 612 * This initializes all the quota information that's kept in the
795 * mount structure 613 * mount structure
796 */ 614 */
@@ -804,13 +622,6 @@ xfs_qm_init_quotainfo(
804 622
805 ASSERT(XFS_IS_QUOTA_RUNNING(mp)); 623 ASSERT(XFS_IS_QUOTA_RUNNING(mp));
806 624
807 /*
808 * Tell XQM that we exist as soon as possible.
809 */
810 if ((error = xfs_qm_hold_quotafs_ref(mp))) {
811 return error;
812 }
813
814 qinf = mp->m_quotainfo = kmem_zalloc(sizeof(xfs_quotainfo_t), KM_SLEEP); 625 qinf = mp->m_quotainfo = kmem_zalloc(sizeof(xfs_quotainfo_t), KM_SLEEP);
815 626
816 /* 627 /*
@@ -823,11 +634,13 @@ xfs_qm_init_quotainfo(
823 return error; 634 return error;
824 } 635 }
825 636
826 INIT_LIST_HEAD(&qinf->qi_dqlist); 637 INIT_RADIX_TREE(&qinf->qi_uquota_tree, GFP_NOFS);
827 mutex_init(&qinf->qi_dqlist_lock); 638 INIT_RADIX_TREE(&qinf->qi_gquota_tree, GFP_NOFS);
828 lockdep_set_class(&qinf->qi_dqlist_lock, &xfs_quota_mplist_class); 639 mutex_init(&qinf->qi_tree_lock);
829 640
830 qinf->qi_dqreclaims = 0; 641 INIT_LIST_HEAD(&qinf->qi_lru_list);
642 qinf->qi_lru_count = 0;
643 mutex_init(&qinf->qi_lru_lock);
831 644
832 /* mutex used to serialize quotaoffs */ 645 /* mutex used to serialize quotaoffs */
833 mutex_init(&qinf->qi_quotaofflock); 646 mutex_init(&qinf->qi_quotaofflock);
@@ -894,6 +707,9 @@ xfs_qm_init_quotainfo(
894 qinf->qi_rtbwarnlimit = XFS_QM_RTBWARNLIMIT; 707 qinf->qi_rtbwarnlimit = XFS_QM_RTBWARNLIMIT;
895 } 708 }
896 709
710 qinf->qi_shrinker.shrink = xfs_qm_shake;
711 qinf->qi_shrinker.seeks = DEFAULT_SEEKS;
712 register_shrinker(&qinf->qi_shrinker);
897 return 0; 713 return 0;
898} 714}
899 715
@@ -911,17 +727,8 @@ xfs_qm_destroy_quotainfo(
911 727
912 qi = mp->m_quotainfo; 728 qi = mp->m_quotainfo;
913 ASSERT(qi != NULL); 729 ASSERT(qi != NULL);
914 ASSERT(xfs_Gqm != NULL);
915
916 /*
917 * Release the reference that XQM kept, so that we know
918 * when the XQM structure should be freed. We cannot assume
919 * that xfs_Gqm is non-null after this point.
920 */
921 xfs_qm_rele_quotafs_ref(mp);
922 730
923 ASSERT(list_empty(&qi->qi_dqlist)); 731 unregister_shrinker(&qi->qi_shrinker);
924 mutex_destroy(&qi->qi_dqlist_lock);
925 732
926 if (qi->qi_uquotaip) { 733 if (qi->qi_uquotaip) {
927 IRELE(qi->qi_uquotaip); 734 IRELE(qi->qi_uquotaip);
@@ -936,30 +743,6 @@ xfs_qm_destroy_quotainfo(
936 mp->m_quotainfo = NULL; 743 mp->m_quotainfo = NULL;
937} 744}
938 745
939
940
941/* ------------------- PRIVATE STATIC FUNCTIONS ----------------------- */
942
943/* ARGSUSED */
944STATIC void
945xfs_qm_list_init(
946 xfs_dqlist_t *list,
947 char *str,
948 int n)
949{
950 mutex_init(&list->qh_lock);
951 INIT_LIST_HEAD(&list->qh_list);
952 list->qh_version = 0;
953 list->qh_nelems = 0;
954}
955
956STATIC void
957xfs_qm_list_destroy(
958 xfs_dqlist_t *list)
959{
960 mutex_destroy(&(list->qh_lock));
961}
962
963/* 746/*
964 * Create an inode and return with a reference already taken, but unlocked 747 * Create an inode and return with a reference already taken, but unlocked
965 * This is how we create quota inodes 748 * This is how we create quota inodes
@@ -1397,6 +1180,28 @@ error0:
1397 return error; 1180 return error;
1398} 1181}
1399 1182
1183STATIC int
1184xfs_qm_flush_one(
1185 struct xfs_dquot *dqp)
1186{
1187 int error = 0;
1188
1189 xfs_dqlock(dqp);
1190 if (dqp->dq_flags & XFS_DQ_FREEING)
1191 goto out_unlock;
1192 if (!XFS_DQ_IS_DIRTY(dqp))
1193 goto out_unlock;
1194
1195 if (!xfs_dqflock_nowait(dqp))
1196 xfs_dqflock_pushbuf_wait(dqp);
1197
1198 error = xfs_qm_dqflush(dqp, 0);
1199
1200out_unlock:
1201 xfs_dqunlock(dqp);
1202 return error;
1203}
1204
1400/* 1205/*
1401 * Walk thru all the filesystem inodes and construct a consistent view 1206 * Walk thru all the filesystem inodes and construct a consistent view
1402 * of the disk quota world. If the quotacheck fails, disable quotas. 1207 * of the disk quota world. If the quotacheck fails, disable quotas.
@@ -1405,7 +1210,7 @@ int
1405xfs_qm_quotacheck( 1210xfs_qm_quotacheck(
1406 xfs_mount_t *mp) 1211 xfs_mount_t *mp)
1407{ 1212{
1408 int done, count, error; 1213 int done, count, error, error2;
1409 xfs_ino_t lastino; 1214 xfs_ino_t lastino;
1410 size_t structsz; 1215 size_t structsz;
1411 xfs_inode_t *uip, *gip; 1216 xfs_inode_t *uip, *gip;
@@ -1419,12 +1224,6 @@ xfs_qm_quotacheck(
1419 ASSERT(mp->m_quotainfo->qi_uquotaip || mp->m_quotainfo->qi_gquotaip); 1224 ASSERT(mp->m_quotainfo->qi_uquotaip || mp->m_quotainfo->qi_gquotaip);
1420 ASSERT(XFS_IS_QUOTA_RUNNING(mp)); 1225 ASSERT(XFS_IS_QUOTA_RUNNING(mp));
1421 1226
1422 /*
1423 * There should be no cached dquots. The (simplistic) quotacheck
1424 * algorithm doesn't like that.
1425 */
1426 ASSERT(list_empty(&mp->m_quotainfo->qi_dqlist));
1427
1428 xfs_notice(mp, "Quotacheck needed: Please wait."); 1227 xfs_notice(mp, "Quotacheck needed: Please wait.");
1429 1228
1430 /* 1229 /*
@@ -1463,12 +1262,21 @@ xfs_qm_quotacheck(
1463 } while (!done); 1262 } while (!done);
1464 1263
1465 /* 1264 /*
1466 * We've made all the changes that we need to make incore. 1265 * We've made all the changes that we need to make incore. Flush them
1467 * Flush them down to disk buffers if everything was updated 1266 * down to disk buffers if everything was updated successfully.
1468 * successfully.
1469 */ 1267 */
1470 if (!error) 1268 if (XFS_IS_UQUOTA_ON(mp))
1471 error = xfs_qm_dqflush_all(mp); 1269 error = xfs_qm_dquot_walk(mp, XFS_DQ_USER, xfs_qm_flush_one);
1270 if (XFS_IS_GQUOTA_ON(mp)) {
1271 error2 = xfs_qm_dquot_walk(mp, XFS_DQ_GROUP, xfs_qm_flush_one);
1272 if (!error)
1273 error = error2;
1274 }
1275 if (XFS_IS_PQUOTA_ON(mp)) {
1276 error2 = xfs_qm_dquot_walk(mp, XFS_DQ_PROJ, xfs_qm_flush_one);
1277 if (!error)
1278 error = error2;
1279 }
1472 1280
1473 /* 1281 /*
1474 * We can get this error if we couldn't do a dquot allocation inside 1282 * We can get this error if we couldn't do a dquot allocation inside
@@ -1496,7 +1304,7 @@ xfs_qm_quotacheck(
1496 * quotachecked status, since we won't be doing accounting for 1304 * quotachecked status, since we won't be doing accounting for
1497 * that type anymore. 1305 * that type anymore.
1498 */ 1306 */
1499 mp->m_qflags &= ~(XFS_OQUOTA_CHKD | XFS_UQUOTA_CHKD); 1307 mp->m_qflags &= ~XFS_ALL_QUOTA_CHKD;
1500 mp->m_qflags |= flags; 1308 mp->m_qflags |= flags;
1501 1309
1502 error_return: 1310 error_return:
@@ -1508,7 +1316,6 @@ xfs_qm_quotacheck(
1508 * We must turn off quotas. 1316 * We must turn off quotas.
1509 */ 1317 */
1510 ASSERT(mp->m_quotainfo != NULL); 1318 ASSERT(mp->m_quotainfo != NULL);
1511 ASSERT(xfs_Gqm != NULL);
1512 xfs_qm_destroy_quotainfo(mp); 1319 xfs_qm_destroy_quotainfo(mp);
1513 if (xfs_mount_reset_sbqflags(mp)) { 1320 if (xfs_mount_reset_sbqflags(mp)) {
1514 xfs_warn(mp, 1321 xfs_warn(mp,
@@ -1604,16 +1411,12 @@ xfs_qm_dqfree_one(
1604 struct xfs_mount *mp = dqp->q_mount; 1411 struct xfs_mount *mp = dqp->q_mount;
1605 struct xfs_quotainfo *qi = mp->m_quotainfo; 1412 struct xfs_quotainfo *qi = mp->m_quotainfo;
1606 1413
1607 mutex_lock(&dqp->q_hash->qh_lock); 1414 mutex_lock(&qi->qi_tree_lock);
1608 list_del_init(&dqp->q_hashlist); 1415 radix_tree_delete(XFS_DQUOT_TREE(qi, dqp->q_core.d_flags),
1609 dqp->q_hash->qh_version++; 1416 be32_to_cpu(dqp->q_core.d_id));
1610 mutex_unlock(&dqp->q_hash->qh_lock);
1611 1417
1612 mutex_lock(&qi->qi_dqlist_lock);
1613 list_del_init(&dqp->q_mplist);
1614 qi->qi_dquots--; 1418 qi->qi_dquots--;
1615 qi->qi_dqreclaims++; 1419 mutex_unlock(&qi->qi_tree_lock);
1616 mutex_unlock(&qi->qi_dqlist_lock);
1617 1420
1618 xfs_qm_dqdestroy(dqp); 1421 xfs_qm_dqdestroy(dqp);
1619} 1422}
@@ -1624,6 +1427,7 @@ xfs_qm_dqreclaim_one(
1624 struct list_head *dispose_list) 1427 struct list_head *dispose_list)
1625{ 1428{
1626 struct xfs_mount *mp = dqp->q_mount; 1429 struct xfs_mount *mp = dqp->q_mount;
1430 struct xfs_quotainfo *qi = mp->m_quotainfo;
1627 int error; 1431 int error;
1628 1432
1629 if (!xfs_dqlock_nowait(dqp)) 1433 if (!xfs_dqlock_nowait(dqp))
@@ -1637,16 +1441,14 @@ xfs_qm_dqreclaim_one(
1637 xfs_dqunlock(dqp); 1441 xfs_dqunlock(dqp);
1638 1442
1639 trace_xfs_dqreclaim_want(dqp); 1443 trace_xfs_dqreclaim_want(dqp);
1640 XQM_STATS_INC(xqmstats.xs_qm_dqwants); 1444 XFS_STATS_INC(xs_qm_dqwants);
1641 1445
1642 list_del_init(&dqp->q_freelist); 1446 list_del_init(&dqp->q_lru);
1643 xfs_Gqm->qm_dqfrlist_cnt--; 1447 qi->qi_lru_count--;
1448 XFS_STATS_DEC(xs_qm_dquot_unused);
1644 return; 1449 return;
1645 } 1450 }
1646 1451
1647 ASSERT(dqp->q_hash);
1648 ASSERT(!list_empty(&dqp->q_mplist));
1649
1650 /* 1452 /*
1651 * Try to grab the flush lock. If this dquot is in the process of 1453 * Try to grab the flush lock. If this dquot is in the process of
1652 * getting flushed to disk, we don't want to reclaim it. 1454 * getting flushed to disk, we don't want to reclaim it.
@@ -1688,11 +1490,12 @@ xfs_qm_dqreclaim_one(
1688 xfs_dqunlock(dqp); 1490 xfs_dqunlock(dqp);
1689 1491
1690 ASSERT(dqp->q_nrefs == 0); 1492 ASSERT(dqp->q_nrefs == 0);
1691 list_move_tail(&dqp->q_freelist, dispose_list); 1493 list_move_tail(&dqp->q_lru, dispose_list);
1692 xfs_Gqm->qm_dqfrlist_cnt--; 1494 qi->qi_lru_count--;
1495 XFS_STATS_DEC(xs_qm_dquot_unused);
1693 1496
1694 trace_xfs_dqreclaim_done(dqp); 1497 trace_xfs_dqreclaim_done(dqp);
1695 XQM_STATS_INC(xqmstats.xs_qm_dqreclaims); 1498 XFS_STATS_INC(xs_qm_dqreclaims);
1696 return; 1499 return;
1697 1500
1698out_busy: 1501out_busy:
@@ -1701,10 +1504,10 @@ out_busy:
1701 /* 1504 /*
1702 * Move the dquot to the tail of the list so that we don't spin on it. 1505 * Move the dquot to the tail of the list so that we don't spin on it.
1703 */ 1506 */
1704 list_move_tail(&dqp->q_freelist, &xfs_Gqm->qm_dqfrlist); 1507 list_move_tail(&dqp->q_lru, &qi->qi_lru_list);
1705 1508
1706 trace_xfs_dqreclaim_busy(dqp); 1509 trace_xfs_dqreclaim_busy(dqp);
1707 XQM_STATS_INC(xqmstats.xs_qm_dqreclaim_misses); 1510 XFS_STATS_INC(xs_qm_dqreclaim_misses);
1708} 1511}
1709 1512
1710STATIC int 1513STATIC int
@@ -1712,6 +1515,8 @@ xfs_qm_shake(
1712 struct shrinker *shrink, 1515 struct shrinker *shrink,
1713 struct shrink_control *sc) 1516 struct shrink_control *sc)
1714{ 1517{
1518 struct xfs_quotainfo *qi =
1519 container_of(shrink, struct xfs_quotainfo, qi_shrinker);
1715 int nr_to_scan = sc->nr_to_scan; 1520 int nr_to_scan = sc->nr_to_scan;
1716 LIST_HEAD (dispose_list); 1521 LIST_HEAD (dispose_list);
1717 struct xfs_dquot *dqp; 1522 struct xfs_dquot *dqp;
@@ -1721,24 +1526,23 @@ xfs_qm_shake(
1721 if (!nr_to_scan) 1526 if (!nr_to_scan)
1722 goto out; 1527 goto out;
1723 1528
1724 mutex_lock(&xfs_Gqm->qm_dqfrlist_lock); 1529 mutex_lock(&qi->qi_lru_lock);
1725 while (!list_empty(&xfs_Gqm->qm_dqfrlist)) { 1530 while (!list_empty(&qi->qi_lru_list)) {
1726 if (nr_to_scan-- <= 0) 1531 if (nr_to_scan-- <= 0)
1727 break; 1532 break;
1728 dqp = list_first_entry(&xfs_Gqm->qm_dqfrlist, struct xfs_dquot, 1533 dqp = list_first_entry(&qi->qi_lru_list, struct xfs_dquot,
1729 q_freelist); 1534 q_lru);
1730 xfs_qm_dqreclaim_one(dqp, &dispose_list); 1535 xfs_qm_dqreclaim_one(dqp, &dispose_list);
1731 } 1536 }
1732 mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock); 1537 mutex_unlock(&qi->qi_lru_lock);
1733 1538
1734 while (!list_empty(&dispose_list)) { 1539 while (!list_empty(&dispose_list)) {
1735 dqp = list_first_entry(&dispose_list, struct xfs_dquot, 1540 dqp = list_first_entry(&dispose_list, struct xfs_dquot, q_lru);
1736 q_freelist); 1541 list_del_init(&dqp->q_lru);
1737 list_del_init(&dqp->q_freelist);
1738 xfs_qm_dqfree_one(dqp); 1542 xfs_qm_dqfree_one(dqp);
1739 } 1543 }
1740out: 1544out:
1741 return (xfs_Gqm->qm_dqfrlist_cnt / 100) * sysctl_vfs_cache_pressure; 1545 return (qi->qi_lru_count / 100) * sysctl_vfs_cache_pressure;
1742} 1546}
1743 1547
1744/* 1548/*
diff --git a/fs/xfs/xfs_qm.h b/fs/xfs/xfs_qm.h
index 9a9b997e1a0a..44b858b79d71 100644
--- a/fs/xfs/xfs_qm.h
+++ b/fs/xfs/xfs_qm.h
@@ -21,21 +21,10 @@
21#include "xfs_dquot_item.h" 21#include "xfs_dquot_item.h"
22#include "xfs_dquot.h" 22#include "xfs_dquot.h"
23#include "xfs_quota_priv.h" 23#include "xfs_quota_priv.h"
24#include "xfs_qm_stats.h"
25 24
26struct xfs_qm;
27struct xfs_inode; 25struct xfs_inode;
28 26
29extern struct mutex xfs_Gqm_lock; 27extern struct kmem_zone *xfs_qm_dqtrxzone;
30extern struct xfs_qm *xfs_Gqm;
31extern kmem_zone_t *qm_dqzone;
32extern kmem_zone_t *qm_dqtrxzone;
33
34/*
35 * Dquot hashtable constants/threshold values.
36 */
37#define XFS_QM_HASHSIZE_LOW (PAGE_SIZE / sizeof(xfs_dqhash_t))
38#define XFS_QM_HASHSIZE_HIGH ((PAGE_SIZE * 4) / sizeof(xfs_dqhash_t))
39 28
40/* 29/*
41 * This defines the unit of allocation of dquots. 30 * This defines the unit of allocation of dquots.
@@ -48,36 +37,20 @@ extern kmem_zone_t *qm_dqtrxzone;
48 */ 37 */
49#define XFS_DQUOT_CLUSTER_SIZE_FSB (xfs_filblks_t)1 38#define XFS_DQUOT_CLUSTER_SIZE_FSB (xfs_filblks_t)1
50 39
51typedef xfs_dqhash_t xfs_dqlist_t;
52
53/*
54 * Quota Manager (global) structure. Lives only in core.
55 */
56typedef struct xfs_qm {
57 xfs_dqlist_t *qm_usr_dqhtable;/* udquot hash table */
58 xfs_dqlist_t *qm_grp_dqhtable;/* gdquot hash table */
59 uint qm_dqhashmask; /* # buckets in dq hashtab - 1 */
60 struct list_head qm_dqfrlist; /* freelist of dquots */
61 struct mutex qm_dqfrlist_lock;
62 int qm_dqfrlist_cnt;
63 atomic_t qm_totaldquots; /* total incore dquots */
64 uint qm_nrefs; /* file systems with quota on */
65 kmem_zone_t *qm_dqzone; /* dquot mem-alloc zone */
66 kmem_zone_t *qm_dqtrxzone; /* t_dqinfo of transactions */
67} xfs_qm_t;
68
69/* 40/*
70 * Various quota information for individual filesystems. 41 * Various quota information for individual filesystems.
71 * The mount structure keeps a pointer to this. 42 * The mount structure keeps a pointer to this.
72 */ 43 */
73typedef struct xfs_quotainfo { 44typedef struct xfs_quotainfo {
45 struct radix_tree_root qi_uquota_tree;
46 struct radix_tree_root qi_gquota_tree;
47 struct mutex qi_tree_lock;
74 xfs_inode_t *qi_uquotaip; /* user quota inode */ 48 xfs_inode_t *qi_uquotaip; /* user quota inode */
75 xfs_inode_t *qi_gquotaip; /* group quota inode */ 49 xfs_inode_t *qi_gquotaip; /* group quota inode */
76 struct list_head qi_dqlist; /* all dquots in filesys */ 50 struct list_head qi_lru_list;
77 struct mutex qi_dqlist_lock; 51 struct mutex qi_lru_lock;
52 int qi_lru_count;
78 int qi_dquots; 53 int qi_dquots;
79 int qi_dqreclaims; /* a change here indicates
80 a removal in the dqlist */
81 time_t qi_btimelimit; /* limit for blks timer */ 54 time_t qi_btimelimit; /* limit for blks timer */
82 time_t qi_itimelimit; /* limit for inodes timer */ 55 time_t qi_itimelimit; /* limit for inodes timer */
83 time_t qi_rtbtimelimit;/* limit for rt blks timer */ 56 time_t qi_rtbtimelimit;/* limit for rt blks timer */
@@ -93,8 +66,14 @@ typedef struct xfs_quotainfo {
93 xfs_qcnt_t qi_isoftlimit; /* default inode count soft limit */ 66 xfs_qcnt_t qi_isoftlimit; /* default inode count soft limit */
94 xfs_qcnt_t qi_rtbhardlimit;/* default realtime blk hard limit */ 67 xfs_qcnt_t qi_rtbhardlimit;/* default realtime blk hard limit */
95 xfs_qcnt_t qi_rtbsoftlimit;/* default realtime blk soft limit */ 68 xfs_qcnt_t qi_rtbsoftlimit;/* default realtime blk soft limit */
69 struct shrinker qi_shrinker;
96} xfs_quotainfo_t; 70} xfs_quotainfo_t;
97 71
72#define XFS_DQUOT_TREE(qi, type) \
73 ((type & XFS_DQ_USER) ? \
74 &((qi)->qi_uquota_tree) : \
75 &((qi)->qi_gquota_tree))
76
98 77
99extern void xfs_trans_mod_dquot(xfs_trans_t *, xfs_dquot_t *, uint, long); 78extern void xfs_trans_mod_dquot(xfs_trans_t *, xfs_dquot_t *, uint, long);
100extern int xfs_trans_reserve_quota_bydquots(xfs_trans_t *, xfs_mount_t *, 79extern int xfs_trans_reserve_quota_bydquots(xfs_trans_t *, xfs_mount_t *,
@@ -130,7 +109,7 @@ extern int xfs_qm_quotacheck(xfs_mount_t *);
130extern int xfs_qm_write_sb_changes(xfs_mount_t *, __int64_t); 109extern int xfs_qm_write_sb_changes(xfs_mount_t *, __int64_t);
131 110
132/* dquot stuff */ 111/* dquot stuff */
133extern int xfs_qm_dqpurge_all(xfs_mount_t *, uint); 112extern void xfs_qm_dqpurge_all(xfs_mount_t *, uint);
134extern void xfs_qm_dqrele_all_inodes(xfs_mount_t *, uint); 113extern void xfs_qm_dqrele_all_inodes(xfs_mount_t *, uint);
135 114
136/* quota ops */ 115/* quota ops */
diff --git a/fs/xfs/xfs_qm_bhv.c b/fs/xfs/xfs_qm_bhv.c
index a0a829addca9..e6986b5d80d8 100644
--- a/fs/xfs/xfs_qm_bhv.c
+++ b/fs/xfs/xfs_qm_bhv.c
@@ -40,28 +40,28 @@
40STATIC void 40STATIC void
41xfs_fill_statvfs_from_dquot( 41xfs_fill_statvfs_from_dquot(
42 struct kstatfs *statp, 42 struct kstatfs *statp,
43 xfs_disk_dquot_t *dp) 43 struct xfs_dquot *dqp)
44{ 44{
45 __uint64_t limit; 45 __uint64_t limit;
46 46
47 limit = dp->d_blk_softlimit ? 47 limit = dqp->q_core.d_blk_softlimit ?
48 be64_to_cpu(dp->d_blk_softlimit) : 48 be64_to_cpu(dqp->q_core.d_blk_softlimit) :
49 be64_to_cpu(dp->d_blk_hardlimit); 49 be64_to_cpu(dqp->q_core.d_blk_hardlimit);
50 if (limit && statp->f_blocks > limit) { 50 if (limit && statp->f_blocks > limit) {
51 statp->f_blocks = limit; 51 statp->f_blocks = limit;
52 statp->f_bfree = statp->f_bavail = 52 statp->f_bfree = statp->f_bavail =
53 (statp->f_blocks > be64_to_cpu(dp->d_bcount)) ? 53 (statp->f_blocks > dqp->q_res_bcount) ?
54 (statp->f_blocks - be64_to_cpu(dp->d_bcount)) : 0; 54 (statp->f_blocks - dqp->q_res_bcount) : 0;
55 } 55 }
56 56
57 limit = dp->d_ino_softlimit ? 57 limit = dqp->q_core.d_ino_softlimit ?
58 be64_to_cpu(dp->d_ino_softlimit) : 58 be64_to_cpu(dqp->q_core.d_ino_softlimit) :
59 be64_to_cpu(dp->d_ino_hardlimit); 59 be64_to_cpu(dqp->q_core.d_ino_hardlimit);
60 if (limit && statp->f_files > limit) { 60 if (limit && statp->f_files > limit) {
61 statp->f_files = limit; 61 statp->f_files = limit;
62 statp->f_ffree = 62 statp->f_ffree =
63 (statp->f_files > be64_to_cpu(dp->d_icount)) ? 63 (statp->f_files > dqp->q_res_icount) ?
64 (statp->f_ffree - be64_to_cpu(dp->d_icount)) : 0; 64 (statp->f_ffree - dqp->q_res_icount) : 0;
65 } 65 }
66} 66}
67 67
@@ -82,7 +82,7 @@ xfs_qm_statvfs(
82 xfs_dquot_t *dqp; 82 xfs_dquot_t *dqp;
83 83
84 if (!xfs_qm_dqget(mp, NULL, xfs_get_projid(ip), XFS_DQ_PROJ, 0, &dqp)) { 84 if (!xfs_qm_dqget(mp, NULL, xfs_get_projid(ip), XFS_DQ_PROJ, 0, &dqp)) {
85 xfs_fill_statvfs_from_dquot(statp, &dqp->q_core); 85 xfs_fill_statvfs_from_dquot(statp, dqp);
86 xfs_qm_dqput(dqp); 86 xfs_qm_dqput(dqp);
87 } 87 }
88} 88}
@@ -156,21 +156,3 @@ xfs_qm_newmount(
156 156
157 return 0; 157 return 0;
158} 158}
159
160void __init
161xfs_qm_init(void)
162{
163 printk(KERN_INFO "SGI XFS Quota Management subsystem\n");
164 mutex_init(&xfs_Gqm_lock);
165 xfs_qm_init_procfs();
166}
167
168void __exit
169xfs_qm_exit(void)
170{
171 xfs_qm_cleanup_procfs();
172 if (qm_dqzone)
173 kmem_zone_destroy(qm_dqzone);
174 if (qm_dqtrxzone)
175 kmem_zone_destroy(qm_dqtrxzone);
176}
diff --git a/fs/xfs/xfs_qm_stats.c b/fs/xfs/xfs_qm_stats.c
deleted file mode 100644
index 5729ba570877..000000000000
--- a/fs/xfs/xfs_qm_stats.c
+++ /dev/null
@@ -1,105 +0,0 @@
1/*
2 * Copyright (c) 2000-2003 Silicon Graphics, Inc.
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18#include "xfs.h"
19#include "xfs_fs.h"
20#include "xfs_bit.h"
21#include "xfs_log.h"
22#include "xfs_inum.h"
23#include "xfs_trans.h"
24#include "xfs_sb.h"
25#include "xfs_ag.h"
26#include "xfs_alloc.h"
27#include "xfs_quota.h"
28#include "xfs_mount.h"
29#include "xfs_bmap_btree.h"
30#include "xfs_inode.h"
31#include "xfs_itable.h"
32#include "xfs_bmap.h"
33#include "xfs_rtalloc.h"
34#include "xfs_error.h"
35#include "xfs_attr.h"
36#include "xfs_buf_item.h"
37#include "xfs_qm.h"
38
39struct xqmstats xqmstats;
40
41static int xqm_proc_show(struct seq_file *m, void *v)
42{
43 /* maximum; incore; ratio free to inuse; freelist */
44 seq_printf(m, "%d\t%d\t%d\t%u\n",
45 0,
46 xfs_Gqm? atomic_read(&xfs_Gqm->qm_totaldquots) : 0,
47 0,
48 xfs_Gqm? xfs_Gqm->qm_dqfrlist_cnt : 0);
49 return 0;
50}
51
52static int xqm_proc_open(struct inode *inode, struct file *file)
53{
54 return single_open(file, xqm_proc_show, NULL);
55}
56
57static const struct file_operations xqm_proc_fops = {
58 .owner = THIS_MODULE,
59 .open = xqm_proc_open,
60 .read = seq_read,
61 .llseek = seq_lseek,
62 .release = single_release,
63};
64
65static int xqmstat_proc_show(struct seq_file *m, void *v)
66{
67 /* quota performance statistics */
68 seq_printf(m, "qm %u %u %u %u %u %u %u %u\n",
69 xqmstats.xs_qm_dqreclaims,
70 xqmstats.xs_qm_dqreclaim_misses,
71 xqmstats.xs_qm_dquot_dups,
72 xqmstats.xs_qm_dqcachemisses,
73 xqmstats.xs_qm_dqcachehits,
74 xqmstats.xs_qm_dqwants,
75 xqmstats.xs_qm_dqshake_reclaims,
76 xqmstats.xs_qm_dqinact_reclaims);
77 return 0;
78}
79
80static int xqmstat_proc_open(struct inode *inode, struct file *file)
81{
82 return single_open(file, xqmstat_proc_show, NULL);
83}
84
85static const struct file_operations xqmstat_proc_fops = {
86 .owner = THIS_MODULE,
87 .open = xqmstat_proc_open,
88 .read = seq_read,
89 .llseek = seq_lseek,
90 .release = single_release,
91};
92
93void
94xfs_qm_init_procfs(void)
95{
96 proc_create("fs/xfs/xqmstat", 0, NULL, &xqmstat_proc_fops);
97 proc_create("fs/xfs/xqm", 0, NULL, &xqm_proc_fops);
98}
99
100void
101xfs_qm_cleanup_procfs(void)
102{
103 remove_proc_entry("fs/xfs/xqm", NULL);
104 remove_proc_entry("fs/xfs/xqmstat", NULL);
105}
diff --git a/fs/xfs/xfs_qm_stats.h b/fs/xfs/xfs_qm_stats.h
deleted file mode 100644
index 5b964fc0dc09..000000000000
--- a/fs/xfs/xfs_qm_stats.h
+++ /dev/null
@@ -1,53 +0,0 @@
1/*
2 * Copyright (c) 2002 Silicon Graphics, Inc.
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18#ifndef __XFS_QM_STATS_H__
19#define __XFS_QM_STATS_H__
20
21#if defined(CONFIG_PROC_FS) && !defined(XFS_STATS_OFF)
22
23/*
24 * XQM global statistics
25 */
26struct xqmstats {
27 __uint32_t xs_qm_dqreclaims;
28 __uint32_t xs_qm_dqreclaim_misses;
29 __uint32_t xs_qm_dquot_dups;
30 __uint32_t xs_qm_dqcachemisses;
31 __uint32_t xs_qm_dqcachehits;
32 __uint32_t xs_qm_dqwants;
33 __uint32_t xs_qm_dqshake_reclaims;
34 __uint32_t xs_qm_dqinact_reclaims;
35};
36
37extern struct xqmstats xqmstats;
38
39# define XQM_STATS_INC(count) ( (count)++ )
40
41extern void xfs_qm_init_procfs(void);
42extern void xfs_qm_cleanup_procfs(void);
43
44#else
45
46# define XQM_STATS_INC(count) do { } while (0)
47
48static inline void xfs_qm_init_procfs(void) { };
49static inline void xfs_qm_cleanup_procfs(void) { };
50
51#endif
52
53#endif /* __XFS_QM_STATS_H__ */
diff --git a/fs/xfs/xfs_qm_syscalls.c b/fs/xfs/xfs_qm_syscalls.c
index 711a86e39ff0..c4f396e437a8 100644
--- a/fs/xfs/xfs_qm_syscalls.c
+++ b/fs/xfs/xfs_qm_syscalls.c
@@ -47,9 +47,6 @@ STATIC int xfs_qm_log_quotaoff_end(xfs_mount_t *, xfs_qoff_logitem_t *,
47 uint); 47 uint);
48STATIC uint xfs_qm_export_flags(uint); 48STATIC uint xfs_qm_export_flags(uint);
49STATIC uint xfs_qm_export_qtype_flags(uint); 49STATIC uint xfs_qm_export_qtype_flags(uint);
50STATIC void xfs_qm_export_dquot(xfs_mount_t *, xfs_disk_dquot_t *,
51 fs_disk_quota_t *);
52
53 50
54/* 51/*
55 * Turn off quota accounting and/or enforcement for all udquots and/or 52 * Turn off quota accounting and/or enforcement for all udquots and/or
@@ -69,7 +66,6 @@ xfs_qm_scall_quotaoff(
69 int error; 66 int error;
70 uint inactivate_flags; 67 uint inactivate_flags;
71 xfs_qoff_logitem_t *qoffstart; 68 xfs_qoff_logitem_t *qoffstart;
72 int nculprits;
73 69
74 /* 70 /*
75 * No file system can have quotas enabled on disk but not in core. 71 * No file system can have quotas enabled on disk but not in core.
@@ -175,18 +171,13 @@ xfs_qm_scall_quotaoff(
175 * This isn't protected by a particular lock directly, because we 171 * This isn't protected by a particular lock directly, because we
176 * don't want to take a mrlock every time we depend on quotas being on. 172 * don't want to take a mrlock every time we depend on quotas being on.
177 */ 173 */
178 mp->m_qflags &= ~(flags); 174 mp->m_qflags &= ~flags;
179 175
180 /* 176 /*
181 * Go through all the dquots of this file system and purge them, 177 * Go through all the dquots of this file system and purge them,
182 * according to what was turned off. We may not be able to get rid 178 * according to what was turned off.
183 * of all dquots, because dquots can have temporary references that
184 * are not attached to inodes. eg. xfs_setattr, xfs_create.
185 * So, if we couldn't purge all the dquots from the filesystem,
186 * we can't get rid of the incore data structures.
187 */ 179 */
188 while ((nculprits = xfs_qm_dqpurge_all(mp, dqtype))) 180 xfs_qm_dqpurge_all(mp, dqtype);
189 delay(10 * nculprits);
190 181
191 /* 182 /*
192 * Transactions that had started before ACTIVE state bit was cleared 183 * Transactions that had started before ACTIVE state bit was cleared
@@ -635,42 +626,6 @@ xfs_qm_scall_setqlim(
635 return error; 626 return error;
636} 627}
637 628
638int
639xfs_qm_scall_getquota(
640 xfs_mount_t *mp,
641 xfs_dqid_t id,
642 uint type,
643 fs_disk_quota_t *out)
644{
645 xfs_dquot_t *dqp;
646 int error;
647
648 /*
649 * Try to get the dquot. We don't want it allocated on disk, so
650 * we aren't passing the XFS_QMOPT_DOALLOC flag. If it doesn't
651 * exist, we'll get ENOENT back.
652 */
653 if ((error = xfs_qm_dqget(mp, NULL, id, type, 0, &dqp))) {
654 return (error);
655 }
656
657 /*
658 * If everything's NULL, this dquot doesn't quite exist as far as
659 * our utility programs are concerned.
660 */
661 if (XFS_IS_DQUOT_UNINITIALIZED(dqp)) {
662 xfs_qm_dqput(dqp);
663 return XFS_ERROR(ENOENT);
664 }
665 /*
666 * Convert the disk dquot to the exportable format
667 */
668 xfs_qm_export_dquot(mp, &dqp->q_core, out);
669 xfs_qm_dqput(dqp);
670 return (error ? XFS_ERROR(EFAULT) : 0);
671}
672
673
674STATIC int 629STATIC int
675xfs_qm_log_quotaoff_end( 630xfs_qm_log_quotaoff_end(
676 xfs_mount_t *mp, 631 xfs_mount_t *mp,
@@ -759,50 +714,66 @@ error0:
759} 714}
760 715
761 716
762/* 717int
763 * Translate an internal style on-disk-dquot to the exportable format. 718xfs_qm_scall_getquota(
764 * The main differences are that the counters/limits are all in Basic 719 struct xfs_mount *mp,
765 * Blocks (BBs) instead of the internal FSBs, and all on-disk data has 720 xfs_dqid_t id,
766 * to be converted to the native endianness. 721 uint type,
767 */
768STATIC void
769xfs_qm_export_dquot(
770 xfs_mount_t *mp,
771 xfs_disk_dquot_t *src,
772 struct fs_disk_quota *dst) 722 struct fs_disk_quota *dst)
773{ 723{
724 struct xfs_dquot *dqp;
725 int error;
726
727 /*
728 * Try to get the dquot. We don't want it allocated on disk, so
729 * we aren't passing the XFS_QMOPT_DOALLOC flag. If it doesn't
730 * exist, we'll get ENOENT back.
731 */
732 error = xfs_qm_dqget(mp, NULL, id, type, 0, &dqp);
733 if (error)
734 return error;
735
736 /*
737 * If everything's NULL, this dquot doesn't quite exist as far as
738 * our utility programs are concerned.
739 */
740 if (XFS_IS_DQUOT_UNINITIALIZED(dqp)) {
741 error = XFS_ERROR(ENOENT);
742 goto out_put;
743 }
744
774 memset(dst, 0, sizeof(*dst)); 745 memset(dst, 0, sizeof(*dst));
775 dst->d_version = FS_DQUOT_VERSION; /* different from src->d_version */ 746 dst->d_version = FS_DQUOT_VERSION;
776 dst->d_flags = xfs_qm_export_qtype_flags(src->d_flags); 747 dst->d_flags = xfs_qm_export_qtype_flags(dqp->q_core.d_flags);
777 dst->d_id = be32_to_cpu(src->d_id); 748 dst->d_id = be32_to_cpu(dqp->q_core.d_id);
778 dst->d_blk_hardlimit = 749 dst->d_blk_hardlimit =
779 XFS_FSB_TO_BB(mp, be64_to_cpu(src->d_blk_hardlimit)); 750 XFS_FSB_TO_BB(mp, be64_to_cpu(dqp->q_core.d_blk_hardlimit));
780 dst->d_blk_softlimit = 751 dst->d_blk_softlimit =
781 XFS_FSB_TO_BB(mp, be64_to_cpu(src->d_blk_softlimit)); 752 XFS_FSB_TO_BB(mp, be64_to_cpu(dqp->q_core.d_blk_softlimit));
782 dst->d_ino_hardlimit = be64_to_cpu(src->d_ino_hardlimit); 753 dst->d_ino_hardlimit = be64_to_cpu(dqp->q_core.d_ino_hardlimit);
783 dst->d_ino_softlimit = be64_to_cpu(src->d_ino_softlimit); 754 dst->d_ino_softlimit = be64_to_cpu(dqp->q_core.d_ino_softlimit);
784 dst->d_bcount = XFS_FSB_TO_BB(mp, be64_to_cpu(src->d_bcount)); 755 dst->d_bcount = XFS_FSB_TO_BB(mp, dqp->q_res_bcount);
785 dst->d_icount = be64_to_cpu(src->d_icount); 756 dst->d_icount = dqp->q_res_icount;
786 dst->d_btimer = be32_to_cpu(src->d_btimer); 757 dst->d_btimer = be32_to_cpu(dqp->q_core.d_btimer);
787 dst->d_itimer = be32_to_cpu(src->d_itimer); 758 dst->d_itimer = be32_to_cpu(dqp->q_core.d_itimer);
788 dst->d_iwarns = be16_to_cpu(src->d_iwarns); 759 dst->d_iwarns = be16_to_cpu(dqp->q_core.d_iwarns);
789 dst->d_bwarns = be16_to_cpu(src->d_bwarns); 760 dst->d_bwarns = be16_to_cpu(dqp->q_core.d_bwarns);
790 dst->d_rtb_hardlimit = 761 dst->d_rtb_hardlimit =
791 XFS_FSB_TO_BB(mp, be64_to_cpu(src->d_rtb_hardlimit)); 762 XFS_FSB_TO_BB(mp, be64_to_cpu(dqp->q_core.d_rtb_hardlimit));
792 dst->d_rtb_softlimit = 763 dst->d_rtb_softlimit =
793 XFS_FSB_TO_BB(mp, be64_to_cpu(src->d_rtb_softlimit)); 764 XFS_FSB_TO_BB(mp, be64_to_cpu(dqp->q_core.d_rtb_softlimit));
794 dst->d_rtbcount = XFS_FSB_TO_BB(mp, be64_to_cpu(src->d_rtbcount)); 765 dst->d_rtbcount = XFS_FSB_TO_BB(mp, dqp->q_res_rtbcount);
795 dst->d_rtbtimer = be32_to_cpu(src->d_rtbtimer); 766 dst->d_rtbtimer = be32_to_cpu(dqp->q_core.d_rtbtimer);
796 dst->d_rtbwarns = be16_to_cpu(src->d_rtbwarns); 767 dst->d_rtbwarns = be16_to_cpu(dqp->q_core.d_rtbwarns);
797 768
798 /* 769 /*
799 * Internally, we don't reset all the timers when quota enforcement 770 * Internally, we don't reset all the timers when quota enforcement
800 * gets turned off. No need to confuse the user level code, 771 * gets turned off. No need to confuse the user level code,
801 * so return zeroes in that case. 772 * so return zeroes in that case.
802 */ 773 */
803 if ((!XFS_IS_UQUOTA_ENFORCED(mp) && src->d_flags == XFS_DQ_USER) || 774 if ((!XFS_IS_UQUOTA_ENFORCED(mp) && dqp->q_core.d_flags == XFS_DQ_USER) ||
804 (!XFS_IS_OQUOTA_ENFORCED(mp) && 775 (!XFS_IS_OQUOTA_ENFORCED(mp) &&
805 (src->d_flags & (XFS_DQ_PROJ | XFS_DQ_GROUP)))) { 776 (dqp->q_core.d_flags & (XFS_DQ_PROJ | XFS_DQ_GROUP)))) {
806 dst->d_btimer = 0; 777 dst->d_btimer = 0;
807 dst->d_itimer = 0; 778 dst->d_itimer = 0;
808 dst->d_rtbtimer = 0; 779 dst->d_rtbtimer = 0;
@@ -823,6 +794,9 @@ xfs_qm_export_dquot(
823 } 794 }
824 } 795 }
825#endif 796#endif
797out_put:
798 xfs_qm_dqput(dqp);
799 return error;
826} 800}
827 801
828STATIC uint 802STATIC uint
diff --git a/fs/xfs/xfs_quota.h b/fs/xfs/xfs_quota.h
index 8a0807e0f979..b50ec5b95d5a 100644
--- a/fs/xfs/xfs_quota.h
+++ b/fs/xfs/xfs_quota.h
@@ -174,6 +174,8 @@ typedef struct xfs_qoff_logformat {
174#define XFS_UQUOTA_ACTIVE 0x0100 /* uquotas are being turned off */ 174#define XFS_UQUOTA_ACTIVE 0x0100 /* uquotas are being turned off */
175#define XFS_PQUOTA_ACTIVE 0x0200 /* pquotas are being turned off */ 175#define XFS_PQUOTA_ACTIVE 0x0200 /* pquotas are being turned off */
176#define XFS_GQUOTA_ACTIVE 0x0400 /* gquotas are being turned off */ 176#define XFS_GQUOTA_ACTIVE 0x0400 /* gquotas are being turned off */
177#define XFS_ALL_QUOTA_ACTIVE \
178 (XFS_UQUOTA_ACTIVE | XFS_PQUOTA_ACTIVE | XFS_GQUOTA_ACTIVE)
177 179
178/* 180/*
179 * Checking XFS_IS_*QUOTA_ON() while holding any inode lock guarantees 181 * Checking XFS_IS_*QUOTA_ON() while holding any inode lock guarantees
diff --git a/fs/xfs/xfs_quota_priv.h b/fs/xfs/xfs_quota_priv.h
index 94a3d927d716..6d86219d93da 100644
--- a/fs/xfs/xfs_quota_priv.h
+++ b/fs/xfs/xfs_quota_priv.h
@@ -24,17 +24,6 @@
24 */ 24 */
25#define XFS_DQITER_MAP_SIZE 10 25#define XFS_DQITER_MAP_SIZE 10
26 26
27/*
28 * Hash into a bucket in the dquot hash table, based on <mp, id>.
29 */
30#define XFS_DQ_HASHVAL(mp, id) (((__psunsigned_t)(mp) + \
31 (__psunsigned_t)(id)) & \
32 (xfs_Gqm->qm_dqhashmask - 1))
33#define XFS_DQ_HASH(mp, id, type) (type == XFS_DQ_USER ? \
34 (xfs_Gqm->qm_usr_dqhtable + \
35 XFS_DQ_HASHVAL(mp, id)) : \
36 (xfs_Gqm->qm_grp_dqhtable + \
37 XFS_DQ_HASHVAL(mp, id)))
38#define XFS_IS_DQUOT_UNINITIALIZED(dqp) ( \ 27#define XFS_IS_DQUOT_UNINITIALIZED(dqp) ( \
39 !dqp->q_core.d_blk_hardlimit && \ 28 !dqp->q_core.d_blk_hardlimit && \
40 !dqp->q_core.d_blk_softlimit && \ 29 !dqp->q_core.d_blk_softlimit && \
diff --git a/fs/xfs/xfs_sb.h b/fs/xfs/xfs_sb.h
index cb6ae715814a..f429d9d5d325 100644
--- a/fs/xfs/xfs_sb.h
+++ b/fs/xfs/xfs_sb.h
@@ -529,7 +529,6 @@ static inline int xfs_sb_version_hasprojid32bit(xfs_sb_t *sbp)
529#define XFS_BB_TO_FSB(mp,bb) \ 529#define XFS_BB_TO_FSB(mp,bb) \
530 (((bb) + (XFS_FSB_TO_BB(mp,1) - 1)) >> (mp)->m_blkbb_log) 530 (((bb) + (XFS_FSB_TO_BB(mp,1) - 1)) >> (mp)->m_blkbb_log)
531#define XFS_BB_TO_FSBT(mp,bb) ((bb) >> (mp)->m_blkbb_log) 531#define XFS_BB_TO_FSBT(mp,bb) ((bb) >> (mp)->m_blkbb_log)
532#define XFS_BB_FSB_OFFSET(mp,bb) ((bb) & ((mp)->m_bsize - 1))
533 532
534/* 533/*
535 * File system block to byte conversions. 534 * File system block to byte conversions.
diff --git a/fs/xfs/xfs_stats.c b/fs/xfs/xfs_stats.c
index 76fdc5861932..ce372b7d5644 100644
--- a/fs/xfs/xfs_stats.c
+++ b/fs/xfs/xfs_stats.c
@@ -20,9 +20,18 @@
20 20
21DEFINE_PER_CPU(struct xfsstats, xfsstats); 21DEFINE_PER_CPU(struct xfsstats, xfsstats);
22 22
23static int counter_val(int idx)
24{
25 int val = 0, cpu;
26
27 for_each_possible_cpu(cpu)
28 val += *(((__u32 *)&per_cpu(xfsstats, cpu) + idx));
29 return val;
30}
31
23static int xfs_stat_proc_show(struct seq_file *m, void *v) 32static int xfs_stat_proc_show(struct seq_file *m, void *v)
24{ 33{
25 int c, i, j, val; 34 int i, j;
26 __uint64_t xs_xstrat_bytes = 0; 35 __uint64_t xs_xstrat_bytes = 0;
27 __uint64_t xs_write_bytes = 0; 36 __uint64_t xs_write_bytes = 0;
28 __uint64_t xs_read_bytes = 0; 37 __uint64_t xs_read_bytes = 0;
@@ -50,20 +59,16 @@ static int xfs_stat_proc_show(struct seq_file *m, void *v)
50 { "abtc2", XFSSTAT_END_ABTC_V2 }, 59 { "abtc2", XFSSTAT_END_ABTC_V2 },
51 { "bmbt2", XFSSTAT_END_BMBT_V2 }, 60 { "bmbt2", XFSSTAT_END_BMBT_V2 },
52 { "ibt2", XFSSTAT_END_IBT_V2 }, 61 { "ibt2", XFSSTAT_END_IBT_V2 },
62 /* we print both series of quota information together */
63 { "qm", XFSSTAT_END_QM },
53 }; 64 };
54 65
55 /* Loop over all stats groups */ 66 /* Loop over all stats groups */
56 for (i=j = 0; i < ARRAY_SIZE(xstats); i++) { 67 for (i = j = 0; i < ARRAY_SIZE(xstats); i++) {
57 seq_printf(m, "%s", xstats[i].desc); 68 seq_printf(m, "%s", xstats[i].desc);
58 /* inner loop does each group */ 69 /* inner loop does each group */
59 while (j < xstats[i].endpoint) { 70 for (; j < xstats[i].endpoint; j++)
60 val = 0; 71 seq_printf(m, " %u", counter_val(j));
61 /* sum over all cpus */
62 for_each_possible_cpu(c)
63 val += *(((__u32*)&per_cpu(xfsstats, c) + j));
64 seq_printf(m, " %u", val);
65 j++;
66 }
67 seq_putc(m, '\n'); 72 seq_putc(m, '\n');
68 } 73 }
69 /* extra precision counters */ 74 /* extra precision counters */
@@ -97,6 +102,58 @@ static const struct file_operations xfs_stat_proc_fops = {
97 .release = single_release, 102 .release = single_release,
98}; 103};
99 104
105/* legacy quota interfaces */
106#ifdef CONFIG_XFS_QUOTA
107static int xqm_proc_show(struct seq_file *m, void *v)
108{
109 /* maximum; incore; ratio free to inuse; freelist */
110 seq_printf(m, "%d\t%d\t%d\t%u\n",
111 0,
112 counter_val(XFSSTAT_END_XQMSTAT),
113 0,
114 counter_val(XFSSTAT_END_XQMSTAT + 1));
115 return 0;
116}
117
118static int xqm_proc_open(struct inode *inode, struct file *file)
119{
120 return single_open(file, xqm_proc_show, NULL);
121}
122
123static const struct file_operations xqm_proc_fops = {
124 .owner = THIS_MODULE,
125 .open = xqm_proc_open,
126 .read = seq_read,
127 .llseek = seq_lseek,
128 .release = single_release,
129};
130
131/* legacy quota stats interface no 2 */
132static int xqmstat_proc_show(struct seq_file *m, void *v)
133{
134 int j;
135
136 seq_printf(m, "qm");
137 for (j = XFSSTAT_END_IBT_V2; j < XFSSTAT_END_XQMSTAT; j++)
138 seq_printf(m, " %u", counter_val(j));
139 seq_putc(m, '\n');
140 return 0;
141}
142
143static int xqmstat_proc_open(struct inode *inode, struct file *file)
144{
145 return single_open(file, xqmstat_proc_show, NULL);
146}
147
148static const struct file_operations xqmstat_proc_fops = {
149 .owner = THIS_MODULE,
150 .open = xqmstat_proc_open,
151 .read = seq_read,
152 .llseek = seq_lseek,
153 .release = single_release,
154};
155#endif /* CONFIG_XFS_QUOTA */
156
100int 157int
101xfs_init_procfs(void) 158xfs_init_procfs(void)
102{ 159{
@@ -105,10 +162,24 @@ xfs_init_procfs(void)
105 162
106 if (!proc_create("fs/xfs/stat", 0, NULL, 163 if (!proc_create("fs/xfs/stat", 0, NULL,
107 &xfs_stat_proc_fops)) 164 &xfs_stat_proc_fops))
108 goto out_remove_entry; 165 goto out_remove_xfs_dir;
166#ifdef CONFIG_XFS_QUOTA
167 if (!proc_create("fs/xfs/xqmstat", 0, NULL,
168 &xqmstat_proc_fops))
169 goto out_remove_stat_file;
170 if (!proc_create("fs/xfs/xqm", 0, NULL,
171 &xqm_proc_fops))
172 goto out_remove_xqmstat_file;
173#endif
109 return 0; 174 return 0;
110 175
111 out_remove_entry: 176#ifdef CONFIG_XFS_QUOTA
177 out_remove_xqmstat_file:
178 remove_proc_entry("fs/xfs/xqmstat", NULL);
179 out_remove_stat_file:
180 remove_proc_entry("fs/xfs/stat", NULL);
181#endif
182 out_remove_xfs_dir:
112 remove_proc_entry("fs/xfs", NULL); 183 remove_proc_entry("fs/xfs", NULL);
113 out: 184 out:
114 return -ENOMEM; 185 return -ENOMEM;
@@ -117,6 +188,10 @@ xfs_init_procfs(void)
117void 188void
118xfs_cleanup_procfs(void) 189xfs_cleanup_procfs(void)
119{ 190{
191#ifdef CONFIG_XFS_QUOTA
192 remove_proc_entry("fs/xfs/xqm", NULL);
193 remove_proc_entry("fs/xfs/xqmstat", NULL);
194#endif
120 remove_proc_entry("fs/xfs/stat", NULL); 195 remove_proc_entry("fs/xfs/stat", NULL);
121 remove_proc_entry("fs/xfs", NULL); 196 remove_proc_entry("fs/xfs", NULL);
122} 197}
diff --git a/fs/xfs/xfs_stats.h b/fs/xfs/xfs_stats.h
index 736854b1ca1a..c03ad38ceaeb 100644
--- a/fs/xfs/xfs_stats.h
+++ b/fs/xfs/xfs_stats.h
@@ -183,6 +183,16 @@ struct xfsstats {
183 __uint32_t xs_ibt_2_alloc; 183 __uint32_t xs_ibt_2_alloc;
184 __uint32_t xs_ibt_2_free; 184 __uint32_t xs_ibt_2_free;
185 __uint32_t xs_ibt_2_moves; 185 __uint32_t xs_ibt_2_moves;
186#define XFSSTAT_END_XQMSTAT (XFSSTAT_END_IBT_V2+6)
187 __uint32_t xs_qm_dqreclaims;
188 __uint32_t xs_qm_dqreclaim_misses;
189 __uint32_t xs_qm_dquot_dups;
190 __uint32_t xs_qm_dqcachemisses;
191 __uint32_t xs_qm_dqcachehits;
192 __uint32_t xs_qm_dqwants;
193#define XFSSTAT_END_QM (XFSSTAT_END_XQMSTAT+2)
194 __uint32_t xs_qm_dquot;
195 __uint32_t xs_qm_dquot_unused;
186/* Extra precision counters */ 196/* Extra precision counters */
187 __uint64_t xs_xstrat_bytes; 197 __uint64_t xs_xstrat_bytes;
188 __uint64_t xs_write_bytes; 198 __uint64_t xs_write_bytes;
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index baf40e378d35..912442cf0f82 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -324,10 +324,9 @@ xfs_parseargs(
324 } else if (!strcmp(this_char, MNTOPT_FILESTREAM)) { 324 } else if (!strcmp(this_char, MNTOPT_FILESTREAM)) {
325 mp->m_flags |= XFS_MOUNT_FILESTREAMS; 325 mp->m_flags |= XFS_MOUNT_FILESTREAMS;
326 } else if (!strcmp(this_char, MNTOPT_NOQUOTA)) { 326 } else if (!strcmp(this_char, MNTOPT_NOQUOTA)) {
327 mp->m_qflags &= ~(XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE | 327 mp->m_qflags &= ~XFS_ALL_QUOTA_ACCT;
328 XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE | 328 mp->m_qflags &= ~XFS_ALL_QUOTA_ENFD;
329 XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE | 329 mp->m_qflags &= ~XFS_ALL_QUOTA_ACTIVE;
330 XFS_UQUOTA_ENFD | XFS_OQUOTA_ENFD);
331 } else if (!strcmp(this_char, MNTOPT_QUOTA) || 330 } else if (!strcmp(this_char, MNTOPT_QUOTA) ||
332 !strcmp(this_char, MNTOPT_UQUOTA) || 331 !strcmp(this_char, MNTOPT_UQUOTA) ||
333 !strcmp(this_char, MNTOPT_USRQUOTA)) { 332 !strcmp(this_char, MNTOPT_USRQUOTA)) {
@@ -760,6 +759,36 @@ xfs_setup_devices(
760 return 0; 759 return 0;
761} 760}
762 761
762STATIC int
763xfs_init_mount_workqueues(
764 struct xfs_mount *mp)
765{
766 mp->m_data_workqueue = alloc_workqueue("xfs-data/%s",
767 WQ_MEM_RECLAIM, 0, mp->m_fsname);
768 if (!mp->m_data_workqueue)
769 goto out;
770
771 mp->m_unwritten_workqueue = alloc_workqueue("xfs-conv/%s",
772 WQ_MEM_RECLAIM, 0, mp->m_fsname);
773 if (!mp->m_unwritten_workqueue)
774 goto out_destroy_data_iodone_queue;
775
776 return 0;
777
778out_destroy_data_iodone_queue:
779 destroy_workqueue(mp->m_data_workqueue);
780out:
781 return -ENOMEM;
782}
783
784STATIC void
785xfs_destroy_mount_workqueues(
786 struct xfs_mount *mp)
787{
788 destroy_workqueue(mp->m_data_workqueue);
789 destroy_workqueue(mp->m_unwritten_workqueue);
790}
791
763/* Catch misguided souls that try to use this interface on XFS */ 792/* Catch misguided souls that try to use this interface on XFS */
764STATIC struct inode * 793STATIC struct inode *
765xfs_fs_alloc_inode( 794xfs_fs_alloc_inode(
@@ -834,91 +863,58 @@ xfs_fs_inode_init_once(
834} 863}
835 864
836/* 865/*
837 * Dirty the XFS inode when mark_inode_dirty_sync() is called so that 866 * This is called by the VFS when dirtying inode metadata. This can happen
838 * we catch unlogged VFS level updates to the inode. 867 * for a few reasons, but we only care about timestamp updates, given that
868 * we handled the rest ourselves. In theory no other calls should happen,
869 * but for example generic_write_end() keeps dirtying the inode after
870 * updating i_size. Thus we check that the flags are exactly I_DIRTY_SYNC,
871 * and skip this call otherwise.
839 * 872 *
840 * We need the barrier() to maintain correct ordering between unlogged 873 * We'll hopefull get a different method just for updating timestamps soon,
841 * updates and the transaction commit code that clears the i_update_core 874 * at which point this hack can go away, and maybe we'll also get real
842 * field. This requires all updates to be completed before marking the 875 * error handling here.
843 * inode dirty.
844 */ 876 */
845STATIC void 877STATIC void
846xfs_fs_dirty_inode( 878xfs_fs_dirty_inode(
847 struct inode *inode,
848 int flags)
849{
850 barrier();
851 XFS_I(inode)->i_update_core = 1;
852}
853
854STATIC int
855xfs_fs_write_inode(
856 struct inode *inode, 879 struct inode *inode,
857 struct writeback_control *wbc) 880 int flags)
858{ 881{
859 struct xfs_inode *ip = XFS_I(inode); 882 struct xfs_inode *ip = XFS_I(inode);
860 struct xfs_mount *mp = ip->i_mount; 883 struct xfs_mount *mp = ip->i_mount;
861 int error = EAGAIN; 884 struct xfs_trans *tp;
862 885 int error;
863 trace_xfs_write_inode(ip);
864
865 if (XFS_FORCED_SHUTDOWN(mp))
866 return -XFS_ERROR(EIO);
867
868 if (wbc->sync_mode == WB_SYNC_ALL || wbc->for_kupdate) {
869 /*
870 * Make sure the inode has made it it into the log. Instead
871 * of forcing it all the way to stable storage using a
872 * synchronous transaction we let the log force inside the
873 * ->sync_fs call do that for thus, which reduces the number
874 * of synchronous log forces dramatically.
875 */
876 error = xfs_log_dirty_inode(ip, NULL, 0);
877 if (error)
878 goto out;
879 return 0;
880 } else {
881 if (!ip->i_update_core)
882 return 0;
883 886
884 /* 887 if (flags != I_DIRTY_SYNC)
885 * We make this non-blocking if the inode is contended, return 888 return;
886 * EAGAIN to indicate to the caller that they did not succeed.
887 * This prevents the flush path from blocking on inodes inside
888 * another operation right now, they get caught later by
889 * xfs_sync.
890 */
891 if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED))
892 goto out;
893 889
894 if (xfs_ipincount(ip) || !xfs_iflock_nowait(ip)) 890 trace_xfs_dirty_inode(ip);
895 goto out_unlock;
896 891
897 /* 892 tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS);
898 * Now we have the flush lock and the inode is not pinned, we 893 error = xfs_trans_reserve(tp, 0, XFS_FSYNC_TS_LOG_RES(mp), 0, 0, 0);
899 * can check if the inode is really clean as we know that 894 if (error) {
900 * there are no pending transaction completions, it is not 895 xfs_trans_cancel(tp, 0);
901 * waiting on the delayed write queue and there is no IO in 896 goto trouble;
902 * progress.
903 */
904 if (xfs_inode_clean(ip)) {
905 xfs_ifunlock(ip);
906 error = 0;
907 goto out_unlock;
908 }
909 error = xfs_iflush(ip, SYNC_TRYLOCK);
910 } 897 }
911 898 xfs_ilock(ip, XFS_ILOCK_EXCL);
912 out_unlock:
913 xfs_iunlock(ip, XFS_ILOCK_SHARED);
914 out:
915 /* 899 /*
916 * if we failed to write out the inode then mark 900 * Grab all the latest timestamps from the Linux inode.
917 * it dirty again so we'll try again later.
918 */ 901 */
902 ip->i_d.di_atime.t_sec = (__int32_t)inode->i_atime.tv_sec;
903 ip->i_d.di_atime.t_nsec = (__int32_t)inode->i_atime.tv_nsec;
904 ip->i_d.di_ctime.t_sec = (__int32_t)inode->i_ctime.tv_sec;
905 ip->i_d.di_ctime.t_nsec = (__int32_t)inode->i_ctime.tv_nsec;
906 ip->i_d.di_mtime.t_sec = (__int32_t)inode->i_mtime.tv_sec;
907 ip->i_d.di_mtime.t_nsec = (__int32_t)inode->i_mtime.tv_nsec;
908
909 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
910 xfs_trans_log_inode(tp, ip, XFS_ILOG_TIMESTAMP);
911 error = xfs_trans_commit(tp, 0);
919 if (error) 912 if (error)
920 xfs_mark_inode_dirty_sync(ip); 913 goto trouble;
921 return -error; 914 return;
915
916trouble:
917 xfs_warn(mp, "failed to update timestamps for inode 0x%llx", ip->i_ino);
922} 918}
923 919
924STATIC void 920STATIC void
@@ -983,6 +979,7 @@ xfs_fs_put_super(
983 xfs_unmountfs(mp); 979 xfs_unmountfs(mp);
984 xfs_freesb(mp); 980 xfs_freesb(mp);
985 xfs_icsb_destroy_counters(mp); 981 xfs_icsb_destroy_counters(mp);
982 xfs_destroy_mount_workqueues(mp);
986 xfs_close_devices(mp); 983 xfs_close_devices(mp);
987 xfs_free_fsname(mp); 984 xfs_free_fsname(mp);
988 kfree(mp); 985 kfree(mp);
@@ -1309,10 +1306,14 @@ xfs_fs_fill_super(
1309 if (error) 1306 if (error)
1310 goto out_free_fsname; 1307 goto out_free_fsname;
1311 1308
1312 error = xfs_icsb_init_counters(mp); 1309 error = xfs_init_mount_workqueues(mp);
1313 if (error) 1310 if (error)
1314 goto out_close_devices; 1311 goto out_close_devices;
1315 1312
1313 error = xfs_icsb_init_counters(mp);
1314 if (error)
1315 goto out_destroy_workqueues;
1316
1316 error = xfs_readsb(mp, flags); 1317 error = xfs_readsb(mp, flags);
1317 if (error) 1318 if (error)
1318 goto out_destroy_counters; 1319 goto out_destroy_counters;
@@ -1376,6 +1377,8 @@ xfs_fs_fill_super(
1376 xfs_freesb(mp); 1377 xfs_freesb(mp);
1377 out_destroy_counters: 1378 out_destroy_counters:
1378 xfs_icsb_destroy_counters(mp); 1379 xfs_icsb_destroy_counters(mp);
1380out_destroy_workqueues:
1381 xfs_destroy_mount_workqueues(mp);
1379 out_close_devices: 1382 out_close_devices:
1380 xfs_close_devices(mp); 1383 xfs_close_devices(mp);
1381 out_free_fsname: 1384 out_free_fsname:
@@ -1429,7 +1432,6 @@ static const struct super_operations xfs_super_operations = {
1429 .alloc_inode = xfs_fs_alloc_inode, 1432 .alloc_inode = xfs_fs_alloc_inode,
1430 .destroy_inode = xfs_fs_destroy_inode, 1433 .destroy_inode = xfs_fs_destroy_inode,
1431 .dirty_inode = xfs_fs_dirty_inode, 1434 .dirty_inode = xfs_fs_dirty_inode,
1432 .write_inode = xfs_fs_write_inode,
1433 .evict_inode = xfs_fs_evict_inode, 1435 .evict_inode = xfs_fs_evict_inode,
1434 .put_super = xfs_fs_put_super, 1436 .put_super = xfs_fs_put_super,
1435 .sync_fs = xfs_fs_sync_fs, 1437 .sync_fs = xfs_fs_sync_fs,
@@ -1651,13 +1653,17 @@ init_xfs_fs(void)
1651 if (error) 1653 if (error)
1652 goto out_cleanup_procfs; 1654 goto out_cleanup_procfs;
1653 1655
1654 vfs_initquota(); 1656 error = xfs_qm_init();
1657 if (error)
1658 goto out_sysctl_unregister;
1655 1659
1656 error = register_filesystem(&xfs_fs_type); 1660 error = register_filesystem(&xfs_fs_type);
1657 if (error) 1661 if (error)
1658 goto out_sysctl_unregister; 1662 goto out_qm_exit;
1659 return 0; 1663 return 0;
1660 1664
1665 out_qm_exit:
1666 xfs_qm_exit();
1661 out_sysctl_unregister: 1667 out_sysctl_unregister:
1662 xfs_sysctl_unregister(); 1668 xfs_sysctl_unregister();
1663 out_cleanup_procfs: 1669 out_cleanup_procfs:
@@ -1679,7 +1685,7 @@ init_xfs_fs(void)
1679STATIC void __exit 1685STATIC void __exit
1680exit_xfs_fs(void) 1686exit_xfs_fs(void)
1681{ 1687{
1682 vfs_exitquota(); 1688 xfs_qm_exit();
1683 unregister_filesystem(&xfs_fs_type); 1689 unregister_filesystem(&xfs_fs_type);
1684 xfs_sysctl_unregister(); 1690 xfs_sysctl_unregister();
1685 xfs_cleanup_procfs(); 1691 xfs_cleanup_procfs();
diff --git a/fs/xfs/xfs_super.h b/fs/xfs/xfs_super.h
index 50a3266c999e..09b0c26b2245 100644
--- a/fs/xfs/xfs_super.h
+++ b/fs/xfs/xfs_super.h
@@ -21,13 +21,11 @@
21#include <linux/exportfs.h> 21#include <linux/exportfs.h>
22 22
23#ifdef CONFIG_XFS_QUOTA 23#ifdef CONFIG_XFS_QUOTA
24extern void xfs_qm_init(void); 24extern int xfs_qm_init(void);
25extern void xfs_qm_exit(void); 25extern void xfs_qm_exit(void);
26# define vfs_initquota() xfs_qm_init()
27# define vfs_exitquota() xfs_qm_exit()
28#else 26#else
29# define vfs_initquota() do { } while (0) 27# define xfs_qm_init() (0)
30# define vfs_exitquota() do { } while (0) 28# define xfs_qm_exit() do { } while (0)
31#endif 29#endif
32 30
33#ifdef CONFIG_XFS_POSIX_ACL 31#ifdef CONFIG_XFS_POSIX_ACL
diff --git a/fs/xfs/xfs_sync.c b/fs/xfs/xfs_sync.c
index 40b75eecd2b4..205ebcb34d9e 100644
--- a/fs/xfs/xfs_sync.c
+++ b/fs/xfs/xfs_sync.c
@@ -336,32 +336,6 @@ xfs_sync_fsdata(
336 return error; 336 return error;
337} 337}
338 338
339int
340xfs_log_dirty_inode(
341 struct xfs_inode *ip,
342 struct xfs_perag *pag,
343 int flags)
344{
345 struct xfs_mount *mp = ip->i_mount;
346 struct xfs_trans *tp;
347 int error;
348
349 if (!ip->i_update_core)
350 return 0;
351
352 tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS);
353 error = xfs_trans_reserve(tp, 0, XFS_FSYNC_TS_LOG_RES(mp), 0, 0, 0);
354 if (error) {
355 xfs_trans_cancel(tp, 0);
356 return error;
357 }
358
359 xfs_ilock(ip, XFS_ILOCK_EXCL);
360 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
361 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
362 return xfs_trans_commit(tp, 0);
363}
364
365/* 339/*
366 * When remounting a filesystem read-only or freezing the filesystem, we have 340 * When remounting a filesystem read-only or freezing the filesystem, we have
367 * two phases to execute. This first phase is syncing the data before we 341 * two phases to execute. This first phase is syncing the data before we
@@ -385,16 +359,6 @@ xfs_quiesce_data(
385{ 359{
386 int error, error2 = 0; 360 int error, error2 = 0;
387 361
388 /*
389 * Log all pending size and timestamp updates. The vfs writeback
390 * code is supposed to do this, but due to its overagressive
391 * livelock detection it will skip inodes where appending writes
392 * were written out in the first non-blocking sync phase if their
393 * completion took long enough that it happened after taking the
394 * timestamp for the cut-off in the blocking phase.
395 */
396 xfs_inode_ag_iterator(mp, xfs_log_dirty_inode, 0);
397
398 /* force out the log */ 362 /* force out the log */
399 xfs_log_force(mp, XFS_LOG_SYNC); 363 xfs_log_force(mp, XFS_LOG_SYNC);
400 364
@@ -913,17 +877,15 @@ reclaim:
913 * can reference the inodes in the cache without taking references. 877 * can reference the inodes in the cache without taking references.
914 * 878 *
915 * We make that OK here by ensuring that we wait until the inode is 879 * We make that OK here by ensuring that we wait until the inode is
916 * unlocked after the lookup before we go ahead and free it. We get 880 * unlocked after the lookup before we go ahead and free it.
917 * both the ilock and the iolock because the code may need to drop the
918 * ilock one but will still hold the iolock.
919 */ 881 */
920 xfs_ilock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); 882 xfs_ilock(ip, XFS_ILOCK_EXCL);
921 xfs_qm_dqdetach(ip); 883 xfs_qm_dqdetach(ip);
922 xfs_iunlock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); 884 xfs_iunlock(ip, XFS_ILOCK_EXCL);
923 885
924 xfs_inode_free(ip); 886 xfs_inode_free(ip);
925 return error;
926 887
888 return error;
927} 889}
928 890
929/* 891/*
diff --git a/fs/xfs/xfs_sync.h b/fs/xfs/xfs_sync.h
index fa965479d788..941202e7ac6e 100644
--- a/fs/xfs/xfs_sync.h
+++ b/fs/xfs/xfs_sync.h
@@ -34,8 +34,6 @@ void xfs_quiesce_attr(struct xfs_mount *mp);
34 34
35void xfs_flush_inodes(struct xfs_inode *ip); 35void xfs_flush_inodes(struct xfs_inode *ip);
36 36
37int xfs_log_dirty_inode(struct xfs_inode *ip, struct xfs_perag *pag, int flags);
38
39int xfs_reclaim_inodes(struct xfs_mount *mp, int mode); 37int xfs_reclaim_inodes(struct xfs_mount *mp, int mode);
40int xfs_reclaim_inodes_count(struct xfs_mount *mp); 38int xfs_reclaim_inodes_count(struct xfs_mount *mp);
41void xfs_reclaim_inodes_nr(struct xfs_mount *mp, int nr_to_scan); 39void xfs_reclaim_inodes_nr(struct xfs_mount *mp, int nr_to_scan);
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index bb134a819930..75eb54af4d58 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -580,7 +580,7 @@ DEFINE_INODE_EVENT(xfs_ioctl_setattr);
580DEFINE_INODE_EVENT(xfs_dir_fsync); 580DEFINE_INODE_EVENT(xfs_dir_fsync);
581DEFINE_INODE_EVENT(xfs_file_fsync); 581DEFINE_INODE_EVENT(xfs_file_fsync);
582DEFINE_INODE_EVENT(xfs_destroy_inode); 582DEFINE_INODE_EVENT(xfs_destroy_inode);
583DEFINE_INODE_EVENT(xfs_write_inode); 583DEFINE_INODE_EVENT(xfs_dirty_inode);
584DEFINE_INODE_EVENT(xfs_evict_inode); 584DEFINE_INODE_EVENT(xfs_evict_inode);
585 585
586DEFINE_INODE_EVENT(xfs_dquot_dqalloc); 586DEFINE_INODE_EVENT(xfs_dquot_dqalloc);
@@ -741,10 +741,10 @@ DEFINE_DQUOT_EVENT(xfs_dqalloc);
741DEFINE_DQUOT_EVENT(xfs_dqtobp_read); 741DEFINE_DQUOT_EVENT(xfs_dqtobp_read);
742DEFINE_DQUOT_EVENT(xfs_dqread); 742DEFINE_DQUOT_EVENT(xfs_dqread);
743DEFINE_DQUOT_EVENT(xfs_dqread_fail); 743DEFINE_DQUOT_EVENT(xfs_dqread_fail);
744DEFINE_DQUOT_EVENT(xfs_dqlookup_found);
745DEFINE_DQUOT_EVENT(xfs_dqlookup_done);
746DEFINE_DQUOT_EVENT(xfs_dqget_hit); 744DEFINE_DQUOT_EVENT(xfs_dqget_hit);
747DEFINE_DQUOT_EVENT(xfs_dqget_miss); 745DEFINE_DQUOT_EVENT(xfs_dqget_miss);
746DEFINE_DQUOT_EVENT(xfs_dqget_freeing);
747DEFINE_DQUOT_EVENT(xfs_dqget_dup);
748DEFINE_DQUOT_EVENT(xfs_dqput); 748DEFINE_DQUOT_EVENT(xfs_dqput);
749DEFINE_DQUOT_EVENT(xfs_dqput_wait); 749DEFINE_DQUOT_EVENT(xfs_dqput_wait);
750DEFINE_DQUOT_EVENT(xfs_dqput_free); 750DEFINE_DQUOT_EVENT(xfs_dqput_free);
@@ -782,12 +782,12 @@ DECLARE_EVENT_CLASS(xfs_loggrant_class,
782 __entry->curr_res = tic->t_curr_res; 782 __entry->curr_res = tic->t_curr_res;
783 __entry->unit_res = tic->t_unit_res; 783 __entry->unit_res = tic->t_unit_res;
784 __entry->flags = tic->t_flags; 784 __entry->flags = tic->t_flags;
785 __entry->reserveq = list_empty(&log->l_reserveq); 785 __entry->reserveq = list_empty(&log->l_reserve_head.waiters);
786 __entry->writeq = list_empty(&log->l_writeq); 786 __entry->writeq = list_empty(&log->l_write_head.waiters);
787 xlog_crack_grant_head(&log->l_grant_reserve_head, 787 xlog_crack_grant_head(&log->l_reserve_head.grant,
788 &__entry->grant_reserve_cycle, 788 &__entry->grant_reserve_cycle,
789 &__entry->grant_reserve_bytes); 789 &__entry->grant_reserve_bytes);
790 xlog_crack_grant_head(&log->l_grant_write_head, 790 xlog_crack_grant_head(&log->l_write_head.grant,
791 &__entry->grant_write_cycle, 791 &__entry->grant_write_cycle,
792 &__entry->grant_write_bytes); 792 &__entry->grant_write_bytes);
793 __entry->curr_cycle = log->l_curr_cycle; 793 __entry->curr_cycle = log->l_curr_cycle;
@@ -826,20 +826,14 @@ DEFINE_EVENT(xfs_loggrant_class, name, \
826 TP_ARGS(log, tic)) 826 TP_ARGS(log, tic))
827DEFINE_LOGGRANT_EVENT(xfs_log_done_nonperm); 827DEFINE_LOGGRANT_EVENT(xfs_log_done_nonperm);
828DEFINE_LOGGRANT_EVENT(xfs_log_done_perm); 828DEFINE_LOGGRANT_EVENT(xfs_log_done_perm);
829DEFINE_LOGGRANT_EVENT(xfs_log_reserve);
830DEFINE_LOGGRANT_EVENT(xfs_log_umount_write); 829DEFINE_LOGGRANT_EVENT(xfs_log_umount_write);
831DEFINE_LOGGRANT_EVENT(xfs_log_grant_enter);
832DEFINE_LOGGRANT_EVENT(xfs_log_grant_exit);
833DEFINE_LOGGRANT_EVENT(xfs_log_grant_error);
834DEFINE_LOGGRANT_EVENT(xfs_log_grant_sleep); 830DEFINE_LOGGRANT_EVENT(xfs_log_grant_sleep);
835DEFINE_LOGGRANT_EVENT(xfs_log_grant_wake); 831DEFINE_LOGGRANT_EVENT(xfs_log_grant_wake);
836DEFINE_LOGGRANT_EVENT(xfs_log_grant_wake_up); 832DEFINE_LOGGRANT_EVENT(xfs_log_grant_wake_up);
837DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_enter); 833DEFINE_LOGGRANT_EVENT(xfs_log_reserve);
838DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_exit); 834DEFINE_LOGGRANT_EVENT(xfs_log_reserve_exit);
839DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_error); 835DEFINE_LOGGRANT_EVENT(xfs_log_regrant);
840DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_sleep); 836DEFINE_LOGGRANT_EVENT(xfs_log_regrant_exit);
841DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_wake);
842DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_wake_up);
843DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_enter); 837DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_enter);
844DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_exit); 838DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_exit);
845DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_sub); 839DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_sub);
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index 7adcdf15ae0c..103b00c90004 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -681,7 +681,6 @@ xfs_trans_reserve(
681 uint flags, 681 uint flags,
682 uint logcount) 682 uint logcount)
683{ 683{
684 int log_flags;
685 int error = 0; 684 int error = 0;
686 int rsvd = (tp->t_flags & XFS_TRANS_RESERVE) != 0; 685 int rsvd = (tp->t_flags & XFS_TRANS_RESERVE) != 0;
687 686
@@ -707,24 +706,32 @@ xfs_trans_reserve(
707 * Reserve the log space needed for this transaction. 706 * Reserve the log space needed for this transaction.
708 */ 707 */
709 if (logspace > 0) { 708 if (logspace > 0) {
710 ASSERT((tp->t_log_res == 0) || (tp->t_log_res == logspace)); 709 bool permanent = false;
711 ASSERT((tp->t_log_count == 0) || 710
712 (tp->t_log_count == logcount)); 711 ASSERT(tp->t_log_res == 0 || tp->t_log_res == logspace);
712 ASSERT(tp->t_log_count == 0 || tp->t_log_count == logcount);
713
713 if (flags & XFS_TRANS_PERM_LOG_RES) { 714 if (flags & XFS_TRANS_PERM_LOG_RES) {
714 log_flags = XFS_LOG_PERM_RESERV;
715 tp->t_flags |= XFS_TRANS_PERM_LOG_RES; 715 tp->t_flags |= XFS_TRANS_PERM_LOG_RES;
716 permanent = true;
716 } else { 717 } else {
717 ASSERT(tp->t_ticket == NULL); 718 ASSERT(tp->t_ticket == NULL);
718 ASSERT(!(tp->t_flags & XFS_TRANS_PERM_LOG_RES)); 719 ASSERT(!(tp->t_flags & XFS_TRANS_PERM_LOG_RES));
719 log_flags = 0;
720 } 720 }
721 721
722 error = xfs_log_reserve(tp->t_mountp, logspace, logcount, 722 if (tp->t_ticket != NULL) {
723 &tp->t_ticket, 723 ASSERT(flags & XFS_TRANS_PERM_LOG_RES);
724 XFS_TRANSACTION, log_flags, tp->t_type); 724 error = xfs_log_regrant(tp->t_mountp, tp->t_ticket);
725 if (error) { 725 } else {
726 goto undo_blocks; 726 error = xfs_log_reserve(tp->t_mountp, logspace,
727 logcount, &tp->t_ticket,
728 XFS_TRANSACTION, permanent,
729 tp->t_type);
727 } 730 }
731
732 if (error)
733 goto undo_blocks;
734
728 tp->t_log_res = logspace; 735 tp->t_log_res = logspace;
729 tp->t_log_count = logcount; 736 tp->t_log_count = logcount;
730 } 737 }
@@ -752,6 +759,8 @@ xfs_trans_reserve(
752 */ 759 */
753undo_log: 760undo_log:
754 if (logspace > 0) { 761 if (logspace > 0) {
762 int log_flags;
763
755 if (flags & XFS_TRANS_PERM_LOG_RES) { 764 if (flags & XFS_TRANS_PERM_LOG_RES) {
756 log_flags = XFS_LOG_REL_PERM_RESERV; 765 log_flags = XFS_LOG_REL_PERM_RESERV;
757 } else { 766 } else {
diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c
index ed9252bcdac9..1dead07f092c 100644
--- a/fs/xfs/xfs_trans_ail.c
+++ b/fs/xfs/xfs_trans_ail.c
@@ -611,50 +611,6 @@ xfs_ail_push_all(
611} 611}
612 612
613/* 613/*
614 * This is to be called when an item is unlocked that may have
615 * been in the AIL. It will wake up the first member of the AIL
616 * wait list if this item's unlocking might allow it to progress.
617 * If the item is in the AIL, then we need to get the AIL lock
618 * while doing our checking so we don't race with someone going
619 * to sleep waiting for this event in xfs_trans_push_ail().
620 */
621void
622xfs_trans_unlocked_item(
623 struct xfs_ail *ailp,
624 xfs_log_item_t *lip)
625{
626 xfs_log_item_t *min_lip;
627
628 /*
629 * If we're forcibly shutting down, we may have
630 * unlocked log items arbitrarily. The last thing
631 * we want to do is to move the tail of the log
632 * over some potentially valid data.
633 */
634 if (!(lip->li_flags & XFS_LI_IN_AIL) ||
635 XFS_FORCED_SHUTDOWN(ailp->xa_mount)) {
636 return;
637 }
638
639 /*
640 * This is the one case where we can call into xfs_ail_min()
641 * without holding the AIL lock because we only care about the
642 * case where we are at the tail of the AIL. If the object isn't
643 * at the tail, it doesn't matter what result we get back. This
644 * is slightly racy because since we were just unlocked, we could
645 * go to sleep between the call to xfs_ail_min and the call to
646 * xfs_log_move_tail, have someone else lock us, commit to us disk,
647 * move us out of the tail of the AIL, and then we wake up. However,
648 * the call to xfs_log_move_tail() doesn't do anything if there's
649 * not enough free space to wake people up so we're safe calling it.
650 */
651 min_lip = xfs_ail_min(ailp);
652
653 if (min_lip == lip)
654 xfs_log_move_tail(ailp->xa_mount, 1);
655} /* xfs_trans_unlocked_item */
656
657/*
658 * xfs_trans_ail_update - bulk AIL insertion operation. 614 * xfs_trans_ail_update - bulk AIL insertion operation.
659 * 615 *
660 * @xfs_trans_ail_update takes an array of log items that all need to be 616 * @xfs_trans_ail_update takes an array of log items that all need to be
@@ -685,7 +641,6 @@ xfs_trans_ail_update_bulk(
685 xfs_lsn_t lsn) __releases(ailp->xa_lock) 641 xfs_lsn_t lsn) __releases(ailp->xa_lock)
686{ 642{
687 xfs_log_item_t *mlip; 643 xfs_log_item_t *mlip;
688 xfs_lsn_t tail_lsn;
689 int mlip_changed = 0; 644 int mlip_changed = 0;
690 int i; 645 int i;
691 LIST_HEAD(tmp); 646 LIST_HEAD(tmp);
@@ -712,22 +667,12 @@ xfs_trans_ail_update_bulk(
712 667
713 if (!list_empty(&tmp)) 668 if (!list_empty(&tmp))
714 xfs_ail_splice(ailp, cur, &tmp, lsn); 669 xfs_ail_splice(ailp, cur, &tmp, lsn);
670 spin_unlock(&ailp->xa_lock);
715 671
716 if (!mlip_changed) { 672 if (mlip_changed && !XFS_FORCED_SHUTDOWN(ailp->xa_mount)) {
717 spin_unlock(&ailp->xa_lock); 673 xlog_assign_tail_lsn(ailp->xa_mount);
718 return; 674 xfs_log_space_wake(ailp->xa_mount);
719 } 675 }
720
721 /*
722 * It is not safe to access mlip after the AIL lock is dropped, so we
723 * must get a copy of li_lsn before we do so. This is especially
724 * important on 32-bit platforms where accessing and updating 64-bit
725 * values like li_lsn is not atomic.
726 */
727 mlip = xfs_ail_min(ailp);
728 tail_lsn = mlip->li_lsn;
729 spin_unlock(&ailp->xa_lock);
730 xfs_log_move_tail(ailp->xa_mount, tail_lsn);
731} 676}
732 677
733/* 678/*
@@ -758,7 +703,6 @@ xfs_trans_ail_delete_bulk(
758 int nr_items) __releases(ailp->xa_lock) 703 int nr_items) __releases(ailp->xa_lock)
759{ 704{
760 xfs_log_item_t *mlip; 705 xfs_log_item_t *mlip;
761 xfs_lsn_t tail_lsn;
762 int mlip_changed = 0; 706 int mlip_changed = 0;
763 int i; 707 int i;
764 708
@@ -785,23 +729,12 @@ xfs_trans_ail_delete_bulk(
785 if (mlip == lip) 729 if (mlip == lip)
786 mlip_changed = 1; 730 mlip_changed = 1;
787 } 731 }
732 spin_unlock(&ailp->xa_lock);
788 733
789 if (!mlip_changed) { 734 if (mlip_changed && !XFS_FORCED_SHUTDOWN(ailp->xa_mount)) {
790 spin_unlock(&ailp->xa_lock); 735 xlog_assign_tail_lsn(ailp->xa_mount);
791 return; 736 xfs_log_space_wake(ailp->xa_mount);
792 } 737 }
793
794 /*
795 * It is not safe to access mlip after the AIL lock is dropped, so we
796 * must get a copy of li_lsn before we do so. This is especially
797 * important on 32-bit platforms where accessing and updating 64-bit
798 * values like li_lsn is not atomic. It is possible we've emptied the
799 * AIL here, so if that is the case, pass an LSN of 0 to the tail move.
800 */
801 mlip = xfs_ail_min(ailp);
802 tail_lsn = mlip ? mlip->li_lsn : 0;
803 spin_unlock(&ailp->xa_lock);
804 xfs_log_move_tail(ailp->xa_mount, tail_lsn);
805} 738}
806 739
807/* 740/*
diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c
index 475a4ded4f41..1302d1d95a58 100644
--- a/fs/xfs/xfs_trans_buf.c
+++ b/fs/xfs/xfs_trans_buf.c
@@ -463,19 +463,7 @@ xfs_trans_brelse(xfs_trans_t *tp,
463 * Default to a normal brelse() call if the tp is NULL. 463 * Default to a normal brelse() call if the tp is NULL.
464 */ 464 */
465 if (tp == NULL) { 465 if (tp == NULL) {
466 struct xfs_log_item *lip = bp->b_fspriv;
467
468 ASSERT(bp->b_transp == NULL); 466 ASSERT(bp->b_transp == NULL);
469
470 /*
471 * If there's a buf log item attached to the buffer,
472 * then let the AIL know that the buffer is being
473 * unlocked.
474 */
475 if (lip != NULL && lip->li_type == XFS_LI_BUF) {
476 bip = bp->b_fspriv;
477 xfs_trans_unlocked_item(bip->bli_item.li_ailp, lip);
478 }
479 xfs_buf_relse(bp); 467 xfs_buf_relse(bp);
480 return; 468 return;
481 } 469 }
@@ -550,21 +538,10 @@ xfs_trans_brelse(xfs_trans_t *tp,
550 ASSERT(!(bip->bli_item.li_flags & XFS_LI_IN_AIL)); 538 ASSERT(!(bip->bli_item.li_flags & XFS_LI_IN_AIL));
551 ASSERT(!(bip->bli_flags & XFS_BLI_INODE_ALLOC_BUF)); 539 ASSERT(!(bip->bli_flags & XFS_BLI_INODE_ALLOC_BUF));
552 xfs_buf_item_relse(bp); 540 xfs_buf_item_relse(bp);
553 bip = NULL;
554 }
555 bp->b_transp = NULL;
556
557 /*
558 * If we've still got a buf log item on the buffer, then
559 * tell the AIL that the buffer is being unlocked.
560 */
561 if (bip != NULL) {
562 xfs_trans_unlocked_item(bip->bli_item.li_ailp,
563 (xfs_log_item_t*)bip);
564 } 541 }
565 542
543 bp->b_transp = NULL;
566 xfs_buf_relse(bp); 544 xfs_buf_relse(bp);
567 return;
568} 545}
569 546
570/* 547/*
diff --git a/fs/xfs/xfs_trans_dquot.c b/fs/xfs/xfs_trans_dquot.c
index c4ba366d24e6..279099717ed2 100644
--- a/fs/xfs/xfs_trans_dquot.c
+++ b/fs/xfs/xfs_trans_dquot.c
@@ -605,7 +605,7 @@ xfs_trans_dqresv(
605 time_t timer; 605 time_t timer;
606 xfs_qwarncnt_t warns; 606 xfs_qwarncnt_t warns;
607 xfs_qwarncnt_t warnlimit; 607 xfs_qwarncnt_t warnlimit;
608 xfs_qcnt_t count; 608 xfs_qcnt_t total_count;
609 xfs_qcnt_t *resbcountp; 609 xfs_qcnt_t *resbcountp;
610 xfs_quotainfo_t *q = mp->m_quotainfo; 610 xfs_quotainfo_t *q = mp->m_quotainfo;
611 611
@@ -648,13 +648,12 @@ xfs_trans_dqresv(
648 * hardlimit or exceed the timelimit if we allocate 648 * hardlimit or exceed the timelimit if we allocate
649 * nblks. 649 * nblks.
650 */ 650 */
651 if (hardlimit > 0ULL && 651 total_count = *resbcountp + nblks;
652 hardlimit < nblks + *resbcountp) { 652 if (hardlimit && total_count > hardlimit) {
653 xfs_quota_warn(mp, dqp, QUOTA_NL_BHARDWARN); 653 xfs_quota_warn(mp, dqp, QUOTA_NL_BHARDWARN);
654 goto error_return; 654 goto error_return;
655 } 655 }
656 if (softlimit > 0ULL && 656 if (softlimit && total_count > softlimit) {
657 softlimit < nblks + *resbcountp) {
658 if ((timer != 0 && get_seconds() > timer) || 657 if ((timer != 0 && get_seconds() > timer) ||
659 (warns != 0 && warns >= warnlimit)) { 658 (warns != 0 && warns >= warnlimit)) {
660 xfs_quota_warn(mp, dqp, 659 xfs_quota_warn(mp, dqp,
@@ -666,7 +665,7 @@ xfs_trans_dqresv(
666 } 665 }
667 } 666 }
668 if (ninos > 0) { 667 if (ninos > 0) {
669 count = be64_to_cpu(dqp->q_core.d_icount); 668 total_count = be64_to_cpu(dqp->q_core.d_icount) + ninos;
670 timer = be32_to_cpu(dqp->q_core.d_itimer); 669 timer = be32_to_cpu(dqp->q_core.d_itimer);
671 warns = be16_to_cpu(dqp->q_core.d_iwarns); 670 warns = be16_to_cpu(dqp->q_core.d_iwarns);
672 warnlimit = dqp->q_mount->m_quotainfo->qi_iwarnlimit; 671 warnlimit = dqp->q_mount->m_quotainfo->qi_iwarnlimit;
@@ -677,13 +676,11 @@ xfs_trans_dqresv(
677 if (!softlimit) 676 if (!softlimit)
678 softlimit = q->qi_isoftlimit; 677 softlimit = q->qi_isoftlimit;
679 678
680 if (hardlimit > 0ULL && 679 if (hardlimit && total_count > hardlimit) {
681 hardlimit < ninos + count) {
682 xfs_quota_warn(mp, dqp, QUOTA_NL_IHARDWARN); 680 xfs_quota_warn(mp, dqp, QUOTA_NL_IHARDWARN);
683 goto error_return; 681 goto error_return;
684 } 682 }
685 if (softlimit > 0ULL && 683 if (softlimit && total_count > softlimit) {
686 softlimit < ninos + count) {
687 if ((timer != 0 && get_seconds() > timer) || 684 if ((timer != 0 && get_seconds() > timer) ||
688 (warns != 0 && warns >= warnlimit)) { 685 (warns != 0 && warns >= warnlimit)) {
689 xfs_quota_warn(mp, dqp, 686 xfs_quota_warn(mp, dqp,
@@ -878,7 +875,7 @@ STATIC void
878xfs_trans_alloc_dqinfo( 875xfs_trans_alloc_dqinfo(
879 xfs_trans_t *tp) 876 xfs_trans_t *tp)
880{ 877{
881 tp->t_dqinfo = kmem_zone_zalloc(xfs_Gqm->qm_dqtrxzone, KM_SLEEP); 878 tp->t_dqinfo = kmem_zone_zalloc(xfs_qm_dqtrxzone, KM_SLEEP);
882} 879}
883 880
884void 881void
@@ -887,6 +884,6 @@ xfs_trans_free_dqinfo(
887{ 884{
888 if (!tp->t_dqinfo) 885 if (!tp->t_dqinfo)
889 return; 886 return;
890 kmem_zone_free(xfs_Gqm->qm_dqtrxzone, tp->t_dqinfo); 887 kmem_zone_free(xfs_qm_dqtrxzone, tp->t_dqinfo);
891 tp->t_dqinfo = NULL; 888 tp->t_dqinfo = NULL;
892} 889}
diff --git a/fs/xfs/xfs_trans_inode.c b/fs/xfs/xfs_trans_inode.c
index 32f0288ae10f..7a7442c03f2b 100644
--- a/fs/xfs/xfs_trans_inode.c
+++ b/fs/xfs/xfs_trans_inode.c
@@ -95,10 +95,14 @@ xfs_trans_ichgtime(
95 if ((flags & XFS_ICHGTIME_MOD) && 95 if ((flags & XFS_ICHGTIME_MOD) &&
96 !timespec_equal(&inode->i_mtime, &tv)) { 96 !timespec_equal(&inode->i_mtime, &tv)) {
97 inode->i_mtime = tv; 97 inode->i_mtime = tv;
98 ip->i_d.di_mtime.t_sec = tv.tv_sec;
99 ip->i_d.di_mtime.t_nsec = tv.tv_nsec;
98 } 100 }
99 if ((flags & XFS_ICHGTIME_CHG) && 101 if ((flags & XFS_ICHGTIME_CHG) &&
100 !timespec_equal(&inode->i_ctime, &tv)) { 102 !timespec_equal(&inode->i_ctime, &tv)) {
101 inode->i_ctime = tv; 103 inode->i_ctime = tv;
104 ip->i_d.di_ctime.t_sec = tv.tv_sec;
105 ip->i_d.di_ctime.t_nsec = tv.tv_nsec;
102 } 106 }
103} 107}
104 108
@@ -126,12 +130,12 @@ xfs_trans_log_inode(
126 /* 130 /*
127 * Always OR in the bits from the ili_last_fields field. 131 * Always OR in the bits from the ili_last_fields field.
128 * This is to coordinate with the xfs_iflush() and xfs_iflush_done() 132 * This is to coordinate with the xfs_iflush() and xfs_iflush_done()
129 * routines in the eventual clearing of the ilf_fields bits. 133 * routines in the eventual clearing of the ili_fields bits.
130 * See the big comment in xfs_iflush() for an explanation of 134 * See the big comment in xfs_iflush() for an explanation of
131 * this coordination mechanism. 135 * this coordination mechanism.
132 */ 136 */
133 flags |= ip->i_itemp->ili_last_fields; 137 flags |= ip->i_itemp->ili_last_fields;
134 ip->i_itemp->ili_format.ilf_fields |= flags; 138 ip->i_itemp->ili_fields |= flags;
135} 139}
136 140
137#ifdef XFS_TRANS_DEBUG 141#ifdef XFS_TRANS_DEBUG
diff --git a/fs/xfs/xfs_trans_priv.h b/fs/xfs/xfs_trans_priv.h
index 44820b9fcb43..8ab2ced415f1 100644
--- a/fs/xfs/xfs_trans_priv.h
+++ b/fs/xfs/xfs_trans_priv.h
@@ -104,9 +104,6 @@ void xfs_ail_push(struct xfs_ail *, xfs_lsn_t);
104void xfs_ail_push_all(struct xfs_ail *); 104void xfs_ail_push_all(struct xfs_ail *);
105xfs_lsn_t xfs_ail_min_lsn(struct xfs_ail *ailp); 105xfs_lsn_t xfs_ail_min_lsn(struct xfs_ail *ailp);
106 106
107void xfs_trans_unlocked_item(struct xfs_ail *,
108 xfs_log_item_t *);
109
110struct xfs_log_item * xfs_trans_ail_cursor_first(struct xfs_ail *ailp, 107struct xfs_log_item * xfs_trans_ail_cursor_first(struct xfs_ail *ailp,
111 struct xfs_ail_cursor *cur, 108 struct xfs_ail_cursor *cur,
112 xfs_lsn_t lsn); 109 xfs_lsn_t lsn);
diff --git a/fs/xfs/xfs_vnode.h b/fs/xfs/xfs_vnode.h
index 7c220b4227bc..db14d0c08682 100644
--- a/fs/xfs/xfs_vnode.h
+++ b/fs/xfs/xfs_vnode.h
@@ -22,7 +22,6 @@
22 22
23struct file; 23struct file;
24struct xfs_inode; 24struct xfs_inode;
25struct xfs_iomap;
26struct attrlist_cursor_kern; 25struct attrlist_cursor_kern;
27 26
28/* 27/*
diff --git a/fs/xfs/xfs_vnodeops.h b/fs/xfs/xfs_vnodeops.h
index 0c877cbde142..447e146b2ba6 100644
--- a/fs/xfs/xfs_vnodeops.h
+++ b/fs/xfs/xfs_vnodeops.h
@@ -10,7 +10,6 @@ struct kiocb;
10struct pipe_inode_info; 10struct pipe_inode_info;
11struct uio; 11struct uio;
12struct xfs_inode; 12struct xfs_inode;
13struct xfs_iomap;
14 13
15 14
16int xfs_setattr_nonsize(struct xfs_inode *ip, struct iattr *vap, int flags); 15int xfs_setattr_nonsize(struct xfs_inode *ip, struct iattr *vap, int flags);
@@ -49,8 +48,6 @@ int xfs_attr_set(struct xfs_inode *dp, const unsigned char *name,
49int xfs_attr_remove(struct xfs_inode *dp, const unsigned char *name, int flags); 48int xfs_attr_remove(struct xfs_inode *dp, const unsigned char *name, int flags);
50int xfs_attr_list(struct xfs_inode *dp, char *buffer, int bufsize, 49int xfs_attr_list(struct xfs_inode *dp, char *buffer, int bufsize,
51 int flags, struct attrlist_cursor_kern *cursor); 50 int flags, struct attrlist_cursor_kern *cursor);
52int xfs_bmap(struct xfs_inode *ip, xfs_off_t offset, ssize_t count,
53 int flags, struct xfs_iomap *iomapp, int *niomaps);
54void xfs_tosspages(struct xfs_inode *inode, xfs_off_t first, 51void xfs_tosspages(struct xfs_inode *inode, xfs_off_t first,
55 xfs_off_t last, int fiopt); 52 xfs_off_t last, int fiopt);
56int xfs_flushinval_pages(struct xfs_inode *ip, xfs_off_t first, 53int xfs_flushinval_pages(struct xfs_inode *ip, xfs_off_t first,