aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/dlm/lowcomms.c26
-rw-r--r--fs/ext3/fsync.c12
-rw-r--r--fs/ext3/inode.c28
-rw-r--r--fs/fuse/control.c138
-rw-r--r--fs/fuse/dev.c10
-rw-r--r--fs/fuse/fuse_i.h18
-rw-r--r--fs/fuse/inode.c80
-rw-r--r--fs/jbd/checkpoint.c6
-rw-r--r--fs/jbd/commit.c2
-rw-r--r--fs/jbd/journal.c30
-rw-r--r--fs/jbd/recovery.c18
-rw-r--r--fs/jbd/revoke.c16
-rw-r--r--fs/jbd/transaction.c9
-rw-r--r--fs/xfs/linux-2.6/xfs_aops.c1
-rw-r--r--fs/xfs/linux-2.6/xfs_file.c19
-rw-r--r--fs/xfs/linux-2.6/xfs_iops.c1
-rw-r--r--fs/xfs/linux-2.6/xfs_lrw.c8
-rw-r--r--fs/xfs/linux-2.6/xfs_stats.c51
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c24
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.c15
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.h1
-rw-r--r--fs/xfs/quota/xfs_qm_stats.c78
-rw-r--r--fs/xfs/xfs_ag.h9
-rw-r--r--fs/xfs/xfs_bmap.c2
-rw-r--r--fs/xfs/xfs_bmap.h11
-rw-r--r--fs/xfs/xfs_bmap_btree.c20
-rw-r--r--fs/xfs/xfs_bmap_btree.h1
-rw-r--r--fs/xfs/xfs_btree.c42
-rw-r--r--fs/xfs/xfs_btree.h15
-rw-r--r--fs/xfs/xfs_ialloc.c805
-rw-r--r--fs/xfs/xfs_ialloc.h18
-rw-r--r--fs/xfs/xfs_iget.c27
-rw-r--r--fs/xfs/xfs_inode.c8
-rw-r--r--fs/xfs/xfs_inode.h8
-rw-r--r--fs/xfs/xfs_inode_item.c10
-rw-r--r--fs/xfs/xfs_inode_item.h2
-rw-r--r--fs/xfs/xfs_inum.h1
-rw-r--r--fs/xfs/xfs_itable.c98
-rw-r--r--fs/xfs/xfs_itable.h5
-rw-r--r--fs/xfs/xfs_log_priv.h2
-rw-r--r--fs/xfs/xfs_log_recover.c2
-rw-r--r--fs/xfs/xfs_mount.c2
-rw-r--r--fs/xfs/xfs_mount.h3
-rw-r--r--fs/xfs/xfs_mru_cache.c29
-rw-r--r--fs/xfs/xfs_mru_cache.h1
-rw-r--r--fs/xfs/xfs_rw.c84
-rw-r--r--fs/xfs/xfs_rw.h7
-rw-r--r--fs/xfs/xfs_trans.h2
-rw-r--r--fs/xfs/xfs_trans_buf.c4
-rw-r--r--fs/xfs/xfs_trans_inode.c86
-rw-r--r--fs/xfs/xfs_vnodeops.c17
51 files changed, 915 insertions, 997 deletions
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index 618a60f03886..240cef14fe58 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -106,6 +106,7 @@ struct connection {
106#define CF_CONNECT_PENDING 3 106#define CF_CONNECT_PENDING 3
107#define CF_INIT_PENDING 4 107#define CF_INIT_PENDING 4
108#define CF_IS_OTHERCON 5 108#define CF_IS_OTHERCON 5
109#define CF_CLOSE 6
109 struct list_head writequeue; /* List of outgoing writequeue_entries */ 110 struct list_head writequeue; /* List of outgoing writequeue_entries */
110 spinlock_t writequeue_lock; 111 spinlock_t writequeue_lock;
111 int (*rx_action) (struct connection *); /* What to do when active */ 112 int (*rx_action) (struct connection *); /* What to do when active */
@@ -299,6 +300,8 @@ static void lowcomms_write_space(struct sock *sk)
299 300
300static inline void lowcomms_connect_sock(struct connection *con) 301static inline void lowcomms_connect_sock(struct connection *con)
301{ 302{
303 if (test_bit(CF_CLOSE, &con->flags))
304 return;
302 if (!test_and_set_bit(CF_CONNECT_PENDING, &con->flags)) 305 if (!test_and_set_bit(CF_CONNECT_PENDING, &con->flags))
303 queue_work(send_workqueue, &con->swork); 306 queue_work(send_workqueue, &con->swork);
304} 307}
@@ -926,10 +929,8 @@ static void tcp_connect_to_sock(struct connection *con)
926 goto out_err; 929 goto out_err;
927 930
928 memset(&saddr, 0, sizeof(saddr)); 931 memset(&saddr, 0, sizeof(saddr));
929 if (dlm_nodeid_to_addr(con->nodeid, &saddr)) { 932 if (dlm_nodeid_to_addr(con->nodeid, &saddr))
930 sock_release(sock);
931 goto out_err; 933 goto out_err;
932 }
933 934
934 sock->sk->sk_user_data = con; 935 sock->sk->sk_user_data = con;
935 con->rx_action = receive_from_sock; 936 con->rx_action = receive_from_sock;
@@ -1284,7 +1285,6 @@ out:
1284static void send_to_sock(struct connection *con) 1285static void send_to_sock(struct connection *con)
1285{ 1286{
1286 int ret = 0; 1287 int ret = 0;
1287 ssize_t(*sendpage) (struct socket *, struct page *, int, size_t, int);
1288 const int msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL; 1288 const int msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL;
1289 struct writequeue_entry *e; 1289 struct writequeue_entry *e;
1290 int len, offset; 1290 int len, offset;
@@ -1293,8 +1293,6 @@ static void send_to_sock(struct connection *con)
1293 if (con->sock == NULL) 1293 if (con->sock == NULL)
1294 goto out_connect; 1294 goto out_connect;
1295 1295
1296 sendpage = con->sock->ops->sendpage;
1297
1298 spin_lock(&con->writequeue_lock); 1296 spin_lock(&con->writequeue_lock);
1299 for (;;) { 1297 for (;;) {
1300 e = list_entry(con->writequeue.next, struct writequeue_entry, 1298 e = list_entry(con->writequeue.next, struct writequeue_entry,
@@ -1309,8 +1307,8 @@ static void send_to_sock(struct connection *con)
1309 1307
1310 ret = 0; 1308 ret = 0;
1311 if (len) { 1309 if (len) {
1312 ret = sendpage(con->sock, e->page, offset, len, 1310 ret = kernel_sendpage(con->sock, e->page, offset, len,
1313 msg_flags); 1311 msg_flags);
1314 if (ret == -EAGAIN || ret == 0) { 1312 if (ret == -EAGAIN || ret == 0) {
1315 cond_resched(); 1313 cond_resched();
1316 goto out; 1314 goto out;
@@ -1370,6 +1368,13 @@ int dlm_lowcomms_close(int nodeid)
1370 log_print("closing connection to node %d", nodeid); 1368 log_print("closing connection to node %d", nodeid);
1371 con = nodeid2con(nodeid, 0); 1369 con = nodeid2con(nodeid, 0);
1372 if (con) { 1370 if (con) {
1371 clear_bit(CF_CONNECT_PENDING, &con->flags);
1372 clear_bit(CF_WRITE_PENDING, &con->flags);
1373 set_bit(CF_CLOSE, &con->flags);
1374 if (cancel_work_sync(&con->swork))
1375 log_print("canceled swork for node %d", nodeid);
1376 if (cancel_work_sync(&con->rwork))
1377 log_print("canceled rwork for node %d", nodeid);
1373 clean_one_writequeue(con); 1378 clean_one_writequeue(con);
1374 close_connection(con, true); 1379 close_connection(con, true);
1375 } 1380 }
@@ -1395,9 +1400,10 @@ static void process_send_sockets(struct work_struct *work)
1395 1400
1396 if (test_and_clear_bit(CF_CONNECT_PENDING, &con->flags)) { 1401 if (test_and_clear_bit(CF_CONNECT_PENDING, &con->flags)) {
1397 con->connect_action(con); 1402 con->connect_action(con);
1403 set_bit(CF_WRITE_PENDING, &con->flags);
1398 } 1404 }
1399 clear_bit(CF_WRITE_PENDING, &con->flags); 1405 if (test_and_clear_bit(CF_WRITE_PENDING, &con->flags))
1400 send_to_sock(con); 1406 send_to_sock(con);
1401} 1407}
1402 1408
1403 1409
diff --git a/fs/ext3/fsync.c b/fs/ext3/fsync.c
index d33634119e17..451d166bbe93 100644
--- a/fs/ext3/fsync.c
+++ b/fs/ext3/fsync.c
@@ -23,6 +23,7 @@
23 */ 23 */
24 24
25#include <linux/time.h> 25#include <linux/time.h>
26#include <linux/blkdev.h>
26#include <linux/fs.h> 27#include <linux/fs.h>
27#include <linux/sched.h> 28#include <linux/sched.h>
28#include <linux/writeback.h> 29#include <linux/writeback.h>
@@ -73,7 +74,7 @@ int ext3_sync_file(struct file * file, struct dentry *dentry, int datasync)
73 } 74 }
74 75
75 if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) 76 if (datasync && !(inode->i_state & I_DIRTY_DATASYNC))
76 goto out; 77 goto flush;
77 78
78 /* 79 /*
79 * The VFS has written the file data. If the inode is unaltered 80 * The VFS has written the file data. If the inode is unaltered
@@ -85,7 +86,16 @@ int ext3_sync_file(struct file * file, struct dentry *dentry, int datasync)
85 .nr_to_write = 0, /* sys_fsync did this */ 86 .nr_to_write = 0, /* sys_fsync did this */
86 }; 87 };
87 ret = sync_inode(inode, &wbc); 88 ret = sync_inode(inode, &wbc);
89 goto out;
88 } 90 }
91flush:
92 /*
93 * In case we didn't commit a transaction, we have to flush
94 * disk caches manually so that data really is on persistent
95 * storage
96 */
97 if (test_opt(inode->i_sb, BARRIER))
98 blkdev_issue_flush(inode->i_sb->s_bdev, NULL);
89out: 99out:
90 return ret; 100 return ret;
91} 101}
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index b49908a167ae..cd098a7b77fc 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -172,10 +172,21 @@ static int try_to_extend_transaction(handle_t *handle, struct inode *inode)
172 * so before we call here everything must be consistently dirtied against 172 * so before we call here everything must be consistently dirtied against
173 * this transaction. 173 * this transaction.
174 */ 174 */
175static int ext3_journal_test_restart(handle_t *handle, struct inode *inode) 175static int truncate_restart_transaction(handle_t *handle, struct inode *inode)
176{ 176{
177 int ret;
178
177 jbd_debug(2, "restarting handle %p\n", handle); 179 jbd_debug(2, "restarting handle %p\n", handle);
178 return ext3_journal_restart(handle, blocks_for_truncate(inode)); 180 /*
181 * Drop truncate_mutex to avoid deadlock with ext3_get_blocks_handle
182 * At this moment, get_block can be called only for blocks inside
183 * i_size since page cache has been already dropped and writes are
184 * blocked by i_mutex. So we can safely drop the truncate_mutex.
185 */
186 mutex_unlock(&EXT3_I(inode)->truncate_mutex);
187 ret = ext3_journal_restart(handle, blocks_for_truncate(inode));
188 mutex_lock(&EXT3_I(inode)->truncate_mutex);
189 return ret;
179} 190}
180 191
181/* 192/*
@@ -2072,7 +2083,7 @@ static void ext3_clear_blocks(handle_t *handle, struct inode *inode,
2072 ext3_journal_dirty_metadata(handle, bh); 2083 ext3_journal_dirty_metadata(handle, bh);
2073 } 2084 }
2074 ext3_mark_inode_dirty(handle, inode); 2085 ext3_mark_inode_dirty(handle, inode);
2075 ext3_journal_test_restart(handle, inode); 2086 truncate_restart_transaction(handle, inode);
2076 if (bh) { 2087 if (bh) {
2077 BUFFER_TRACE(bh, "retaking write access"); 2088 BUFFER_TRACE(bh, "retaking write access");
2078 ext3_journal_get_write_access(handle, bh); 2089 ext3_journal_get_write_access(handle, bh);
@@ -2282,7 +2293,7 @@ static void ext3_free_branches(handle_t *handle, struct inode *inode,
2282 return; 2293 return;
2283 if (try_to_extend_transaction(handle, inode)) { 2294 if (try_to_extend_transaction(handle, inode)) {
2284 ext3_mark_inode_dirty(handle, inode); 2295 ext3_mark_inode_dirty(handle, inode);
2285 ext3_journal_test_restart(handle, inode); 2296 truncate_restart_transaction(handle, inode);
2286 } 2297 }
2287 2298
2288 ext3_free_blocks(handle, inode, nr, 1); 2299 ext3_free_blocks(handle, inode, nr, 1);
@@ -2892,6 +2903,10 @@ static int ext3_do_update_inode(handle_t *handle,
2892 struct buffer_head *bh = iloc->bh; 2903 struct buffer_head *bh = iloc->bh;
2893 int err = 0, rc, block; 2904 int err = 0, rc, block;
2894 2905
2906again:
2907 /* we can't allow multiple procs in here at once, its a bit racey */
2908 lock_buffer(bh);
2909
2895 /* For fields not not tracking in the in-memory inode, 2910 /* For fields not not tracking in the in-memory inode,
2896 * initialise them to zero for new inodes. */ 2911 * initialise them to zero for new inodes. */
2897 if (ei->i_state & EXT3_STATE_NEW) 2912 if (ei->i_state & EXT3_STATE_NEW)
@@ -2951,16 +2966,20 @@ static int ext3_do_update_inode(handle_t *handle,
2951 /* If this is the first large file 2966 /* If this is the first large file
2952 * created, add a flag to the superblock. 2967 * created, add a flag to the superblock.
2953 */ 2968 */
2969 unlock_buffer(bh);
2954 err = ext3_journal_get_write_access(handle, 2970 err = ext3_journal_get_write_access(handle,
2955 EXT3_SB(sb)->s_sbh); 2971 EXT3_SB(sb)->s_sbh);
2956 if (err) 2972 if (err)
2957 goto out_brelse; 2973 goto out_brelse;
2974
2958 ext3_update_dynamic_rev(sb); 2975 ext3_update_dynamic_rev(sb);
2959 EXT3_SET_RO_COMPAT_FEATURE(sb, 2976 EXT3_SET_RO_COMPAT_FEATURE(sb,
2960 EXT3_FEATURE_RO_COMPAT_LARGE_FILE); 2977 EXT3_FEATURE_RO_COMPAT_LARGE_FILE);
2961 handle->h_sync = 1; 2978 handle->h_sync = 1;
2962 err = ext3_journal_dirty_metadata(handle, 2979 err = ext3_journal_dirty_metadata(handle,
2963 EXT3_SB(sb)->s_sbh); 2980 EXT3_SB(sb)->s_sbh);
2981 /* get our lock and start over */
2982 goto again;
2964 } 2983 }
2965 } 2984 }
2966 } 2985 }
@@ -2983,6 +3002,7 @@ static int ext3_do_update_inode(handle_t *handle,
2983 raw_inode->i_extra_isize = cpu_to_le16(ei->i_extra_isize); 3002 raw_inode->i_extra_isize = cpu_to_le16(ei->i_extra_isize);
2984 3003
2985 BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata"); 3004 BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata");
3005 unlock_buffer(bh);
2986 rc = ext3_journal_dirty_metadata(handle, bh); 3006 rc = ext3_journal_dirty_metadata(handle, bh);
2987 if (!err) 3007 if (!err)
2988 err = rc; 3008 err = rc;
diff --git a/fs/fuse/control.c b/fs/fuse/control.c
index 99c99dfb0373..3773fd63d2f9 100644
--- a/fs/fuse/control.c
+++ b/fs/fuse/control.c
@@ -61,6 +61,121 @@ static ssize_t fuse_conn_waiting_read(struct file *file, char __user *buf,
61 return simple_read_from_buffer(buf, len, ppos, tmp, size); 61 return simple_read_from_buffer(buf, len, ppos, tmp, size);
62} 62}
63 63
64static ssize_t fuse_conn_limit_read(struct file *file, char __user *buf,
65 size_t len, loff_t *ppos, unsigned val)
66{
67 char tmp[32];
68 size_t size = sprintf(tmp, "%u\n", val);
69
70 return simple_read_from_buffer(buf, len, ppos, tmp, size);
71}
72
73static ssize_t fuse_conn_limit_write(struct file *file, const char __user *buf,
74 size_t count, loff_t *ppos, unsigned *val,
75 unsigned global_limit)
76{
77 unsigned long t;
78 char tmp[32];
79 unsigned limit = (1 << 16) - 1;
80 int err;
81
82 if (*ppos || count >= sizeof(tmp) - 1)
83 return -EINVAL;
84
85 if (copy_from_user(tmp, buf, count))
86 return -EINVAL;
87
88 tmp[count] = '\0';
89
90 err = strict_strtoul(tmp, 0, &t);
91 if (err)
92 return err;
93
94 if (!capable(CAP_SYS_ADMIN))
95 limit = min(limit, global_limit);
96
97 if (t > limit)
98 return -EINVAL;
99
100 *val = t;
101
102 return count;
103}
104
105static ssize_t fuse_conn_max_background_read(struct file *file,
106 char __user *buf, size_t len,
107 loff_t *ppos)
108{
109 struct fuse_conn *fc;
110 unsigned val;
111
112 fc = fuse_ctl_file_conn_get(file);
113 if (!fc)
114 return 0;
115
116 val = fc->max_background;
117 fuse_conn_put(fc);
118
119 return fuse_conn_limit_read(file, buf, len, ppos, val);
120}
121
122static ssize_t fuse_conn_max_background_write(struct file *file,
123 const char __user *buf,
124 size_t count, loff_t *ppos)
125{
126 unsigned val;
127 ssize_t ret;
128
129 ret = fuse_conn_limit_write(file, buf, count, ppos, &val,
130 max_user_bgreq);
131 if (ret > 0) {
132 struct fuse_conn *fc = fuse_ctl_file_conn_get(file);
133 if (fc) {
134 fc->max_background = val;
135 fuse_conn_put(fc);
136 }
137 }
138
139 return ret;
140}
141
142static ssize_t fuse_conn_congestion_threshold_read(struct file *file,
143 char __user *buf, size_t len,
144 loff_t *ppos)
145{
146 struct fuse_conn *fc;
147 unsigned val;
148
149 fc = fuse_ctl_file_conn_get(file);
150 if (!fc)
151 return 0;
152
153 val = fc->congestion_threshold;
154 fuse_conn_put(fc);
155
156 return fuse_conn_limit_read(file, buf, len, ppos, val);
157}
158
159static ssize_t fuse_conn_congestion_threshold_write(struct file *file,
160 const char __user *buf,
161 size_t count, loff_t *ppos)
162{
163 unsigned val;
164 ssize_t ret;
165
166 ret = fuse_conn_limit_write(file, buf, count, ppos, &val,
167 max_user_congthresh);
168 if (ret > 0) {
169 struct fuse_conn *fc = fuse_ctl_file_conn_get(file);
170 if (fc) {
171 fc->congestion_threshold = val;
172 fuse_conn_put(fc);
173 }
174 }
175
176 return ret;
177}
178
64static const struct file_operations fuse_ctl_abort_ops = { 179static const struct file_operations fuse_ctl_abort_ops = {
65 .open = nonseekable_open, 180 .open = nonseekable_open,
66 .write = fuse_conn_abort_write, 181 .write = fuse_conn_abort_write,
@@ -71,6 +186,18 @@ static const struct file_operations fuse_ctl_waiting_ops = {
71 .read = fuse_conn_waiting_read, 186 .read = fuse_conn_waiting_read,
72}; 187};
73 188
189static const struct file_operations fuse_conn_max_background_ops = {
190 .open = nonseekable_open,
191 .read = fuse_conn_max_background_read,
192 .write = fuse_conn_max_background_write,
193};
194
195static const struct file_operations fuse_conn_congestion_threshold_ops = {
196 .open = nonseekable_open,
197 .read = fuse_conn_congestion_threshold_read,
198 .write = fuse_conn_congestion_threshold_write,
199};
200
74static struct dentry *fuse_ctl_add_dentry(struct dentry *parent, 201static struct dentry *fuse_ctl_add_dentry(struct dentry *parent,
75 struct fuse_conn *fc, 202 struct fuse_conn *fc,
76 const char *name, 203 const char *name,
@@ -127,9 +254,14 @@ int fuse_ctl_add_conn(struct fuse_conn *fc)
127 goto err; 254 goto err;
128 255
129 if (!fuse_ctl_add_dentry(parent, fc, "waiting", S_IFREG | 0400, 1, 256 if (!fuse_ctl_add_dentry(parent, fc, "waiting", S_IFREG | 0400, 1,
130 NULL, &fuse_ctl_waiting_ops) || 257 NULL, &fuse_ctl_waiting_ops) ||
131 !fuse_ctl_add_dentry(parent, fc, "abort", S_IFREG | 0200, 1, 258 !fuse_ctl_add_dentry(parent, fc, "abort", S_IFREG | 0200, 1,
132 NULL, &fuse_ctl_abort_ops)) 259 NULL, &fuse_ctl_abort_ops) ||
260 !fuse_ctl_add_dentry(parent, fc, "max_background", S_IFREG | 0600,
261 1, NULL, &fuse_conn_max_background_ops) ||
262 !fuse_ctl_add_dentry(parent, fc, "congestion_threshold",
263 S_IFREG | 0600, 1, NULL,
264 &fuse_conn_congestion_threshold_ops))
133 goto err; 265 goto err;
134 266
135 return 0; 267 return 0;
@@ -156,7 +288,7 @@ void fuse_ctl_remove_conn(struct fuse_conn *fc)
156 d_drop(dentry); 288 d_drop(dentry);
157 dput(dentry); 289 dput(dentry);
158 } 290 }
159 fuse_control_sb->s_root->d_inode->i_nlink--; 291 drop_nlink(fuse_control_sb->s_root->d_inode);
160} 292}
161 293
162static int fuse_ctl_fill_super(struct super_block *sb, void *data, int silent) 294static int fuse_ctl_fill_super(struct super_block *sb, void *data, int silent)
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 6484eb75acd6..51d9e33d634f 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -250,7 +250,7 @@ static void queue_request(struct fuse_conn *fc, struct fuse_req *req)
250 250
251static void flush_bg_queue(struct fuse_conn *fc) 251static void flush_bg_queue(struct fuse_conn *fc)
252{ 252{
253 while (fc->active_background < FUSE_MAX_BACKGROUND && 253 while (fc->active_background < fc->max_background &&
254 !list_empty(&fc->bg_queue)) { 254 !list_empty(&fc->bg_queue)) {
255 struct fuse_req *req; 255 struct fuse_req *req;
256 256
@@ -280,11 +280,11 @@ __releases(&fc->lock)
280 list_del(&req->intr_entry); 280 list_del(&req->intr_entry);
281 req->state = FUSE_REQ_FINISHED; 281 req->state = FUSE_REQ_FINISHED;
282 if (req->background) { 282 if (req->background) {
283 if (fc->num_background == FUSE_MAX_BACKGROUND) { 283 if (fc->num_background == fc->max_background) {
284 fc->blocked = 0; 284 fc->blocked = 0;
285 wake_up_all(&fc->blocked_waitq); 285 wake_up_all(&fc->blocked_waitq);
286 } 286 }
287 if (fc->num_background == FUSE_CONGESTION_THRESHOLD && 287 if (fc->num_background == fc->congestion_threshold &&
288 fc->connected && fc->bdi_initialized) { 288 fc->connected && fc->bdi_initialized) {
289 clear_bdi_congested(&fc->bdi, BLK_RW_SYNC); 289 clear_bdi_congested(&fc->bdi, BLK_RW_SYNC);
290 clear_bdi_congested(&fc->bdi, BLK_RW_ASYNC); 290 clear_bdi_congested(&fc->bdi, BLK_RW_ASYNC);
@@ -410,9 +410,9 @@ static void fuse_request_send_nowait_locked(struct fuse_conn *fc,
410{ 410{
411 req->background = 1; 411 req->background = 1;
412 fc->num_background++; 412 fc->num_background++;
413 if (fc->num_background == FUSE_MAX_BACKGROUND) 413 if (fc->num_background == fc->max_background)
414 fc->blocked = 1; 414 fc->blocked = 1;
415 if (fc->num_background == FUSE_CONGESTION_THRESHOLD && 415 if (fc->num_background == fc->congestion_threshold &&
416 fc->bdi_initialized) { 416 fc->bdi_initialized) {
417 set_bdi_congested(&fc->bdi, BLK_RW_SYNC); 417 set_bdi_congested(&fc->bdi, BLK_RW_SYNC);
418 set_bdi_congested(&fc->bdi, BLK_RW_ASYNC); 418 set_bdi_congested(&fc->bdi, BLK_RW_ASYNC);
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 52b641fc0faf..fc9c79feb5f7 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -25,12 +25,6 @@
25/** Max number of pages that can be used in a single read request */ 25/** Max number of pages that can be used in a single read request */
26#define FUSE_MAX_PAGES_PER_REQ 32 26#define FUSE_MAX_PAGES_PER_REQ 32
27 27
28/** Maximum number of outstanding background requests */
29#define FUSE_MAX_BACKGROUND 12
30
31/** Congestion starts at 75% of maximum */
32#define FUSE_CONGESTION_THRESHOLD (FUSE_MAX_BACKGROUND * 75 / 100)
33
34/** Bias for fi->writectr, meaning new writepages must not be sent */ 28/** Bias for fi->writectr, meaning new writepages must not be sent */
35#define FUSE_NOWRITE INT_MIN 29#define FUSE_NOWRITE INT_MIN
36 30
@@ -38,7 +32,7 @@
38#define FUSE_NAME_MAX 1024 32#define FUSE_NAME_MAX 1024
39 33
40/** Number of dentries for each connection in the control filesystem */ 34/** Number of dentries for each connection in the control filesystem */
41#define FUSE_CTL_NUM_DENTRIES 3 35#define FUSE_CTL_NUM_DENTRIES 5
42 36
43/** If the FUSE_DEFAULT_PERMISSIONS flag is given, the filesystem 37/** If the FUSE_DEFAULT_PERMISSIONS flag is given, the filesystem
44 module will check permissions based on the file mode. Otherwise no 38 module will check permissions based on the file mode. Otherwise no
@@ -55,6 +49,10 @@ extern struct list_head fuse_conn_list;
55/** Global mutex protecting fuse_conn_list and the control filesystem */ 49/** Global mutex protecting fuse_conn_list and the control filesystem */
56extern struct mutex fuse_mutex; 50extern struct mutex fuse_mutex;
57 51
52/** Module parameters */
53extern unsigned max_user_bgreq;
54extern unsigned max_user_congthresh;
55
58/** FUSE inode */ 56/** FUSE inode */
59struct fuse_inode { 57struct fuse_inode {
60 /** Inode data */ 58 /** Inode data */
@@ -349,6 +347,12 @@ struct fuse_conn {
349 /** rbtree of fuse_files waiting for poll events indexed by ph */ 347 /** rbtree of fuse_files waiting for poll events indexed by ph */
350 struct rb_root polled_files; 348 struct rb_root polled_files;
351 349
350 /** Maximum number of outstanding background requests */
351 unsigned max_background;
352
353 /** Number of background requests at which congestion starts */
354 unsigned congestion_threshold;
355
352 /** Number of requests currently in the background */ 356 /** Number of requests currently in the background */
353 unsigned num_background; 357 unsigned num_background;
354 358
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index e5dbecd87b0f..6da947daabda 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -14,6 +14,7 @@
14#include <linux/seq_file.h> 14#include <linux/seq_file.h>
15#include <linux/init.h> 15#include <linux/init.h>
16#include <linux/module.h> 16#include <linux/module.h>
17#include <linux/moduleparam.h>
17#include <linux/parser.h> 18#include <linux/parser.h>
18#include <linux/statfs.h> 19#include <linux/statfs.h>
19#include <linux/random.h> 20#include <linux/random.h>
@@ -28,10 +29,34 @@ static struct kmem_cache *fuse_inode_cachep;
28struct list_head fuse_conn_list; 29struct list_head fuse_conn_list;
29DEFINE_MUTEX(fuse_mutex); 30DEFINE_MUTEX(fuse_mutex);
30 31
32static int set_global_limit(const char *val, struct kernel_param *kp);
33
34unsigned max_user_bgreq;
35module_param_call(max_user_bgreq, set_global_limit, param_get_uint,
36 &max_user_bgreq, 0644);
37__MODULE_PARM_TYPE(max_user_bgreq, "uint");
38MODULE_PARM_DESC(max_user_bgreq,
39 "Global limit for the maximum number of backgrounded requests an "
40 "unprivileged user can set");
41
42unsigned max_user_congthresh;
43module_param_call(max_user_congthresh, set_global_limit, param_get_uint,
44 &max_user_congthresh, 0644);
45__MODULE_PARM_TYPE(max_user_congthresh, "uint");
46MODULE_PARM_DESC(max_user_congthresh,
47 "Global limit for the maximum congestion threshold an "
48 "unprivileged user can set");
49
31#define FUSE_SUPER_MAGIC 0x65735546 50#define FUSE_SUPER_MAGIC 0x65735546
32 51
33#define FUSE_DEFAULT_BLKSIZE 512 52#define FUSE_DEFAULT_BLKSIZE 512
34 53
54/** Maximum number of outstanding background requests */
55#define FUSE_DEFAULT_MAX_BACKGROUND 12
56
57/** Congestion starts at 75% of maximum */
58#define FUSE_DEFAULT_CONGESTION_THRESHOLD (FUSE_DEFAULT_MAX_BACKGROUND * 3 / 4)
59
35struct fuse_mount_data { 60struct fuse_mount_data {
36 int fd; 61 int fd;
37 unsigned rootmode; 62 unsigned rootmode;
@@ -517,6 +542,8 @@ void fuse_conn_init(struct fuse_conn *fc)
517 INIT_LIST_HEAD(&fc->bg_queue); 542 INIT_LIST_HEAD(&fc->bg_queue);
518 INIT_LIST_HEAD(&fc->entry); 543 INIT_LIST_HEAD(&fc->entry);
519 atomic_set(&fc->num_waiting, 0); 544 atomic_set(&fc->num_waiting, 0);
545 fc->max_background = FUSE_DEFAULT_MAX_BACKGROUND;
546 fc->congestion_threshold = FUSE_DEFAULT_CONGESTION_THRESHOLD;
520 fc->khctr = 0; 547 fc->khctr = 0;
521 fc->polled_files = RB_ROOT; 548 fc->polled_files = RB_ROOT;
522 fc->reqctr = 0; 549 fc->reqctr = 0;
@@ -727,6 +754,54 @@ static const struct super_operations fuse_super_operations = {
727 .show_options = fuse_show_options, 754 .show_options = fuse_show_options,
728}; 755};
729 756
757static void sanitize_global_limit(unsigned *limit)
758{
759 if (*limit == 0)
760 *limit = ((num_physpages << PAGE_SHIFT) >> 13) /
761 sizeof(struct fuse_req);
762
763 if (*limit >= 1 << 16)
764 *limit = (1 << 16) - 1;
765}
766
767static int set_global_limit(const char *val, struct kernel_param *kp)
768{
769 int rv;
770
771 rv = param_set_uint(val, kp);
772 if (rv)
773 return rv;
774
775 sanitize_global_limit((unsigned *)kp->arg);
776
777 return 0;
778}
779
780static void process_init_limits(struct fuse_conn *fc, struct fuse_init_out *arg)
781{
782 int cap_sys_admin = capable(CAP_SYS_ADMIN);
783
784 if (arg->minor < 13)
785 return;
786
787 sanitize_global_limit(&max_user_bgreq);
788 sanitize_global_limit(&max_user_congthresh);
789
790 if (arg->max_background) {
791 fc->max_background = arg->max_background;
792
793 if (!cap_sys_admin && fc->max_background > max_user_bgreq)
794 fc->max_background = max_user_bgreq;
795 }
796 if (arg->congestion_threshold) {
797 fc->congestion_threshold = arg->congestion_threshold;
798
799 if (!cap_sys_admin &&
800 fc->congestion_threshold > max_user_congthresh)
801 fc->congestion_threshold = max_user_congthresh;
802 }
803}
804
730static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req) 805static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
731{ 806{
732 struct fuse_init_out *arg = &req->misc.init_out; 807 struct fuse_init_out *arg = &req->misc.init_out;
@@ -736,6 +811,8 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
736 else { 811 else {
737 unsigned long ra_pages; 812 unsigned long ra_pages;
738 813
814 process_init_limits(fc, arg);
815
739 if (arg->minor >= 6) { 816 if (arg->minor >= 6) {
740 ra_pages = arg->max_readahead / PAGE_CACHE_SIZE; 817 ra_pages = arg->max_readahead / PAGE_CACHE_SIZE;
741 if (arg->flags & FUSE_ASYNC_READ) 818 if (arg->flags & FUSE_ASYNC_READ)
@@ -1150,6 +1227,9 @@ static int __init fuse_init(void)
1150 if (res) 1227 if (res)
1151 goto err_sysfs_cleanup; 1228 goto err_sysfs_cleanup;
1152 1229
1230 sanitize_global_limit(&max_user_bgreq);
1231 sanitize_global_limit(&max_user_congthresh);
1232
1153 return 0; 1233 return 0;
1154 1234
1155 err_sysfs_cleanup: 1235 err_sysfs_cleanup:
diff --git a/fs/jbd/checkpoint.c b/fs/jbd/checkpoint.c
index 61f32f3868cd..b0435dd0654d 100644
--- a/fs/jbd/checkpoint.c
+++ b/fs/jbd/checkpoint.c
@@ -456,7 +456,7 @@ int cleanup_journal_tail(journal_t *journal)
456{ 456{
457 transaction_t * transaction; 457 transaction_t * transaction;
458 tid_t first_tid; 458 tid_t first_tid;
459 unsigned long blocknr, freed; 459 unsigned int blocknr, freed;
460 460
461 if (is_journal_aborted(journal)) 461 if (is_journal_aborted(journal))
462 return 1; 462 return 1;
@@ -502,8 +502,8 @@ int cleanup_journal_tail(journal_t *journal)
502 freed = freed + journal->j_last - journal->j_first; 502 freed = freed + journal->j_last - journal->j_first;
503 503
504 jbd_debug(1, 504 jbd_debug(1,
505 "Cleaning journal tail from %d to %d (offset %lu), " 505 "Cleaning journal tail from %d to %d (offset %u), "
506 "freeing %lu\n", 506 "freeing %u\n",
507 journal->j_tail_sequence, first_tid, blocknr, freed); 507 journal->j_tail_sequence, first_tid, blocknr, freed);
508 508
509 journal->j_free += freed; 509 journal->j_free += freed;
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c
index 618e21c0b7a3..4bd882548c45 100644
--- a/fs/jbd/commit.c
+++ b/fs/jbd/commit.c
@@ -308,7 +308,7 @@ void journal_commit_transaction(journal_t *journal)
308 int bufs; 308 int bufs;
309 int flags; 309 int flags;
310 int err; 310 int err;
311 unsigned long blocknr; 311 unsigned int blocknr;
312 ktime_t start_time; 312 ktime_t start_time;
313 u64 commit_time; 313 u64 commit_time;
314 char *tagp = NULL; 314 char *tagp = NULL;
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index f96f85092d1c..bd3c073b485d 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -276,7 +276,7 @@ static void journal_kill_thread(journal_t *journal)
276int journal_write_metadata_buffer(transaction_t *transaction, 276int journal_write_metadata_buffer(transaction_t *transaction,
277 struct journal_head *jh_in, 277 struct journal_head *jh_in,
278 struct journal_head **jh_out, 278 struct journal_head **jh_out,
279 unsigned long blocknr) 279 unsigned int blocknr)
280{ 280{
281 int need_copy_out = 0; 281 int need_copy_out = 0;
282 int done_copy_out = 0; 282 int done_copy_out = 0;
@@ -567,9 +567,9 @@ int log_wait_commit(journal_t *journal, tid_t tid)
567 * Log buffer allocation routines: 567 * Log buffer allocation routines:
568 */ 568 */
569 569
570int journal_next_log_block(journal_t *journal, unsigned long *retp) 570int journal_next_log_block(journal_t *journal, unsigned int *retp)
571{ 571{
572 unsigned long blocknr; 572 unsigned int blocknr;
573 573
574 spin_lock(&journal->j_state_lock); 574 spin_lock(&journal->j_state_lock);
575 J_ASSERT(journal->j_free > 1); 575 J_ASSERT(journal->j_free > 1);
@@ -590,11 +590,11 @@ int journal_next_log_block(journal_t *journal, unsigned long *retp)
590 * this is a no-op. If needed, we can use j_blk_offset - everything is 590 * this is a no-op. If needed, we can use j_blk_offset - everything is
591 * ready. 591 * ready.
592 */ 592 */
593int journal_bmap(journal_t *journal, unsigned long blocknr, 593int journal_bmap(journal_t *journal, unsigned int blocknr,
594 unsigned long *retp) 594 unsigned int *retp)
595{ 595{
596 int err = 0; 596 int err = 0;
597 unsigned long ret; 597 unsigned int ret;
598 598
599 if (journal->j_inode) { 599 if (journal->j_inode) {
600 ret = bmap(journal->j_inode, blocknr); 600 ret = bmap(journal->j_inode, blocknr);
@@ -604,7 +604,7 @@ int journal_bmap(journal_t *journal, unsigned long blocknr,
604 char b[BDEVNAME_SIZE]; 604 char b[BDEVNAME_SIZE];
605 605
606 printk(KERN_ALERT "%s: journal block not found " 606 printk(KERN_ALERT "%s: journal block not found "
607 "at offset %lu on %s\n", 607 "at offset %u on %s\n",
608 __func__, 608 __func__,
609 blocknr, 609 blocknr,
610 bdevname(journal->j_dev, b)); 610 bdevname(journal->j_dev, b));
@@ -630,7 +630,7 @@ int journal_bmap(journal_t *journal, unsigned long blocknr,
630struct journal_head *journal_get_descriptor_buffer(journal_t *journal) 630struct journal_head *journal_get_descriptor_buffer(journal_t *journal)
631{ 631{
632 struct buffer_head *bh; 632 struct buffer_head *bh;
633 unsigned long blocknr; 633 unsigned int blocknr;
634 int err; 634 int err;
635 635
636 err = journal_next_log_block(journal, &blocknr); 636 err = journal_next_log_block(journal, &blocknr);
@@ -774,7 +774,7 @@ journal_t * journal_init_inode (struct inode *inode)
774 journal_t *journal = journal_init_common(); 774 journal_t *journal = journal_init_common();
775 int err; 775 int err;
776 int n; 776 int n;
777 unsigned long blocknr; 777 unsigned int blocknr;
778 778
779 if (!journal) 779 if (!journal)
780 return NULL; 780 return NULL;
@@ -846,12 +846,12 @@ static void journal_fail_superblock (journal_t *journal)
846static int journal_reset(journal_t *journal) 846static int journal_reset(journal_t *journal)
847{ 847{
848 journal_superblock_t *sb = journal->j_superblock; 848 journal_superblock_t *sb = journal->j_superblock;
849 unsigned long first, last; 849 unsigned int first, last;
850 850
851 first = be32_to_cpu(sb->s_first); 851 first = be32_to_cpu(sb->s_first);
852 last = be32_to_cpu(sb->s_maxlen); 852 last = be32_to_cpu(sb->s_maxlen);
853 if (first + JFS_MIN_JOURNAL_BLOCKS > last + 1) { 853 if (first + JFS_MIN_JOURNAL_BLOCKS > last + 1) {
854 printk(KERN_ERR "JBD: Journal too short (blocks %lu-%lu).\n", 854 printk(KERN_ERR "JBD: Journal too short (blocks %u-%u).\n",
855 first, last); 855 first, last);
856 journal_fail_superblock(journal); 856 journal_fail_superblock(journal);
857 return -EINVAL; 857 return -EINVAL;
@@ -885,7 +885,7 @@ static int journal_reset(journal_t *journal)
885 **/ 885 **/
886int journal_create(journal_t *journal) 886int journal_create(journal_t *journal)
887{ 887{
888 unsigned long blocknr; 888 unsigned int blocknr;
889 struct buffer_head *bh; 889 struct buffer_head *bh;
890 journal_superblock_t *sb; 890 journal_superblock_t *sb;
891 int i, err; 891 int i, err;
@@ -969,14 +969,14 @@ void journal_update_superblock(journal_t *journal, int wait)
969 if (sb->s_start == 0 && journal->j_tail_sequence == 969 if (sb->s_start == 0 && journal->j_tail_sequence ==
970 journal->j_transaction_sequence) { 970 journal->j_transaction_sequence) {
971 jbd_debug(1,"JBD: Skipping superblock update on recovered sb " 971 jbd_debug(1,"JBD: Skipping superblock update on recovered sb "
972 "(start %ld, seq %d, errno %d)\n", 972 "(start %u, seq %d, errno %d)\n",
973 journal->j_tail, journal->j_tail_sequence, 973 journal->j_tail, journal->j_tail_sequence,
974 journal->j_errno); 974 journal->j_errno);
975 goto out; 975 goto out;
976 } 976 }
977 977
978 spin_lock(&journal->j_state_lock); 978 spin_lock(&journal->j_state_lock);
979 jbd_debug(1,"JBD: updating superblock (start %ld, seq %d, errno %d)\n", 979 jbd_debug(1,"JBD: updating superblock (start %u, seq %d, errno %d)\n",
980 journal->j_tail, journal->j_tail_sequence, journal->j_errno); 980 journal->j_tail, journal->j_tail_sequence, journal->j_errno);
981 981
982 sb->s_sequence = cpu_to_be32(journal->j_tail_sequence); 982 sb->s_sequence = cpu_to_be32(journal->j_tail_sequence);
@@ -1371,7 +1371,7 @@ int journal_flush(journal_t *journal)
1371{ 1371{
1372 int err = 0; 1372 int err = 0;
1373 transaction_t *transaction = NULL; 1373 transaction_t *transaction = NULL;
1374 unsigned long old_tail; 1374 unsigned int old_tail;
1375 1375
1376 spin_lock(&journal->j_state_lock); 1376 spin_lock(&journal->j_state_lock);
1377 1377
diff --git a/fs/jbd/recovery.c b/fs/jbd/recovery.c
index db5e982c5ddf..cb1a49ae605e 100644
--- a/fs/jbd/recovery.c
+++ b/fs/jbd/recovery.c
@@ -70,7 +70,7 @@ static int do_readahead(journal_t *journal, unsigned int start)
70{ 70{
71 int err; 71 int err;
72 unsigned int max, nbufs, next; 72 unsigned int max, nbufs, next;
73 unsigned long blocknr; 73 unsigned int blocknr;
74 struct buffer_head *bh; 74 struct buffer_head *bh;
75 75
76 struct buffer_head * bufs[MAXBUF]; 76 struct buffer_head * bufs[MAXBUF];
@@ -132,7 +132,7 @@ static int jread(struct buffer_head **bhp, journal_t *journal,
132 unsigned int offset) 132 unsigned int offset)
133{ 133{
134 int err; 134 int err;
135 unsigned long blocknr; 135 unsigned int blocknr;
136 struct buffer_head *bh; 136 struct buffer_head *bh;
137 137
138 *bhp = NULL; 138 *bhp = NULL;
@@ -314,7 +314,7 @@ static int do_one_pass(journal_t *journal,
314 struct recovery_info *info, enum passtype pass) 314 struct recovery_info *info, enum passtype pass)
315{ 315{
316 unsigned int first_commit_ID, next_commit_ID; 316 unsigned int first_commit_ID, next_commit_ID;
317 unsigned long next_log_block; 317 unsigned int next_log_block;
318 int err, success = 0; 318 int err, success = 0;
319 journal_superblock_t * sb; 319 journal_superblock_t * sb;
320 journal_header_t * tmp; 320 journal_header_t * tmp;
@@ -367,14 +367,14 @@ static int do_one_pass(journal_t *journal,
367 if (tid_geq(next_commit_ID, info->end_transaction)) 367 if (tid_geq(next_commit_ID, info->end_transaction))
368 break; 368 break;
369 369
370 jbd_debug(2, "Scanning for sequence ID %u at %lu/%lu\n", 370 jbd_debug(2, "Scanning for sequence ID %u at %u/%u\n",
371 next_commit_ID, next_log_block, journal->j_last); 371 next_commit_ID, next_log_block, journal->j_last);
372 372
373 /* Skip over each chunk of the transaction looking 373 /* Skip over each chunk of the transaction looking
374 * either the next descriptor block or the final commit 374 * either the next descriptor block or the final commit
375 * record. */ 375 * record. */
376 376
377 jbd_debug(3, "JBD: checking block %ld\n", next_log_block); 377 jbd_debug(3, "JBD: checking block %u\n", next_log_block);
378 err = jread(&bh, journal, next_log_block); 378 err = jread(&bh, journal, next_log_block);
379 if (err) 379 if (err)
380 goto failed; 380 goto failed;
@@ -429,7 +429,7 @@ static int do_one_pass(journal_t *journal,
429 tagp = &bh->b_data[sizeof(journal_header_t)]; 429 tagp = &bh->b_data[sizeof(journal_header_t)];
430 while ((tagp - bh->b_data +sizeof(journal_block_tag_t)) 430 while ((tagp - bh->b_data +sizeof(journal_block_tag_t))
431 <= journal->j_blocksize) { 431 <= journal->j_blocksize) {
432 unsigned long io_block; 432 unsigned int io_block;
433 433
434 tag = (journal_block_tag_t *) tagp; 434 tag = (journal_block_tag_t *) tagp;
435 flags = be32_to_cpu(tag->t_flags); 435 flags = be32_to_cpu(tag->t_flags);
@@ -443,10 +443,10 @@ static int do_one_pass(journal_t *journal,
443 success = err; 443 success = err;
444 printk (KERN_ERR 444 printk (KERN_ERR
445 "JBD: IO error %d recovering " 445 "JBD: IO error %d recovering "
446 "block %ld in log\n", 446 "block %u in log\n",
447 err, io_block); 447 err, io_block);
448 } else { 448 } else {
449 unsigned long blocknr; 449 unsigned int blocknr;
450 450
451 J_ASSERT(obh != NULL); 451 J_ASSERT(obh != NULL);
452 blocknr = be32_to_cpu(tag->t_blocknr); 452 blocknr = be32_to_cpu(tag->t_blocknr);
@@ -581,7 +581,7 @@ static int scan_revoke_records(journal_t *journal, struct buffer_head *bh,
581 max = be32_to_cpu(header->r_count); 581 max = be32_to_cpu(header->r_count);
582 582
583 while (offset < max) { 583 while (offset < max) {
584 unsigned long blocknr; 584 unsigned int blocknr;
585 int err; 585 int err;
586 586
587 blocknr = be32_to_cpu(* ((__be32 *) (bh->b_data+offset))); 587 blocknr = be32_to_cpu(* ((__be32 *) (bh->b_data+offset)));
diff --git a/fs/jbd/revoke.c b/fs/jbd/revoke.c
index da6cd9bdaabc..ad717328343a 100644
--- a/fs/jbd/revoke.c
+++ b/fs/jbd/revoke.c
@@ -101,7 +101,7 @@ struct jbd_revoke_record_s
101{ 101{
102 struct list_head hash; 102 struct list_head hash;
103 tid_t sequence; /* Used for recovery only */ 103 tid_t sequence; /* Used for recovery only */
104 unsigned long blocknr; 104 unsigned int blocknr;
105}; 105};
106 106
107 107
@@ -126,7 +126,7 @@ static void flush_descriptor(journal_t *, struct journal_head *, int, int);
126/* Utility functions to maintain the revoke table */ 126/* Utility functions to maintain the revoke table */
127 127
128/* Borrowed from buffer.c: this is a tried and tested block hash function */ 128/* Borrowed from buffer.c: this is a tried and tested block hash function */
129static inline int hash(journal_t *journal, unsigned long block) 129static inline int hash(journal_t *journal, unsigned int block)
130{ 130{
131 struct jbd_revoke_table_s *table = journal->j_revoke; 131 struct jbd_revoke_table_s *table = journal->j_revoke;
132 int hash_shift = table->hash_shift; 132 int hash_shift = table->hash_shift;
@@ -136,7 +136,7 @@ static inline int hash(journal_t *journal, unsigned long block)
136 (block << (hash_shift - 12))) & (table->hash_size - 1); 136 (block << (hash_shift - 12))) & (table->hash_size - 1);
137} 137}
138 138
139static int insert_revoke_hash(journal_t *journal, unsigned long blocknr, 139static int insert_revoke_hash(journal_t *journal, unsigned int blocknr,
140 tid_t seq) 140 tid_t seq)
141{ 141{
142 struct list_head *hash_list; 142 struct list_head *hash_list;
@@ -166,7 +166,7 @@ oom:
166/* Find a revoke record in the journal's hash table. */ 166/* Find a revoke record in the journal's hash table. */
167 167
168static struct jbd_revoke_record_s *find_revoke_record(journal_t *journal, 168static struct jbd_revoke_record_s *find_revoke_record(journal_t *journal,
169 unsigned long blocknr) 169 unsigned int blocknr)
170{ 170{
171 struct list_head *hash_list; 171 struct list_head *hash_list;
172 struct jbd_revoke_record_s *record; 172 struct jbd_revoke_record_s *record;
@@ -332,7 +332,7 @@ void journal_destroy_revoke(journal_t *journal)
332 * by one. 332 * by one.
333 */ 333 */
334 334
335int journal_revoke(handle_t *handle, unsigned long blocknr, 335int journal_revoke(handle_t *handle, unsigned int blocknr,
336 struct buffer_head *bh_in) 336 struct buffer_head *bh_in)
337{ 337{
338 struct buffer_head *bh = NULL; 338 struct buffer_head *bh = NULL;
@@ -401,7 +401,7 @@ int journal_revoke(handle_t *handle, unsigned long blocknr,
401 } 401 }
402 } 402 }
403 403
404 jbd_debug(2, "insert revoke for block %lu, bh_in=%p\n", blocknr, bh_in); 404 jbd_debug(2, "insert revoke for block %u, bh_in=%p\n", blocknr, bh_in);
405 err = insert_revoke_hash(journal, blocknr, 405 err = insert_revoke_hash(journal, blocknr,
406 handle->h_transaction->t_tid); 406 handle->h_transaction->t_tid);
407 BUFFER_TRACE(bh_in, "exit"); 407 BUFFER_TRACE(bh_in, "exit");
@@ -644,7 +644,7 @@ static void flush_descriptor(journal_t *journal,
644 */ 644 */
645 645
646int journal_set_revoke(journal_t *journal, 646int journal_set_revoke(journal_t *journal,
647 unsigned long blocknr, 647 unsigned int blocknr,
648 tid_t sequence) 648 tid_t sequence)
649{ 649{
650 struct jbd_revoke_record_s *record; 650 struct jbd_revoke_record_s *record;
@@ -668,7 +668,7 @@ int journal_set_revoke(journal_t *journal,
668 */ 668 */
669 669
670int journal_test_revoke(journal_t *journal, 670int journal_test_revoke(journal_t *journal,
671 unsigned long blocknr, 671 unsigned int blocknr,
672 tid_t sequence) 672 tid_t sequence)
673{ 673{
674 struct jbd_revoke_record_s *record; 674 struct jbd_revoke_record_s *record;
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c
index c03ac11f74be..006f9ad838a2 100644
--- a/fs/jbd/transaction.c
+++ b/fs/jbd/transaction.c
@@ -56,7 +56,8 @@ get_transaction(journal_t *journal, transaction_t *transaction)
56 spin_lock_init(&transaction->t_handle_lock); 56 spin_lock_init(&transaction->t_handle_lock);
57 57
58 /* Set up the commit timer for the new transaction. */ 58 /* Set up the commit timer for the new transaction. */
59 journal->j_commit_timer.expires = round_jiffies(transaction->t_expires); 59 journal->j_commit_timer.expires =
60 round_jiffies_up(transaction->t_expires);
60 add_timer(&journal->j_commit_timer); 61 add_timer(&journal->j_commit_timer);
61 62
62 J_ASSERT(journal->j_running_transaction == NULL); 63 J_ASSERT(journal->j_running_transaction == NULL);
@@ -228,6 +229,8 @@ repeat_locked:
228 __log_space_left(journal)); 229 __log_space_left(journal));
229 spin_unlock(&transaction->t_handle_lock); 230 spin_unlock(&transaction->t_handle_lock);
230 spin_unlock(&journal->j_state_lock); 231 spin_unlock(&journal->j_state_lock);
232
233 lock_map_acquire(&handle->h_lockdep_map);
231out: 234out:
232 if (unlikely(new_transaction)) /* It's usually NULL */ 235 if (unlikely(new_transaction)) /* It's usually NULL */
233 kfree(new_transaction); 236 kfree(new_transaction);
@@ -292,9 +295,6 @@ handle_t *journal_start(journal_t *journal, int nblocks)
292 handle = ERR_PTR(err); 295 handle = ERR_PTR(err);
293 goto out; 296 goto out;
294 } 297 }
295
296 lock_map_acquire(&handle->h_lockdep_map);
297
298out: 298out:
299 return handle; 299 return handle;
300} 300}
@@ -416,6 +416,7 @@ int journal_restart(handle_t *handle, int nblocks)
416 __log_start_commit(journal, transaction->t_tid); 416 __log_start_commit(journal, transaction->t_tid);
417 spin_unlock(&journal->j_state_lock); 417 spin_unlock(&journal->j_state_lock);
418 418
419 lock_map_release(&handle->h_lockdep_map);
419 handle->h_buffer_credits = nblocks; 420 handle->h_buffer_credits = nblocks;
420 ret = start_this_handle(journal, handle); 421 ret = start_this_handle(journal, handle);
421 return ret; 422 return ret;
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index aecf2519db76..d5e5559e31db 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -216,7 +216,6 @@ xfs_setfilesize(
216 if (ip->i_d.di_size < isize) { 216 if (ip->i_d.di_size < isize) {
217 ip->i_d.di_size = isize; 217 ip->i_d.di_size = isize;
218 ip->i_update_core = 1; 218 ip->i_update_core = 1;
219 ip->i_update_size = 1;
220 xfs_mark_inode_dirty_sync(ip); 219 xfs_mark_inode_dirty_sync(ip);
221 } 220 }
222 221
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c
index 0542fd507649..988d8f87bc0f 100644
--- a/fs/xfs/linux-2.6/xfs_file.c
+++ b/fs/xfs/linux-2.6/xfs_file.c
@@ -172,12 +172,21 @@ xfs_file_release(
172 */ 172 */
173STATIC int 173STATIC int
174xfs_file_fsync( 174xfs_file_fsync(
175 struct file *filp, 175 struct file *file,
176 struct dentry *dentry, 176 struct dentry *dentry,
177 int datasync) 177 int datasync)
178{ 178{
179 xfs_iflags_clear(XFS_I(dentry->d_inode), XFS_ITRUNCATED); 179 struct inode *inode = dentry->d_inode;
180 return -xfs_fsync(XFS_I(dentry->d_inode)); 180 struct xfs_inode *ip = XFS_I(inode);
181 int error;
182
183 /* capture size updates in I/O completion before writing the inode. */
184 error = filemap_fdatawait(inode->i_mapping);
185 if (error)
186 return error;
187
188 xfs_iflags_clear(ip, XFS_ITRUNCATED);
189 return -xfs_fsync(ip);
181} 190}
182 191
183STATIC int 192STATIC int
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index 6c32f1d63d8c..da0159d99f82 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -43,7 +43,6 @@
43#include "xfs_error.h" 43#include "xfs_error.h"
44#include "xfs_itable.h" 44#include "xfs_itable.h"
45#include "xfs_rw.h" 45#include "xfs_rw.h"
46#include "xfs_acl.h"
47#include "xfs_attr.h" 46#include "xfs_attr.h"
48#include "xfs_buf_item.h" 47#include "xfs_buf_item.h"
49#include "xfs_utils.h" 48#include "xfs_utils.h"
diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c
index fde63a3c4ecc..49e4a6aea73c 100644
--- a/fs/xfs/linux-2.6/xfs_lrw.c
+++ b/fs/xfs/linux-2.6/xfs_lrw.c
@@ -812,19 +812,21 @@ write_retry:
812 812
813 /* Handle various SYNC-type writes */ 813 /* Handle various SYNC-type writes */
814 if ((file->f_flags & O_SYNC) || IS_SYNC(inode)) { 814 if ((file->f_flags & O_SYNC) || IS_SYNC(inode)) {
815 loff_t end = pos + ret - 1;
815 int error2; 816 int error2;
816 817
817 xfs_iunlock(xip, iolock); 818 xfs_iunlock(xip, iolock);
818 if (need_i_mutex) 819 if (need_i_mutex)
819 mutex_unlock(&inode->i_mutex); 820 mutex_unlock(&inode->i_mutex);
820 error2 = filemap_write_and_wait_range(mapping, pos, 821
821 pos + ret - 1); 822 error2 = filemap_write_and_wait_range(mapping, pos, end);
822 if (!error) 823 if (!error)
823 error = error2; 824 error = error2;
824 if (need_i_mutex) 825 if (need_i_mutex)
825 mutex_lock(&inode->i_mutex); 826 mutex_lock(&inode->i_mutex);
826 xfs_ilock(xip, iolock); 827 xfs_ilock(xip, iolock);
827 error2 = xfs_write_sync_logforce(mp, xip); 828
829 error2 = xfs_fsync(xip);
828 if (!error) 830 if (!error)
829 error = error2; 831 error = error2;
830 } 832 }
diff --git a/fs/xfs/linux-2.6/xfs_stats.c b/fs/xfs/linux-2.6/xfs_stats.c
index c3526d445f6a..76fdc5861932 100644
--- a/fs/xfs/linux-2.6/xfs_stats.c
+++ b/fs/xfs/linux-2.6/xfs_stats.c
@@ -20,16 +20,9 @@
20 20
21DEFINE_PER_CPU(struct xfsstats, xfsstats); 21DEFINE_PER_CPU(struct xfsstats, xfsstats);
22 22
23STATIC int 23static int xfs_stat_proc_show(struct seq_file *m, void *v)
24xfs_read_xfsstats(
25 char *buffer,
26 char **start,
27 off_t offset,
28 int count,
29 int *eof,
30 void *data)
31{ 24{
32 int c, i, j, len, val; 25 int c, i, j, val;
33 __uint64_t xs_xstrat_bytes = 0; 26 __uint64_t xs_xstrat_bytes = 0;
34 __uint64_t xs_write_bytes = 0; 27 __uint64_t xs_write_bytes = 0;
35 __uint64_t xs_read_bytes = 0; 28 __uint64_t xs_read_bytes = 0;
@@ -60,18 +53,18 @@ xfs_read_xfsstats(
60 }; 53 };
61 54
62 /* Loop over all stats groups */ 55 /* Loop over all stats groups */
63 for (i=j=len = 0; i < ARRAY_SIZE(xstats); i++) { 56 for (i=j = 0; i < ARRAY_SIZE(xstats); i++) {
64 len += sprintf(buffer + len, "%s", xstats[i].desc); 57 seq_printf(m, "%s", xstats[i].desc);
65 /* inner loop does each group */ 58 /* inner loop does each group */
66 while (j < xstats[i].endpoint) { 59 while (j < xstats[i].endpoint) {
67 val = 0; 60 val = 0;
68 /* sum over all cpus */ 61 /* sum over all cpus */
69 for_each_possible_cpu(c) 62 for_each_possible_cpu(c)
70 val += *(((__u32*)&per_cpu(xfsstats, c) + j)); 63 val += *(((__u32*)&per_cpu(xfsstats, c) + j));
71 len += sprintf(buffer + len, " %u", val); 64 seq_printf(m, " %u", val);
72 j++; 65 j++;
73 } 66 }
74 buffer[len++] = '\n'; 67 seq_putc(m, '\n');
75 } 68 }
76 /* extra precision counters */ 69 /* extra precision counters */
77 for_each_possible_cpu(i) { 70 for_each_possible_cpu(i) {
@@ -80,36 +73,38 @@ xfs_read_xfsstats(
80 xs_read_bytes += per_cpu(xfsstats, i).xs_read_bytes; 73 xs_read_bytes += per_cpu(xfsstats, i).xs_read_bytes;
81 } 74 }
82 75
83 len += sprintf(buffer + len, "xpc %Lu %Lu %Lu\n", 76 seq_printf(m, "xpc %Lu %Lu %Lu\n",
84 xs_xstrat_bytes, xs_write_bytes, xs_read_bytes); 77 xs_xstrat_bytes, xs_write_bytes, xs_read_bytes);
85 len += sprintf(buffer + len, "debug %u\n", 78 seq_printf(m, "debug %u\n",
86#if defined(DEBUG) 79#if defined(DEBUG)
87 1); 80 1);
88#else 81#else
89 0); 82 0);
90#endif 83#endif
84 return 0;
85}
91 86
92 if (offset >= len) { 87static int xfs_stat_proc_open(struct inode *inode, struct file *file)
93 *start = buffer; 88{
94 *eof = 1; 89 return single_open(file, xfs_stat_proc_show, NULL);
95 return 0;
96 }
97 *start = buffer + offset;
98 if ((len -= offset) > count)
99 return count;
100 *eof = 1;
101
102 return len;
103} 90}
104 91
92static const struct file_operations xfs_stat_proc_fops = {
93 .owner = THIS_MODULE,
94 .open = xfs_stat_proc_open,
95 .read = seq_read,
96 .llseek = seq_lseek,
97 .release = single_release,
98};
99
105int 100int
106xfs_init_procfs(void) 101xfs_init_procfs(void)
107{ 102{
108 if (!proc_mkdir("fs/xfs", NULL)) 103 if (!proc_mkdir("fs/xfs", NULL))
109 goto out; 104 goto out;
110 105
111 if (!create_proc_read_entry("fs/xfs/stat", 0, NULL, 106 if (!proc_create("fs/xfs/stat", 0, NULL,
112 xfs_read_xfsstats, NULL)) 107 &xfs_stat_proc_fops))
113 goto out_remove_entry; 108 goto out_remove_entry;
114 return 0; 109 return 0;
115 110
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index a220d36f789b..5d7c60ac77b4 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -579,15 +579,19 @@ xfs_showargs(
579 else if (mp->m_qflags & XFS_UQUOTA_ACCT) 579 else if (mp->m_qflags & XFS_UQUOTA_ACCT)
580 seq_puts(m, "," MNTOPT_UQUOTANOENF); 580 seq_puts(m, "," MNTOPT_UQUOTANOENF);
581 581
582 if (mp->m_qflags & (XFS_PQUOTA_ACCT|XFS_OQUOTA_ENFD)) 582 /* Either project or group quotas can be active, not both */
583 seq_puts(m, "," MNTOPT_PRJQUOTA); 583
584 else if (mp->m_qflags & XFS_PQUOTA_ACCT) 584 if (mp->m_qflags & XFS_PQUOTA_ACCT) {
585 seq_puts(m, "," MNTOPT_PQUOTANOENF); 585 if (mp->m_qflags & XFS_OQUOTA_ENFD)
586 586 seq_puts(m, "," MNTOPT_PRJQUOTA);
587 if (mp->m_qflags & (XFS_GQUOTA_ACCT|XFS_OQUOTA_ENFD)) 587 else
588 seq_puts(m, "," MNTOPT_GRPQUOTA); 588 seq_puts(m, "," MNTOPT_PQUOTANOENF);
589 else if (mp->m_qflags & XFS_GQUOTA_ACCT) 589 } else if (mp->m_qflags & XFS_GQUOTA_ACCT) {
590 seq_puts(m, "," MNTOPT_GQUOTANOENF); 590 if (mp->m_qflags & XFS_OQUOTA_ENFD)
591 seq_puts(m, "," MNTOPT_GRPQUOTA);
592 else
593 seq_puts(m, "," MNTOPT_GQUOTANOENF);
594 }
591 595
592 if (!(mp->m_qflags & XFS_ALL_QUOTA_ACCT)) 596 if (!(mp->m_qflags & XFS_ALL_QUOTA_ACCT))
593 seq_puts(m, "," MNTOPT_NOQUOTA); 597 seq_puts(m, "," MNTOPT_NOQUOTA);
@@ -687,7 +691,7 @@ xfs_barrier_test(
687 return error; 691 return error;
688} 692}
689 693
690void 694STATIC void
691xfs_mountfs_check_barriers(xfs_mount_t *mp) 695xfs_mountfs_check_barriers(xfs_mount_t *mp)
692{ 696{
693 int error; 697 int error;
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index 98ef624d9baf..320be6aea492 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -749,21 +749,6 @@ __xfs_inode_clear_reclaim_tag(
749 XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG); 749 XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG);
750} 750}
751 751
752void
753xfs_inode_clear_reclaim_tag(
754 xfs_inode_t *ip)
755{
756 xfs_mount_t *mp = ip->i_mount;
757 xfs_perag_t *pag = xfs_get_perag(mp, ip->i_ino);
758
759 read_lock(&pag->pag_ici_lock);
760 spin_lock(&ip->i_flags_lock);
761 __xfs_inode_clear_reclaim_tag(mp, pag, ip);
762 spin_unlock(&ip->i_flags_lock);
763 read_unlock(&pag->pag_ici_lock);
764 xfs_put_perag(mp, pag);
765}
766
767STATIC int 752STATIC int
768xfs_reclaim_inode_now( 753xfs_reclaim_inode_now(
769 struct xfs_inode *ip, 754 struct xfs_inode *ip,
diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h
index 59120602588a..27920eb7a820 100644
--- a/fs/xfs/linux-2.6/xfs_sync.h
+++ b/fs/xfs/linux-2.6/xfs_sync.h
@@ -49,7 +49,6 @@ int xfs_reclaim_inodes(struct xfs_mount *mp, int mode);
49 49
50void xfs_inode_set_reclaim_tag(struct xfs_inode *ip); 50void xfs_inode_set_reclaim_tag(struct xfs_inode *ip);
51void __xfs_inode_set_reclaim_tag(struct xfs_perag *pag, struct xfs_inode *ip); 51void __xfs_inode_set_reclaim_tag(struct xfs_perag *pag, struct xfs_inode *ip);
52void xfs_inode_clear_reclaim_tag(struct xfs_inode *ip);
53void __xfs_inode_clear_reclaim_tag(struct xfs_mount *mp, struct xfs_perag *pag, 52void __xfs_inode_clear_reclaim_tag(struct xfs_mount *mp, struct xfs_perag *pag,
54 struct xfs_inode *ip); 53 struct xfs_inode *ip);
55 54
diff --git a/fs/xfs/quota/xfs_qm_stats.c b/fs/xfs/quota/xfs_qm_stats.c
index 21b08c0396a1..83e7ea3e25fa 100644
--- a/fs/xfs/quota/xfs_qm_stats.c
+++ b/fs/xfs/quota/xfs_qm_stats.c
@@ -48,50 +48,34 @@
48 48
49struct xqmstats xqmstats; 49struct xqmstats xqmstats;
50 50
51STATIC int 51static int xqm_proc_show(struct seq_file *m, void *v)
52xfs_qm_read_xfsquota(
53 char *buffer,
54 char **start,
55 off_t offset,
56 int count,
57 int *eof,
58 void *data)
59{ 52{
60 int len;
61
62 /* maximum; incore; ratio free to inuse; freelist */ 53 /* maximum; incore; ratio free to inuse; freelist */
63 len = sprintf(buffer, "%d\t%d\t%d\t%u\n", 54 seq_printf(m, "%d\t%d\t%d\t%u\n",
64 ndquot, 55 ndquot,
65 xfs_Gqm? atomic_read(&xfs_Gqm->qm_totaldquots) : 0, 56 xfs_Gqm? atomic_read(&xfs_Gqm->qm_totaldquots) : 0,
66 xfs_Gqm? xfs_Gqm->qm_dqfree_ratio : 0, 57 xfs_Gqm? xfs_Gqm->qm_dqfree_ratio : 0,
67 xfs_Gqm? xfs_Gqm->qm_dqfreelist.qh_nelems : 0); 58 xfs_Gqm? xfs_Gqm->qm_dqfreelist.qh_nelems : 0);
68 59 return 0;
69 if (offset >= len) {
70 *start = buffer;
71 *eof = 1;
72 return 0;
73 }
74 *start = buffer + offset;
75 if ((len -= offset) > count)
76 return count;
77 *eof = 1;
78
79 return len;
80} 60}
81 61
82STATIC int 62static int xqm_proc_open(struct inode *inode, struct file *file)
83xfs_qm_read_stats(
84 char *buffer,
85 char **start,
86 off_t offset,
87 int count,
88 int *eof,
89 void *data)
90{ 63{
91 int len; 64 return single_open(file, xqm_proc_show, NULL);
65}
66
67static const struct file_operations xqm_proc_fops = {
68 .owner = THIS_MODULE,
69 .open = xqm_proc_open,
70 .read = seq_read,
71 .llseek = seq_lseek,
72 .release = single_release,
73};
92 74
75static int xqmstat_proc_show(struct seq_file *m, void *v)
76{
93 /* quota performance statistics */ 77 /* quota performance statistics */
94 len = sprintf(buffer, "qm %u %u %u %u %u %u %u %u\n", 78 seq_printf(m, "qm %u %u %u %u %u %u %u %u\n",
95 xqmstats.xs_qm_dqreclaims, 79 xqmstats.xs_qm_dqreclaims,
96 xqmstats.xs_qm_dqreclaim_misses, 80 xqmstats.xs_qm_dqreclaim_misses,
97 xqmstats.xs_qm_dquot_dups, 81 xqmstats.xs_qm_dquot_dups,
@@ -100,25 +84,27 @@ xfs_qm_read_stats(
100 xqmstats.xs_qm_dqwants, 84 xqmstats.xs_qm_dqwants,
101 xqmstats.xs_qm_dqshake_reclaims, 85 xqmstats.xs_qm_dqshake_reclaims,
102 xqmstats.xs_qm_dqinact_reclaims); 86 xqmstats.xs_qm_dqinact_reclaims);
87 return 0;
88}
103 89
104 if (offset >= len) { 90static int xqmstat_proc_open(struct inode *inode, struct file *file)
105 *start = buffer; 91{
106 *eof = 1; 92 return single_open(file, xqmstat_proc_show, NULL);
107 return 0;
108 }
109 *start = buffer + offset;
110 if ((len -= offset) > count)
111 return count;
112 *eof = 1;
113
114 return len;
115} 93}
116 94
95static const struct file_operations xqmstat_proc_fops = {
96 .owner = THIS_MODULE,
97 .open = xqmstat_proc_open,
98 .read = seq_read,
99 .llseek = seq_lseek,
100 .release = single_release,
101};
102
117void 103void
118xfs_qm_init_procfs(void) 104xfs_qm_init_procfs(void)
119{ 105{
120 create_proc_read_entry("fs/xfs/xqmstat", 0, NULL, xfs_qm_read_stats, NULL); 106 proc_create("fs/xfs/xqmstat", 0, NULL, &xqmstat_proc_fops);
121 create_proc_read_entry("fs/xfs/xqm", 0, NULL, xfs_qm_read_xfsquota, NULL); 107 proc_create("fs/xfs/xqm", 0, NULL, &xqm_proc_fops);
122} 108}
123 109
124void 110void
diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h
index f24b50b68d03..a5d54bf4931b 100644
--- a/fs/xfs/xfs_ag.h
+++ b/fs/xfs/xfs_ag.h
@@ -198,6 +198,15 @@ typedef struct xfs_perag
198 xfs_agino_t pagi_count; /* number of allocated inodes */ 198 xfs_agino_t pagi_count; /* number of allocated inodes */
199 int pagb_count; /* pagb slots in use */ 199 int pagb_count; /* pagb slots in use */
200 xfs_perag_busy_t *pagb_list; /* unstable blocks */ 200 xfs_perag_busy_t *pagb_list; /* unstable blocks */
201
202 /*
203 * Inode allocation search lookup optimisation.
204 * If the pagino matches, the search for new inodes
205 * doesn't need to search the near ones again straight away
206 */
207 xfs_agino_t pagl_pagino;
208 xfs_agino_t pagl_leftrec;
209 xfs_agino_t pagl_rightrec;
201#ifdef __KERNEL__ 210#ifdef __KERNEL__
202 spinlock_t pagb_lock; /* lock for pagb_list */ 211 spinlock_t pagb_lock; /* lock for pagb_list */
203 212
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index 8ee5b5a76a2a..8971fb09d387 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -3713,7 +3713,7 @@ done:
3713 * entry (null if none). Else, *lastxp will be set to the index 3713 * entry (null if none). Else, *lastxp will be set to the index
3714 * of the found entry; *gotp will contain the entry. 3714 * of the found entry; *gotp will contain the entry.
3715 */ 3715 */
3716xfs_bmbt_rec_host_t * /* pointer to found extent entry */ 3716STATIC xfs_bmbt_rec_host_t * /* pointer to found extent entry */
3717xfs_bmap_search_multi_extents( 3717xfs_bmap_search_multi_extents(
3718 xfs_ifork_t *ifp, /* inode fork pointer */ 3718 xfs_ifork_t *ifp, /* inode fork pointer */
3719 xfs_fileoff_t bno, /* block number searched for */ 3719 xfs_fileoff_t bno, /* block number searched for */
diff --git a/fs/xfs/xfs_bmap.h b/fs/xfs/xfs_bmap.h
index 1b8ff9256bd0..56f62d2edc35 100644
--- a/fs/xfs/xfs_bmap.h
+++ b/fs/xfs/xfs_bmap.h
@@ -392,17 +392,6 @@ xfs_bmap_count_blocks(
392 int whichfork, 392 int whichfork,
393 int *count); 393 int *count);
394 394
395/*
396 * Search the extent records for the entry containing block bno.
397 * If bno lies in a hole, point to the next entry. If bno lies
398 * past eof, *eofp will be set, and *prevp will contain the last
399 * entry (null if none). Else, *lastxp will be set to the index
400 * of the found entry; *gotp will contain the entry.
401 */
402xfs_bmbt_rec_host_t *
403xfs_bmap_search_multi_extents(struct xfs_ifork *, xfs_fileoff_t, int *,
404 xfs_extnum_t *, xfs_bmbt_irec_t *, xfs_bmbt_irec_t *);
405
406#endif /* __KERNEL__ */ 395#endif /* __KERNEL__ */
407 396
408#endif /* __XFS_BMAP_H__ */ 397#endif /* __XFS_BMAP_H__ */
diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c
index 5c1ade06578e..eb7b702d0690 100644
--- a/fs/xfs/xfs_bmap_btree.c
+++ b/fs/xfs/xfs_bmap_btree.c
@@ -202,16 +202,6 @@ xfs_bmbt_get_state(
202 ext_flag); 202 ext_flag);
203} 203}
204 204
205/* Endian flipping versions of the bmbt extraction functions */
206void
207xfs_bmbt_disk_get_all(
208 xfs_bmbt_rec_t *r,
209 xfs_bmbt_irec_t *s)
210{
211 __xfs_bmbt_get_all(get_unaligned_be64(&r->l0),
212 get_unaligned_be64(&r->l1), s);
213}
214
215/* 205/*
216 * Extract the blockcount field from an on disk bmap extent record. 206 * Extract the blockcount field from an on disk bmap extent record.
217 */ 207 */
@@ -816,6 +806,16 @@ xfs_bmbt_trace_key(
816 *l1 = 0; 806 *l1 = 0;
817} 807}
818 808
809/* Endian flipping versions of the bmbt extraction functions */
810STATIC void
811xfs_bmbt_disk_get_all(
812 xfs_bmbt_rec_t *r,
813 xfs_bmbt_irec_t *s)
814{
815 __xfs_bmbt_get_all(get_unaligned_be64(&r->l0),
816 get_unaligned_be64(&r->l1), s);
817}
818
819STATIC void 819STATIC void
820xfs_bmbt_trace_record( 820xfs_bmbt_trace_record(
821 struct xfs_btree_cur *cur, 821 struct xfs_btree_cur *cur,
diff --git a/fs/xfs/xfs_bmap_btree.h b/fs/xfs/xfs_bmap_btree.h
index 0e8df007615e..5549d495947f 100644
--- a/fs/xfs/xfs_bmap_btree.h
+++ b/fs/xfs/xfs_bmap_btree.h
@@ -220,7 +220,6 @@ extern xfs_fsblock_t xfs_bmbt_get_startblock(xfs_bmbt_rec_host_t *r);
220extern xfs_fileoff_t xfs_bmbt_get_startoff(xfs_bmbt_rec_host_t *r); 220extern xfs_fileoff_t xfs_bmbt_get_startoff(xfs_bmbt_rec_host_t *r);
221extern xfs_exntst_t xfs_bmbt_get_state(xfs_bmbt_rec_host_t *r); 221extern xfs_exntst_t xfs_bmbt_get_state(xfs_bmbt_rec_host_t *r);
222 222
223extern void xfs_bmbt_disk_get_all(xfs_bmbt_rec_t *r, xfs_bmbt_irec_t *s);
224extern xfs_filblks_t xfs_bmbt_disk_get_blockcount(xfs_bmbt_rec_t *r); 223extern xfs_filblks_t xfs_bmbt_disk_get_blockcount(xfs_bmbt_rec_t *r);
225extern xfs_fileoff_t xfs_bmbt_disk_get_startoff(xfs_bmbt_rec_t *r); 224extern xfs_fileoff_t xfs_bmbt_disk_get_startoff(xfs_bmbt_rec_t *r);
226 225
diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c
index 26717388acf5..52b5f14d0c32 100644
--- a/fs/xfs/xfs_btree.c
+++ b/fs/xfs/xfs_btree.c
@@ -646,46 +646,6 @@ xfs_btree_read_bufl(
646} 646}
647 647
648/* 648/*
649 * Get a buffer for the block, return it read in.
650 * Short-form addressing.
651 */
652int /* error */
653xfs_btree_read_bufs(
654 xfs_mount_t *mp, /* file system mount point */
655 xfs_trans_t *tp, /* transaction pointer */
656 xfs_agnumber_t agno, /* allocation group number */
657 xfs_agblock_t agbno, /* allocation group block number */
658 uint lock, /* lock flags for read_buf */
659 xfs_buf_t **bpp, /* buffer for agno/agbno */
660 int refval) /* ref count value for buffer */
661{
662 xfs_buf_t *bp; /* return value */
663 xfs_daddr_t d; /* real disk block address */
664 int error;
665
666 ASSERT(agno != NULLAGNUMBER);
667 ASSERT(agbno != NULLAGBLOCK);
668 d = XFS_AGB_TO_DADDR(mp, agno, agbno);
669 if ((error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, d,
670 mp->m_bsize, lock, &bp))) {
671 return error;
672 }
673 ASSERT(!bp || !XFS_BUF_GETERROR(bp));
674 if (bp != NULL) {
675 switch (refval) {
676 case XFS_ALLOC_BTREE_REF:
677 XFS_BUF_SET_VTYPE_REF(bp, B_FS_MAP, refval);
678 break;
679 case XFS_INO_BTREE_REF:
680 XFS_BUF_SET_VTYPE_REF(bp, B_FS_INOMAP, refval);
681 break;
682 }
683 }
684 *bpp = bp;
685 return 0;
686}
687
688/*
689 * Read-ahead the block, don't wait for it, don't return a buffer. 649 * Read-ahead the block, don't wait for it, don't return a buffer.
690 * Long-form addressing. 650 * Long-form addressing.
691 */ 651 */
@@ -2951,7 +2911,7 @@ error0:
2951 * inode we have to copy the single block it was pointing to into the 2911 * inode we have to copy the single block it was pointing to into the
2952 * inode. 2912 * inode.
2953 */ 2913 */
2954int 2914STATIC int
2955xfs_btree_kill_iroot( 2915xfs_btree_kill_iroot(
2956 struct xfs_btree_cur *cur) 2916 struct xfs_btree_cur *cur)
2957{ 2917{
diff --git a/fs/xfs/xfs_btree.h b/fs/xfs/xfs_btree.h
index 4f852b735b96..7fa07062bdda 100644
--- a/fs/xfs/xfs_btree.h
+++ b/fs/xfs/xfs_btree.h
@@ -379,20 +379,6 @@ xfs_btree_read_bufl(
379 int refval);/* ref count value for buffer */ 379 int refval);/* ref count value for buffer */
380 380
381/* 381/*
382 * Get a buffer for the block, return it read in.
383 * Short-form addressing.
384 */
385int /* error */
386xfs_btree_read_bufs(
387 struct xfs_mount *mp, /* file system mount point */
388 struct xfs_trans *tp, /* transaction pointer */
389 xfs_agnumber_t agno, /* allocation group number */
390 xfs_agblock_t agbno, /* allocation group block number */
391 uint lock, /* lock flags for read_buf */
392 struct xfs_buf **bpp, /* buffer for agno/agbno */
393 int refval);/* ref count value for buffer */
394
395/*
396 * Read-ahead the block, don't wait for it, don't return a buffer. 382 * Read-ahead the block, don't wait for it, don't return a buffer.
397 * Long-form addressing. 383 * Long-form addressing.
398 */ 384 */
@@ -432,7 +418,6 @@ int xfs_btree_decrement(struct xfs_btree_cur *, int, int *);
432int xfs_btree_lookup(struct xfs_btree_cur *, xfs_lookup_t, int *); 418int xfs_btree_lookup(struct xfs_btree_cur *, xfs_lookup_t, int *);
433int xfs_btree_update(struct xfs_btree_cur *, union xfs_btree_rec *); 419int xfs_btree_update(struct xfs_btree_cur *, union xfs_btree_rec *);
434int xfs_btree_new_iroot(struct xfs_btree_cur *, int *, int *); 420int xfs_btree_new_iroot(struct xfs_btree_cur *, int *, int *);
435int xfs_btree_kill_iroot(struct xfs_btree_cur *);
436int xfs_btree_insert(struct xfs_btree_cur *, int *); 421int xfs_btree_insert(struct xfs_btree_cur *, int *);
437int xfs_btree_delete(struct xfs_btree_cur *, int *); 422int xfs_btree_delete(struct xfs_btree_cur *, int *);
438int xfs_btree_get_rec(struct xfs_btree_cur *, union xfs_btree_rec **, int *); 423int xfs_btree_get_rec(struct xfs_btree_cur *, union xfs_btree_rec **, int *);
diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c
index 3120a3a5e20f..ab64f3efb43b 100644
--- a/fs/xfs/xfs_ialloc.c
+++ b/fs/xfs/xfs_ialloc.c
@@ -57,75 +57,35 @@ xfs_ialloc_cluster_alignment(
57} 57}
58 58
59/* 59/*
60 * Lookup the record equal to ino in the btree given by cur. 60 * Lookup a record by ino in the btree given by cur.
61 */
62STATIC int /* error */
63xfs_inobt_lookup_eq(
64 struct xfs_btree_cur *cur, /* btree cursor */
65 xfs_agino_t ino, /* starting inode of chunk */
66 __int32_t fcnt, /* free inode count */
67 xfs_inofree_t free, /* free inode mask */
68 int *stat) /* success/failure */
69{
70 cur->bc_rec.i.ir_startino = ino;
71 cur->bc_rec.i.ir_freecount = fcnt;
72 cur->bc_rec.i.ir_free = free;
73 return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat);
74}
75
76/*
77 * Lookup the first record greater than or equal to ino
78 * in the btree given by cur.
79 */ 61 */
80int /* error */ 62int /* error */
81xfs_inobt_lookup_ge( 63xfs_inobt_lookup(
82 struct xfs_btree_cur *cur, /* btree cursor */ 64 struct xfs_btree_cur *cur, /* btree cursor */
83 xfs_agino_t ino, /* starting inode of chunk */ 65 xfs_agino_t ino, /* starting inode of chunk */
84 __int32_t fcnt, /* free inode count */ 66 xfs_lookup_t dir, /* <=, >=, == */
85 xfs_inofree_t free, /* free inode mask */
86 int *stat) /* success/failure */ 67 int *stat) /* success/failure */
87{ 68{
88 cur->bc_rec.i.ir_startino = ino; 69 cur->bc_rec.i.ir_startino = ino;
89 cur->bc_rec.i.ir_freecount = fcnt; 70 cur->bc_rec.i.ir_freecount = 0;
90 cur->bc_rec.i.ir_free = free; 71 cur->bc_rec.i.ir_free = 0;
91 return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat); 72 return xfs_btree_lookup(cur, dir, stat);
92} 73}
93 74
94/* 75/*
95 * Lookup the first record less than or equal to ino 76 * Update the record referred to by cur to the value given.
96 * in the btree given by cur.
97 */
98int /* error */
99xfs_inobt_lookup_le(
100 struct xfs_btree_cur *cur, /* btree cursor */
101 xfs_agino_t ino, /* starting inode of chunk */
102 __int32_t fcnt, /* free inode count */
103 xfs_inofree_t free, /* free inode mask */
104 int *stat) /* success/failure */
105{
106 cur->bc_rec.i.ir_startino = ino;
107 cur->bc_rec.i.ir_freecount = fcnt;
108 cur->bc_rec.i.ir_free = free;
109 return xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat);
110}
111
112/*
113 * Update the record referred to by cur to the value given
114 * by [ino, fcnt, free].
115 * This either works (return 0) or gets an EFSCORRUPTED error. 77 * This either works (return 0) or gets an EFSCORRUPTED error.
116 */ 78 */
117STATIC int /* error */ 79STATIC int /* error */
118xfs_inobt_update( 80xfs_inobt_update(
119 struct xfs_btree_cur *cur, /* btree cursor */ 81 struct xfs_btree_cur *cur, /* btree cursor */
120 xfs_agino_t ino, /* starting inode of chunk */ 82 xfs_inobt_rec_incore_t *irec) /* btree record */
121 __int32_t fcnt, /* free inode count */
122 xfs_inofree_t free) /* free inode mask */
123{ 83{
124 union xfs_btree_rec rec; 84 union xfs_btree_rec rec;
125 85
126 rec.inobt.ir_startino = cpu_to_be32(ino); 86 rec.inobt.ir_startino = cpu_to_be32(irec->ir_startino);
127 rec.inobt.ir_freecount = cpu_to_be32(fcnt); 87 rec.inobt.ir_freecount = cpu_to_be32(irec->ir_freecount);
128 rec.inobt.ir_free = cpu_to_be64(free); 88 rec.inobt.ir_free = cpu_to_be64(irec->ir_free);
129 return xfs_btree_update(cur, &rec); 89 return xfs_btree_update(cur, &rec);
130} 90}
131 91
@@ -135,9 +95,7 @@ xfs_inobt_update(
135int /* error */ 95int /* error */
136xfs_inobt_get_rec( 96xfs_inobt_get_rec(
137 struct xfs_btree_cur *cur, /* btree cursor */ 97 struct xfs_btree_cur *cur, /* btree cursor */
138 xfs_agino_t *ino, /* output: starting inode of chunk */ 98 xfs_inobt_rec_incore_t *irec, /* btree record */
139 __int32_t *fcnt, /* output: number of free inodes */
140 xfs_inofree_t *free, /* output: free inode mask */
141 int *stat) /* output: success/failure */ 99 int *stat) /* output: success/failure */
142{ 100{
143 union xfs_btree_rec *rec; 101 union xfs_btree_rec *rec;
@@ -145,14 +103,136 @@ xfs_inobt_get_rec(
145 103
146 error = xfs_btree_get_rec(cur, &rec, stat); 104 error = xfs_btree_get_rec(cur, &rec, stat);
147 if (!error && *stat == 1) { 105 if (!error && *stat == 1) {
148 *ino = be32_to_cpu(rec->inobt.ir_startino); 106 irec->ir_startino = be32_to_cpu(rec->inobt.ir_startino);
149 *fcnt = be32_to_cpu(rec->inobt.ir_freecount); 107 irec->ir_freecount = be32_to_cpu(rec->inobt.ir_freecount);
150 *free = be64_to_cpu(rec->inobt.ir_free); 108 irec->ir_free = be64_to_cpu(rec->inobt.ir_free);
151 } 109 }
152 return error; 110 return error;
153} 111}
154 112
155/* 113/*
114 * Verify that the number of free inodes in the AGI is correct.
115 */
116#ifdef DEBUG
117STATIC int
118xfs_check_agi_freecount(
119 struct xfs_btree_cur *cur,
120 struct xfs_agi *agi)
121{
122 if (cur->bc_nlevels == 1) {
123 xfs_inobt_rec_incore_t rec;
124 int freecount = 0;
125 int error;
126 int i;
127
128 error = xfs_inobt_lookup(cur, 0, XFS_LOOKUP_GE, &i);
129 if (error)
130 return error;
131
132 do {
133 error = xfs_inobt_get_rec(cur, &rec, &i);
134 if (error)
135 return error;
136
137 if (i) {
138 freecount += rec.ir_freecount;
139 error = xfs_btree_increment(cur, 0, &i);
140 if (error)
141 return error;
142 }
143 } while (i == 1);
144
145 if (!XFS_FORCED_SHUTDOWN(cur->bc_mp))
146 ASSERT(freecount == be32_to_cpu(agi->agi_freecount));
147 }
148 return 0;
149}
150#else
151#define xfs_check_agi_freecount(cur, agi) 0
152#endif
153
154/*
155 * Initialise a new set of inodes.
156 */
157STATIC void
158xfs_ialloc_inode_init(
159 struct xfs_mount *mp,
160 struct xfs_trans *tp,
161 xfs_agnumber_t agno,
162 xfs_agblock_t agbno,
163 xfs_agblock_t length,
164 unsigned int gen)
165{
166 struct xfs_buf *fbuf;
167 struct xfs_dinode *free;
168 int blks_per_cluster, nbufs, ninodes;
169 int version;
170 int i, j;
171 xfs_daddr_t d;
172
173 /*
174 * Loop over the new block(s), filling in the inodes.
175 * For small block sizes, manipulate the inodes in buffers
176 * which are multiples of the blocks size.
177 */
178 if (mp->m_sb.sb_blocksize >= XFS_INODE_CLUSTER_SIZE(mp)) {
179 blks_per_cluster = 1;
180 nbufs = length;
181 ninodes = mp->m_sb.sb_inopblock;
182 } else {
183 blks_per_cluster = XFS_INODE_CLUSTER_SIZE(mp) /
184 mp->m_sb.sb_blocksize;
185 nbufs = length / blks_per_cluster;
186 ninodes = blks_per_cluster * mp->m_sb.sb_inopblock;
187 }
188
189 /*
190 * Figure out what version number to use in the inodes we create.
191 * If the superblock version has caught up to the one that supports
192 * the new inode format, then use the new inode version. Otherwise
193 * use the old version so that old kernels will continue to be
194 * able to use the file system.
195 */
196 if (xfs_sb_version_hasnlink(&mp->m_sb))
197 version = 2;
198 else
199 version = 1;
200
201 for (j = 0; j < nbufs; j++) {
202 /*
203 * Get the block.
204 */
205 d = XFS_AGB_TO_DADDR(mp, agno, agbno + (j * blks_per_cluster));
206 fbuf = xfs_trans_get_buf(tp, mp->m_ddev_targp, d,
207 mp->m_bsize * blks_per_cluster,
208 XFS_BUF_LOCK);
209 ASSERT(fbuf);
210 ASSERT(!XFS_BUF_GETERROR(fbuf));
211
212 /*
213 * Initialize all inodes in this buffer and then log them.
214 *
215 * XXX: It would be much better if we had just one transaction
216 * to log a whole cluster of inodes instead of all the
217 * individual transactions causing a lot of log traffic.
218 */
219 xfs_biozero(fbuf, 0, ninodes << mp->m_sb.sb_inodelog);
220 for (i = 0; i < ninodes; i++) {
221 int ioffset = i << mp->m_sb.sb_inodelog;
222 uint isize = sizeof(struct xfs_dinode);
223
224 free = xfs_make_iptr(mp, fbuf, i);
225 free->di_magic = cpu_to_be16(XFS_DINODE_MAGIC);
226 free->di_version = version;
227 free->di_gen = cpu_to_be32(gen);
228 free->di_next_unlinked = cpu_to_be32(NULLAGINO);
229 xfs_trans_log_buf(tp, fbuf, ioffset, ioffset + isize - 1);
230 }
231 xfs_trans_inode_alloc_buf(tp, fbuf);
232 }
233}
234
235/*
156 * Allocate new inodes in the allocation group specified by agbp. 236 * Allocate new inodes in the allocation group specified by agbp.
157 * Return 0 for success, else error code. 237 * Return 0 for success, else error code.
158 */ 238 */
@@ -164,24 +244,15 @@ xfs_ialloc_ag_alloc(
164{ 244{
165 xfs_agi_t *agi; /* allocation group header */ 245 xfs_agi_t *agi; /* allocation group header */
166 xfs_alloc_arg_t args; /* allocation argument structure */ 246 xfs_alloc_arg_t args; /* allocation argument structure */
167 int blks_per_cluster; /* fs blocks per inode cluster */
168 xfs_btree_cur_t *cur; /* inode btree cursor */ 247 xfs_btree_cur_t *cur; /* inode btree cursor */
169 xfs_daddr_t d; /* disk addr of buffer */
170 xfs_agnumber_t agno; 248 xfs_agnumber_t agno;
171 int error; 249 int error;
172 xfs_buf_t *fbuf; /* new free inodes' buffer */ 250 int i;
173 xfs_dinode_t *free; /* new free inode structure */
174 int i; /* inode counter */
175 int j; /* block counter */
176 int nbufs; /* num bufs of new inodes */
177 xfs_agino_t newino; /* new first inode's number */ 251 xfs_agino_t newino; /* new first inode's number */
178 xfs_agino_t newlen; /* new number of inodes */ 252 xfs_agino_t newlen; /* new number of inodes */
179 int ninodes; /* num inodes per buf */
180 xfs_agino_t thisino; /* current inode number, for loop */ 253 xfs_agino_t thisino; /* current inode number, for loop */
181 int version; /* inode version number to use */
182 int isaligned = 0; /* inode allocation at stripe unit */ 254 int isaligned = 0; /* inode allocation at stripe unit */
183 /* boundary */ 255 /* boundary */
184 unsigned int gen;
185 256
186 args.tp = tp; 257 args.tp = tp;
187 args.mp = tp->t_mountp; 258 args.mp = tp->t_mountp;
@@ -202,12 +273,12 @@ xfs_ialloc_ag_alloc(
202 */ 273 */
203 agi = XFS_BUF_TO_AGI(agbp); 274 agi = XFS_BUF_TO_AGI(agbp);
204 newino = be32_to_cpu(agi->agi_newino); 275 newino = be32_to_cpu(agi->agi_newino);
276 agno = be32_to_cpu(agi->agi_seqno);
205 args.agbno = XFS_AGINO_TO_AGBNO(args.mp, newino) + 277 args.agbno = XFS_AGINO_TO_AGBNO(args.mp, newino) +
206 XFS_IALLOC_BLOCKS(args.mp); 278 XFS_IALLOC_BLOCKS(args.mp);
207 if (likely(newino != NULLAGINO && 279 if (likely(newino != NULLAGINO &&
208 (args.agbno < be32_to_cpu(agi->agi_length)))) { 280 (args.agbno < be32_to_cpu(agi->agi_length)))) {
209 args.fsbno = XFS_AGB_TO_FSB(args.mp, 281 args.fsbno = XFS_AGB_TO_FSB(args.mp, agno, args.agbno);
210 be32_to_cpu(agi->agi_seqno), args.agbno);
211 args.type = XFS_ALLOCTYPE_THIS_BNO; 282 args.type = XFS_ALLOCTYPE_THIS_BNO;
212 args.mod = args.total = args.wasdel = args.isfl = 283 args.mod = args.total = args.wasdel = args.isfl =
213 args.userdata = args.minalignslop = 0; 284 args.userdata = args.minalignslop = 0;
@@ -258,8 +329,7 @@ xfs_ialloc_ag_alloc(
258 * For now, just allocate blocks up front. 329 * For now, just allocate blocks up front.
259 */ 330 */
260 args.agbno = be32_to_cpu(agi->agi_root); 331 args.agbno = be32_to_cpu(agi->agi_root);
261 args.fsbno = XFS_AGB_TO_FSB(args.mp, 332 args.fsbno = XFS_AGB_TO_FSB(args.mp, agno, args.agbno);
262 be32_to_cpu(agi->agi_seqno), args.agbno);
263 /* 333 /*
264 * Allocate a fixed-size extent of inodes. 334 * Allocate a fixed-size extent of inodes.
265 */ 335 */
@@ -282,8 +352,7 @@ xfs_ialloc_ag_alloc(
282 if (isaligned && args.fsbno == NULLFSBLOCK) { 352 if (isaligned && args.fsbno == NULLFSBLOCK) {
283 args.type = XFS_ALLOCTYPE_NEAR_BNO; 353 args.type = XFS_ALLOCTYPE_NEAR_BNO;
284 args.agbno = be32_to_cpu(agi->agi_root); 354 args.agbno = be32_to_cpu(agi->agi_root);
285 args.fsbno = XFS_AGB_TO_FSB(args.mp, 355 args.fsbno = XFS_AGB_TO_FSB(args.mp, agno, args.agbno);
286 be32_to_cpu(agi->agi_seqno), args.agbno);
287 args.alignment = xfs_ialloc_cluster_alignment(&args); 356 args.alignment = xfs_ialloc_cluster_alignment(&args);
288 if ((error = xfs_alloc_vextent(&args))) 357 if ((error = xfs_alloc_vextent(&args)))
289 return error; 358 return error;
@@ -294,85 +363,30 @@ xfs_ialloc_ag_alloc(
294 return 0; 363 return 0;
295 } 364 }
296 ASSERT(args.len == args.minlen); 365 ASSERT(args.len == args.minlen);
297 /*
298 * Convert the results.
299 */
300 newino = XFS_OFFBNO_TO_AGINO(args.mp, args.agbno, 0);
301 /*
302 * Loop over the new block(s), filling in the inodes.
303 * For small block sizes, manipulate the inodes in buffers
304 * which are multiples of the blocks size.
305 */
306 if (args.mp->m_sb.sb_blocksize >= XFS_INODE_CLUSTER_SIZE(args.mp)) {
307 blks_per_cluster = 1;
308 nbufs = (int)args.len;
309 ninodes = args.mp->m_sb.sb_inopblock;
310 } else {
311 blks_per_cluster = XFS_INODE_CLUSTER_SIZE(args.mp) /
312 args.mp->m_sb.sb_blocksize;
313 nbufs = (int)args.len / blks_per_cluster;
314 ninodes = blks_per_cluster * args.mp->m_sb.sb_inopblock;
315 }
316 /*
317 * Figure out what version number to use in the inodes we create.
318 * If the superblock version has caught up to the one that supports
319 * the new inode format, then use the new inode version. Otherwise
320 * use the old version so that old kernels will continue to be
321 * able to use the file system.
322 */
323 if (xfs_sb_version_hasnlink(&args.mp->m_sb))
324 version = 2;
325 else
326 version = 1;
327 366
328 /* 367 /*
368 * Stamp and write the inode buffers.
369 *
329 * Seed the new inode cluster with a random generation number. This 370 * Seed the new inode cluster with a random generation number. This
330 * prevents short-term reuse of generation numbers if a chunk is 371 * prevents short-term reuse of generation numbers if a chunk is
331 * freed and then immediately reallocated. We use random numbers 372 * freed and then immediately reallocated. We use random numbers
332 * rather than a linear progression to prevent the next generation 373 * rather than a linear progression to prevent the next generation
333 * number from being easily guessable. 374 * number from being easily guessable.
334 */ 375 */
335 gen = random32(); 376 xfs_ialloc_inode_init(args.mp, tp, agno, args.agbno, args.len,
336 for (j = 0; j < nbufs; j++) { 377 random32());
337 /*
338 * Get the block.
339 */
340 d = XFS_AGB_TO_DADDR(args.mp, be32_to_cpu(agi->agi_seqno),
341 args.agbno + (j * blks_per_cluster));
342 fbuf = xfs_trans_get_buf(tp, args.mp->m_ddev_targp, d,
343 args.mp->m_bsize * blks_per_cluster,
344 XFS_BUF_LOCK);
345 ASSERT(fbuf);
346 ASSERT(!XFS_BUF_GETERROR(fbuf));
347 378
348 /* 379 /*
349 * Initialize all inodes in this buffer and then log them. 380 * Convert the results.
350 * 381 */
351 * XXX: It would be much better if we had just one transaction to 382 newino = XFS_OFFBNO_TO_AGINO(args.mp, args.agbno, 0);
352 * log a whole cluster of inodes instead of all the individual
353 * transactions causing a lot of log traffic.
354 */
355 xfs_biozero(fbuf, 0, ninodes << args.mp->m_sb.sb_inodelog);
356 for (i = 0; i < ninodes; i++) {
357 int ioffset = i << args.mp->m_sb.sb_inodelog;
358 uint isize = sizeof(struct xfs_dinode);
359
360 free = xfs_make_iptr(args.mp, fbuf, i);
361 free->di_magic = cpu_to_be16(XFS_DINODE_MAGIC);
362 free->di_version = version;
363 free->di_gen = cpu_to_be32(gen);
364 free->di_next_unlinked = cpu_to_be32(NULLAGINO);
365 xfs_trans_log_buf(tp, fbuf, ioffset, ioffset + isize - 1);
366 }
367 xfs_trans_inode_alloc_buf(tp, fbuf);
368 }
369 be32_add_cpu(&agi->agi_count, newlen); 383 be32_add_cpu(&agi->agi_count, newlen);
370 be32_add_cpu(&agi->agi_freecount, newlen); 384 be32_add_cpu(&agi->agi_freecount, newlen);
371 agno = be32_to_cpu(agi->agi_seqno);
372 down_read(&args.mp->m_peraglock); 385 down_read(&args.mp->m_peraglock);
373 args.mp->m_perag[agno].pagi_freecount += newlen; 386 args.mp->m_perag[agno].pagi_freecount += newlen;
374 up_read(&args.mp->m_peraglock); 387 up_read(&args.mp->m_peraglock);
375 agi->agi_newino = cpu_to_be32(newino); 388 agi->agi_newino = cpu_to_be32(newino);
389
376 /* 390 /*
377 * Insert records describing the new inode chunk into the btree. 391 * Insert records describing the new inode chunk into the btree.
378 */ 392 */
@@ -380,13 +394,17 @@ xfs_ialloc_ag_alloc(
380 for (thisino = newino; 394 for (thisino = newino;
381 thisino < newino + newlen; 395 thisino < newino + newlen;
382 thisino += XFS_INODES_PER_CHUNK) { 396 thisino += XFS_INODES_PER_CHUNK) {
383 if ((error = xfs_inobt_lookup_eq(cur, thisino, 397 cur->bc_rec.i.ir_startino = thisino;
384 XFS_INODES_PER_CHUNK, XFS_INOBT_ALL_FREE, &i))) { 398 cur->bc_rec.i.ir_freecount = XFS_INODES_PER_CHUNK;
399 cur->bc_rec.i.ir_free = XFS_INOBT_ALL_FREE;
400 error = xfs_btree_lookup(cur, XFS_LOOKUP_EQ, &i);
401 if (error) {
385 xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); 402 xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
386 return error; 403 return error;
387 } 404 }
388 ASSERT(i == 0); 405 ASSERT(i == 0);
389 if ((error = xfs_btree_insert(cur, &i))) { 406 error = xfs_btree_insert(cur, &i);
407 if (error) {
390 xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); 408 xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
391 return error; 409 return error;
392 } 410 }
@@ -539,6 +557,62 @@ nextag:
539} 557}
540 558
541/* 559/*
560 * Try to retrieve the next record to the left/right from the current one.
561 */
562STATIC int
563xfs_ialloc_next_rec(
564 struct xfs_btree_cur *cur,
565 xfs_inobt_rec_incore_t *rec,
566 int *done,
567 int left)
568{
569 int error;
570 int i;
571
572 if (left)
573 error = xfs_btree_decrement(cur, 0, &i);
574 else
575 error = xfs_btree_increment(cur, 0, &i);
576
577 if (error)
578 return error;
579 *done = !i;
580 if (i) {
581 error = xfs_inobt_get_rec(cur, rec, &i);
582 if (error)
583 return error;
584 XFS_WANT_CORRUPTED_RETURN(i == 1);
585 }
586
587 return 0;
588}
589
590STATIC int
591xfs_ialloc_get_rec(
592 struct xfs_btree_cur *cur,
593 xfs_agino_t agino,
594 xfs_inobt_rec_incore_t *rec,
595 int *done,
596 int left)
597{
598 int error;
599 int i;
600
601 error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_EQ, &i);
602 if (error)
603 return error;
604 *done = !i;
605 if (i) {
606 error = xfs_inobt_get_rec(cur, rec, &i);
607 if (error)
608 return error;
609 XFS_WANT_CORRUPTED_RETURN(i == 1);
610 }
611
612 return 0;
613}
614
615/*
542 * Visible inode allocation functions. 616 * Visible inode allocation functions.
543 */ 617 */
544 618
@@ -592,8 +666,8 @@ xfs_dialloc(
592 int j; /* result code */ 666 int j; /* result code */
593 xfs_mount_t *mp; /* file system mount structure */ 667 xfs_mount_t *mp; /* file system mount structure */
594 int offset; /* index of inode in chunk */ 668 int offset; /* index of inode in chunk */
595 xfs_agino_t pagino; /* parent's a.g. relative inode # */ 669 xfs_agino_t pagino; /* parent's AG relative inode # */
596 xfs_agnumber_t pagno; /* parent's allocation group number */ 670 xfs_agnumber_t pagno; /* parent's AG number */
597 xfs_inobt_rec_incore_t rec; /* inode allocation record */ 671 xfs_inobt_rec_incore_t rec; /* inode allocation record */
598 xfs_agnumber_t tagno; /* testing allocation group number */ 672 xfs_agnumber_t tagno; /* testing allocation group number */
599 xfs_btree_cur_t *tcur; /* temp cursor */ 673 xfs_btree_cur_t *tcur; /* temp cursor */
@@ -716,6 +790,8 @@ nextag:
716 */ 790 */
717 agno = tagno; 791 agno = tagno;
718 *IO_agbp = NULL; 792 *IO_agbp = NULL;
793
794 restart_pagno:
719 cur = xfs_inobt_init_cursor(mp, tp, agbp, be32_to_cpu(agi->agi_seqno)); 795 cur = xfs_inobt_init_cursor(mp, tp, agbp, be32_to_cpu(agi->agi_seqno));
720 /* 796 /*
721 * If pagino is 0 (this is the root inode allocation) use newino. 797 * If pagino is 0 (this is the root inode allocation) use newino.
@@ -723,220 +799,199 @@ nextag:
723 */ 799 */
724 if (!pagino) 800 if (!pagino)
725 pagino = be32_to_cpu(agi->agi_newino); 801 pagino = be32_to_cpu(agi->agi_newino);
726#ifdef DEBUG
727 if (cur->bc_nlevels == 1) {
728 int freecount = 0;
729 802
730 if ((error = xfs_inobt_lookup_ge(cur, 0, 0, 0, &i))) 803 error = xfs_check_agi_freecount(cur, agi);
731 goto error0; 804 if (error)
732 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 805 goto error0;
733 do {
734 if ((error = xfs_inobt_get_rec(cur, &rec.ir_startino,
735 &rec.ir_freecount, &rec.ir_free, &i)))
736 goto error0;
737 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
738 freecount += rec.ir_freecount;
739 if ((error = xfs_btree_increment(cur, 0, &i)))
740 goto error0;
741 } while (i == 1);
742 806
743 ASSERT(freecount == be32_to_cpu(agi->agi_freecount) ||
744 XFS_FORCED_SHUTDOWN(mp));
745 }
746#endif
747 /* 807 /*
748 * If in the same a.g. as the parent, try to get near the parent. 808 * If in the same AG as the parent, try to get near the parent.
749 */ 809 */
750 if (pagno == agno) { 810 if (pagno == agno) {
751 if ((error = xfs_inobt_lookup_le(cur, pagino, 0, 0, &i))) 811 xfs_perag_t *pag = &mp->m_perag[agno];
812 int doneleft; /* done, to the left */
813 int doneright; /* done, to the right */
814 int searchdistance = 10;
815
816 error = xfs_inobt_lookup(cur, pagino, XFS_LOOKUP_LE, &i);
817 if (error)
818 goto error0;
819 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
820
821 error = xfs_inobt_get_rec(cur, &rec, &j);
822 if (error)
752 goto error0; 823 goto error0;
753 if (i != 0 && 824 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
754 (error = xfs_inobt_get_rec(cur, &rec.ir_startino, 825
755 &rec.ir_freecount, &rec.ir_free, &j)) == 0 && 826 if (rec.ir_freecount > 0) {
756 j == 1 &&
757 rec.ir_freecount > 0) {
758 /* 827 /*
759 * Found a free inode in the same chunk 828 * Found a free inode in the same chunk
760 * as parent, done. 829 * as the parent, done.
761 */ 830 */
831 goto alloc_inode;
762 } 832 }
833
834
835 /*
836 * In the same AG as parent, but parent's chunk is full.
837 */
838
839 /* duplicate the cursor, search left & right simultaneously */
840 error = xfs_btree_dup_cursor(cur, &tcur);
841 if (error)
842 goto error0;
843
763 /* 844 /*
764 * In the same a.g. as parent, but parent's chunk is full. 845 * Skip to last blocks looked up if same parent inode.
765 */ 846 */
766 else { 847 if (pagino != NULLAGINO &&
767 int doneleft; /* done, to the left */ 848 pag->pagl_pagino == pagino &&
768 int doneright; /* done, to the right */ 849 pag->pagl_leftrec != NULLAGINO &&
850 pag->pagl_rightrec != NULLAGINO) {
851 error = xfs_ialloc_get_rec(tcur, pag->pagl_leftrec,
852 &trec, &doneleft, 1);
853 if (error)
854 goto error1;
769 855
856 error = xfs_ialloc_get_rec(cur, pag->pagl_rightrec,
857 &rec, &doneright, 0);
770 if (error) 858 if (error)
771 goto error0;
772 ASSERT(i == 1);
773 ASSERT(j == 1);
774 /*
775 * Duplicate the cursor, search left & right
776 * simultaneously.
777 */
778 if ((error = xfs_btree_dup_cursor(cur, &tcur)))
779 goto error0;
780 /*
781 * Search left with tcur, back up 1 record.
782 */
783 if ((error = xfs_btree_decrement(tcur, 0, &i)))
784 goto error1; 859 goto error1;
785 doneleft = !i; 860 } else {
786 if (!doneleft) { 861 /* search left with tcur, back up 1 record */
787 if ((error = xfs_inobt_get_rec(tcur, 862 error = xfs_ialloc_next_rec(tcur, &trec, &doneleft, 1);
788 &trec.ir_startino, 863 if (error)
789 &trec.ir_freecount,
790 &trec.ir_free, &i)))
791 goto error1;
792 XFS_WANT_CORRUPTED_GOTO(i == 1, error1);
793 }
794 /*
795 * Search right with cur, go forward 1 record.
796 */
797 if ((error = xfs_btree_increment(cur, 0, &i)))
798 goto error1; 864 goto error1;
799 doneright = !i;
800 if (!doneright) {
801 if ((error = xfs_inobt_get_rec(cur,
802 &rec.ir_startino,
803 &rec.ir_freecount,
804 &rec.ir_free, &i)))
805 goto error1;
806 XFS_WANT_CORRUPTED_GOTO(i == 1, error1);
807 }
808 /*
809 * Loop until we find the closest inode chunk
810 * with a free one.
811 */
812 while (!doneleft || !doneright) {
813 int useleft; /* using left inode
814 chunk this time */
815 865
866 /* search right with cur, go forward 1 record. */
867 error = xfs_ialloc_next_rec(cur, &rec, &doneright, 0);
868 if (error)
869 goto error1;
870 }
871
872 /*
873 * Loop until we find an inode chunk with a free inode.
874 */
875 while (!doneleft || !doneright) {
876 int useleft; /* using left inode chunk this time */
877
878 if (!--searchdistance) {
816 /* 879 /*
817 * Figure out which block is closer, 880 * Not in range - save last search
818 * if both are valid. 881 * location and allocate a new inode
819 */
820 if (!doneleft && !doneright)
821 useleft =
822 pagino -
823 (trec.ir_startino +
824 XFS_INODES_PER_CHUNK - 1) <
825 rec.ir_startino - pagino;
826 else
827 useleft = !doneleft;
828 /*
829 * If checking the left, does it have
830 * free inodes?
831 */
832 if (useleft && trec.ir_freecount) {
833 /*
834 * Yes, set it up as the chunk to use.
835 */
836 rec = trec;
837 xfs_btree_del_cursor(cur,
838 XFS_BTREE_NOERROR);
839 cur = tcur;
840 break;
841 }
842 /*
843 * If checking the right, does it have
844 * free inodes?
845 */
846 if (!useleft && rec.ir_freecount) {
847 /*
848 * Yes, it's already set up.
849 */
850 xfs_btree_del_cursor(tcur,
851 XFS_BTREE_NOERROR);
852 break;
853 }
854 /*
855 * If used the left, get another one
856 * further left.
857 */
858 if (useleft) {
859 if ((error = xfs_btree_decrement(tcur, 0,
860 &i)))
861 goto error1;
862 doneleft = !i;
863 if (!doneleft) {
864 if ((error = xfs_inobt_get_rec(
865 tcur,
866 &trec.ir_startino,
867 &trec.ir_freecount,
868 &trec.ir_free, &i)))
869 goto error1;
870 XFS_WANT_CORRUPTED_GOTO(i == 1,
871 error1);
872 }
873 }
874 /*
875 * If used the right, get another one
876 * further right.
877 */ 882 */
878 else { 883 pag->pagl_leftrec = trec.ir_startino;
879 if ((error = xfs_btree_increment(cur, 0, 884 pag->pagl_rightrec = rec.ir_startino;
880 &i))) 885 pag->pagl_pagino = pagino;
881 goto error1; 886 goto newino;
882 doneright = !i; 887 }
883 if (!doneright) { 888
884 if ((error = xfs_inobt_get_rec( 889 /* figure out the closer block if both are valid. */
885 cur, 890 if (!doneleft && !doneright) {
886 &rec.ir_startino, 891 useleft = pagino -
887 &rec.ir_freecount, 892 (trec.ir_startino + XFS_INODES_PER_CHUNK - 1) <
888 &rec.ir_free, &i))) 893 rec.ir_startino - pagino;
889 goto error1; 894 } else {
890 XFS_WANT_CORRUPTED_GOTO(i == 1, 895 useleft = !doneleft;
891 error1);
892 }
893 }
894 } 896 }
895 ASSERT(!doneleft || !doneright); 897
898 /* free inodes to the left? */
899 if (useleft && trec.ir_freecount) {
900 rec = trec;
901 xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
902 cur = tcur;
903
904 pag->pagl_leftrec = trec.ir_startino;
905 pag->pagl_rightrec = rec.ir_startino;
906 pag->pagl_pagino = pagino;
907 goto alloc_inode;
908 }
909
910 /* free inodes to the right? */
911 if (!useleft && rec.ir_freecount) {
912 xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
913
914 pag->pagl_leftrec = trec.ir_startino;
915 pag->pagl_rightrec = rec.ir_startino;
916 pag->pagl_pagino = pagino;
917 goto alloc_inode;
918 }
919
920 /* get next record to check */
921 if (useleft) {
922 error = xfs_ialloc_next_rec(tcur, &trec,
923 &doneleft, 1);
924 } else {
925 error = xfs_ialloc_next_rec(cur, &rec,
926 &doneright, 0);
927 }
928 if (error)
929 goto error1;
896 } 930 }
931
932 /*
933 * We've reached the end of the btree. because
934 * we are only searching a small chunk of the
935 * btree each search, there is obviously free
936 * inodes closer to the parent inode than we
937 * are now. restart the search again.
938 */
939 pag->pagl_pagino = NULLAGINO;
940 pag->pagl_leftrec = NULLAGINO;
941 pag->pagl_rightrec = NULLAGINO;
942 xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
943 xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
944 goto restart_pagno;
897 } 945 }
946
898 /* 947 /*
899 * In a different a.g. from the parent. 948 * In a different AG from the parent.
900 * See if the most recently allocated block has any free. 949 * See if the most recently allocated block has any free.
901 */ 950 */
902 else if (be32_to_cpu(agi->agi_newino) != NULLAGINO) { 951newino:
903 if ((error = xfs_inobt_lookup_eq(cur, 952 if (be32_to_cpu(agi->agi_newino) != NULLAGINO) {
904 be32_to_cpu(agi->agi_newino), 0, 0, &i))) 953 error = xfs_inobt_lookup(cur, be32_to_cpu(agi->agi_newino),
954 XFS_LOOKUP_EQ, &i);
955 if (error)
905 goto error0; 956 goto error0;
906 if (i == 1 && 957
907 (error = xfs_inobt_get_rec(cur, &rec.ir_startino, 958 if (i == 1) {
908 &rec.ir_freecount, &rec.ir_free, &j)) == 0 && 959 error = xfs_inobt_get_rec(cur, &rec, &j);
909 j == 1 &&
910 rec.ir_freecount > 0) {
911 /*
912 * The last chunk allocated in the group still has
913 * a free inode.
914 */
915 }
916 /*
917 * None left in the last group, search the whole a.g.
918 */
919 else {
920 if (error) 960 if (error)
921 goto error0; 961 goto error0;
922 if ((error = xfs_inobt_lookup_ge(cur, 0, 0, 0, &i))) 962
923 goto error0; 963 if (j == 1 && rec.ir_freecount > 0) {
924 ASSERT(i == 1); 964 /*
925 for (;;) { 965 * The last chunk allocated in the group
926 if ((error = xfs_inobt_get_rec(cur, 966 * still has a free inode.
927 &rec.ir_startino, 967 */
928 &rec.ir_freecount, &rec.ir_free, 968 goto alloc_inode;
929 &i)))
930 goto error0;
931 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
932 if (rec.ir_freecount > 0)
933 break;
934 if ((error = xfs_btree_increment(cur, 0, &i)))
935 goto error0;
936 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
937 } 969 }
938 } 970 }
939 } 971 }
972
973 /*
974 * None left in the last group, search the whole AG
975 */
976 error = xfs_inobt_lookup(cur, 0, XFS_LOOKUP_GE, &i);
977 if (error)
978 goto error0;
979 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
980
981 for (;;) {
982 error = xfs_inobt_get_rec(cur, &rec, &i);
983 if (error)
984 goto error0;
985 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
986 if (rec.ir_freecount > 0)
987 break;
988 error = xfs_btree_increment(cur, 0, &i);
989 if (error)
990 goto error0;
991 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
992 }
993
994alloc_inode:
940 offset = xfs_ialloc_find_free(&rec.ir_free); 995 offset = xfs_ialloc_find_free(&rec.ir_free);
941 ASSERT(offset >= 0); 996 ASSERT(offset >= 0);
942 ASSERT(offset < XFS_INODES_PER_CHUNK); 997 ASSERT(offset < XFS_INODES_PER_CHUNK);
@@ -945,33 +1000,19 @@ nextag:
945 ino = XFS_AGINO_TO_INO(mp, agno, rec.ir_startino + offset); 1000 ino = XFS_AGINO_TO_INO(mp, agno, rec.ir_startino + offset);
946 rec.ir_free &= ~XFS_INOBT_MASK(offset); 1001 rec.ir_free &= ~XFS_INOBT_MASK(offset);
947 rec.ir_freecount--; 1002 rec.ir_freecount--;
948 if ((error = xfs_inobt_update(cur, rec.ir_startino, rec.ir_freecount, 1003 error = xfs_inobt_update(cur, &rec);
949 rec.ir_free))) 1004 if (error)
950 goto error0; 1005 goto error0;
951 be32_add_cpu(&agi->agi_freecount, -1); 1006 be32_add_cpu(&agi->agi_freecount, -1);
952 xfs_ialloc_log_agi(tp, agbp, XFS_AGI_FREECOUNT); 1007 xfs_ialloc_log_agi(tp, agbp, XFS_AGI_FREECOUNT);
953 down_read(&mp->m_peraglock); 1008 down_read(&mp->m_peraglock);
954 mp->m_perag[tagno].pagi_freecount--; 1009 mp->m_perag[tagno].pagi_freecount--;
955 up_read(&mp->m_peraglock); 1010 up_read(&mp->m_peraglock);
956#ifdef DEBUG
957 if (cur->bc_nlevels == 1) {
958 int freecount = 0;
959 1011
960 if ((error = xfs_inobt_lookup_ge(cur, 0, 0, 0, &i))) 1012 error = xfs_check_agi_freecount(cur, agi);
961 goto error0; 1013 if (error)
962 do { 1014 goto error0;
963 if ((error = xfs_inobt_get_rec(cur, &rec.ir_startino, 1015
964 &rec.ir_freecount, &rec.ir_free, &i)))
965 goto error0;
966 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
967 freecount += rec.ir_freecount;
968 if ((error = xfs_btree_increment(cur, 0, &i)))
969 goto error0;
970 } while (i == 1);
971 ASSERT(freecount == be32_to_cpu(agi->agi_freecount) ||
972 XFS_FORCED_SHUTDOWN(mp));
973 }
974#endif
975 xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); 1016 xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
976 xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, -1); 1017 xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, -1);
977 *inop = ino; 1018 *inop = ino;
@@ -1062,38 +1103,23 @@ xfs_difree(
1062 * Initialize the cursor. 1103 * Initialize the cursor.
1063 */ 1104 */
1064 cur = xfs_inobt_init_cursor(mp, tp, agbp, agno); 1105 cur = xfs_inobt_init_cursor(mp, tp, agbp, agno);
1065#ifdef DEBUG
1066 if (cur->bc_nlevels == 1) {
1067 int freecount = 0;
1068 1106
1069 if ((error = xfs_inobt_lookup_ge(cur, 0, 0, 0, &i))) 1107 error = xfs_check_agi_freecount(cur, agi);
1070 goto error0; 1108 if (error)
1071 do { 1109 goto error0;
1072 if ((error = xfs_inobt_get_rec(cur, &rec.ir_startino, 1110
1073 &rec.ir_freecount, &rec.ir_free, &i)))
1074 goto error0;
1075 if (i) {
1076 freecount += rec.ir_freecount;
1077 if ((error = xfs_btree_increment(cur, 0, &i)))
1078 goto error0;
1079 }
1080 } while (i == 1);
1081 ASSERT(freecount == be32_to_cpu(agi->agi_freecount) ||
1082 XFS_FORCED_SHUTDOWN(mp));
1083 }
1084#endif
1085 /* 1111 /*
1086 * Look for the entry describing this inode. 1112 * Look for the entry describing this inode.
1087 */ 1113 */
1088 if ((error = xfs_inobt_lookup_le(cur, agino, 0, 0, &i))) { 1114 if ((error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, &i))) {
1089 cmn_err(CE_WARN, 1115 cmn_err(CE_WARN,
1090 "xfs_difree: xfs_inobt_lookup_le returned() an error %d on %s. Returning error.", 1116 "xfs_difree: xfs_inobt_lookup returned() an error %d on %s. Returning error.",
1091 error, mp->m_fsname); 1117 error, mp->m_fsname);
1092 goto error0; 1118 goto error0;
1093 } 1119 }
1094 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1120 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1095 if ((error = xfs_inobt_get_rec(cur, &rec.ir_startino, &rec.ir_freecount, 1121 error = xfs_inobt_get_rec(cur, &rec, &i);
1096 &rec.ir_free, &i))) { 1122 if (error) {
1097 cmn_err(CE_WARN, 1123 cmn_err(CE_WARN,
1098 "xfs_difree: xfs_inobt_get_rec() returned an error %d on %s. Returning error.", 1124 "xfs_difree: xfs_inobt_get_rec() returned an error %d on %s. Returning error.",
1099 error, mp->m_fsname); 1125 error, mp->m_fsname);
@@ -1148,12 +1174,14 @@ xfs_difree(
1148 } else { 1174 } else {
1149 *delete = 0; 1175 *delete = 0;
1150 1176
1151 if ((error = xfs_inobt_update(cur, rec.ir_startino, rec.ir_freecount, rec.ir_free))) { 1177 error = xfs_inobt_update(cur, &rec);
1178 if (error) {
1152 cmn_err(CE_WARN, 1179 cmn_err(CE_WARN,
1153 "xfs_difree: xfs_inobt_update() returned an error %d on %s. Returning error.", 1180 "xfs_difree: xfs_inobt_update returned an error %d on %s.",
1154 error, mp->m_fsname); 1181 error, mp->m_fsname);
1155 goto error0; 1182 goto error0;
1156 } 1183 }
1184
1157 /* 1185 /*
1158 * Change the inode free counts and log the ag/sb changes. 1186 * Change the inode free counts and log the ag/sb changes.
1159 */ 1187 */
@@ -1165,28 +1193,10 @@ xfs_difree(
1165 xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, 1); 1193 xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, 1);
1166 } 1194 }
1167 1195
1168#ifdef DEBUG 1196 error = xfs_check_agi_freecount(cur, agi);
1169 if (cur->bc_nlevels == 1) { 1197 if (error)
1170 int freecount = 0; 1198 goto error0;
1171 1199
1172 if ((error = xfs_inobt_lookup_ge(cur, 0, 0, 0, &i)))
1173 goto error0;
1174 do {
1175 if ((error = xfs_inobt_get_rec(cur,
1176 &rec.ir_startino,
1177 &rec.ir_freecount,
1178 &rec.ir_free, &i)))
1179 goto error0;
1180 if (i) {
1181 freecount += rec.ir_freecount;
1182 if ((error = xfs_btree_increment(cur, 0, &i)))
1183 goto error0;
1184 }
1185 } while (i == 1);
1186 ASSERT(freecount == be32_to_cpu(agi->agi_freecount) ||
1187 XFS_FORCED_SHUTDOWN(mp));
1188 }
1189#endif
1190 xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); 1200 xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
1191 return 0; 1201 return 0;
1192 1202
@@ -1297,9 +1307,7 @@ xfs_imap(
1297 chunk_agbno = agbno - offset_agbno; 1307 chunk_agbno = agbno - offset_agbno;
1298 } else { 1308 } else {
1299 xfs_btree_cur_t *cur; /* inode btree cursor */ 1309 xfs_btree_cur_t *cur; /* inode btree cursor */
1300 xfs_agino_t chunk_agino; /* first agino in inode chunk */ 1310 xfs_inobt_rec_incore_t chunk_rec;
1301 __int32_t chunk_cnt; /* count of free inodes in chunk */
1302 xfs_inofree_t chunk_free; /* mask of free inodes in chunk */
1303 xfs_buf_t *agbp; /* agi buffer */ 1311 xfs_buf_t *agbp; /* agi buffer */
1304 int i; /* temp state */ 1312 int i; /* temp state */
1305 1313
@@ -1315,15 +1323,14 @@ xfs_imap(
1315 } 1323 }
1316 1324
1317 cur = xfs_inobt_init_cursor(mp, tp, agbp, agno); 1325 cur = xfs_inobt_init_cursor(mp, tp, agbp, agno);
1318 error = xfs_inobt_lookup_le(cur, agino, 0, 0, &i); 1326 error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, &i);
1319 if (error) { 1327 if (error) {
1320 xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: " 1328 xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: "
1321 "xfs_inobt_lookup_le() failed"); 1329 "xfs_inobt_lookup() failed");
1322 goto error0; 1330 goto error0;
1323 } 1331 }
1324 1332
1325 error = xfs_inobt_get_rec(cur, &chunk_agino, &chunk_cnt, 1333 error = xfs_inobt_get_rec(cur, &chunk_rec, &i);
1326 &chunk_free, &i);
1327 if (error) { 1334 if (error) {
1328 xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: " 1335 xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: "
1329 "xfs_inobt_get_rec() failed"); 1336 "xfs_inobt_get_rec() failed");
@@ -1341,7 +1348,7 @@ xfs_imap(
1341 xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); 1348 xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
1342 if (error) 1349 if (error)
1343 return error; 1350 return error;
1344 chunk_agbno = XFS_AGINO_TO_AGBNO(mp, chunk_agino); 1351 chunk_agbno = XFS_AGINO_TO_AGBNO(mp, chunk_rec.ir_startino);
1345 offset_agbno = agbno - chunk_agbno; 1352 offset_agbno = agbno - chunk_agbno;
1346 } 1353 }
1347 1354
diff --git a/fs/xfs/xfs_ialloc.h b/fs/xfs/xfs_ialloc.h
index aeee8278f92c..bb5385475e1f 100644
--- a/fs/xfs/xfs_ialloc.h
+++ b/fs/xfs/xfs_ialloc.h
@@ -150,23 +150,15 @@ xfs_ialloc_pagi_init(
150 xfs_agnumber_t agno); /* allocation group number */ 150 xfs_agnumber_t agno); /* allocation group number */
151 151
152/* 152/*
153 * Lookup the first record greater than or equal to ino 153 * Lookup a record by ino in the btree given by cur.
154 * in the btree given by cur.
155 */ 154 */
156int xfs_inobt_lookup_ge(struct xfs_btree_cur *cur, xfs_agino_t ino, 155int xfs_inobt_lookup(struct xfs_btree_cur *cur, xfs_agino_t ino,
157 __int32_t fcnt, xfs_inofree_t free, int *stat); 156 xfs_lookup_t dir, int *stat);
158
159/*
160 * Lookup the first record less than or equal to ino
161 * in the btree given by cur.
162 */
163int xfs_inobt_lookup_le(struct xfs_btree_cur *cur, xfs_agino_t ino,
164 __int32_t fcnt, xfs_inofree_t free, int *stat);
165 157
166/* 158/*
167 * Get the data from the pointed-to record. 159 * Get the data from the pointed-to record.
168 */ 160 */
169extern int xfs_inobt_get_rec(struct xfs_btree_cur *cur, xfs_agino_t *ino, 161extern int xfs_inobt_get_rec(struct xfs_btree_cur *cur,
170 __int32_t *fcnt, xfs_inofree_t *free, int *stat); 162 xfs_inobt_rec_incore_t *rec, int *stat);
171 163
172#endif /* __XFS_IALLOC_H__ */ 164#endif /* __XFS_IALLOC_H__ */
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index ecbf8b4d2e2e..80e526489be5 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -82,7 +82,6 @@ xfs_inode_alloc(
82 memset(&ip->i_df, 0, sizeof(xfs_ifork_t)); 82 memset(&ip->i_df, 0, sizeof(xfs_ifork_t));
83 ip->i_flags = 0; 83 ip->i_flags = 0;
84 ip->i_update_core = 0; 84 ip->i_update_core = 0;
85 ip->i_update_size = 0;
86 ip->i_delayed_blks = 0; 85 ip->i_delayed_blks = 0;
87 memset(&ip->i_d, 0, sizeof(xfs_icdinode_t)); 86 memset(&ip->i_d, 0, sizeof(xfs_icdinode_t));
88 ip->i_size = 0; 87 ip->i_size = 0;
@@ -456,32 +455,6 @@ out_error_or_again:
456 return error; 455 return error;
457} 456}
458 457
459
460/*
461 * Look for the inode corresponding to the given ino in the hash table.
462 * If it is there and its i_transp pointer matches tp, return it.
463 * Otherwise, return NULL.
464 */
465xfs_inode_t *
466xfs_inode_incore(xfs_mount_t *mp,
467 xfs_ino_t ino,
468 xfs_trans_t *tp)
469{
470 xfs_inode_t *ip;
471 xfs_perag_t *pag;
472
473 pag = xfs_get_perag(mp, ino);
474 read_lock(&pag->pag_ici_lock);
475 ip = radix_tree_lookup(&pag->pag_ici_root, XFS_INO_TO_AGINO(mp, ino));
476 read_unlock(&pag->pag_ici_lock);
477 xfs_put_perag(mp, pag);
478
479 /* the returned inode must match the transaction */
480 if (ip && (ip->i_transp != tp))
481 return NULL;
482 return ip;
483}
484
485/* 458/*
486 * Decrement reference count of an inode structure and unlock it. 459 * Decrement reference count of an inode structure and unlock it.
487 * 460 *
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index da428b3fe0f5..c1dc7ef5a1d8 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -651,7 +651,7 @@ xfs_iformat_btree(
651 return 0; 651 return 0;
652} 652}
653 653
654void 654STATIC void
655xfs_dinode_from_disk( 655xfs_dinode_from_disk(
656 xfs_icdinode_t *to, 656 xfs_icdinode_t *to,
657 xfs_dinode_t *from) 657 xfs_dinode_t *from)
@@ -1247,7 +1247,7 @@ xfs_isize_check(
1247 * In that case the pages will still be in memory, but the inode size 1247 * In that case the pages will still be in memory, but the inode size
1248 * will never have been updated. 1248 * will never have been updated.
1249 */ 1249 */
1250xfs_fsize_t 1250STATIC xfs_fsize_t
1251xfs_file_last_byte( 1251xfs_file_last_byte(
1252 xfs_inode_t *ip) 1252 xfs_inode_t *ip)
1253{ 1253{
@@ -3837,7 +3837,7 @@ xfs_iext_inline_to_direct(
3837/* 3837/*
3838 * Resize an extent indirection array to new_size bytes. 3838 * Resize an extent indirection array to new_size bytes.
3839 */ 3839 */
3840void 3840STATIC void
3841xfs_iext_realloc_indirect( 3841xfs_iext_realloc_indirect(
3842 xfs_ifork_t *ifp, /* inode fork pointer */ 3842 xfs_ifork_t *ifp, /* inode fork pointer */
3843 int new_size) /* new indirection array size */ 3843 int new_size) /* new indirection array size */
@@ -3862,7 +3862,7 @@ xfs_iext_realloc_indirect(
3862/* 3862/*
3863 * Switch from indirection array to linear (direct) extent allocations. 3863 * Switch from indirection array to linear (direct) extent allocations.
3864 */ 3864 */
3865void 3865STATIC void
3866xfs_iext_indirect_to_direct( 3866xfs_iext_indirect_to_direct(
3867 xfs_ifork_t *ifp) /* inode fork pointer */ 3867 xfs_ifork_t *ifp) /* inode fork pointer */
3868{ 3868{
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 65f24a3cc992..0b38b9a869ec 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -261,7 +261,6 @@ typedef struct xfs_inode {
261 /* Miscellaneous state. */ 261 /* Miscellaneous state. */
262 unsigned short i_flags; /* see defined flags below */ 262 unsigned short i_flags; /* see defined flags below */
263 unsigned char i_update_core; /* timestamps/size is dirty */ 263 unsigned char i_update_core; /* timestamps/size is dirty */
264 unsigned char i_update_size; /* di_size field is dirty */
265 unsigned int i_delayed_blks; /* count of delay alloc blks */ 264 unsigned int i_delayed_blks; /* count of delay alloc blks */
266 265
267 xfs_icdinode_t i_d; /* most of ondisk inode */ 266 xfs_icdinode_t i_d; /* most of ondisk inode */
@@ -468,8 +467,6 @@ static inline void xfs_ifunlock(xfs_inode_t *ip)
468/* 467/*
469 * xfs_iget.c prototypes. 468 * xfs_iget.c prototypes.
470 */ 469 */
471xfs_inode_t *xfs_inode_incore(struct xfs_mount *, xfs_ino_t,
472 struct xfs_trans *);
473int xfs_iget(struct xfs_mount *, struct xfs_trans *, xfs_ino_t, 470int xfs_iget(struct xfs_mount *, struct xfs_trans *, xfs_ino_t,
474 uint, uint, xfs_inode_t **, xfs_daddr_t); 471 uint, uint, xfs_inode_t **, xfs_daddr_t);
475void xfs_iput(xfs_inode_t *, uint); 472void xfs_iput(xfs_inode_t *, uint);
@@ -504,7 +501,6 @@ void xfs_ipin(xfs_inode_t *);
504void xfs_iunpin(xfs_inode_t *); 501void xfs_iunpin(xfs_inode_t *);
505int xfs_iflush(xfs_inode_t *, uint); 502int xfs_iflush(xfs_inode_t *, uint);
506void xfs_ichgtime(xfs_inode_t *, int); 503void xfs_ichgtime(xfs_inode_t *, int);
507xfs_fsize_t xfs_file_last_byte(xfs_inode_t *);
508void xfs_lock_inodes(xfs_inode_t **, int, uint); 504void xfs_lock_inodes(xfs_inode_t **, int, uint);
509void xfs_lock_two_inodes(xfs_inode_t *, xfs_inode_t *, uint); 505void xfs_lock_two_inodes(xfs_inode_t *, xfs_inode_t *, uint);
510 506
@@ -572,8 +568,6 @@ int xfs_itobp(struct xfs_mount *, struct xfs_trans *,
572 struct xfs_buf **, uint); 568 struct xfs_buf **, uint);
573int xfs_iread(struct xfs_mount *, struct xfs_trans *, 569int xfs_iread(struct xfs_mount *, struct xfs_trans *,
574 struct xfs_inode *, xfs_daddr_t, uint); 570 struct xfs_inode *, xfs_daddr_t, uint);
575void xfs_dinode_from_disk(struct xfs_icdinode *,
576 struct xfs_dinode *);
577void xfs_dinode_to_disk(struct xfs_dinode *, 571void xfs_dinode_to_disk(struct xfs_dinode *,
578 struct xfs_icdinode *); 572 struct xfs_icdinode *);
579void xfs_idestroy_fork(struct xfs_inode *, int); 573void xfs_idestroy_fork(struct xfs_inode *, int);
@@ -592,8 +586,6 @@ void xfs_iext_remove_inline(xfs_ifork_t *, xfs_extnum_t, int);
592void xfs_iext_remove_direct(xfs_ifork_t *, xfs_extnum_t, int); 586void xfs_iext_remove_direct(xfs_ifork_t *, xfs_extnum_t, int);
593void xfs_iext_remove_indirect(xfs_ifork_t *, xfs_extnum_t, int); 587void xfs_iext_remove_indirect(xfs_ifork_t *, xfs_extnum_t, int);
594void xfs_iext_realloc_direct(xfs_ifork_t *, int); 588void xfs_iext_realloc_direct(xfs_ifork_t *, int);
595void xfs_iext_realloc_indirect(xfs_ifork_t *, int);
596void xfs_iext_indirect_to_direct(xfs_ifork_t *);
597void xfs_iext_direct_to_inline(xfs_ifork_t *, xfs_extnum_t); 589void xfs_iext_direct_to_inline(xfs_ifork_t *, xfs_extnum_t);
598void xfs_iext_inline_to_direct(xfs_ifork_t *, int); 590void xfs_iext_inline_to_direct(xfs_ifork_t *, int);
599void xfs_iext_destroy(xfs_ifork_t *); 591void xfs_iext_destroy(xfs_ifork_t *);
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index 977c4aec587e..47d5b663c37e 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -263,14 +263,6 @@ xfs_inode_item_format(
263 } 263 }
264 264
265 /* 265 /*
266 * We don't have to worry about re-ordering here because
267 * the update_size field is protected by the inode lock
268 * and we have that held in exclusive mode.
269 */
270 if (ip->i_update_size)
271 ip->i_update_size = 0;
272
273 /*
274 * Make sure to get the latest atime from the Linux inode. 266 * Make sure to get the latest atime from the Linux inode.
275 */ 267 */
276 xfs_synchronize_atime(ip); 268 xfs_synchronize_atime(ip);
@@ -712,8 +704,6 @@ xfs_inode_item_unlock(
712 * Clear out the fields of the inode log item particular 704 * Clear out the fields of the inode log item particular
713 * to the current transaction. 705 * to the current transaction.
714 */ 706 */
715 iip->ili_ilock_recur = 0;
716 iip->ili_iolock_recur = 0;
717 iip->ili_flags = 0; 707 iip->ili_flags = 0;
718 708
719 /* 709 /*
diff --git a/fs/xfs/xfs_inode_item.h b/fs/xfs/xfs_inode_item.h
index a52ac125f055..65bae4c9b8bf 100644
--- a/fs/xfs/xfs_inode_item.h
+++ b/fs/xfs/xfs_inode_item.h
@@ -137,8 +137,6 @@ typedef struct xfs_inode_log_item {
137 struct xfs_inode *ili_inode; /* inode ptr */ 137 struct xfs_inode *ili_inode; /* inode ptr */
138 xfs_lsn_t ili_flush_lsn; /* lsn at last flush */ 138 xfs_lsn_t ili_flush_lsn; /* lsn at last flush */
139 xfs_lsn_t ili_last_lsn; /* lsn at last transaction */ 139 xfs_lsn_t ili_last_lsn; /* lsn at last transaction */
140 unsigned short ili_ilock_recur; /* lock recursion count */
141 unsigned short ili_iolock_recur; /* lock recursion count */
142 unsigned short ili_flags; /* misc flags */ 140 unsigned short ili_flags; /* misc flags */
143 unsigned short ili_logged; /* flushed logged data */ 141 unsigned short ili_logged; /* flushed logged data */
144 unsigned int ili_last_fields; /* fields when flushed */ 142 unsigned int ili_last_fields; /* fields when flushed */
diff --git a/fs/xfs/xfs_inum.h b/fs/xfs/xfs_inum.h
index 7a28191cb0de..b8e4ee4e89a4 100644
--- a/fs/xfs/xfs_inum.h
+++ b/fs/xfs/xfs_inum.h
@@ -72,7 +72,6 @@ struct xfs_mount;
72 72
73#if XFS_BIG_INUMS 73#if XFS_BIG_INUMS
74#define XFS_MAXINUMBER ((xfs_ino_t)((1ULL << 56) - 1ULL)) 74#define XFS_MAXINUMBER ((xfs_ino_t)((1ULL << 56) - 1ULL))
75#define XFS_INO64_OFFSET ((xfs_ino_t)(1ULL << 32))
76#else 75#else
77#define XFS_MAXINUMBER ((xfs_ino_t)((1ULL << 32) - 1ULL)) 76#define XFS_MAXINUMBER ((xfs_ino_t)((1ULL << 32) - 1ULL))
78#endif 77#endif
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
index aeb2d2221c7d..b68f9107e26c 100644
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -39,7 +39,7 @@
39#include "xfs_error.h" 39#include "xfs_error.h"
40#include "xfs_btree.h" 40#include "xfs_btree.h"
41 41
42int 42STATIC int
43xfs_internal_inum( 43xfs_internal_inum(
44 xfs_mount_t *mp, 44 xfs_mount_t *mp,
45 xfs_ino_t ino) 45 xfs_ino_t ino)
@@ -353,9 +353,6 @@ xfs_bulkstat(
353 int end_of_ag; /* set if we've seen the ag end */ 353 int end_of_ag; /* set if we've seen the ag end */
354 int error; /* error code */ 354 int error; /* error code */
355 int fmterror;/* bulkstat formatter result */ 355 int fmterror;/* bulkstat formatter result */
356 __int32_t gcnt; /* current btree rec's count */
357 xfs_inofree_t gfree; /* current btree rec's free mask */
358 xfs_agino_t gino; /* current btree rec's start inode */
359 int i; /* loop index */ 356 int i; /* loop index */
360 int icount; /* count of inodes good in irbuf */ 357 int icount; /* count of inodes good in irbuf */
361 size_t irbsize; /* size of irec buffer in bytes */ 358 size_t irbsize; /* size of irec buffer in bytes */
@@ -442,40 +439,43 @@ xfs_bulkstat(
442 * we need to get the remainder of the chunk we're in. 439 * we need to get the remainder of the chunk we're in.
443 */ 440 */
444 if (agino > 0) { 441 if (agino > 0) {
442 xfs_inobt_rec_incore_t r;
443
445 /* 444 /*
446 * Lookup the inode chunk that this inode lives in. 445 * Lookup the inode chunk that this inode lives in.
447 */ 446 */
448 error = xfs_inobt_lookup_le(cur, agino, 0, 0, &tmp); 447 error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE,
448 &tmp);
449 if (!error && /* no I/O error */ 449 if (!error && /* no I/O error */
450 tmp && /* lookup succeeded */ 450 tmp && /* lookup succeeded */
451 /* got the record, should always work */ 451 /* got the record, should always work */
452 !(error = xfs_inobt_get_rec(cur, &gino, &gcnt, 452 !(error = xfs_inobt_get_rec(cur, &r, &i)) &&
453 &gfree, &i)) &&
454 i == 1 && 453 i == 1 &&
455 /* this is the right chunk */ 454 /* this is the right chunk */
456 agino < gino + XFS_INODES_PER_CHUNK && 455 agino < r.ir_startino + XFS_INODES_PER_CHUNK &&
457 /* lastino was not last in chunk */ 456 /* lastino was not last in chunk */
458 (chunkidx = agino - gino + 1) < 457 (chunkidx = agino - r.ir_startino + 1) <
459 XFS_INODES_PER_CHUNK && 458 XFS_INODES_PER_CHUNK &&
460 /* there are some left allocated */ 459 /* there are some left allocated */
461 xfs_inobt_maskn(chunkidx, 460 xfs_inobt_maskn(chunkidx,
462 XFS_INODES_PER_CHUNK - chunkidx) & ~gfree) { 461 XFS_INODES_PER_CHUNK - chunkidx) &
462 ~r.ir_free) {
463 /* 463 /*
464 * Grab the chunk record. Mark all the 464 * Grab the chunk record. Mark all the
465 * uninteresting inodes (because they're 465 * uninteresting inodes (because they're
466 * before our start point) free. 466 * before our start point) free.
467 */ 467 */
468 for (i = 0; i < chunkidx; i++) { 468 for (i = 0; i < chunkidx; i++) {
469 if (XFS_INOBT_MASK(i) & ~gfree) 469 if (XFS_INOBT_MASK(i) & ~r.ir_free)
470 gcnt++; 470 r.ir_freecount++;
471 } 471 }
472 gfree |= xfs_inobt_maskn(0, chunkidx); 472 r.ir_free |= xfs_inobt_maskn(0, chunkidx);
473 irbp->ir_startino = gino; 473 irbp->ir_startino = r.ir_startino;
474 irbp->ir_freecount = gcnt; 474 irbp->ir_freecount = r.ir_freecount;
475 irbp->ir_free = gfree; 475 irbp->ir_free = r.ir_free;
476 irbp++; 476 irbp++;
477 agino = gino + XFS_INODES_PER_CHUNK; 477 agino = r.ir_startino + XFS_INODES_PER_CHUNK;
478 icount = XFS_INODES_PER_CHUNK - gcnt; 478 icount = XFS_INODES_PER_CHUNK - r.ir_freecount;
479 } else { 479 } else {
480 /* 480 /*
481 * If any of those tests failed, bump the 481 * If any of those tests failed, bump the
@@ -493,7 +493,7 @@ xfs_bulkstat(
493 /* 493 /*
494 * Start of ag. Lookup the first inode chunk. 494 * Start of ag. Lookup the first inode chunk.
495 */ 495 */
496 error = xfs_inobt_lookup_ge(cur, 0, 0, 0, &tmp); 496 error = xfs_inobt_lookup(cur, 0, XFS_LOOKUP_GE, &tmp);
497 icount = 0; 497 icount = 0;
498 } 498 }
499 /* 499 /*
@@ -501,6 +501,8 @@ xfs_bulkstat(
501 * until we run out of inodes or space in the buffer. 501 * until we run out of inodes or space in the buffer.
502 */ 502 */
503 while (irbp < irbufend && icount < ubcount) { 503 while (irbp < irbufend && icount < ubcount) {
504 xfs_inobt_rec_incore_t r;
505
504 /* 506 /*
505 * Loop as long as we're unable to read the 507 * Loop as long as we're unable to read the
506 * inode btree. 508 * inode btree.
@@ -510,51 +512,55 @@ xfs_bulkstat(
510 if (XFS_AGINO_TO_AGBNO(mp, agino) >= 512 if (XFS_AGINO_TO_AGBNO(mp, agino) >=
511 be32_to_cpu(agi->agi_length)) 513 be32_to_cpu(agi->agi_length))
512 break; 514 break;
513 error = xfs_inobt_lookup_ge(cur, agino, 0, 0, 515 error = xfs_inobt_lookup(cur, agino,
514 &tmp); 516 XFS_LOOKUP_GE, &tmp);
515 cond_resched(); 517 cond_resched();
516 } 518 }
517 /* 519 /*
518 * If ran off the end of the ag either with an error, 520 * If ran off the end of the ag either with an error,
519 * or the normal way, set end and stop collecting. 521 * or the normal way, set end and stop collecting.
520 */ 522 */
521 if (error || 523 if (error) {
522 (error = xfs_inobt_get_rec(cur, &gino, &gcnt,
523 &gfree, &i)) ||
524 i == 0) {
525 end_of_ag = 1; 524 end_of_ag = 1;
526 break; 525 break;
527 } 526 }
527
528 error = xfs_inobt_get_rec(cur, &r, &i);
529 if (error || i == 0) {
530 end_of_ag = 1;
531 break;
532 }
533
528 /* 534 /*
529 * If this chunk has any allocated inodes, save it. 535 * If this chunk has any allocated inodes, save it.
530 * Also start read-ahead now for this chunk. 536 * Also start read-ahead now for this chunk.
531 */ 537 */
532 if (gcnt < XFS_INODES_PER_CHUNK) { 538 if (r.ir_freecount < XFS_INODES_PER_CHUNK) {
533 /* 539 /*
534 * Loop over all clusters in the next chunk. 540 * Loop over all clusters in the next chunk.
535 * Do a readahead if there are any allocated 541 * Do a readahead if there are any allocated
536 * inodes in that cluster. 542 * inodes in that cluster.
537 */ 543 */
538 for (agbno = XFS_AGINO_TO_AGBNO(mp, gino), 544 agbno = XFS_AGINO_TO_AGBNO(mp, r.ir_startino);
539 chunkidx = 0; 545 for (chunkidx = 0;
540 chunkidx < XFS_INODES_PER_CHUNK; 546 chunkidx < XFS_INODES_PER_CHUNK;
541 chunkidx += nicluster, 547 chunkidx += nicluster,
542 agbno += nbcluster) { 548 agbno += nbcluster) {
543 if (xfs_inobt_maskn(chunkidx, 549 if (xfs_inobt_maskn(chunkidx, nicluster)
544 nicluster) & ~gfree) 550 & ~r.ir_free)
545 xfs_btree_reada_bufs(mp, agno, 551 xfs_btree_reada_bufs(mp, agno,
546 agbno, nbcluster); 552 agbno, nbcluster);
547 } 553 }
548 irbp->ir_startino = gino; 554 irbp->ir_startino = r.ir_startino;
549 irbp->ir_freecount = gcnt; 555 irbp->ir_freecount = r.ir_freecount;
550 irbp->ir_free = gfree; 556 irbp->ir_free = r.ir_free;
551 irbp++; 557 irbp++;
552 icount += XFS_INODES_PER_CHUNK - gcnt; 558 icount += XFS_INODES_PER_CHUNK - r.ir_freecount;
553 } 559 }
554 /* 560 /*
555 * Set agino to after this chunk and bump the cursor. 561 * Set agino to after this chunk and bump the cursor.
556 */ 562 */
557 agino = gino + XFS_INODES_PER_CHUNK; 563 agino = r.ir_startino + XFS_INODES_PER_CHUNK;
558 error = xfs_btree_increment(cur, 0, &tmp); 564 error = xfs_btree_increment(cur, 0, &tmp);
559 cond_resched(); 565 cond_resched();
560 } 566 }
@@ -820,9 +826,7 @@ xfs_inumbers(
820 int bufidx; 826 int bufidx;
821 xfs_btree_cur_t *cur; 827 xfs_btree_cur_t *cur;
822 int error; 828 int error;
823 __int32_t gcnt; 829 xfs_inobt_rec_incore_t r;
824 xfs_inofree_t gfree;
825 xfs_agino_t gino;
826 int i; 830 int i;
827 xfs_ino_t ino; 831 xfs_ino_t ino;
828 int left; 832 int left;
@@ -855,7 +859,8 @@ xfs_inumbers(
855 continue; 859 continue;
856 } 860 }
857 cur = xfs_inobt_init_cursor(mp, NULL, agbp, agno); 861 cur = xfs_inobt_init_cursor(mp, NULL, agbp, agno);
858 error = xfs_inobt_lookup_ge(cur, agino, 0, 0, &tmp); 862 error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_GE,
863 &tmp);
859 if (error) { 864 if (error) {
860 xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); 865 xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
861 cur = NULL; 866 cur = NULL;
@@ -870,9 +875,8 @@ xfs_inumbers(
870 continue; 875 continue;
871 } 876 }
872 } 877 }
873 if ((error = xfs_inobt_get_rec(cur, &gino, &gcnt, &gfree, 878 error = xfs_inobt_get_rec(cur, &r, &i);
874 &i)) || 879 if (error || i == 0) {
875 i == 0) {
876 xfs_buf_relse(agbp); 880 xfs_buf_relse(agbp);
877 agbp = NULL; 881 agbp = NULL;
878 xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); 882 xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
@@ -881,10 +885,12 @@ xfs_inumbers(
881 agino = 0; 885 agino = 0;
882 continue; 886 continue;
883 } 887 }
884 agino = gino + XFS_INODES_PER_CHUNK - 1; 888 agino = r.ir_startino + XFS_INODES_PER_CHUNK - 1;
885 buffer[bufidx].xi_startino = XFS_AGINO_TO_INO(mp, agno, gino); 889 buffer[bufidx].xi_startino =
886 buffer[bufidx].xi_alloccount = XFS_INODES_PER_CHUNK - gcnt; 890 XFS_AGINO_TO_INO(mp, agno, r.ir_startino);
887 buffer[bufidx].xi_allocmask = ~gfree; 891 buffer[bufidx].xi_alloccount =
892 XFS_INODES_PER_CHUNK - r.ir_freecount;
893 buffer[bufidx].xi_allocmask = ~r.ir_free;
888 bufidx++; 894 bufidx++;
889 left--; 895 left--;
890 if (bufidx == bcount) { 896 if (bufidx == bcount) {
diff --git a/fs/xfs/xfs_itable.h b/fs/xfs/xfs_itable.h
index 1fb04e7deb61..20792bf45946 100644
--- a/fs/xfs/xfs_itable.h
+++ b/fs/xfs/xfs_itable.h
@@ -99,11 +99,6 @@ xfs_bulkstat_one(
99 void *dibuff, 99 void *dibuff,
100 int *stat); 100 int *stat);
101 101
102int
103xfs_internal_inum(
104 xfs_mount_t *mp,
105 xfs_ino_t ino);
106
107typedef int (*inumbers_fmt_pf)( 102typedef int (*inumbers_fmt_pf)(
108 void __user *ubuffer, /* buffer to write to */ 103 void __user *ubuffer, /* buffer to write to */
109 const xfs_inogrp_t *buffer, /* buffer to read from */ 104 const xfs_inogrp_t *buffer, /* buffer to read from */
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index bcad5f4c1fd1..679c7c4926a2 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -451,8 +451,6 @@ extern int xlog_find_tail(xlog_t *log,
451extern int xlog_recover(xlog_t *log); 451extern int xlog_recover(xlog_t *log);
452extern int xlog_recover_finish(xlog_t *log); 452extern int xlog_recover_finish(xlog_t *log);
453extern void xlog_pack_data(xlog_t *log, xlog_in_core_t *iclog, int); 453extern void xlog_pack_data(xlog_t *log, xlog_in_core_t *iclog, int);
454extern void xlog_recover_process_iunlinks(xlog_t *log);
455
456extern struct xfs_buf *xlog_get_bp(xlog_t *, int); 454extern struct xfs_buf *xlog_get_bp(xlog_t *, int);
457extern void xlog_put_bp(struct xfs_buf *); 455extern void xlog_put_bp(struct xfs_buf *);
458 456
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 47da2fb45377..1099395d7d6c 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -3263,7 +3263,7 @@ xlog_recover_process_one_iunlink(
3263 * freeing of the inode and its removal from the list must be 3263 * freeing of the inode and its removal from the list must be
3264 * atomic. 3264 * atomic.
3265 */ 3265 */
3266void 3266STATIC void
3267xlog_recover_process_iunlinks( 3267xlog_recover_process_iunlinks(
3268 xlog_t *log) 3268 xlog_t *log)
3269{ 3269{
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 5c6f092659c1..8b6c9e807efb 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -1568,7 +1568,7 @@ xfs_mod_sb(xfs_trans_t *tp, __int64_t fields)
1568 * 1568 *
1569 * The m_sb_lock must be held when this routine is called. 1569 * The m_sb_lock must be held when this routine is called.
1570 */ 1570 */
1571int 1571STATIC int
1572xfs_mod_incore_sb_unlocked( 1572xfs_mod_incore_sb_unlocked(
1573 xfs_mount_t *mp, 1573 xfs_mount_t *mp,
1574 xfs_sb_field_t field, 1574 xfs_sb_field_t field,
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index a5122382afde..a6c023bc0fb2 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -414,13 +414,10 @@ typedef struct xfs_mod_sb {
414 414
415extern int xfs_log_sbcount(xfs_mount_t *, uint); 415extern int xfs_log_sbcount(xfs_mount_t *, uint);
416extern int xfs_mountfs(xfs_mount_t *mp); 416extern int xfs_mountfs(xfs_mount_t *mp);
417extern void xfs_mountfs_check_barriers(xfs_mount_t *mp);
418 417
419extern void xfs_unmountfs(xfs_mount_t *); 418extern void xfs_unmountfs(xfs_mount_t *);
420extern int xfs_unmountfs_writesb(xfs_mount_t *); 419extern int xfs_unmountfs_writesb(xfs_mount_t *);
421extern int xfs_mod_incore_sb(xfs_mount_t *, xfs_sb_field_t, int64_t, int); 420extern int xfs_mod_incore_sb(xfs_mount_t *, xfs_sb_field_t, int64_t, int);
422extern int xfs_mod_incore_sb_unlocked(xfs_mount_t *, xfs_sb_field_t,
423 int64_t, int);
424extern int xfs_mod_incore_sb_batch(xfs_mount_t *, xfs_mod_sb_t *, 421extern int xfs_mod_incore_sb_batch(xfs_mount_t *, xfs_mod_sb_t *,
425 uint, int); 422 uint, int);
426extern int xfs_mount_log_sb(xfs_mount_t *, __int64_t); 423extern int xfs_mount_log_sb(xfs_mount_t *, __int64_t);
diff --git a/fs/xfs/xfs_mru_cache.c b/fs/xfs/xfs_mru_cache.c
index afee7eb24323..4b0613d99faa 100644
--- a/fs/xfs/xfs_mru_cache.c
+++ b/fs/xfs/xfs_mru_cache.c
@@ -564,35 +564,6 @@ xfs_mru_cache_lookup(
564} 564}
565 565
566/* 566/*
567 * To look up an element using its key, but leave its location in the internal
568 * lists alone, call xfs_mru_cache_peek(). If the element isn't found, this
569 * function returns NULL.
570 *
571 * See the comments above the declaration of the xfs_mru_cache_lookup() function
572 * for important locking information pertaining to this call.
573 */
574void *
575xfs_mru_cache_peek(
576 xfs_mru_cache_t *mru,
577 unsigned long key)
578{
579 xfs_mru_cache_elem_t *elem;
580
581 ASSERT(mru && mru->lists);
582 if (!mru || !mru->lists)
583 return NULL;
584
585 spin_lock(&mru->lock);
586 elem = radix_tree_lookup(&mru->store, key);
587 if (!elem)
588 spin_unlock(&mru->lock);
589 else
590 __release(mru_lock); /* help sparse not be stupid */
591
592 return elem ? elem->value : NULL;
593}
594
595/*
596 * To release the internal data structure spinlock after having performed an 567 * To release the internal data structure spinlock after having performed an
597 * xfs_mru_cache_lookup() or an xfs_mru_cache_peek(), call xfs_mru_cache_done() 568 * xfs_mru_cache_lookup() or an xfs_mru_cache_peek(), call xfs_mru_cache_done()
598 * with the data store pointer. 569 * with the data store pointer.
diff --git a/fs/xfs/xfs_mru_cache.h b/fs/xfs/xfs_mru_cache.h
index dd58ea1bbebe..5d439f34b0c9 100644
--- a/fs/xfs/xfs_mru_cache.h
+++ b/fs/xfs/xfs_mru_cache.h
@@ -49,7 +49,6 @@ int xfs_mru_cache_insert(struct xfs_mru_cache *mru, unsigned long key,
49void * xfs_mru_cache_remove(struct xfs_mru_cache *mru, unsigned long key); 49void * xfs_mru_cache_remove(struct xfs_mru_cache *mru, unsigned long key);
50void xfs_mru_cache_delete(struct xfs_mru_cache *mru, unsigned long key); 50void xfs_mru_cache_delete(struct xfs_mru_cache *mru, unsigned long key);
51void *xfs_mru_cache_lookup(struct xfs_mru_cache *mru, unsigned long key); 51void *xfs_mru_cache_lookup(struct xfs_mru_cache *mru, unsigned long key);
52void *xfs_mru_cache_peek(struct xfs_mru_cache *mru, unsigned long key);
53void xfs_mru_cache_done(struct xfs_mru_cache *mru); 52void xfs_mru_cache_done(struct xfs_mru_cache *mru);
54 53
55#endif /* __XFS_MRU_CACHE_H__ */ 54#endif /* __XFS_MRU_CACHE_H__ */
diff --git a/fs/xfs/xfs_rw.c b/fs/xfs/xfs_rw.c
index fea68615ed23..3f816ad7ff19 100644
--- a/fs/xfs/xfs_rw.c
+++ b/fs/xfs/xfs_rw.c
@@ -88,90 +88,6 @@ xfs_write_clear_setuid(
88} 88}
89 89
90/* 90/*
91 * Handle logging requirements of various synchronous types of write.
92 */
93int
94xfs_write_sync_logforce(
95 xfs_mount_t *mp,
96 xfs_inode_t *ip)
97{
98 int error = 0;
99
100 /*
101 * If we're treating this as O_DSYNC and we have not updated the
102 * size, force the log.
103 */
104 if (!(mp->m_flags & XFS_MOUNT_OSYNCISOSYNC) &&
105 !(ip->i_update_size)) {
106 xfs_inode_log_item_t *iip = ip->i_itemp;
107
108 /*
109 * If an allocation transaction occurred
110 * without extending the size, then we have to force
111 * the log up the proper point to ensure that the
112 * allocation is permanent. We can't count on
113 * the fact that buffered writes lock out direct I/O
114 * writes - the direct I/O write could have extended
115 * the size nontransactionally, then finished before
116 * we started. xfs_write_file will think that the file
117 * didn't grow but the update isn't safe unless the
118 * size change is logged.
119 *
120 * Force the log if we've committed a transaction
121 * against the inode or if someone else has and
122 * the commit record hasn't gone to disk (e.g.
123 * the inode is pinned). This guarantees that
124 * all changes affecting the inode are permanent
125 * when we return.
126 */
127 if (iip && iip->ili_last_lsn) {
128 error = _xfs_log_force(mp, iip->ili_last_lsn,
129 XFS_LOG_FORCE | XFS_LOG_SYNC, NULL);
130 } else if (xfs_ipincount(ip) > 0) {
131 error = _xfs_log_force(mp, (xfs_lsn_t)0,
132 XFS_LOG_FORCE | XFS_LOG_SYNC, NULL);
133 }
134
135 } else {
136 xfs_trans_t *tp;
137
138 /*
139 * O_SYNC or O_DSYNC _with_ a size update are handled
140 * the same way.
141 *
142 * If the write was synchronous then we need to make
143 * sure that the inode modification time is permanent.
144 * We'll have updated the timestamp above, so here
145 * we use a synchronous transaction to log the inode.
146 * It's not fast, but it's necessary.
147 *
148 * If this a dsync write and the size got changed
149 * non-transactionally, then we need to ensure that
150 * the size change gets logged in a synchronous
151 * transaction.
152 */
153 tp = xfs_trans_alloc(mp, XFS_TRANS_WRITE_SYNC);
154 if ((error = xfs_trans_reserve(tp, 0,
155 XFS_SWRITE_LOG_RES(mp),
156 0, 0, 0))) {
157 /* Transaction reserve failed */
158 xfs_trans_cancel(tp, 0);
159 } else {
160 /* Transaction reserve successful */
161 xfs_ilock(ip, XFS_ILOCK_EXCL);
162 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
163 xfs_trans_ihold(tp, ip);
164 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
165 xfs_trans_set_sync(tp);
166 error = xfs_trans_commit(tp, 0);
167 xfs_iunlock(ip, XFS_ILOCK_EXCL);
168 }
169 }
170
171 return error;
172}
173
174/*
175 * Force a shutdown of the filesystem instantly while keeping 91 * Force a shutdown of the filesystem instantly while keeping
176 * the filesystem consistent. We don't do an unmount here; just shutdown 92 * the filesystem consistent. We don't do an unmount here; just shutdown
177 * the shop, make sure that absolutely nothing persistent happens to 93 * the shop, make sure that absolutely nothing persistent happens to
diff --git a/fs/xfs/xfs_rw.h b/fs/xfs/xfs_rw.h
index f76c003ec55d..f5e4874c37d8 100644
--- a/fs/xfs/xfs_rw.h
+++ b/fs/xfs/xfs_rw.h
@@ -68,7 +68,6 @@ xfs_get_extsz_hint(
68 * Prototypes for functions in xfs_rw.c. 68 * Prototypes for functions in xfs_rw.c.
69 */ 69 */
70extern int xfs_write_clear_setuid(struct xfs_inode *ip); 70extern int xfs_write_clear_setuid(struct xfs_inode *ip);
71extern int xfs_write_sync_logforce(struct xfs_mount *mp, struct xfs_inode *ip);
72extern int xfs_bwrite(struct xfs_mount *mp, struct xfs_buf *bp); 71extern int xfs_bwrite(struct xfs_mount *mp, struct xfs_buf *bp);
73extern int xfs_bioerror(struct xfs_buf *bp); 72extern int xfs_bioerror(struct xfs_buf *bp);
74extern int xfs_bioerror_relse(struct xfs_buf *bp); 73extern int xfs_bioerror_relse(struct xfs_buf *bp);
@@ -78,10 +77,4 @@ extern int xfs_read_buf(struct xfs_mount *mp, xfs_buftarg_t *btp,
78extern void xfs_ioerror_alert(char *func, struct xfs_mount *mp, 77extern void xfs_ioerror_alert(char *func, struct xfs_mount *mp,
79 xfs_buf_t *bp, xfs_daddr_t blkno); 78 xfs_buf_t *bp, xfs_daddr_t blkno);
80 79
81/*
82 * Prototypes for functions in xfs_vnodeops.c.
83 */
84extern int xfs_free_eofblocks(struct xfs_mount *mp, struct xfs_inode *ip,
85 int flags);
86
87#endif /* __XFS_RW_H__ */ 80#endif /* __XFS_RW_H__ */
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index 775249a54f6f..ed47fc77759c 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -68,7 +68,7 @@ typedef struct xfs_trans_header {
68#define XFS_TRANS_GROWFS 14 68#define XFS_TRANS_GROWFS 14
69#define XFS_TRANS_STRAT_WRITE 15 69#define XFS_TRANS_STRAT_WRITE 15
70#define XFS_TRANS_DIOSTRAT 16 70#define XFS_TRANS_DIOSTRAT 16
71#define XFS_TRANS_WRITE_SYNC 17 71/* 17 was XFS_TRANS_WRITE_SYNC */
72#define XFS_TRANS_WRITEID 18 72#define XFS_TRANS_WRITEID 18
73#define XFS_TRANS_ADDAFORK 19 73#define XFS_TRANS_ADDAFORK 19
74#define XFS_TRANS_ATTRINVAL 20 74#define XFS_TRANS_ATTRINVAL 20
diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c
index 8ee2f8c8b0a6..218829e6a152 100644
--- a/fs/xfs/xfs_trans_buf.c
+++ b/fs/xfs/xfs_trans_buf.c
@@ -307,7 +307,7 @@ xfs_trans_read_buf(
307 return (flags & XFS_BUF_TRYLOCK) ? 307 return (flags & XFS_BUF_TRYLOCK) ?
308 EAGAIN : XFS_ERROR(ENOMEM); 308 EAGAIN : XFS_ERROR(ENOMEM);
309 309
310 if ((bp != NULL) && (XFS_BUF_GETERROR(bp) != 0)) { 310 if (XFS_BUF_GETERROR(bp) != 0) {
311 xfs_ioerror_alert("xfs_trans_read_buf", mp, 311 xfs_ioerror_alert("xfs_trans_read_buf", mp,
312 bp, blkno); 312 bp, blkno);
313 error = XFS_BUF_GETERROR(bp); 313 error = XFS_BUF_GETERROR(bp);
@@ -315,7 +315,7 @@ xfs_trans_read_buf(
315 return error; 315 return error;
316 } 316 }
317#ifdef DEBUG 317#ifdef DEBUG
318 if (xfs_do_error && (bp != NULL)) { 318 if (xfs_do_error) {
319 if (xfs_error_target == target) { 319 if (xfs_error_target == target) {
320 if (((xfs_req_num++) % xfs_error_mod) == 0) { 320 if (((xfs_req_num++) % xfs_error_mod) == 0) {
321 xfs_buf_relse(bp); 321 xfs_buf_relse(bp);
diff --git a/fs/xfs/xfs_trans_inode.c b/fs/xfs/xfs_trans_inode.c
index 23d276af2e0c..785ff101da0a 100644
--- a/fs/xfs/xfs_trans_inode.c
+++ b/fs/xfs/xfs_trans_inode.c
@@ -49,30 +49,7 @@ xfs_trans_inode_broot_debug(
49 49
50 50
51/* 51/*
52 * Get and lock the inode for the caller if it is not already 52 * Get an inode and join it to the transaction.
53 * locked within the given transaction. If it is already locked
54 * within the transaction, just increment its lock recursion count
55 * and return a pointer to it.
56 *
57 * For an inode to be locked in a transaction, the inode lock, as
58 * opposed to the io lock, must be taken exclusively. This ensures
59 * that the inode can be involved in only 1 transaction at a time.
60 * Lock recursion is handled on the io lock, but only for lock modes
61 * of equal or lesser strength. That is, you can recur on the io lock
62 * held EXCL with a SHARED request but not vice versa. Also, if
63 * the inode is already a part of the transaction then you cannot
64 * go from not holding the io lock to having it EXCL or SHARED.
65 *
66 * Use the inode cache routine xfs_inode_incore() to find the inode
67 * if it is already owned by this transaction.
68 *
69 * If we don't already own the inode, use xfs_iget() to get it.
70 * Since the inode log item structure is embedded in the incore
71 * inode structure and is initialized when the inode is brought
72 * into memory, there is nothing to do with it here.
73 *
74 * If the given transaction pointer is NULL, just call xfs_iget().
75 * This simplifies code which must handle both cases.
76 */ 53 */
77int 54int
78xfs_trans_iget( 55xfs_trans_iget(
@@ -84,62 +61,11 @@ xfs_trans_iget(
84 xfs_inode_t **ipp) 61 xfs_inode_t **ipp)
85{ 62{
86 int error; 63 int error;
87 xfs_inode_t *ip;
88
89 /*
90 * If the transaction pointer is NULL, just call the normal
91 * xfs_iget().
92 */
93 if (tp == NULL)
94 return xfs_iget(mp, NULL, ino, flags, lock_flags, ipp, 0);
95
96 /*
97 * If we find the inode in core with this transaction
98 * pointer in its i_transp field, then we know we already
99 * have it locked. In this case we just increment the lock
100 * recursion count and return the inode to the caller.
101 * Assert that the inode is already locked in the mode requested
102 * by the caller. We cannot do lock promotions yet, so
103 * die if someone gets this wrong.
104 */
105 if ((ip = xfs_inode_incore(tp->t_mountp, ino, tp)) != NULL) {
106 /*
107 * Make sure that the inode lock is held EXCL and
108 * that the io lock is never upgraded when the inode
109 * is already a part of the transaction.
110 */
111 ASSERT(ip->i_itemp != NULL);
112 ASSERT(lock_flags & XFS_ILOCK_EXCL);
113 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
114 ASSERT((!(lock_flags & XFS_IOLOCK_EXCL)) ||
115 xfs_isilocked(ip, XFS_IOLOCK_EXCL));
116 ASSERT((!(lock_flags & XFS_IOLOCK_EXCL)) ||
117 (ip->i_itemp->ili_flags & XFS_ILI_IOLOCKED_EXCL));
118 ASSERT((!(lock_flags & XFS_IOLOCK_SHARED)) ||
119 xfs_isilocked(ip, XFS_IOLOCK_EXCL|XFS_IOLOCK_SHARED));
120 ASSERT((!(lock_flags & XFS_IOLOCK_SHARED)) ||
121 (ip->i_itemp->ili_flags & XFS_ILI_IOLOCKED_ANY));
122
123 if (lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) {
124 ip->i_itemp->ili_iolock_recur++;
125 }
126 if (lock_flags & XFS_ILOCK_EXCL) {
127 ip->i_itemp->ili_ilock_recur++;
128 }
129 *ipp = ip;
130 return 0;
131 }
132
133 ASSERT(lock_flags & XFS_ILOCK_EXCL);
134 error = xfs_iget(tp->t_mountp, tp, ino, flags, lock_flags, &ip, 0);
135 if (error) {
136 return error;
137 }
138 ASSERT(ip != NULL);
139 64
140 xfs_trans_ijoin(tp, ip, lock_flags); 65 error = xfs_iget(mp, tp, ino, flags, lock_flags, ipp, 0);
141 *ipp = ip; 66 if (!error && tp)
142 return 0; 67 xfs_trans_ijoin(tp, *ipp, lock_flags);
68 return error;
143} 69}
144 70
145/* 71/*
@@ -163,8 +89,6 @@ xfs_trans_ijoin(
163 xfs_inode_item_init(ip, ip->i_mount); 89 xfs_inode_item_init(ip, ip->i_mount);
164 iip = ip->i_itemp; 90 iip = ip->i_itemp;
165 ASSERT(iip->ili_flags == 0); 91 ASSERT(iip->ili_flags == 0);
166 ASSERT(iip->ili_ilock_recur == 0);
167 ASSERT(iip->ili_iolock_recur == 0);
168 92
169 /* 93 /*
170 * Get a log_item_desc to point at the new item. 94 * Get a log_item_desc to point at the new item.
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index 492d75bae2bf..a434f287962d 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -611,7 +611,7 @@ xfs_fsync(
611 xfs_inode_t *ip) 611 xfs_inode_t *ip)
612{ 612{
613 xfs_trans_t *tp; 613 xfs_trans_t *tp;
614 int error; 614 int error = 0;
615 int log_flushed = 0, changed = 1; 615 int log_flushed = 0, changed = 1;
616 616
617 xfs_itrace_entry(ip); 617 xfs_itrace_entry(ip);
@@ -619,14 +619,9 @@ xfs_fsync(
619 if (XFS_FORCED_SHUTDOWN(ip->i_mount)) 619 if (XFS_FORCED_SHUTDOWN(ip->i_mount))
620 return XFS_ERROR(EIO); 620 return XFS_ERROR(EIO);
621 621
622 /* capture size updates in I/O completion before writing the inode. */
623 error = xfs_wait_on_pages(ip, 0, -1);
624 if (error)
625 return XFS_ERROR(error);
626
627 /* 622 /*
628 * We always need to make sure that the required inode state is safe on 623 * We always need to make sure that the required inode state is safe on
629 * disk. The vnode might be clean but we still might need to force the 624 * disk. The inode might be clean but we still might need to force the
630 * log because of committed transactions that haven't hit the disk yet. 625 * log because of committed transactions that haven't hit the disk yet.
631 * Likewise, there could be unflushed non-transactional changes to the 626 * Likewise, there could be unflushed non-transactional changes to the
632 * inode core that have to go to disk and this requires us to issue 627 * inode core that have to go to disk and this requires us to issue
@@ -638,7 +633,7 @@ xfs_fsync(
638 */ 633 */
639 xfs_ilock(ip, XFS_ILOCK_SHARED); 634 xfs_ilock(ip, XFS_ILOCK_SHARED);
640 635
641 if (!(ip->i_update_size || ip->i_update_core)) { 636 if (!ip->i_update_core) {
642 /* 637 /*
643 * Timestamps/size haven't changed since last inode flush or 638 * Timestamps/size haven't changed since last inode flush or
644 * inode transaction commit. That means either nothing got 639 * inode transaction commit. That means either nothing got
@@ -718,7 +713,7 @@ xfs_fsync(
718 * when the link count isn't zero and by xfs_dm_punch_hole() when 713 * when the link count isn't zero and by xfs_dm_punch_hole() when
719 * punching a hole to EOF. 714 * punching a hole to EOF.
720 */ 715 */
721int 716STATIC int
722xfs_free_eofblocks( 717xfs_free_eofblocks(
723 xfs_mount_t *mp, 718 xfs_mount_t *mp,
724 xfs_inode_t *ip, 719 xfs_inode_t *ip,
@@ -1476,8 +1471,8 @@ xfs_create(
1476 if (error == ENOSPC) { 1471 if (error == ENOSPC) {
1477 /* flush outstanding delalloc blocks and retry */ 1472 /* flush outstanding delalloc blocks and retry */
1478 xfs_flush_inodes(dp); 1473 xfs_flush_inodes(dp);
1479 error = xfs_trans_reserve(tp, resblks, XFS_CREATE_LOG_RES(mp), 0, 1474 error = xfs_trans_reserve(tp, resblks, log_res, 0,
1480 XFS_TRANS_PERM_LOG_RES, XFS_CREATE_LOG_COUNT); 1475 XFS_TRANS_PERM_LOG_RES, log_count);
1481 } 1476 }
1482 if (error == ENOSPC) { 1477 if (error == ENOSPC) {
1483 /* No space at all so try a "no-allocation" reservation */ 1478 /* No space at all so try a "no-allocation" reservation */