aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/v9fs.c33
-rw-r--r--fs/9p/v9fs_vfs.h1
-rw-r--r--fs/9p/vfs_file.c19
-rw-r--r--fs/9p/vfs_inode.c2
-rw-r--r--fs/bio.c9
-rw-r--r--fs/btrfs/file.c6
-rw-r--r--fs/cachefiles/namei.c12
-rw-r--r--fs/cifs/CHANGES5
-rw-r--r--fs/cifs/cifsfs.h2
-rw-r--r--fs/cifs/cifsglob.h3
-rw-r--r--fs/cifs/cifspdu.h6
-rw-r--r--fs/cifs/cifsproto.h7
-rw-r--r--fs/cifs/cifssmb.c360
-rw-r--r--fs/cifs/connect.c38
-rw-r--r--fs/cifs/inode.c15
-rw-r--r--fs/cifs/misc.c2
-rw-r--r--fs/cifs/readdir.c8
-rw-r--r--fs/cifs/sess.c11
-rw-r--r--fs/cifs/xattr.c8
-rw-r--r--fs/compat_ioctl.c9
-rw-r--r--fs/dlm/ast.c74
-rw-r--r--fs/dlm/ast.h4
-rw-r--r--fs/dlm/debug_fs.c2
-rw-r--r--fs/dlm/dlm_internal.h10
-rw-r--r--fs/dlm/lock.c120
-rw-r--r--fs/dlm/lockspace.c14
-rw-r--r--fs/dlm/user.c10
-rw-r--r--fs/dlm/user.h4
-rw-r--r--fs/exec.c20
-rw-r--r--fs/ext4/ext4.h2
-rw-r--r--fs/file.c2
-rw-r--r--fs/fuse/dev.c30
-rw-r--r--fs/gfs2/aops.c4
-rw-r--r--fs/gfs2/bmap.c2
-rw-r--r--fs/gfs2/glock.c75
-rw-r--r--fs/gfs2/glock.h7
-rw-r--r--fs/gfs2/glops.c16
-rw-r--r--fs/gfs2/incore.h5
-rw-r--r--fs/gfs2/inode.c6
-rw-r--r--fs/gfs2/lock_dlm.c5
-rw-r--r--fs/gfs2/lops.c4
-rw-r--r--fs/gfs2/main.c28
-rw-r--r--fs/gfs2/meta_io.c46
-rw-r--r--fs/gfs2/meta_io.h12
-rw-r--r--fs/gfs2/ops_fstype.c6
-rw-r--r--fs/gfs2/super.c27
-rw-r--r--fs/gfs2/sys.c2
-rw-r--r--fs/gfs2/util.c1
-rw-r--r--fs/gfs2/util.h1
-rw-r--r--fs/namei.c14
-rw-r--r--fs/nfs/Kconfig3
-rw-r--r--fs/nfs/direct.c3
-rw-r--r--fs/nfs/fscache.c9
-rw-r--r--fs/nfs/iostat.h4
-rw-r--r--fs/nfs/mount_clnt.c2
-rw-r--r--fs/nfs/nfs2xdr.c2
-rw-r--r--fs/nfs/nfs4xdr.c6
-rw-r--r--fs/nfs/write.c3
-rw-r--r--fs/nfsd/export.c10
-rw-r--r--fs/nfsd/vfs.c3
-rw-r--r--fs/partitions/check.c7
-rw-r--r--fs/proc/array.c2
-rw-r--r--fs/proc/base.c30
-rw-r--r--fs/proc/kmsg.c14
-rw-r--r--fs/proc/proc_devtree.c7
-rw-r--r--fs/reiserfs/inode.c2
-rw-r--r--fs/seq_file.c130
-rw-r--r--fs/sysfs/inode.c35
-rw-r--r--fs/xfs/Makefile2
-rw-r--r--fs/xfs/linux-2.6/kmem.c56
-rw-r--r--fs/xfs/linux-2.6/kmem.h21
-rw-r--r--fs/xfs/linux-2.6/xfs_acl.c11
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c320
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.h52
-rw-r--r--fs/xfs/linux-2.6/xfs_fs_subr.c2
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl.c21
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl.h12
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl32.c4
-rw-r--r--fs/xfs/linux-2.6/xfs_iops.c4
-rw-r--r--fs/xfs/linux-2.6/xfs_lrw.c62
-rw-r--r--fs/xfs/linux-2.6/xfs_lrw.h3
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c169
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.c186
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.h1
-rw-r--r--fs/xfs/linux-2.6/xfs_trace.h81
-rw-r--r--fs/xfs/linux-2.6/xfs_xattr.c27
-rw-r--r--fs/xfs/quota/xfs_dquot.c47
-rw-r--r--fs/xfs/quota/xfs_dquot_item.c99
-rw-r--r--fs/xfs/quota/xfs_dquot_item.h4
-rw-r--r--fs/xfs/quota/xfs_qm.c40
-rw-r--r--fs/xfs/quota/xfs_qm_bhv.c2
-rw-r--r--fs/xfs/quota/xfs_qm_syscalls.c4
-rw-r--r--fs/xfs/quota/xfs_trans_dquot.c49
-rw-r--r--fs/xfs/xfs_acl.h4
-rw-r--r--fs/xfs/xfs_ag.h16
-rw-r--r--fs/xfs/xfs_alloc.c96
-rw-r--r--fs/xfs/xfs_alloc_btree.c9
-rw-r--r--fs/xfs/xfs_attr.c52
-rw-r--r--fs/xfs/xfs_attr.h3
-rw-r--r--fs/xfs/xfs_attr_leaf.c30
-rw-r--r--fs/xfs/xfs_attr_sf.h2
-rw-r--r--fs/xfs/xfs_bmap.c17
-rw-r--r--fs/xfs/xfs_bmap_btree.c2
-rw-r--r--fs/xfs/xfs_bmap_btree.h1
-rw-r--r--fs/xfs/xfs_btree.c4
-rw-r--r--fs/xfs/xfs_buf_item.c72
-rw-r--r--fs/xfs/xfs_da_btree.c4
-rw-r--r--fs/xfs/xfs_da_btree.h5
-rw-r--r--fs/xfs/xfs_dfrag.c43
-rw-r--r--fs/xfs/xfs_dfrag.h3
-rw-r--r--fs/xfs/xfs_dir2.c8
-rw-r--r--fs/xfs/xfs_dir2.h4
-rw-r--r--fs/xfs/xfs_dir2_block.c9
-rw-r--r--fs/xfs/xfs_dir2_leaf.c2
-rw-r--r--fs/xfs/xfs_dir2_node.c2
-rw-r--r--fs/xfs/xfs_dir2_node.h2
-rw-r--r--fs/xfs/xfs_dir2_sf.c2
-rw-r--r--fs/xfs/xfs_extfree_item.c4
-rw-r--r--fs/xfs/xfs_filestream.c42
-rw-r--r--fs/xfs/xfs_filestream.h28
-rw-r--r--fs/xfs/xfs_fsops.c42
-rw-r--r--fs/xfs/xfs_ialloc.c62
-rw-r--r--fs/xfs/xfs_iget.c10
-rw-r--r--fs/xfs/xfs_inode.c126
-rw-r--r--fs/xfs/xfs_inode.h11
-rw-r--r--fs/xfs/xfs_inode_item.c129
-rw-r--r--fs/xfs/xfs_inode_item.h6
-rw-r--r--fs/xfs/xfs_itable.c12
-rw-r--r--fs/xfs/xfs_log.c383
-rw-r--r--fs/xfs/xfs_log.h19
-rw-r--r--fs/xfs/xfs_log_priv.h5
-rw-r--r--fs/xfs/xfs_log_recover.c222
-rw-r--r--fs/xfs/xfs_log_recover.h23
-rw-r--r--fs/xfs/xfs_mount.c181
-rw-r--r--fs/xfs/xfs_mount.h29
-rw-r--r--fs/xfs/xfs_mru_cache.c2
-rw-r--r--fs/xfs/xfs_mru_cache.h1
-rw-r--r--fs/xfs/xfs_quota.h9
-rw-r--r--fs/xfs/xfs_rw.c155
-rw-r--r--fs/xfs/xfs_rw.h4
-rw-r--r--fs/xfs/xfs_trans.c7
-rw-r--r--fs/xfs/xfs_trans.h3
-rw-r--r--fs/xfs/xfs_trans_ail.c34
-rw-r--r--fs/xfs/xfs_trans_buf.c27
-rw-r--r--fs/xfs/xfs_types.h4
-rw-r--r--fs/xfs/xfs_vnodeops.c33
-rw-r--r--fs/xfs/xfs_vnodeops.h10
147 files changed, 2507 insertions, 2168 deletions
diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c
index cf62b05e296a..7d6c2139891d 100644
--- a/fs/9p/v9fs.c
+++ b/fs/9p/v9fs.c
@@ -84,7 +84,7 @@ static const match_table_t tokens = {
84 84
85static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts) 85static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts)
86{ 86{
87 char *options; 87 char *options, *tmp_options;
88 substring_t args[MAX_OPT_ARGS]; 88 substring_t args[MAX_OPT_ARGS];
89 char *p; 89 char *p;
90 int option = 0; 90 int option = 0;
@@ -102,9 +102,12 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts)
102 if (!opts) 102 if (!opts)
103 return 0; 103 return 0;
104 104
105 options = kstrdup(opts, GFP_KERNEL); 105 tmp_options = kstrdup(opts, GFP_KERNEL);
106 if (!options) 106 if (!tmp_options) {
107 ret = -ENOMEM;
107 goto fail_option_alloc; 108 goto fail_option_alloc;
109 }
110 options = tmp_options;
108 111
109 while ((p = strsep(&options, ",")) != NULL) { 112 while ((p = strsep(&options, ",")) != NULL) {
110 int token; 113 int token;
@@ -159,8 +162,12 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts)
159 break; 162 break;
160 case Opt_cache: 163 case Opt_cache:
161 s = match_strdup(&args[0]); 164 s = match_strdup(&args[0]);
162 if (!s) 165 if (!s) {
163 goto fail_option_alloc; 166 ret = -ENOMEM;
167 P9_DPRINTK(P9_DEBUG_ERROR,
168 "problem allocating copy of cache arg\n");
169 goto free_and_return;
170 }
164 171
165 if (strcmp(s, "loose") == 0) 172 if (strcmp(s, "loose") == 0)
166 v9ses->cache = CACHE_LOOSE; 173 v9ses->cache = CACHE_LOOSE;
@@ -173,8 +180,12 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts)
173 180
174 case Opt_access: 181 case Opt_access:
175 s = match_strdup(&args[0]); 182 s = match_strdup(&args[0]);
176 if (!s) 183 if (!s) {
177 goto fail_option_alloc; 184 ret = -ENOMEM;
185 P9_DPRINTK(P9_DEBUG_ERROR,
186 "problem allocating copy of access arg\n");
187 goto free_and_return;
188 }
178 189
179 v9ses->flags &= ~V9FS_ACCESS_MASK; 190 v9ses->flags &= ~V9FS_ACCESS_MASK;
180 if (strcmp(s, "user") == 0) 191 if (strcmp(s, "user") == 0)
@@ -194,13 +205,11 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts)
194 continue; 205 continue;
195 } 206 }
196 } 207 }
197 kfree(options);
198 return ret;
199 208
209free_and_return:
210 kfree(tmp_options);
200fail_option_alloc: 211fail_option_alloc:
201 P9_DPRINTK(P9_DEBUG_ERROR, 212 return ret;
202 "failed to allocate copy of option argument\n");
203 return -ENOMEM;
204} 213}
205 214
206/** 215/**
diff --git a/fs/9p/v9fs_vfs.h b/fs/9p/v9fs_vfs.h
index 3a7560e35865..ed835836e0dc 100644
--- a/fs/9p/v9fs_vfs.h
+++ b/fs/9p/v9fs_vfs.h
@@ -60,3 +60,4 @@ void v9fs_dentry_release(struct dentry *);
60int v9fs_uflags2omode(int uflags, int extended); 60int v9fs_uflags2omode(int uflags, int extended);
61 61
62ssize_t v9fs_file_readn(struct file *, char *, char __user *, u32, u64); 62ssize_t v9fs_file_readn(struct file *, char *, char __user *, u32, u64);
63void v9fs_blank_wstat(struct p9_wstat *wstat);
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index 3902bf43a088..74a0461a9ac0 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -257,6 +257,23 @@ v9fs_file_write(struct file *filp, const char __user * data,
257 return total; 257 return total;
258} 258}
259 259
260static int v9fs_file_fsync(struct file *filp, struct dentry *dentry,
261 int datasync)
262{
263 struct p9_fid *fid;
264 struct p9_wstat wstat;
265 int retval;
266
267 P9_DPRINTK(P9_DEBUG_VFS, "filp %p dentry %p datasync %x\n", filp,
268 dentry, datasync);
269
270 fid = filp->private_data;
271 v9fs_blank_wstat(&wstat);
272
273 retval = p9_client_wstat(fid, &wstat);
274 return retval;
275}
276
260static const struct file_operations v9fs_cached_file_operations = { 277static const struct file_operations v9fs_cached_file_operations = {
261 .llseek = generic_file_llseek, 278 .llseek = generic_file_llseek,
262 .read = do_sync_read, 279 .read = do_sync_read,
@@ -266,6 +283,7 @@ static const struct file_operations v9fs_cached_file_operations = {
266 .release = v9fs_dir_release, 283 .release = v9fs_dir_release,
267 .lock = v9fs_file_lock, 284 .lock = v9fs_file_lock,
268 .mmap = generic_file_readonly_mmap, 285 .mmap = generic_file_readonly_mmap,
286 .fsync = v9fs_file_fsync,
269}; 287};
270 288
271const struct file_operations v9fs_file_operations = { 289const struct file_operations v9fs_file_operations = {
@@ -276,4 +294,5 @@ const struct file_operations v9fs_file_operations = {
276 .release = v9fs_dir_release, 294 .release = v9fs_dir_release,
277 .lock = v9fs_file_lock, 295 .lock = v9fs_file_lock,
278 .mmap = generic_file_readonly_mmap, 296 .mmap = generic_file_readonly_mmap,
297 .fsync = v9fs_file_fsync,
279}; 298};
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 9d03d1ebca6f..a407fa3388c0 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -176,7 +176,7 @@ int v9fs_uflags2omode(int uflags, int extended)
176 * 176 *
177 */ 177 */
178 178
179static void 179void
180v9fs_blank_wstat(struct p9_wstat *wstat) 180v9fs_blank_wstat(struct p9_wstat *wstat)
181{ 181{
182 wstat->type = ~0; 182 wstat->type = ~0;
diff --git a/fs/bio.c b/fs/bio.c
index 88094afc29ea..dc17afd672e3 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -507,10 +507,8 @@ int bio_get_nr_vecs(struct block_device *bdev)
507 int nr_pages; 507 int nr_pages;
508 508
509 nr_pages = ((queue_max_sectors(q) << 9) + PAGE_SIZE - 1) >> PAGE_SHIFT; 509 nr_pages = ((queue_max_sectors(q) << 9) + PAGE_SIZE - 1) >> PAGE_SHIFT;
510 if (nr_pages > queue_max_phys_segments(q)) 510 if (nr_pages > queue_max_segments(q))
511 nr_pages = queue_max_phys_segments(q); 511 nr_pages = queue_max_segments(q);
512 if (nr_pages > queue_max_hw_segments(q))
513 nr_pages = queue_max_hw_segments(q);
514 512
515 return nr_pages; 513 return nr_pages;
516} 514}
@@ -575,8 +573,7 @@ static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page
575 * make this too complex. 573 * make this too complex.
576 */ 574 */
577 575
578 while (bio->bi_phys_segments >= queue_max_phys_segments(q) 576 while (bio->bi_phys_segments >= queue_max_segments(q)) {
579 || bio->bi_phys_segments >= queue_max_hw_segments(q)) {
580 577
581 if (retried_segments) 578 if (retried_segments)
582 return 0; 579 return 0;
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 9d0809629967..6ed434ac037f 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -720,13 +720,15 @@ again:
720 inode->i_ino, orig_offset); 720 inode->i_ino, orig_offset);
721 BUG_ON(ret); 721 BUG_ON(ret);
722 } 722 }
723 fi = btrfs_item_ptr(leaf, path->slots[0],
724 struct btrfs_file_extent_item);
725 if (del_nr == 0) { 723 if (del_nr == 0) {
724 fi = btrfs_item_ptr(leaf, path->slots[0],
725 struct btrfs_file_extent_item);
726 btrfs_set_file_extent_type(leaf, fi, 726 btrfs_set_file_extent_type(leaf, fi,
727 BTRFS_FILE_EXTENT_REG); 727 BTRFS_FILE_EXTENT_REG);
728 btrfs_mark_buffer_dirty(leaf); 728 btrfs_mark_buffer_dirty(leaf);
729 } else { 729 } else {
730 fi = btrfs_item_ptr(leaf, del_slot - 1,
731 struct btrfs_file_extent_item);
730 btrfs_set_file_extent_type(leaf, fi, 732 btrfs_set_file_extent_type(leaf, fi,
731 BTRFS_FILE_EXTENT_REG); 733 BTRFS_FILE_EXTENT_REG);
732 btrfs_set_file_extent_num_bytes(leaf, fi, 734 btrfs_set_file_extent_num_bytes(leaf, fi,
diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c
index 14ac4806e291..eeb4986ea7db 100644
--- a/fs/cachefiles/namei.c
+++ b/fs/cachefiles/namei.c
@@ -348,7 +348,17 @@ int cachefiles_delete_object(struct cachefiles_cache *cache,
348 dir = dget_parent(object->dentry); 348 dir = dget_parent(object->dentry);
349 349
350 mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT); 350 mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT);
351 ret = cachefiles_bury_object(cache, dir, object->dentry); 351
352 /* we need to check that our parent is _still_ our parent - it may have
353 * been renamed */
354 if (dir == object->dentry->d_parent) {
355 ret = cachefiles_bury_object(cache, dir, object->dentry);
356 } else {
357 /* it got moved, presumably by cachefilesd culling it, so it's
358 * no longer in the key path and we can ignore it */
359 mutex_unlock(&dir->d_inode->i_mutex);
360 ret = 0;
361 }
352 362
353 dput(dir); 363 dput(dir);
354 _leave(" = %d", ret); 364 _leave(" = %d", ret);
diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES
index 7b2600b380d7..bc0025cdd1c9 100644
--- a/fs/cifs/CHANGES
+++ b/fs/cifs/CHANGES
@@ -1,3 +1,8 @@
1Version 1.62
2------------
3Add sockopt=TCP_NODELAY mount option. EA (xattr) routines hardened
4to more strictly handle corrupt frames.
5
1Version 1.61 6Version 1.61
2------------ 7------------
3Fix append problem to Samba servers (files opened with O_APPEND could 8Fix append problem to Samba servers (files opened with O_APPEND could
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index ac2b24c192f8..78c1b86d55f6 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -113,5 +113,5 @@ extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg);
113extern const struct export_operations cifs_export_ops; 113extern const struct export_operations cifs_export_ops;
114#endif /* EXPERIMENTAL */ 114#endif /* EXPERIMENTAL */
115 115
116#define CIFS_VERSION "1.61" 116#define CIFS_VERSION "1.62"
117#endif /* _CIFSFS_H */ 117#endif /* _CIFSFS_H */
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 4b35f7ec0583..a1c817eb291a 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -149,6 +149,7 @@ struct TCP_Server_Info {
149 bool svlocal:1; /* local server or remote */ 149 bool svlocal:1; /* local server or remote */
150 bool noblocksnd; /* use blocking sendmsg */ 150 bool noblocksnd; /* use blocking sendmsg */
151 bool noautotune; /* do not autotune send buf sizes */ 151 bool noautotune; /* do not autotune send buf sizes */
152 bool tcp_nodelay;
152 atomic_t inFlight; /* number of requests on the wire to server */ 153 atomic_t inFlight; /* number of requests on the wire to server */
153#ifdef CONFIG_CIFS_STATS2 154#ifdef CONFIG_CIFS_STATS2
154 atomic_t inSend; /* requests trying to send */ 155 atomic_t inSend; /* requests trying to send */
@@ -204,7 +205,7 @@ struct cifsUidInfo {
204struct cifsSesInfo { 205struct cifsSesInfo {
205 struct list_head smb_ses_list; 206 struct list_head smb_ses_list;
206 struct list_head tcon_list; 207 struct list_head tcon_list;
207 struct semaphore sesSem; 208 struct mutex session_mutex;
208#if 0 209#if 0
209 struct cifsUidInfo *uidInfo; /* pointer to user info */ 210 struct cifsUidInfo *uidInfo; /* pointer to user info */
210#endif 211#endif
diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h
index 3877737f96a6..14d036d8db11 100644
--- a/fs/cifs/cifspdu.h
+++ b/fs/cifs/cifspdu.h
@@ -415,10 +415,10 @@ struct smb_hdr {
415 __u8 WordCount; 415 __u8 WordCount;
416} __attribute__((packed)); 416} __attribute__((packed));
417/* given a pointer to an smb_hdr retrieve the value of byte count */ 417/* given a pointer to an smb_hdr retrieve the value of byte count */
418#define BCC(smb_var) (*(__u16 *)((char *)smb_var + sizeof(struct smb_hdr) + (2 * smb_var->WordCount))) 418#define BCC(smb_var) (*(__u16 *)((char *)(smb_var) + sizeof(struct smb_hdr) + (2 * (smb_var)->WordCount)))
419#define BCC_LE(smb_var) (*(__le16 *)((char *)smb_var + sizeof(struct smb_hdr) + (2 * smb_var->WordCount))) 419#define BCC_LE(smb_var) (*(__le16 *)((char *)(smb_var) + sizeof(struct smb_hdr) + (2 * (smb_var)->WordCount)))
420/* given a pointer to an smb_hdr retrieve the pointer to the byte area */ 420/* given a pointer to an smb_hdr retrieve the pointer to the byte area */
421#define pByteArea(smb_var) ((unsigned char *)smb_var + sizeof(struct smb_hdr) + (2 * smb_var->WordCount) + 2) 421#define pByteArea(smb_var) ((unsigned char *)(smb_var) + sizeof(struct smb_hdr) + (2 * (smb_var)->WordCount) + 2)
422 422
423/* 423/*
424 * Computer Name Length (since Netbios name was length 16 with last byte 0x20) 424 * Computer Name Length (since Netbios name was length 16 with last byte 0x20)
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index 5646727e33f5..88e2bc44ac58 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -363,13 +363,10 @@ extern int CIFSSMBNotify(const int xid, struct cifsTconInfo *tcon,
363 __u32 filter, struct file *file, int multishot, 363 __u32 filter, struct file *file, int multishot,
364 const struct nls_table *nls_codepage); 364 const struct nls_table *nls_codepage);
365extern ssize_t CIFSSMBQAllEAs(const int xid, struct cifsTconInfo *tcon, 365extern ssize_t CIFSSMBQAllEAs(const int xid, struct cifsTconInfo *tcon,
366 const unsigned char *searchName, char *EAData, 366 const unsigned char *searchName,
367 const unsigned char *ea_name, char *EAData,
367 size_t bufsize, const struct nls_table *nls_codepage, 368 size_t bufsize, const struct nls_table *nls_codepage,
368 int remap_special_chars); 369 int remap_special_chars);
369extern ssize_t CIFSSMBQueryEA(const int xid, struct cifsTconInfo *tcon,
370 const unsigned char *searchName, const unsigned char *ea_name,
371 unsigned char *ea_value, size_t buf_size,
372 const struct nls_table *nls_codepage, int remap_special_chars);
373extern int CIFSSMBSetEA(const int xid, struct cifsTconInfo *tcon, 370extern int CIFSSMBSetEA(const int xid, struct cifsTconInfo *tcon,
374 const char *fileName, const char *ea_name, 371 const char *fileName, const char *ea_name,
375 const void *ea_value, const __u16 ea_value_len, 372 const void *ea_value, const __u16 ea_value_len,
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 941441d3e386..9d17df3e0768 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -170,19 +170,19 @@ cifs_reconnect_tcon(struct cifsTconInfo *tcon, int smb_command)
170 * need to prevent multiple threads trying to simultaneously 170 * need to prevent multiple threads trying to simultaneously
171 * reconnect the same SMB session 171 * reconnect the same SMB session
172 */ 172 */
173 down(&ses->sesSem); 173 mutex_lock(&ses->session_mutex);
174 if (ses->need_reconnect) 174 if (ses->need_reconnect)
175 rc = cifs_setup_session(0, ses, nls_codepage); 175 rc = cifs_setup_session(0, ses, nls_codepage);
176 176
177 /* do we need to reconnect tcon? */ 177 /* do we need to reconnect tcon? */
178 if (rc || !tcon->need_reconnect) { 178 if (rc || !tcon->need_reconnect) {
179 up(&ses->sesSem); 179 mutex_unlock(&ses->session_mutex);
180 goto out; 180 goto out;
181 } 181 }
182 182
183 mark_open_files_invalid(tcon); 183 mark_open_files_invalid(tcon);
184 rc = CIFSTCon(0, ses, tcon->treeName, tcon, nls_codepage); 184 rc = CIFSTCon(0, ses, tcon->treeName, tcon, nls_codepage);
185 up(&ses->sesSem); 185 mutex_unlock(&ses->session_mutex);
186 cFYI(1, ("reconnect tcon rc = %d", rc)); 186 cFYI(1, ("reconnect tcon rc = %d", rc));
187 187
188 if (rc) 188 if (rc)
@@ -700,13 +700,13 @@ CIFSSMBLogoff(const int xid, struct cifsSesInfo *ses)
700 if (!ses || !ses->server) 700 if (!ses || !ses->server)
701 return -EIO; 701 return -EIO;
702 702
703 down(&ses->sesSem); 703 mutex_lock(&ses->session_mutex);
704 if (ses->need_reconnect) 704 if (ses->need_reconnect)
705 goto session_already_dead; /* no need to send SMBlogoff if uid 705 goto session_already_dead; /* no need to send SMBlogoff if uid
706 already closed due to reconnect */ 706 already closed due to reconnect */
707 rc = small_smb_init(SMB_COM_LOGOFF_ANDX, 2, NULL, (void **)&pSMB); 707 rc = small_smb_init(SMB_COM_LOGOFF_ANDX, 2, NULL, (void **)&pSMB);
708 if (rc) { 708 if (rc) {
709 up(&ses->sesSem); 709 mutex_unlock(&ses->session_mutex);
710 return rc; 710 return rc;
711 } 711 }
712 712
@@ -721,7 +721,7 @@ CIFSSMBLogoff(const int xid, struct cifsSesInfo *ses)
721 pSMB->AndXCommand = 0xFF; 721 pSMB->AndXCommand = 0xFF;
722 rc = SendReceiveNoRsp(xid, ses, (struct smb_hdr *) pSMB, 0); 722 rc = SendReceiveNoRsp(xid, ses, (struct smb_hdr *) pSMB, 0);
723session_already_dead: 723session_already_dead:
724 up(&ses->sesSem); 724 mutex_unlock(&ses->session_mutex);
725 725
726 /* if session dead then we do not need to do ulogoff, 726 /* if session dead then we do not need to do ulogoff,
727 since server closed smb session, no sense reporting 727 since server closed smb session, no sense reporting
@@ -5269,22 +5269,34 @@ int CIFSSMBNotify(const int xid, struct cifsTconInfo *tcon,
5269 cifs_buf_release(pSMB); 5269 cifs_buf_release(pSMB);
5270 return rc; 5270 return rc;
5271} 5271}
5272
5272#ifdef CONFIG_CIFS_XATTR 5273#ifdef CONFIG_CIFS_XATTR
5274/*
5275 * Do a path-based QUERY_ALL_EAS call and parse the result. This is a common
5276 * function used by listxattr and getxattr type calls. When ea_name is set,
5277 * it looks for that attribute name and stuffs that value into the EAData
5278 * buffer. When ea_name is NULL, it stuffs a list of attribute names into the
5279 * buffer. In both cases, the return value is either the length of the
5280 * resulting data or a negative error code. If EAData is a NULL pointer then
5281 * the data isn't copied to it, but the length is returned.
5282 */
5273ssize_t 5283ssize_t
5274CIFSSMBQAllEAs(const int xid, struct cifsTconInfo *tcon, 5284CIFSSMBQAllEAs(const int xid, struct cifsTconInfo *tcon,
5275 const unsigned char *searchName, 5285 const unsigned char *searchName, const unsigned char *ea_name,
5276 char *EAData, size_t buf_size, 5286 char *EAData, size_t buf_size,
5277 const struct nls_table *nls_codepage, int remap) 5287 const struct nls_table *nls_codepage, int remap)
5278{ 5288{
5279 /* BB assumes one setup word */ 5289 /* BB assumes one setup word */
5280 TRANSACTION2_QPI_REQ *pSMB = NULL; 5290 TRANSACTION2_QPI_REQ *pSMB = NULL;
5281 TRANSACTION2_QPI_RSP *pSMBr = NULL; 5291 TRANSACTION2_QPI_RSP *pSMBr = NULL;
5282 int rc = 0; 5292 int rc = 0;
5283 int bytes_returned; 5293 int bytes_returned;
5284 int name_len; 5294 int list_len;
5295 struct fealist *ea_response_data;
5285 struct fea *temp_fea; 5296 struct fea *temp_fea;
5286 char *temp_ptr; 5297 char *temp_ptr;
5287 __u16 params, byte_count; 5298 char *end_of_smb;
5299 __u16 params, byte_count, data_offset;
5288 5300
5289 cFYI(1, ("In Query All EAs path %s", searchName)); 5301 cFYI(1, ("In Query All EAs path %s", searchName));
5290QAllEAsRetry: 5302QAllEAsRetry:
@@ -5294,22 +5306,22 @@ QAllEAsRetry:
5294 return rc; 5306 return rc;
5295 5307
5296 if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { 5308 if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) {
5297 name_len = 5309 list_len =
5298 cifsConvertToUCS((__le16 *) pSMB->FileName, searchName, 5310 cifsConvertToUCS((__le16 *) pSMB->FileName, searchName,
5299 PATH_MAX, nls_codepage, remap); 5311 PATH_MAX, nls_codepage, remap);
5300 name_len++; /* trailing null */ 5312 list_len++; /* trailing null */
5301 name_len *= 2; 5313 list_len *= 2;
5302 } else { /* BB improve the check for buffer overruns BB */ 5314 } else { /* BB improve the check for buffer overruns BB */
5303 name_len = strnlen(searchName, PATH_MAX); 5315 list_len = strnlen(searchName, PATH_MAX);
5304 name_len++; /* trailing null */ 5316 list_len++; /* trailing null */
5305 strncpy(pSMB->FileName, searchName, name_len); 5317 strncpy(pSMB->FileName, searchName, list_len);
5306 } 5318 }
5307 5319
5308 params = 2 /* level */ + 4 /* reserved */ + name_len /* includes NUL */; 5320 params = 2 /* level */ + 4 /* reserved */ + list_len /* includes NUL */;
5309 pSMB->TotalDataCount = 0; 5321 pSMB->TotalDataCount = 0;
5310 pSMB->MaxParameterCount = cpu_to_le16(2); 5322 pSMB->MaxParameterCount = cpu_to_le16(2);
5311 /* BB find exact max SMB PDU from sess structure BB */ 5323 /* BB find exact max SMB PDU from sess structure BB */
5312 pSMB->MaxDataCount = cpu_to_le16(4000); 5324 pSMB->MaxDataCount = cpu_to_le16(CIFSMaxBufSize);
5313 pSMB->MaxSetupCount = 0; 5325 pSMB->MaxSetupCount = 0;
5314 pSMB->Reserved = 0; 5326 pSMB->Reserved = 0;
5315 pSMB->Flags = 0; 5327 pSMB->Flags = 0;
@@ -5334,237 +5346,117 @@ QAllEAsRetry:
5334 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 5346 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
5335 if (rc) { 5347 if (rc) {
5336 cFYI(1, ("Send error in QueryAllEAs = %d", rc)); 5348 cFYI(1, ("Send error in QueryAllEAs = %d", rc));
5337 } else { /* decode response */ 5349 goto QAllEAsOut;
5338 rc = validate_t2((struct smb_t2_rsp *)pSMBr); 5350 }
5339 5351
5340 /* BB also check enough total bytes returned */ 5352
5341 /* BB we need to improve the validity checking 5353 /* BB also check enough total bytes returned */
5342 of these trans2 responses */ 5354 /* BB we need to improve the validity checking
5343 if (rc || (pSMBr->ByteCount < 4)) 5355 of these trans2 responses */
5344 rc = -EIO; /* bad smb */ 5356
5345 /* else if (pFindData){ 5357 rc = validate_t2((struct smb_t2_rsp *)pSMBr);
5346 memcpy((char *) pFindData, 5358 if (rc || (pSMBr->ByteCount < 4)) {
5347 (char *) &pSMBr->hdr.Protocol + 5359 rc = -EIO; /* bad smb */
5348 data_offset, kl); 5360 goto QAllEAsOut;
5349 }*/ else {
5350 /* check that length of list is not more than bcc */
5351 /* check that each entry does not go beyond length
5352 of list */
5353 /* check that each element of each entry does not
5354 go beyond end of list */
5355 __u16 data_offset = le16_to_cpu(pSMBr->t2.DataOffset);
5356 struct fealist *ea_response_data;
5357 rc = 0;
5358 /* validate_trans2_offsets() */
5359 /* BB check if start of smb + data_offset > &bcc+ bcc */
5360 ea_response_data = (struct fealist *)
5361 (((char *) &pSMBr->hdr.Protocol) +
5362 data_offset);
5363 name_len = le32_to_cpu(ea_response_data->list_len);
5364 cFYI(1, ("ea length %d", name_len));
5365 if (name_len <= 8) {
5366 /* returned EA size zeroed at top of function */
5367 cFYI(1, ("empty EA list returned from server"));
5368 } else {
5369 /* account for ea list len */
5370 name_len -= 4;
5371 temp_fea = ea_response_data->list;
5372 temp_ptr = (char *)temp_fea;
5373 while (name_len > 0) {
5374 __u16 value_len;
5375 name_len -= 4;
5376 temp_ptr += 4;
5377 rc += temp_fea->name_len;
5378 /* account for prefix user. and trailing null */
5379 rc = rc + 5 + 1;
5380 if (rc < (int)buf_size) {
5381 memcpy(EAData, "user.", 5);
5382 EAData += 5;
5383 memcpy(EAData, temp_ptr,
5384 temp_fea->name_len);
5385 EAData += temp_fea->name_len;
5386 /* null terminate name */
5387 *EAData = 0;
5388 EAData = EAData + 1;
5389 } else if (buf_size == 0) {
5390 /* skip copy - calc size only */
5391 } else {
5392 /* stop before overrun buffer */
5393 rc = -ERANGE;
5394 break;
5395 }
5396 name_len -= temp_fea->name_len;
5397 temp_ptr += temp_fea->name_len;
5398 /* account for trailing null */
5399 name_len--;
5400 temp_ptr++;
5401 value_len =
5402 le16_to_cpu(temp_fea->value_len);
5403 name_len -= value_len;
5404 temp_ptr += value_len;
5405 /* BB check that temp_ptr is still
5406 within the SMB BB*/
5407
5408 /* no trailing null to account for
5409 in value len */
5410 /* go on to next EA */
5411 temp_fea = (struct fea *)temp_ptr;
5412 }
5413 }
5414 }
5415 } 5361 }
5416 cifs_buf_release(pSMB);
5417 if (rc == -EAGAIN)
5418 goto QAllEAsRetry;
5419 5362
5420 return (ssize_t)rc; 5363 /* check that length of list is not more than bcc */
5421} 5364 /* check that each entry does not go beyond length
5365 of list */
5366 /* check that each element of each entry does not
5367 go beyond end of list */
5368 /* validate_trans2_offsets() */
5369 /* BB check if start of smb + data_offset > &bcc+ bcc */
5422 5370
5423ssize_t CIFSSMBQueryEA(const int xid, struct cifsTconInfo *tcon, 5371 data_offset = le16_to_cpu(pSMBr->t2.DataOffset);
5424 const unsigned char *searchName, const unsigned char *ea_name, 5372 ea_response_data = (struct fealist *)
5425 unsigned char *ea_value, size_t buf_size, 5373 (((char *) &pSMBr->hdr.Protocol) + data_offset);
5426 const struct nls_table *nls_codepage, int remap)
5427{
5428 TRANSACTION2_QPI_REQ *pSMB = NULL;
5429 TRANSACTION2_QPI_RSP *pSMBr = NULL;
5430 int rc = 0;
5431 int bytes_returned;
5432 int name_len;
5433 struct fea *temp_fea;
5434 char *temp_ptr;
5435 __u16 params, byte_count;
5436 5374
5437 cFYI(1, ("In Query EA path %s", searchName)); 5375 list_len = le32_to_cpu(ea_response_data->list_len);
5438QEARetry: 5376 cFYI(1, ("ea length %d", list_len));
5439 rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB, 5377 if (list_len <= 8) {
5440 (void **) &pSMBr); 5378 cFYI(1, ("empty EA list returned from server"));
5441 if (rc) 5379 goto QAllEAsOut;
5442 return rc; 5380 }
5443 5381
5444 if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { 5382 /* make sure list_len doesn't go past end of SMB */
5445 name_len = 5383 end_of_smb = (char *)pByteArea(&pSMBr->hdr) + BCC(&pSMBr->hdr);
5446 cifsConvertToUCS((__le16 *) pSMB->FileName, searchName, 5384 if ((char *)ea_response_data + list_len > end_of_smb) {
5447 PATH_MAX, nls_codepage, remap); 5385 cFYI(1, ("EA list appears to go beyond SMB"));
5448 name_len++; /* trailing null */ 5386 rc = -EIO;
5449 name_len *= 2; 5387 goto QAllEAsOut;
5450 } else { /* BB improve the check for buffer overruns BB */
5451 name_len = strnlen(searchName, PATH_MAX);
5452 name_len++; /* trailing null */
5453 strncpy(pSMB->FileName, searchName, name_len);
5454 } 5388 }
5455 5389
5456 params = 2 /* level */ + 4 /* reserved */ + name_len /* includes NUL */; 5390 /* account for ea list len */
5457 pSMB->TotalDataCount = 0; 5391 list_len -= 4;
5458 pSMB->MaxParameterCount = cpu_to_le16(2); 5392 temp_fea = ea_response_data->list;
5459 /* BB find exact max SMB PDU from sess structure BB */ 5393 temp_ptr = (char *)temp_fea;
5460 pSMB->MaxDataCount = cpu_to_le16(4000); 5394 while (list_len > 0) {
5461 pSMB->MaxSetupCount = 0; 5395 unsigned int name_len;
5462 pSMB->Reserved = 0; 5396 __u16 value_len;
5463 pSMB->Flags = 0; 5397
5464 pSMB->Timeout = 0; 5398 list_len -= 4;
5465 pSMB->Reserved2 = 0; 5399 temp_ptr += 4;
5466 pSMB->ParameterOffset = cpu_to_le16(offsetof( 5400 /* make sure we can read name_len and value_len */
5467 struct smb_com_transaction2_qpi_req, InformationLevel) - 4); 5401 if (list_len < 0) {
5468 pSMB->DataCount = 0; 5402 cFYI(1, ("EA entry goes beyond length of list"));
5469 pSMB->DataOffset = 0; 5403 rc = -EIO;
5470 pSMB->SetupCount = 1; 5404 goto QAllEAsOut;
5471 pSMB->Reserved3 = 0; 5405 }
5472 pSMB->SubCommand = cpu_to_le16(TRANS2_QUERY_PATH_INFORMATION);
5473 byte_count = params + 1 /* pad */ ;
5474 pSMB->TotalParameterCount = cpu_to_le16(params);
5475 pSMB->ParameterCount = pSMB->TotalParameterCount;
5476 pSMB->InformationLevel = cpu_to_le16(SMB_INFO_QUERY_ALL_EAS);
5477 pSMB->Reserved4 = 0;
5478 pSMB->hdr.smb_buf_length += byte_count;
5479 pSMB->ByteCount = cpu_to_le16(byte_count);
5480 5406
5481 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 5407 name_len = temp_fea->name_len;
5482 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 5408 value_len = le16_to_cpu(temp_fea->value_len);
5483 if (rc) { 5409 list_len -= name_len + 1 + value_len;
5484 cFYI(1, ("Send error in Query EA = %d", rc)); 5410 if (list_len < 0) {
5485 } else { /* decode response */ 5411 cFYI(1, ("EA entry goes beyond length of list"));
5486 rc = validate_t2((struct smb_t2_rsp *)pSMBr); 5412 rc = -EIO;
5413 goto QAllEAsOut;
5414 }
5487 5415
5488 /* BB also check enough total bytes returned */ 5416 if (ea_name) {
5489 /* BB we need to improve the validity checking 5417 if (strncmp(ea_name, temp_ptr, name_len) == 0) {
5490 of these trans2 responses */ 5418 temp_ptr += name_len + 1;
5491 if (rc || (pSMBr->ByteCount < 4)) 5419 rc = value_len;
5492 rc = -EIO; /* bad smb */ 5420 if (buf_size == 0)
5493 /* else if (pFindData){ 5421 goto QAllEAsOut;
5494 memcpy((char *) pFindData, 5422 if ((size_t)value_len > buf_size) {
5495 (char *) &pSMBr->hdr.Protocol + 5423 rc = -ERANGE;
5496 data_offset, kl); 5424 goto QAllEAsOut;
5497 }*/ else {
5498 /* check that length of list is not more than bcc */
5499 /* check that each entry does not go beyond length
5500 of list */
5501 /* check that each element of each entry does not
5502 go beyond end of list */
5503 __u16 data_offset = le16_to_cpu(pSMBr->t2.DataOffset);
5504 struct fealist *ea_response_data;
5505 rc = -ENODATA;
5506 /* validate_trans2_offsets() */
5507 /* BB check if start of smb + data_offset > &bcc+ bcc*/
5508 ea_response_data = (struct fealist *)
5509 (((char *) &pSMBr->hdr.Protocol) +
5510 data_offset);
5511 name_len = le32_to_cpu(ea_response_data->list_len);
5512 cFYI(1, ("ea length %d", name_len));
5513 if (name_len <= 8) {
5514 /* returned EA size zeroed at top of function */
5515 cFYI(1, ("empty EA list returned from server"));
5516 } else {
5517 /* account for ea list len */
5518 name_len -= 4;
5519 temp_fea = ea_response_data->list;
5520 temp_ptr = (char *)temp_fea;
5521 /* loop through checking if we have a matching
5522 name and then return the associated value */
5523 while (name_len > 0) {
5524 __u16 value_len;
5525 name_len -= 4;
5526 temp_ptr += 4;
5527 value_len =
5528 le16_to_cpu(temp_fea->value_len);
5529 /* BB validate that value_len falls within SMB,
5530 even though maximum for name_len is 255 */
5531 if (memcmp(temp_fea->name, ea_name,
5532 temp_fea->name_len) == 0) {
5533 /* found a match */
5534 rc = value_len;
5535 /* account for prefix user. and trailing null */
5536 if (rc <= (int)buf_size) {
5537 memcpy(ea_value,
5538 temp_fea->name+temp_fea->name_len+1,
5539 rc);
5540 /* ea values, unlike ea
5541 names, are not null
5542 terminated */
5543 } else if (buf_size == 0) {
5544 /* skip copy - calc size only */
5545 } else {
5546 /* stop before overrun buffer */
5547 rc = -ERANGE;
5548 }
5549 break;
5550 }
5551 name_len -= temp_fea->name_len;
5552 temp_ptr += temp_fea->name_len;
5553 /* account for trailing null */
5554 name_len--;
5555 temp_ptr++;
5556 name_len -= value_len;
5557 temp_ptr += value_len;
5558 /* No trailing null to account for in
5559 value_len. Go on to next EA */
5560 temp_fea = (struct fea *)temp_ptr;
5561 } 5425 }
5426 memcpy(EAData, temp_ptr, value_len);
5427 goto QAllEAsOut;
5428 }
5429 } else {
5430 /* account for prefix user. and trailing null */
5431 rc += (5 + 1 + name_len);
5432 if (rc < (int) buf_size) {
5433 memcpy(EAData, "user.", 5);
5434 EAData += 5;
5435 memcpy(EAData, temp_ptr, name_len);
5436 EAData += name_len;
5437 /* null terminate name */
5438 *EAData = 0;
5439 ++EAData;
5440 } else if (buf_size == 0) {
5441 /* skip copy - calc size only */
5442 } else {
5443 /* stop before overrun buffer */
5444 rc = -ERANGE;
5445 break;
5562 } 5446 }
5563 } 5447 }
5448 temp_ptr += name_len + 1 + value_len;
5449 temp_fea = (struct fea *)temp_ptr;
5564 } 5450 }
5451
5452 /* didn't find the named attribute */
5453 if (ea_name)
5454 rc = -ENODATA;
5455
5456QAllEAsOut:
5565 cifs_buf_release(pSMB); 5457 cifs_buf_release(pSMB);
5566 if (rc == -EAGAIN) 5458 if (rc == -EAGAIN)
5567 goto QEARetry; 5459 goto QAllEAsRetry;
5568 5460
5569 return (ssize_t)rc; 5461 return (ssize_t)rc;
5570} 5462}
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 3bbcaa716b3c..45eb6cba793f 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -98,7 +98,7 @@ struct smb_vol {
98 bool nostrictsync:1; /* do not force expensive SMBflush on every sync */ 98 bool nostrictsync:1; /* do not force expensive SMBflush on every sync */
99 unsigned int rsize; 99 unsigned int rsize;
100 unsigned int wsize; 100 unsigned int wsize;
101 unsigned int sockopt; 101 bool sockopt_tcp_nodelay:1;
102 unsigned short int port; 102 unsigned short int port;
103 char *prepath; 103 char *prepath;
104}; 104};
@@ -1142,9 +1142,11 @@ cifs_parse_mount_options(char *options, const char *devname,
1142 simple_strtoul(value, &value, 0); 1142 simple_strtoul(value, &value, 0);
1143 } 1143 }
1144 } else if (strnicmp(data, "sockopt", 5) == 0) { 1144 } else if (strnicmp(data, "sockopt", 5) == 0) {
1145 if (value && *value) { 1145 if (!value || !*value) {
1146 vol->sockopt = 1146 cERROR(1, ("no socket option specified"));
1147 simple_strtoul(value, &value, 0); 1147 continue;
1148 } else if (strnicmp(value, "TCP_NODELAY", 11) == 0) {
1149 vol->sockopt_tcp_nodelay = 1;
1148 } 1150 }
1149 } else if (strnicmp(data, "netbiosname", 4) == 0) { 1151 } else if (strnicmp(data, "netbiosname", 4) == 0) {
1150 if (!value || !*value || (*value == ' ')) { 1152 if (!value || !*value || (*value == ' ')) {
@@ -1514,6 +1516,7 @@ cifs_get_tcp_session(struct smb_vol *volume_info)
1514 1516
1515 tcp_ses->noblocksnd = volume_info->noblocksnd; 1517 tcp_ses->noblocksnd = volume_info->noblocksnd;
1516 tcp_ses->noautotune = volume_info->noautotune; 1518 tcp_ses->noautotune = volume_info->noautotune;
1519 tcp_ses->tcp_nodelay = volume_info->sockopt_tcp_nodelay;
1517 atomic_set(&tcp_ses->inFlight, 0); 1520 atomic_set(&tcp_ses->inFlight, 0);
1518 init_waitqueue_head(&tcp_ses->response_q); 1521 init_waitqueue_head(&tcp_ses->response_q);
1519 init_waitqueue_head(&tcp_ses->request_q); 1522 init_waitqueue_head(&tcp_ses->request_q);
@@ -1764,6 +1767,7 @@ static int
1764ipv4_connect(struct TCP_Server_Info *server) 1767ipv4_connect(struct TCP_Server_Info *server)
1765{ 1768{
1766 int rc = 0; 1769 int rc = 0;
1770 int val;
1767 bool connected = false; 1771 bool connected = false;
1768 __be16 orig_port = 0; 1772 __be16 orig_port = 0;
1769 struct socket *socket = server->ssocket; 1773 struct socket *socket = server->ssocket;
@@ -1845,6 +1849,14 @@ ipv4_connect(struct TCP_Server_Info *server)
1845 socket->sk->sk_rcvbuf = 140 * 1024; 1849 socket->sk->sk_rcvbuf = 140 * 1024;
1846 } 1850 }
1847 1851
1852 if (server->tcp_nodelay) {
1853 val = 1;
1854 rc = kernel_setsockopt(socket, SOL_TCP, TCP_NODELAY,
1855 (char *)&val, sizeof(val));
1856 if (rc)
1857 cFYI(1, ("set TCP_NODELAY socket option error %d", rc));
1858 }
1859
1848 cFYI(1, ("sndbuf %d rcvbuf %d rcvtimeo 0x%lx", 1860 cFYI(1, ("sndbuf %d rcvbuf %d rcvtimeo 0x%lx",
1849 socket->sk->sk_sndbuf, 1861 socket->sk->sk_sndbuf,
1850 socket->sk->sk_rcvbuf, socket->sk->sk_rcvtimeo)); 1862 socket->sk->sk_rcvbuf, socket->sk->sk_rcvtimeo));
@@ -1916,6 +1928,7 @@ static int
1916ipv6_connect(struct TCP_Server_Info *server) 1928ipv6_connect(struct TCP_Server_Info *server)
1917{ 1929{
1918 int rc = 0; 1930 int rc = 0;
1931 int val;
1919 bool connected = false; 1932 bool connected = false;
1920 __be16 orig_port = 0; 1933 __be16 orig_port = 0;
1921 struct socket *socket = server->ssocket; 1934 struct socket *socket = server->ssocket;
@@ -1987,6 +2000,15 @@ ipv6_connect(struct TCP_Server_Info *server)
1987 */ 2000 */
1988 socket->sk->sk_rcvtimeo = 7 * HZ; 2001 socket->sk->sk_rcvtimeo = 7 * HZ;
1989 socket->sk->sk_sndtimeo = 5 * HZ; 2002 socket->sk->sk_sndtimeo = 5 * HZ;
2003
2004 if (server->tcp_nodelay) {
2005 val = 1;
2006 rc = kernel_setsockopt(socket, SOL_TCP, TCP_NODELAY,
2007 (char *)&val, sizeof(val));
2008 if (rc)
2009 cFYI(1, ("set TCP_NODELAY socket option error %d", rc));
2010 }
2011
1990 server->ssocket = socket; 2012 server->ssocket = socket;
1991 2013
1992 return rc; 2014 return rc;
@@ -2366,13 +2388,13 @@ try_mount_again:
2366 */ 2388 */
2367 cifs_put_tcp_session(srvTcp); 2389 cifs_put_tcp_session(srvTcp);
2368 2390
2369 down(&pSesInfo->sesSem); 2391 mutex_lock(&pSesInfo->session_mutex);
2370 if (pSesInfo->need_reconnect) { 2392 if (pSesInfo->need_reconnect) {
2371 cFYI(1, ("Session needs reconnect")); 2393 cFYI(1, ("Session needs reconnect"));
2372 rc = cifs_setup_session(xid, pSesInfo, 2394 rc = cifs_setup_session(xid, pSesInfo,
2373 cifs_sb->local_nls); 2395 cifs_sb->local_nls);
2374 } 2396 }
2375 up(&pSesInfo->sesSem); 2397 mutex_unlock(&pSesInfo->session_mutex);
2376 } else if (!rc) { 2398 } else if (!rc) {
2377 cFYI(1, ("Existing smb sess not found")); 2399 cFYI(1, ("Existing smb sess not found"));
2378 pSesInfo = sesInfoAlloc(); 2400 pSesInfo = sesInfoAlloc();
@@ -2415,12 +2437,12 @@ try_mount_again:
2415 } 2437 }
2416 pSesInfo->linux_uid = volume_info->linux_uid; 2438 pSesInfo->linux_uid = volume_info->linux_uid;
2417 pSesInfo->overrideSecFlg = volume_info->secFlg; 2439 pSesInfo->overrideSecFlg = volume_info->secFlg;
2418 down(&pSesInfo->sesSem); 2440 mutex_lock(&pSesInfo->session_mutex);
2419 2441
2420 /* BB FIXME need to pass vol->secFlgs BB */ 2442 /* BB FIXME need to pass vol->secFlgs BB */
2421 rc = cifs_setup_session(xid, pSesInfo, 2443 rc = cifs_setup_session(xid, pSesInfo,
2422 cifs_sb->local_nls); 2444 cifs_sb->local_nls);
2423 up(&pSesInfo->sesSem); 2445 mutex_unlock(&pSesInfo->session_mutex);
2424 } 2446 }
2425 2447
2426 /* search for existing tcon to this server share */ 2448 /* search for existing tcon to this server share */
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index cf18ee765590..8bdbc818164c 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -111,6 +111,7 @@ cifs_fattr_to_inode(struct inode *inode, struct cifs_fattr *fattr)
111 111
112 cifs_i->delete_pending = fattr->cf_flags & CIFS_FATTR_DELETE_PENDING; 112 cifs_i->delete_pending = fattr->cf_flags & CIFS_FATTR_DELETE_PENDING;
113 113
114 cifs_i->server_eof = fattr->cf_eof;
114 /* 115 /*
115 * Can't safely change the file size here if the client is writing to 116 * Can't safely change the file size here if the client is writing to
116 * it due to potential races. 117 * it due to potential races.
@@ -366,7 +367,7 @@ static int cifs_sfu_mode(struct cifs_fattr *fattr, const unsigned char *path,
366 char ea_value[4]; 367 char ea_value[4];
367 __u32 mode; 368 __u32 mode;
368 369
369 rc = CIFSSMBQueryEA(xid, cifs_sb->tcon, path, "SETFILEBITS", 370 rc = CIFSSMBQAllEAs(xid, cifs_sb->tcon, path, "SETFILEBITS",
370 ea_value, 4 /* size of buf */, cifs_sb->local_nls, 371 ea_value, 4 /* size of buf */, cifs_sb->local_nls,
371 cifs_sb->mnt_cifs_flags & 372 cifs_sb->mnt_cifs_flags &
372 CIFS_MOUNT_MAP_SPECIAL_CHR); 373 CIFS_MOUNT_MAP_SPECIAL_CHR);
@@ -1762,8 +1763,18 @@ cifs_setattr_unix(struct dentry *direntry, struct iattr *attrs)
1762 CIFS_MOUNT_MAP_SPECIAL_CHR); 1763 CIFS_MOUNT_MAP_SPECIAL_CHR);
1763 } 1764 }
1764 1765
1765 if (!rc) 1766 if (!rc) {
1766 rc = inode_setattr(inode, attrs); 1767 rc = inode_setattr(inode, attrs);
1768
1769 /* force revalidate when any of these times are set since some
1770 of the fs types (eg ext3, fat) do not have fine enough
1771 time granularity to match protocol, and we do not have a
1772 a way (yet) to query the server fs's time granularity (and
1773 whether it rounds times down).
1774 */
1775 if (!rc && (attrs->ia_valid & (ATTR_MTIME | ATTR_CTIME)))
1776 cifsInode->time = 0;
1777 }
1767out: 1778out:
1768 kfree(args); 1779 kfree(args);
1769 kfree(full_path); 1780 kfree(full_path);
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index d27d4ec6579b..d1474996a812 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -79,7 +79,7 @@ sesInfoAlloc(void)
79 ++ret_buf->ses_count; 79 ++ret_buf->ses_count;
80 INIT_LIST_HEAD(&ret_buf->smb_ses_list); 80 INIT_LIST_HEAD(&ret_buf->smb_ses_list);
81 INIT_LIST_HEAD(&ret_buf->tcon_list); 81 INIT_LIST_HEAD(&ret_buf->tcon_list);
82 init_MUTEX(&ret_buf->sesSem); 82 mutex_init(&ret_buf->session_mutex);
83 } 83 }
84 return ret_buf; 84 return ret_buf;
85} 85}
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index f84062f9a985..c343b14ba2d3 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -77,6 +77,11 @@ cifs_readdir_lookup(struct dentry *parent, struct qstr *name,
77 77
78 cFYI(1, ("For %s", name->name)); 78 cFYI(1, ("For %s", name->name));
79 79
80 if (parent->d_op && parent->d_op->d_hash)
81 parent->d_op->d_hash(parent, name);
82 else
83 name->hash = full_name_hash(name->name, name->len);
84
80 dentry = d_lookup(parent, name); 85 dentry = d_lookup(parent, name);
81 if (dentry) { 86 if (dentry) {
82 /* FIXME: check for inode number changes? */ 87 /* FIXME: check for inode number changes? */
@@ -666,12 +671,11 @@ static int cifs_get_name_from_search_buf(struct qstr *pqst,
666 min(len, max_len), nlt, 671 min(len, max_len), nlt,
667 cifs_sb->mnt_cifs_flags & 672 cifs_sb->mnt_cifs_flags &
668 CIFS_MOUNT_MAP_SPECIAL_CHR); 673 CIFS_MOUNT_MAP_SPECIAL_CHR);
674 pqst->len -= nls_nullsize(nlt);
669 } else { 675 } else {
670 pqst->name = filename; 676 pqst->name = filename;
671 pqst->len = len; 677 pqst->len = len;
672 } 678 }
673 pqst->hash = full_name_hash(pqst->name, pqst->len);
674/* cFYI(1, ("filldir on %s",pqst->name)); */
675 return rc; 679 return rc;
676} 680}
677 681
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index 7085a6275c4c..aaa9c1c5a5bd 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -223,9 +223,9 @@ static void unicode_ssetup_strings(char **pbcc_area, struct cifsSesInfo *ses,
223 /* null user mount */ 223 /* null user mount */
224 *bcc_ptr = 0; 224 *bcc_ptr = 0;
225 *(bcc_ptr+1) = 0; 225 *(bcc_ptr+1) = 0;
226 } else { /* 300 should be long enough for any conceivable user name */ 226 } else {
227 bytes_ret = cifs_strtoUCS((__le16 *) bcc_ptr, ses->userName, 227 bytes_ret = cifs_strtoUCS((__le16 *) bcc_ptr, ses->userName,
228 300, nls_cp); 228 MAX_USERNAME_SIZE, nls_cp);
229 } 229 }
230 bcc_ptr += 2 * bytes_ret; 230 bcc_ptr += 2 * bytes_ret;
231 bcc_ptr += 2; /* account for null termination */ 231 bcc_ptr += 2; /* account for null termination */
@@ -246,11 +246,10 @@ static void ascii_ssetup_strings(char **pbcc_area, struct cifsSesInfo *ses,
246 /* copy user */ 246 /* copy user */
247 if (ses->userName == NULL) { 247 if (ses->userName == NULL) {
248 /* BB what about null user mounts - check that we do this BB */ 248 /* BB what about null user mounts - check that we do this BB */
249 } else { /* 300 should be long enough for any conceivable user name */ 249 } else {
250 strncpy(bcc_ptr, ses->userName, 300); 250 strncpy(bcc_ptr, ses->userName, MAX_USERNAME_SIZE);
251 } 251 }
252 /* BB improve check for overflow */ 252 bcc_ptr += strnlen(ses->userName, MAX_USERNAME_SIZE);
253 bcc_ptr += strnlen(ses->userName, 300);
254 *bcc_ptr = 0; 253 *bcc_ptr = 0;
255 bcc_ptr++; /* account for null termination */ 254 bcc_ptr++; /* account for null termination */
256 255
diff --git a/fs/cifs/xattr.c b/fs/cifs/xattr.c
index a75afa3dd9e1..3e2ef0de1209 100644
--- a/fs/cifs/xattr.c
+++ b/fs/cifs/xattr.c
@@ -244,7 +244,7 @@ ssize_t cifs_getxattr(struct dentry *direntry, const char *ea_name,
244 /* revalidate/getattr then populate from inode */ 244 /* revalidate/getattr then populate from inode */
245 } /* BB add else when above is implemented */ 245 } /* BB add else when above is implemented */
246 ea_name += 5; /* skip past user. prefix */ 246 ea_name += 5; /* skip past user. prefix */
247 rc = CIFSSMBQueryEA(xid, pTcon, full_path, ea_name, ea_value, 247 rc = CIFSSMBQAllEAs(xid, pTcon, full_path, ea_name, ea_value,
248 buf_size, cifs_sb->local_nls, 248 buf_size, cifs_sb->local_nls,
249 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); 249 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
250 } else if (strncmp(ea_name, CIFS_XATTR_OS2_PREFIX, 4) == 0) { 250 } else if (strncmp(ea_name, CIFS_XATTR_OS2_PREFIX, 4) == 0) {
@@ -252,7 +252,7 @@ ssize_t cifs_getxattr(struct dentry *direntry, const char *ea_name,
252 goto get_ea_exit; 252 goto get_ea_exit;
253 253
254 ea_name += 4; /* skip past os2. prefix */ 254 ea_name += 4; /* skip past os2. prefix */
255 rc = CIFSSMBQueryEA(xid, pTcon, full_path, ea_name, ea_value, 255 rc = CIFSSMBQAllEAs(xid, pTcon, full_path, ea_name, ea_value,
256 buf_size, cifs_sb->local_nls, 256 buf_size, cifs_sb->local_nls,
257 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); 257 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
258 } else if (strncmp(ea_name, POSIX_ACL_XATTR_ACCESS, 258 } else if (strncmp(ea_name, POSIX_ACL_XATTR_ACCESS,
@@ -364,8 +364,8 @@ ssize_t cifs_listxattr(struct dentry *direntry, char *data, size_t buf_size)
364 /* if proc/fs/cifs/streamstoxattr is set then 364 /* if proc/fs/cifs/streamstoxattr is set then
365 search server for EAs or streams to 365 search server for EAs or streams to
366 returns as xattrs */ 366 returns as xattrs */
367 rc = CIFSSMBQAllEAs(xid, pTcon, full_path, data, buf_size, 367 rc = CIFSSMBQAllEAs(xid, pTcon, full_path, NULL, data,
368 cifs_sb->local_nls, 368 buf_size, cifs_sb->local_nls,
369 cifs_sb->mnt_cifs_flags & 369 cifs_sb->mnt_cifs_flags &
370 CIFS_MOUNT_MAP_SPECIAL_CHR); 370 CIFS_MOUNT_MAP_SPECIAL_CHR);
371 371
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index c5c45de1a2ee..0ca9ec4a79c3 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -301,6 +301,12 @@ static int sg_ioctl_trans(unsigned int fd, unsigned int cmd,
301 u32 data; 301 u32 data;
302 void __user *dxferp; 302 void __user *dxferp;
303 int err; 303 int err;
304 int interface_id;
305
306 if (get_user(interface_id, &sgio32->interface_id))
307 return -EFAULT;
308 if (interface_id != 'S')
309 return sys_ioctl(fd, cmd, (unsigned long)sgio32);
304 310
305 if (get_user(iovec_count, &sgio32->iovec_count)) 311 if (get_user(iovec_count, &sgio32->iovec_count))
306 return -EFAULT; 312 return -EFAULT;
@@ -936,6 +942,7 @@ COMPATIBLE_IOCTL(TCSETSF)
936COMPATIBLE_IOCTL(TIOCLINUX) 942COMPATIBLE_IOCTL(TIOCLINUX)
937COMPATIBLE_IOCTL(TIOCSBRK) 943COMPATIBLE_IOCTL(TIOCSBRK)
938COMPATIBLE_IOCTL(TIOCCBRK) 944COMPATIBLE_IOCTL(TIOCCBRK)
945COMPATIBLE_IOCTL(TIOCGSID)
939COMPATIBLE_IOCTL(TIOCGICOUNT) 946COMPATIBLE_IOCTL(TIOCGICOUNT)
940/* Little t */ 947/* Little t */
941COMPATIBLE_IOCTL(TIOCGETD) 948COMPATIBLE_IOCTL(TIOCGETD)
@@ -1038,6 +1045,8 @@ COMPATIBLE_IOCTL(FIOQSIZE)
1038#ifdef CONFIG_BLOCK 1045#ifdef CONFIG_BLOCK
1039/* loop */ 1046/* loop */
1040IGNORE_IOCTL(LOOP_CLR_FD) 1047IGNORE_IOCTL(LOOP_CLR_FD)
1048/* md calls this on random blockdevs */
1049IGNORE_IOCTL(RAID_VERSION)
1041/* SG stuff */ 1050/* SG stuff */
1042COMPATIBLE_IOCTL(SG_SET_TIMEOUT) 1051COMPATIBLE_IOCTL(SG_SET_TIMEOUT)
1043COMPATIBLE_IOCTL(SG_GET_TIMEOUT) 1052COMPATIBLE_IOCTL(SG_GET_TIMEOUT)
diff --git a/fs/dlm/ast.c b/fs/dlm/ast.c
index dc2ad6008b2d..4314f0d48d85 100644
--- a/fs/dlm/ast.c
+++ b/fs/dlm/ast.c
@@ -2,7 +2,7 @@
2******************************************************************************* 2*******************************************************************************
3** 3**
4** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. 4** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
5** Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved. 5** Copyright (C) 2004-2010 Red Hat, Inc. All rights reserved.
6** 6**
7** This copyrighted material is made available to anyone wishing to use, 7** This copyrighted material is made available to anyone wishing to use,
8** modify, copy, or redistribute it subject to the terms and conditions 8** modify, copy, or redistribute it subject to the terms and conditions
@@ -33,10 +33,10 @@ void dlm_del_ast(struct dlm_lkb *lkb)
33 spin_unlock(&ast_queue_lock); 33 spin_unlock(&ast_queue_lock);
34} 34}
35 35
36void dlm_add_ast(struct dlm_lkb *lkb, int type, int bastmode) 36void dlm_add_ast(struct dlm_lkb *lkb, int type, int mode)
37{ 37{
38 if (lkb->lkb_flags & DLM_IFL_USER) { 38 if (lkb->lkb_flags & DLM_IFL_USER) {
39 dlm_user_add_ast(lkb, type, bastmode); 39 dlm_user_add_ast(lkb, type, mode);
40 return; 40 return;
41 } 41 }
42 42
@@ -44,10 +44,21 @@ void dlm_add_ast(struct dlm_lkb *lkb, int type, int bastmode)
44 if (!(lkb->lkb_ast_type & (AST_COMP | AST_BAST))) { 44 if (!(lkb->lkb_ast_type & (AST_COMP | AST_BAST))) {
45 kref_get(&lkb->lkb_ref); 45 kref_get(&lkb->lkb_ref);
46 list_add_tail(&lkb->lkb_astqueue, &ast_queue); 46 list_add_tail(&lkb->lkb_astqueue, &ast_queue);
47 lkb->lkb_ast_first = type;
47 } 48 }
49
50 /* sanity check, this should not happen */
51
52 if ((type == AST_COMP) && (lkb->lkb_ast_type & AST_COMP))
53 log_print("repeat cast %d castmode %d lock %x %s",
54 mode, lkb->lkb_castmode,
55 lkb->lkb_id, lkb->lkb_resource->res_name);
56
48 lkb->lkb_ast_type |= type; 57 lkb->lkb_ast_type |= type;
49 if (bastmode) 58 if (type == AST_BAST)
50 lkb->lkb_bastmode = bastmode; 59 lkb->lkb_bastmode = mode;
60 else
61 lkb->lkb_castmode = mode;
51 spin_unlock(&ast_queue_lock); 62 spin_unlock(&ast_queue_lock);
52 63
53 set_bit(WAKE_ASTS, &astd_wakeflags); 64 set_bit(WAKE_ASTS, &astd_wakeflags);
@@ -59,9 +70,9 @@ static void process_asts(void)
59 struct dlm_ls *ls = NULL; 70 struct dlm_ls *ls = NULL;
60 struct dlm_rsb *r = NULL; 71 struct dlm_rsb *r = NULL;
61 struct dlm_lkb *lkb; 72 struct dlm_lkb *lkb;
62 void (*cast) (void *astparam); 73 void (*castfn) (void *astparam);
63 void (*bast) (void *astparam, int mode); 74 void (*bastfn) (void *astparam, int mode);
64 int type = 0, bastmode; 75 int type, first, bastmode, castmode, do_bast, do_cast, last_castmode;
65 76
66repeat: 77repeat:
67 spin_lock(&ast_queue_lock); 78 spin_lock(&ast_queue_lock);
@@ -75,17 +86,48 @@ repeat:
75 list_del(&lkb->lkb_astqueue); 86 list_del(&lkb->lkb_astqueue);
76 type = lkb->lkb_ast_type; 87 type = lkb->lkb_ast_type;
77 lkb->lkb_ast_type = 0; 88 lkb->lkb_ast_type = 0;
89 first = lkb->lkb_ast_first;
90 lkb->lkb_ast_first = 0;
78 bastmode = lkb->lkb_bastmode; 91 bastmode = lkb->lkb_bastmode;
79 92 castmode = lkb->lkb_castmode;
93 castfn = lkb->lkb_astfn;
94 bastfn = lkb->lkb_bastfn;
80 spin_unlock(&ast_queue_lock); 95 spin_unlock(&ast_queue_lock);
81 cast = lkb->lkb_astfn;
82 bast = lkb->lkb_bastfn;
83
84 if ((type & AST_COMP) && cast)
85 cast(lkb->lkb_astparam);
86 96
87 if ((type & AST_BAST) && bast) 97 do_cast = (type & AST_COMP) && castfn;
88 bast(lkb->lkb_astparam, bastmode); 98 do_bast = (type & AST_BAST) && bastfn;
99
100 /* Skip a bast if its blocking mode is compatible with the
101 granted mode of the preceding cast. */
102
103 if (do_bast) {
104 if (first == AST_COMP)
105 last_castmode = castmode;
106 else
107 last_castmode = lkb->lkb_castmode_done;
108 if (dlm_modes_compat(bastmode, last_castmode))
109 do_bast = 0;
110 }
111
112 if (first == AST_COMP) {
113 if (do_cast)
114 castfn(lkb->lkb_astparam);
115 if (do_bast)
116 bastfn(lkb->lkb_astparam, bastmode);
117 } else if (first == AST_BAST) {
118 if (do_bast)
119 bastfn(lkb->lkb_astparam, bastmode);
120 if (do_cast)
121 castfn(lkb->lkb_astparam);
122 } else {
123 log_error(ls, "bad ast_first %d ast_type %d",
124 first, type);
125 }
126
127 if (do_cast)
128 lkb->lkb_castmode_done = castmode;
129 if (do_bast)
130 lkb->lkb_bastmode_done = bastmode;
89 131
90 /* this removes the reference added by dlm_add_ast 132 /* this removes the reference added by dlm_add_ast
91 and may result in the lkb being freed */ 133 and may result in the lkb being freed */
diff --git a/fs/dlm/ast.h b/fs/dlm/ast.h
index 1b5fc5f428fd..bcb1aaba519d 100644
--- a/fs/dlm/ast.h
+++ b/fs/dlm/ast.h
@@ -1,7 +1,7 @@
1/****************************************************************************** 1/******************************************************************************
2******************************************************************************* 2*******************************************************************************
3** 3**
4** Copyright (C) 2005-2008 Red Hat, Inc. All rights reserved. 4** Copyright (C) 2005-2010 Red Hat, Inc. All rights reserved.
5** 5**
6** This copyrighted material is made available to anyone wishing to use, 6** This copyrighted material is made available to anyone wishing to use,
7** modify, copy, or redistribute it subject to the terms and conditions 7** modify, copy, or redistribute it subject to the terms and conditions
@@ -13,7 +13,7 @@
13#ifndef __ASTD_DOT_H__ 13#ifndef __ASTD_DOT_H__
14#define __ASTD_DOT_H__ 14#define __ASTD_DOT_H__
15 15
16void dlm_add_ast(struct dlm_lkb *lkb, int type, int bastmode); 16void dlm_add_ast(struct dlm_lkb *lkb, int type, int mode);
17void dlm_del_ast(struct dlm_lkb *lkb); 17void dlm_del_ast(struct dlm_lkb *lkb);
18 18
19void dlm_astd_wake(void); 19void dlm_astd_wake(void);
diff --git a/fs/dlm/debug_fs.c b/fs/dlm/debug_fs.c
index 375a2359b3bf..29d6139c35fc 100644
--- a/fs/dlm/debug_fs.c
+++ b/fs/dlm/debug_fs.c
@@ -256,7 +256,7 @@ static int print_format3_lock(struct seq_file *s, struct dlm_lkb *lkb,
256 lkb->lkb_status, 256 lkb->lkb_status,
257 lkb->lkb_grmode, 257 lkb->lkb_grmode,
258 lkb->lkb_rqmode, 258 lkb->lkb_rqmode,
259 lkb->lkb_highbast, 259 lkb->lkb_bastmode,
260 rsb_lookup, 260 rsb_lookup,
261 lkb->lkb_wait_type, 261 lkb->lkb_wait_type,
262 lkb->lkb_lvbseq, 262 lkb->lkb_lvbseq,
diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h
index 826d3dc6e0ab..f632b58cd222 100644
--- a/fs/dlm/dlm_internal.h
+++ b/fs/dlm/dlm_internal.h
@@ -2,7 +2,7 @@
2******************************************************************************* 2*******************************************************************************
3** 3**
4** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. 4** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
5** Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved. 5** Copyright (C) 2004-2010 Red Hat, Inc. All rights reserved.
6** 6**
7** This copyrighted material is made available to anyone wishing to use, 7** This copyrighted material is made available to anyone wishing to use,
8** modify, copy, or redistribute it subject to the terms and conditions 8** modify, copy, or redistribute it subject to the terms and conditions
@@ -232,11 +232,17 @@ struct dlm_lkb {
232 int8_t lkb_status; /* granted, waiting, convert */ 232 int8_t lkb_status; /* granted, waiting, convert */
233 int8_t lkb_rqmode; /* requested lock mode */ 233 int8_t lkb_rqmode; /* requested lock mode */
234 int8_t lkb_grmode; /* granted lock mode */ 234 int8_t lkb_grmode; /* granted lock mode */
235 int8_t lkb_bastmode; /* requested mode */
236 int8_t lkb_highbast; /* highest mode bast sent for */ 235 int8_t lkb_highbast; /* highest mode bast sent for */
236
237 int8_t lkb_wait_type; /* type of reply waiting for */ 237 int8_t lkb_wait_type; /* type of reply waiting for */
238 int8_t lkb_wait_count; 238 int8_t lkb_wait_count;
239 int8_t lkb_ast_type; /* type of ast queued for */ 239 int8_t lkb_ast_type; /* type of ast queued for */
240 int8_t lkb_ast_first; /* type of first ast queued */
241
242 int8_t lkb_bastmode; /* req mode of queued bast */
243 int8_t lkb_castmode; /* gr mode of queued cast */
244 int8_t lkb_bastmode_done; /* last delivered bastmode */
245 int8_t lkb_castmode_done; /* last delivered castmode */
240 246
241 struct list_head lkb_idtbl_list; /* lockspace lkbtbl */ 247 struct list_head lkb_idtbl_list; /* lockspace lkbtbl */
242 struct list_head lkb_statequeue; /* rsb g/c/w list */ 248 struct list_head lkb_statequeue; /* rsb g/c/w list */
diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c
index 9c0c1db1e105..46ffd3eeaaf7 100644
--- a/fs/dlm/lock.c
+++ b/fs/dlm/lock.c
@@ -1,7 +1,7 @@
1/****************************************************************************** 1/******************************************************************************
2******************************************************************************* 2*******************************************************************************
3** 3**
4** Copyright (C) 2005-2008 Red Hat, Inc. All rights reserved. 4** Copyright (C) 2005-2010 Red Hat, Inc. All rights reserved.
5** 5**
6** This copyrighted material is made available to anyone wishing to use, 6** This copyrighted material is made available to anyone wishing to use,
7** modify, copy, or redistribute it subject to the terms and conditions 7** modify, copy, or redistribute it subject to the terms and conditions
@@ -307,7 +307,7 @@ static void queue_cast(struct dlm_rsb *r, struct dlm_lkb *lkb, int rv)
307 lkb->lkb_lksb->sb_status = rv; 307 lkb->lkb_lksb->sb_status = rv;
308 lkb->lkb_lksb->sb_flags = lkb->lkb_sbflags; 308 lkb->lkb_lksb->sb_flags = lkb->lkb_sbflags;
309 309
310 dlm_add_ast(lkb, AST_COMP, 0); 310 dlm_add_ast(lkb, AST_COMP, lkb->lkb_grmode);
311} 311}
312 312
313static inline void queue_cast_overlap(struct dlm_rsb *r, struct dlm_lkb *lkb) 313static inline void queue_cast_overlap(struct dlm_rsb *r, struct dlm_lkb *lkb)
@@ -320,10 +320,12 @@ static void queue_bast(struct dlm_rsb *r, struct dlm_lkb *lkb, int rqmode)
320{ 320{
321 lkb->lkb_time_bast = ktime_get(); 321 lkb->lkb_time_bast = ktime_get();
322 322
323 if (is_master_copy(lkb)) 323 if (is_master_copy(lkb)) {
324 lkb->lkb_bastmode = rqmode; /* printed by debugfs */
324 send_bast(r, lkb, rqmode); 325 send_bast(r, lkb, rqmode);
325 else 326 } else {
326 dlm_add_ast(lkb, AST_BAST, rqmode); 327 dlm_add_ast(lkb, AST_BAST, rqmode);
328 }
327} 329}
328 330
329/* 331/*
@@ -2280,20 +2282,30 @@ static int do_request(struct dlm_rsb *r, struct dlm_lkb *lkb)
2280 if (can_be_queued(lkb)) { 2282 if (can_be_queued(lkb)) {
2281 error = -EINPROGRESS; 2283 error = -EINPROGRESS;
2282 add_lkb(r, lkb, DLM_LKSTS_WAITING); 2284 add_lkb(r, lkb, DLM_LKSTS_WAITING);
2283 send_blocking_asts(r, lkb);
2284 add_timeout(lkb); 2285 add_timeout(lkb);
2285 goto out; 2286 goto out;
2286 } 2287 }
2287 2288
2288 error = -EAGAIN; 2289 error = -EAGAIN;
2289 if (force_blocking_asts(lkb))
2290 send_blocking_asts_all(r, lkb);
2291 queue_cast(r, lkb, -EAGAIN); 2290 queue_cast(r, lkb, -EAGAIN);
2292
2293 out: 2291 out:
2294 return error; 2292 return error;
2295} 2293}
2296 2294
2295static void do_request_effects(struct dlm_rsb *r, struct dlm_lkb *lkb,
2296 int error)
2297{
2298 switch (error) {
2299 case -EAGAIN:
2300 if (force_blocking_asts(lkb))
2301 send_blocking_asts_all(r, lkb);
2302 break;
2303 case -EINPROGRESS:
2304 send_blocking_asts(r, lkb);
2305 break;
2306 }
2307}
2308
2297static int do_convert(struct dlm_rsb *r, struct dlm_lkb *lkb) 2309static int do_convert(struct dlm_rsb *r, struct dlm_lkb *lkb)
2298{ 2310{
2299 int error = 0; 2311 int error = 0;
@@ -2304,7 +2316,6 @@ static int do_convert(struct dlm_rsb *r, struct dlm_lkb *lkb)
2304 if (can_be_granted(r, lkb, 1, &deadlk)) { 2316 if (can_be_granted(r, lkb, 1, &deadlk)) {
2305 grant_lock(r, lkb); 2317 grant_lock(r, lkb);
2306 queue_cast(r, lkb, 0); 2318 queue_cast(r, lkb, 0);
2307 grant_pending_locks(r);
2308 goto out; 2319 goto out;
2309 } 2320 }
2310 2321
@@ -2334,7 +2345,6 @@ static int do_convert(struct dlm_rsb *r, struct dlm_lkb *lkb)
2334 if (_can_be_granted(r, lkb, 1)) { 2345 if (_can_be_granted(r, lkb, 1)) {
2335 grant_lock(r, lkb); 2346 grant_lock(r, lkb);
2336 queue_cast(r, lkb, 0); 2347 queue_cast(r, lkb, 0);
2337 grant_pending_locks(r);
2338 goto out; 2348 goto out;
2339 } 2349 }
2340 /* else fall through and move to convert queue */ 2350 /* else fall through and move to convert queue */
@@ -2344,28 +2354,47 @@ static int do_convert(struct dlm_rsb *r, struct dlm_lkb *lkb)
2344 error = -EINPROGRESS; 2354 error = -EINPROGRESS;
2345 del_lkb(r, lkb); 2355 del_lkb(r, lkb);
2346 add_lkb(r, lkb, DLM_LKSTS_CONVERT); 2356 add_lkb(r, lkb, DLM_LKSTS_CONVERT);
2347 send_blocking_asts(r, lkb);
2348 add_timeout(lkb); 2357 add_timeout(lkb);
2349 goto out; 2358 goto out;
2350 } 2359 }
2351 2360
2352 error = -EAGAIN; 2361 error = -EAGAIN;
2353 if (force_blocking_asts(lkb))
2354 send_blocking_asts_all(r, lkb);
2355 queue_cast(r, lkb, -EAGAIN); 2362 queue_cast(r, lkb, -EAGAIN);
2356
2357 out: 2363 out:
2358 return error; 2364 return error;
2359} 2365}
2360 2366
2367static void do_convert_effects(struct dlm_rsb *r, struct dlm_lkb *lkb,
2368 int error)
2369{
2370 switch (error) {
2371 case 0:
2372 grant_pending_locks(r);
2373 /* grant_pending_locks also sends basts */
2374 break;
2375 case -EAGAIN:
2376 if (force_blocking_asts(lkb))
2377 send_blocking_asts_all(r, lkb);
2378 break;
2379 case -EINPROGRESS:
2380 send_blocking_asts(r, lkb);
2381 break;
2382 }
2383}
2384
2361static int do_unlock(struct dlm_rsb *r, struct dlm_lkb *lkb) 2385static int do_unlock(struct dlm_rsb *r, struct dlm_lkb *lkb)
2362{ 2386{
2363 remove_lock(r, lkb); 2387 remove_lock(r, lkb);
2364 queue_cast(r, lkb, -DLM_EUNLOCK); 2388 queue_cast(r, lkb, -DLM_EUNLOCK);
2365 grant_pending_locks(r);
2366 return -DLM_EUNLOCK; 2389 return -DLM_EUNLOCK;
2367} 2390}
2368 2391
2392static void do_unlock_effects(struct dlm_rsb *r, struct dlm_lkb *lkb,
2393 int error)
2394{
2395 grant_pending_locks(r);
2396}
2397
2369/* returns: 0 did nothing, -DLM_ECANCEL canceled lock */ 2398/* returns: 0 did nothing, -DLM_ECANCEL canceled lock */
2370 2399
2371static int do_cancel(struct dlm_rsb *r, struct dlm_lkb *lkb) 2400static int do_cancel(struct dlm_rsb *r, struct dlm_lkb *lkb)
@@ -2375,12 +2404,18 @@ static int do_cancel(struct dlm_rsb *r, struct dlm_lkb *lkb)
2375 error = revert_lock(r, lkb); 2404 error = revert_lock(r, lkb);
2376 if (error) { 2405 if (error) {
2377 queue_cast(r, lkb, -DLM_ECANCEL); 2406 queue_cast(r, lkb, -DLM_ECANCEL);
2378 grant_pending_locks(r);
2379 return -DLM_ECANCEL; 2407 return -DLM_ECANCEL;
2380 } 2408 }
2381 return 0; 2409 return 0;
2382} 2410}
2383 2411
2412static void do_cancel_effects(struct dlm_rsb *r, struct dlm_lkb *lkb,
2413 int error)
2414{
2415 if (error)
2416 grant_pending_locks(r);
2417}
2418
2384/* 2419/*
2385 * Four stage 3 varieties: 2420 * Four stage 3 varieties:
2386 * _request_lock(), _convert_lock(), _unlock_lock(), _cancel_lock() 2421 * _request_lock(), _convert_lock(), _unlock_lock(), _cancel_lock()
@@ -2402,11 +2437,15 @@ static int _request_lock(struct dlm_rsb *r, struct dlm_lkb *lkb)
2402 goto out; 2437 goto out;
2403 } 2438 }
2404 2439
2405 if (is_remote(r)) 2440 if (is_remote(r)) {
2406 /* receive_request() calls do_request() on remote node */ 2441 /* receive_request() calls do_request() on remote node */
2407 error = send_request(r, lkb); 2442 error = send_request(r, lkb);
2408 else 2443 } else {
2409 error = do_request(r, lkb); 2444 error = do_request(r, lkb);
2445 /* for remote locks the request_reply is sent
2446 between do_request and do_request_effects */
2447 do_request_effects(r, lkb, error);
2448 }
2410 out: 2449 out:
2411 return error; 2450 return error;
2412} 2451}
@@ -2417,11 +2456,15 @@ static int _convert_lock(struct dlm_rsb *r, struct dlm_lkb *lkb)
2417{ 2456{
2418 int error; 2457 int error;
2419 2458
2420 if (is_remote(r)) 2459 if (is_remote(r)) {
2421 /* receive_convert() calls do_convert() on remote node */ 2460 /* receive_convert() calls do_convert() on remote node */
2422 error = send_convert(r, lkb); 2461 error = send_convert(r, lkb);
2423 else 2462 } else {
2424 error = do_convert(r, lkb); 2463 error = do_convert(r, lkb);
2464 /* for remote locks the convert_reply is sent
2465 between do_convert and do_convert_effects */
2466 do_convert_effects(r, lkb, error);
2467 }
2425 2468
2426 return error; 2469 return error;
2427} 2470}
@@ -2432,11 +2475,15 @@ static int _unlock_lock(struct dlm_rsb *r, struct dlm_lkb *lkb)
2432{ 2475{
2433 int error; 2476 int error;
2434 2477
2435 if (is_remote(r)) 2478 if (is_remote(r)) {
2436 /* receive_unlock() calls do_unlock() on remote node */ 2479 /* receive_unlock() calls do_unlock() on remote node */
2437 error = send_unlock(r, lkb); 2480 error = send_unlock(r, lkb);
2438 else 2481 } else {
2439 error = do_unlock(r, lkb); 2482 error = do_unlock(r, lkb);
2483 /* for remote locks the unlock_reply is sent
2484 between do_unlock and do_unlock_effects */
2485 do_unlock_effects(r, lkb, error);
2486 }
2440 2487
2441 return error; 2488 return error;
2442} 2489}
@@ -2447,11 +2494,15 @@ static int _cancel_lock(struct dlm_rsb *r, struct dlm_lkb *lkb)
2447{ 2494{
2448 int error; 2495 int error;
2449 2496
2450 if (is_remote(r)) 2497 if (is_remote(r)) {
2451 /* receive_cancel() calls do_cancel() on remote node */ 2498 /* receive_cancel() calls do_cancel() on remote node */
2452 error = send_cancel(r, lkb); 2499 error = send_cancel(r, lkb);
2453 else 2500 } else {
2454 error = do_cancel(r, lkb); 2501 error = do_cancel(r, lkb);
2502 /* for remote locks the cancel_reply is sent
2503 between do_cancel and do_cancel_effects */
2504 do_cancel_effects(r, lkb, error);
2505 }
2455 2506
2456 return error; 2507 return error;
2457} 2508}
@@ -3191,6 +3242,7 @@ static void receive_request(struct dlm_ls *ls, struct dlm_message *ms)
3191 attach_lkb(r, lkb); 3242 attach_lkb(r, lkb);
3192 error = do_request(r, lkb); 3243 error = do_request(r, lkb);
3193 send_request_reply(r, lkb, error); 3244 send_request_reply(r, lkb, error);
3245 do_request_effects(r, lkb, error);
3194 3246
3195 unlock_rsb(r); 3247 unlock_rsb(r);
3196 put_rsb(r); 3248 put_rsb(r);
@@ -3226,15 +3278,19 @@ static void receive_convert(struct dlm_ls *ls, struct dlm_message *ms)
3226 goto out; 3278 goto out;
3227 3279
3228 receive_flags(lkb, ms); 3280 receive_flags(lkb, ms);
3281
3229 error = receive_convert_args(ls, lkb, ms); 3282 error = receive_convert_args(ls, lkb, ms);
3230 if (error) 3283 if (error) {
3231 goto out_reply; 3284 send_convert_reply(r, lkb, error);
3285 goto out;
3286 }
3287
3232 reply = !down_conversion(lkb); 3288 reply = !down_conversion(lkb);
3233 3289
3234 error = do_convert(r, lkb); 3290 error = do_convert(r, lkb);
3235 out_reply:
3236 if (reply) 3291 if (reply)
3237 send_convert_reply(r, lkb, error); 3292 send_convert_reply(r, lkb, error);
3293 do_convert_effects(r, lkb, error);
3238 out: 3294 out:
3239 unlock_rsb(r); 3295 unlock_rsb(r);
3240 put_rsb(r); 3296 put_rsb(r);
@@ -3266,13 +3322,16 @@ static void receive_unlock(struct dlm_ls *ls, struct dlm_message *ms)
3266 goto out; 3322 goto out;
3267 3323
3268 receive_flags(lkb, ms); 3324 receive_flags(lkb, ms);
3325
3269 error = receive_unlock_args(ls, lkb, ms); 3326 error = receive_unlock_args(ls, lkb, ms);
3270 if (error) 3327 if (error) {
3271 goto out_reply; 3328 send_unlock_reply(r, lkb, error);
3329 goto out;
3330 }
3272 3331
3273 error = do_unlock(r, lkb); 3332 error = do_unlock(r, lkb);
3274 out_reply:
3275 send_unlock_reply(r, lkb, error); 3333 send_unlock_reply(r, lkb, error);
3334 do_unlock_effects(r, lkb, error);
3276 out: 3335 out:
3277 unlock_rsb(r); 3336 unlock_rsb(r);
3278 put_rsb(r); 3337 put_rsb(r);
@@ -3307,6 +3366,7 @@ static void receive_cancel(struct dlm_ls *ls, struct dlm_message *ms)
3307 3366
3308 error = do_cancel(r, lkb); 3367 error = do_cancel(r, lkb);
3309 send_cancel_reply(r, lkb, error); 3368 send_cancel_reply(r, lkb, error);
3369 do_cancel_effects(r, lkb, error);
3310 out: 3370 out:
3311 unlock_rsb(r); 3371 unlock_rsb(r);
3312 put_rsb(r); 3372 put_rsb(r);
diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c
index c010ecfc0d29..26a8bd40400a 100644
--- a/fs/dlm/lockspace.c
+++ b/fs/dlm/lockspace.c
@@ -191,6 +191,18 @@ static int do_uevent(struct dlm_ls *ls, int in)
191 return error; 191 return error;
192} 192}
193 193
194static int dlm_uevent(struct kset *kset, struct kobject *kobj,
195 struct kobj_uevent_env *env)
196{
197 struct dlm_ls *ls = container_of(kobj, struct dlm_ls, ls_kobj);
198
199 add_uevent_var(env, "LOCKSPACE=%s", ls->ls_name);
200 return 0;
201}
202
203static struct kset_uevent_ops dlm_uevent_ops = {
204 .uevent = dlm_uevent,
205};
194 206
195int __init dlm_lockspace_init(void) 207int __init dlm_lockspace_init(void)
196{ 208{
@@ -199,7 +211,7 @@ int __init dlm_lockspace_init(void)
199 INIT_LIST_HEAD(&lslist); 211 INIT_LIST_HEAD(&lslist);
200 spin_lock_init(&lslist_lock); 212 spin_lock_init(&lslist_lock);
201 213
202 dlm_kset = kset_create_and_add("dlm", NULL, kernel_kobj); 214 dlm_kset = kset_create_and_add("dlm", &dlm_uevent_ops, kernel_kobj);
203 if (!dlm_kset) { 215 if (!dlm_kset) {
204 printk(KERN_WARNING "%s: can not create kset\n", __func__); 216 printk(KERN_WARNING "%s: can not create kset\n", __func__);
205 return -ENOMEM; 217 return -ENOMEM;
diff --git a/fs/dlm/user.c b/fs/dlm/user.c
index e73a4bb572aa..a4bfd31ac45b 100644
--- a/fs/dlm/user.c
+++ b/fs/dlm/user.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (C) 2006-2009 Red Hat, Inc. All rights reserved. 2 * Copyright (C) 2006-2010 Red Hat, Inc. All rights reserved.
3 * 3 *
4 * This copyrighted material is made available to anyone wishing to use, 4 * This copyrighted material is made available to anyone wishing to use,
5 * modify, copy, or redistribute it subject to the terms and conditions 5 * modify, copy, or redistribute it subject to the terms and conditions
@@ -173,7 +173,7 @@ static int lkb_is_endoflife(struct dlm_lkb *lkb, int sb_status, int type)
173/* we could possibly check if the cancel of an orphan has resulted in the lkb 173/* we could possibly check if the cancel of an orphan has resulted in the lkb
174 being removed and then remove that lkb from the orphans list and free it */ 174 being removed and then remove that lkb from the orphans list and free it */
175 175
176void dlm_user_add_ast(struct dlm_lkb *lkb, int type, int bastmode) 176void dlm_user_add_ast(struct dlm_lkb *lkb, int type, int mode)
177{ 177{
178 struct dlm_ls *ls; 178 struct dlm_ls *ls;
179 struct dlm_user_args *ua; 179 struct dlm_user_args *ua;
@@ -206,8 +206,10 @@ void dlm_user_add_ast(struct dlm_lkb *lkb, int type, int bastmode)
206 206
207 ast_type = lkb->lkb_ast_type; 207 ast_type = lkb->lkb_ast_type;
208 lkb->lkb_ast_type |= type; 208 lkb->lkb_ast_type |= type;
209 if (bastmode) 209 if (type == AST_BAST)
210 lkb->lkb_bastmode = bastmode; 210 lkb->lkb_bastmode = mode;
211 else
212 lkb->lkb_castmode = mode;
211 213
212 if (!ast_type) { 214 if (!ast_type) {
213 kref_get(&lkb->lkb_ref); 215 kref_get(&lkb->lkb_ref);
diff --git a/fs/dlm/user.h b/fs/dlm/user.h
index 1c9686492286..f196091dd7ff 100644
--- a/fs/dlm/user.h
+++ b/fs/dlm/user.h
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (C) 2006-2008 Red Hat, Inc. All rights reserved. 2 * Copyright (C) 2006-2010 Red Hat, Inc. All rights reserved.
3 * 3 *
4 * This copyrighted material is made available to anyone wishing to use, 4 * This copyrighted material is made available to anyone wishing to use,
5 * modify, copy, or redistribute it subject to the terms and conditions 5 * modify, copy, or redistribute it subject to the terms and conditions
@@ -9,7 +9,7 @@
9#ifndef __USER_DOT_H__ 9#ifndef __USER_DOT_H__
10#define __USER_DOT_H__ 10#define __USER_DOT_H__
11 11
12void dlm_user_add_ast(struct dlm_lkb *lkb, int type, int bastmode); 12void dlm_user_add_ast(struct dlm_lkb *lkb, int type, int mode);
13int dlm_user_init(void); 13int dlm_user_init(void);
14void dlm_user_exit(void); 14void dlm_user_exit(void);
15int dlm_device_deregister(struct dlm_ls *ls); 15int dlm_device_deregister(struct dlm_ls *ls);
diff --git a/fs/exec.c b/fs/exec.c
index 0790a107ff7e..cce6bbdbdbb1 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -571,6 +571,9 @@ int setup_arg_pages(struct linux_binprm *bprm,
571 struct vm_area_struct *prev = NULL; 571 struct vm_area_struct *prev = NULL;
572 unsigned long vm_flags; 572 unsigned long vm_flags;
573 unsigned long stack_base; 573 unsigned long stack_base;
574 unsigned long stack_size;
575 unsigned long stack_expand;
576 unsigned long rlim_stack;
574 577
575#ifdef CONFIG_STACK_GROWSUP 578#ifdef CONFIG_STACK_GROWSUP
576 /* Limit stack size to 1GB */ 579 /* Limit stack size to 1GB */
@@ -627,10 +630,23 @@ int setup_arg_pages(struct linux_binprm *bprm,
627 goto out_unlock; 630 goto out_unlock;
628 } 631 }
629 632
633 stack_expand = EXTRA_STACK_VM_PAGES * PAGE_SIZE;
634 stack_size = vma->vm_end - vma->vm_start;
635 /*
636 * Align this down to a page boundary as expand_stack
637 * will align it up.
638 */
639 rlim_stack = rlimit(RLIMIT_STACK) & PAGE_MASK;
630#ifdef CONFIG_STACK_GROWSUP 640#ifdef CONFIG_STACK_GROWSUP
631 stack_base = vma->vm_end + EXTRA_STACK_VM_PAGES * PAGE_SIZE; 641 if (stack_size + stack_expand > rlim_stack)
642 stack_base = vma->vm_start + rlim_stack;
643 else
644 stack_base = vma->vm_end + stack_expand;
632#else 645#else
633 stack_base = vma->vm_start - EXTRA_STACK_VM_PAGES * PAGE_SIZE; 646 if (stack_size + stack_expand > rlim_stack)
647 stack_base = vma->vm_end - rlim_stack;
648 else
649 stack_base = vma->vm_start - stack_expand;
634#endif 650#endif
635 ret = expand_stack(vma, stack_base); 651 ret = expand_stack(vma, stack_base);
636 if (ret) 652 if (ret)
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 874d169a193e..4cedc91ec59d 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1014,7 +1014,7 @@ struct ext4_sb_info {
1014 atomic_t s_lock_busy; 1014 atomic_t s_lock_busy;
1015 1015
1016 /* locality groups */ 1016 /* locality groups */
1017 struct ext4_locality_group *s_locality_groups; 1017 struct ext4_locality_group __percpu *s_locality_groups;
1018 1018
1019 /* for write statistics */ 1019 /* for write statistics */
1020 unsigned long s_sectors_written_start; 1020 unsigned long s_sectors_written_start;
diff --git a/fs/file.c b/fs/file.c
index 87e129030ab1..38039af67663 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -478,7 +478,7 @@ repeat:
478 error = fd; 478 error = fd;
479#if 1 479#if 1
480 /* Sanity check */ 480 /* Sanity check */
481 if (rcu_dereference(fdt->fd[fd]) != NULL) { 481 if (rcu_dereference_raw(fdt->fd[fd]) != NULL) {
482 printk(KERN_WARNING "alloc_fd: slot %d not NULL!\n", fd); 482 printk(KERN_WARNING "alloc_fd: slot %d not NULL!\n", fd);
483 rcu_assign_pointer(fdt->fd[fd], NULL); 483 rcu_assign_pointer(fdt->fd[fd], NULL);
484 } 484 }
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 51d9e33d634f..eb7e9423691f 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -865,13 +865,10 @@ static int fuse_notify_inval_inode(struct fuse_conn *fc, unsigned int size,
865 865
866 down_read(&fc->killsb); 866 down_read(&fc->killsb);
867 err = -ENOENT; 867 err = -ENOENT;
868 if (!fc->sb) 868 if (fc->sb) {
869 goto err_unlock; 869 err = fuse_reverse_inval_inode(fc->sb, outarg.ino,
870 870 outarg.off, outarg.len);
871 err = fuse_reverse_inval_inode(fc->sb, outarg.ino, 871 }
872 outarg.off, outarg.len);
873
874err_unlock:
875 up_read(&fc->killsb); 872 up_read(&fc->killsb);
876 return err; 873 return err;
877 874
@@ -884,10 +881,15 @@ static int fuse_notify_inval_entry(struct fuse_conn *fc, unsigned int size,
884 struct fuse_copy_state *cs) 881 struct fuse_copy_state *cs)
885{ 882{
886 struct fuse_notify_inval_entry_out outarg; 883 struct fuse_notify_inval_entry_out outarg;
887 int err = -EINVAL; 884 int err = -ENOMEM;
888 char buf[FUSE_NAME_MAX+1]; 885 char *buf;
889 struct qstr name; 886 struct qstr name;
890 887
888 buf = kzalloc(FUSE_NAME_MAX + 1, GFP_KERNEL);
889 if (!buf)
890 goto err;
891
892 err = -EINVAL;
891 if (size < sizeof(outarg)) 893 if (size < sizeof(outarg))
892 goto err; 894 goto err;
893 895
@@ -910,16 +912,14 @@ static int fuse_notify_inval_entry(struct fuse_conn *fc, unsigned int size,
910 912
911 down_read(&fc->killsb); 913 down_read(&fc->killsb);
912 err = -ENOENT; 914 err = -ENOENT;
913 if (!fc->sb) 915 if (fc->sb)
914 goto err_unlock; 916 err = fuse_reverse_inval_entry(fc->sb, outarg.parent, &name);
915
916 err = fuse_reverse_inval_entry(fc->sb, outarg.parent, &name);
917
918err_unlock:
919 up_read(&fc->killsb); 917 up_read(&fc->killsb);
918 kfree(buf);
920 return err; 919 return err;
921 920
922err: 921err:
922 kfree(buf);
923 fuse_copy_finish(cs); 923 fuse_copy_finish(cs);
924 return err; 924 return err;
925} 925}
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index 7b8da9415267..0c1d0b82dcf1 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -1061,8 +1061,8 @@ out:
1061 1061
1062int gfs2_releasepage(struct page *page, gfp_t gfp_mask) 1062int gfs2_releasepage(struct page *page, gfp_t gfp_mask)
1063{ 1063{
1064 struct inode *aspace = page->mapping->host; 1064 struct address_space *mapping = page->mapping;
1065 struct gfs2_sbd *sdp = aspace->i_sb->s_fs_info; 1065 struct gfs2_sbd *sdp = gfs2_mapping2sbd(mapping);
1066 struct buffer_head *bh, *head; 1066 struct buffer_head *bh, *head;
1067 struct gfs2_bufdata *bd; 1067 struct gfs2_bufdata *bd;
1068 1068
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 6d47379e794b..583e823307ae 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -541,7 +541,7 @@ static int gfs2_bmap_alloc(struct inode *inode, const sector_t lblock,
541 *ptr++ = cpu_to_be64(bn++); 541 *ptr++ = cpu_to_be64(bn++);
542 break; 542 break;
543 } 543 }
544 } while (state != ALLOC_DATA); 544 } while ((state != ALLOC_DATA) || !dblock);
545 545
546 ip->i_height = height; 546 ip->i_height = height;
547 gfs2_add_inode_blocks(&ip->i_inode, alloced); 547 gfs2_add_inode_blocks(&ip->i_inode, alloced);
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index f42663325931..454d4b4eb36b 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -19,7 +19,6 @@
19#include <linux/list.h> 19#include <linux/list.h>
20#include <linux/wait.h> 20#include <linux/wait.h>
21#include <linux/module.h> 21#include <linux/module.h>
22#include <linux/rwsem.h>
23#include <asm/uaccess.h> 22#include <asm/uaccess.h>
24#include <linux/seq_file.h> 23#include <linux/seq_file.h>
25#include <linux/debugfs.h> 24#include <linux/debugfs.h>
@@ -60,7 +59,6 @@ static int __dump_glock(struct seq_file *seq, const struct gfs2_glock *gl);
60#define GLOCK_BUG_ON(gl,x) do { if (unlikely(x)) { __dump_glock(NULL, gl); BUG(); } } while(0) 59#define GLOCK_BUG_ON(gl,x) do { if (unlikely(x)) { __dump_glock(NULL, gl); BUG(); } } while(0)
61static void do_xmote(struct gfs2_glock *gl, struct gfs2_holder *gh, unsigned int target); 60static void do_xmote(struct gfs2_glock *gl, struct gfs2_holder *gh, unsigned int target);
62 61
63static DECLARE_RWSEM(gfs2_umount_flush_sem);
64static struct dentry *gfs2_root; 62static struct dentry *gfs2_root;
65static struct workqueue_struct *glock_workqueue; 63static struct workqueue_struct *glock_workqueue;
66struct workqueue_struct *gfs2_delete_workqueue; 64struct workqueue_struct *gfs2_delete_workqueue;
@@ -154,12 +152,14 @@ static unsigned int gl_hash(const struct gfs2_sbd *sdp,
154static void glock_free(struct gfs2_glock *gl) 152static void glock_free(struct gfs2_glock *gl)
155{ 153{
156 struct gfs2_sbd *sdp = gl->gl_sbd; 154 struct gfs2_sbd *sdp = gl->gl_sbd;
157 struct inode *aspace = gl->gl_aspace; 155 struct address_space *mapping = gfs2_glock2aspace(gl);
156 struct kmem_cache *cachep = gfs2_glock_cachep;
158 157
159 if (aspace) 158 GLOCK_BUG_ON(gl, mapping && mapping->nrpages);
160 gfs2_aspace_put(aspace);
161 trace_gfs2_glock_put(gl); 159 trace_gfs2_glock_put(gl);
162 sdp->sd_lockstruct.ls_ops->lm_put_lock(gfs2_glock_cachep, gl); 160 if (mapping)
161 cachep = gfs2_glock_aspace_cachep;
162 sdp->sd_lockstruct.ls_ops->lm_put_lock(cachep, gl);
163} 163}
164 164
165/** 165/**
@@ -712,7 +712,6 @@ static void glock_work_func(struct work_struct *work)
712 finish_xmote(gl, gl->gl_reply); 712 finish_xmote(gl, gl->gl_reply);
713 drop_ref = 1; 713 drop_ref = 1;
714 } 714 }
715 down_read(&gfs2_umount_flush_sem);
716 spin_lock(&gl->gl_spin); 715 spin_lock(&gl->gl_spin);
717 if (test_and_clear_bit(GLF_PENDING_DEMOTE, &gl->gl_flags) && 716 if (test_and_clear_bit(GLF_PENDING_DEMOTE, &gl->gl_flags) &&
718 gl->gl_state != LM_ST_UNLOCKED && 717 gl->gl_state != LM_ST_UNLOCKED &&
@@ -725,7 +724,6 @@ static void glock_work_func(struct work_struct *work)
725 } 724 }
726 run_queue(gl, 0); 725 run_queue(gl, 0);
727 spin_unlock(&gl->gl_spin); 726 spin_unlock(&gl->gl_spin);
728 up_read(&gfs2_umount_flush_sem);
729 if (!delay || 727 if (!delay ||
730 queue_delayed_work(glock_workqueue, &gl->gl_work, delay) == 0) 728 queue_delayed_work(glock_workqueue, &gl->gl_work, delay) == 0)
731 gfs2_glock_put(gl); 729 gfs2_glock_put(gl);
@@ -750,10 +748,11 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
750 const struct gfs2_glock_operations *glops, int create, 748 const struct gfs2_glock_operations *glops, int create,
751 struct gfs2_glock **glp) 749 struct gfs2_glock **glp)
752{ 750{
751 struct super_block *s = sdp->sd_vfs;
753 struct lm_lockname name = { .ln_number = number, .ln_type = glops->go_type }; 752 struct lm_lockname name = { .ln_number = number, .ln_type = glops->go_type };
754 struct gfs2_glock *gl, *tmp; 753 struct gfs2_glock *gl, *tmp;
755 unsigned int hash = gl_hash(sdp, &name); 754 unsigned int hash = gl_hash(sdp, &name);
756 int error; 755 struct address_space *mapping;
757 756
758 read_lock(gl_lock_addr(hash)); 757 read_lock(gl_lock_addr(hash));
759 gl = search_bucket(hash, sdp, &name); 758 gl = search_bucket(hash, sdp, &name);
@@ -765,7 +764,10 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
765 if (!create) 764 if (!create)
766 return -ENOENT; 765 return -ENOENT;
767 766
768 gl = kmem_cache_alloc(gfs2_glock_cachep, GFP_KERNEL); 767 if (glops->go_flags & GLOF_ASPACE)
768 gl = kmem_cache_alloc(gfs2_glock_aspace_cachep, GFP_KERNEL);
769 else
770 gl = kmem_cache_alloc(gfs2_glock_cachep, GFP_KERNEL);
769 if (!gl) 771 if (!gl)
770 return -ENOMEM; 772 return -ENOMEM;
771 773
@@ -784,18 +786,18 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
784 gl->gl_tchange = jiffies; 786 gl->gl_tchange = jiffies;
785 gl->gl_object = NULL; 787 gl->gl_object = NULL;
786 gl->gl_sbd = sdp; 788 gl->gl_sbd = sdp;
787 gl->gl_aspace = NULL;
788 INIT_DELAYED_WORK(&gl->gl_work, glock_work_func); 789 INIT_DELAYED_WORK(&gl->gl_work, glock_work_func);
789 INIT_WORK(&gl->gl_delete, delete_work_func); 790 INIT_WORK(&gl->gl_delete, delete_work_func);
790 791
791 /* If this glock protects actual on-disk data or metadata blocks, 792 mapping = gfs2_glock2aspace(gl);
792 create a VFS inode to manage the pages/buffers holding them. */ 793 if (mapping) {
793 if (glops == &gfs2_inode_glops || glops == &gfs2_rgrp_glops) { 794 mapping->a_ops = &gfs2_meta_aops;
794 gl->gl_aspace = gfs2_aspace_get(sdp); 795 mapping->host = s->s_bdev->bd_inode;
795 if (!gl->gl_aspace) { 796 mapping->flags = 0;
796 error = -ENOMEM; 797 mapping_set_gfp_mask(mapping, GFP_NOFS);
797 goto fail; 798 mapping->assoc_mapping = NULL;
798 } 799 mapping->backing_dev_info = s->s_bdi;
800 mapping->writeback_index = 0;
799 } 801 }
800 802
801 write_lock(gl_lock_addr(hash)); 803 write_lock(gl_lock_addr(hash));
@@ -812,10 +814,6 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
812 *glp = gl; 814 *glp = gl;
813 815
814 return 0; 816 return 0;
815
816fail:
817 kmem_cache_free(gfs2_glock_cachep, gl);
818 return error;
819} 817}
820 818
821/** 819/**
@@ -1510,35 +1508,10 @@ void gfs2_glock_thaw(struct gfs2_sbd *sdp)
1510 1508
1511void gfs2_gl_hash_clear(struct gfs2_sbd *sdp) 1509void gfs2_gl_hash_clear(struct gfs2_sbd *sdp)
1512{ 1510{
1513 unsigned long t;
1514 unsigned int x; 1511 unsigned int x;
1515 int cont;
1516 1512
1517 t = jiffies; 1513 for (x = 0; x < GFS2_GL_HASH_SIZE; x++)
1518 1514 examine_bucket(clear_glock, sdp, x);
1519 for (;;) {
1520 cont = 0;
1521 for (x = 0; x < GFS2_GL_HASH_SIZE; x++) {
1522 if (examine_bucket(clear_glock, sdp, x))
1523 cont = 1;
1524 }
1525
1526 if (!cont)
1527 break;
1528
1529 if (time_after_eq(jiffies,
1530 t + gfs2_tune_get(sdp, gt_stall_secs) * HZ)) {
1531 fs_warn(sdp, "Unmount seems to be stalled. "
1532 "Dumping lock state...\n");
1533 gfs2_dump_lockstate(sdp);
1534 t = jiffies;
1535 }
1536
1537 down_write(&gfs2_umount_flush_sem);
1538 invalidate_inodes(sdp->sd_vfs);
1539 up_write(&gfs2_umount_flush_sem);
1540 msleep(10);
1541 }
1542 flush_workqueue(glock_workqueue); 1515 flush_workqueue(glock_workqueue);
1543 wait_event(sdp->sd_glock_wait, atomic_read(&sdp->sd_glock_disposal) == 0); 1516 wait_event(sdp->sd_glock_wait, atomic_read(&sdp->sd_glock_disposal) == 0);
1544 gfs2_dump_lockstate(sdp); 1517 gfs2_dump_lockstate(sdp);
@@ -1685,7 +1658,7 @@ static int __dump_glock(struct seq_file *seq, const struct gfs2_glock *gl)
1685 dtime *= 1000000/HZ; /* demote time in uSec */ 1658 dtime *= 1000000/HZ; /* demote time in uSec */
1686 if (!test_bit(GLF_DEMOTE, &gl->gl_flags)) 1659 if (!test_bit(GLF_DEMOTE, &gl->gl_flags))
1687 dtime = 0; 1660 dtime = 0;
1688 gfs2_print_dbg(seq, "G: s:%s n:%u/%llu f:%s t:%s d:%s/%llu a:%d r:%d\n", 1661 gfs2_print_dbg(seq, "G: s:%s n:%u/%llx f:%s t:%s d:%s/%llu a:%d r:%d\n",
1689 state2str(gl->gl_state), 1662 state2str(gl->gl_state),
1690 gl->gl_name.ln_type, 1663 gl->gl_name.ln_type,
1691 (unsigned long long)gl->gl_name.ln_number, 1664 (unsigned long long)gl->gl_name.ln_number,
diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h
index c0262faf4725..2bda1911b156 100644
--- a/fs/gfs2/glock.h
+++ b/fs/gfs2/glock.h
@@ -180,6 +180,13 @@ static inline int gfs2_glock_is_held_shrd(struct gfs2_glock *gl)
180 return gl->gl_state == LM_ST_SHARED; 180 return gl->gl_state == LM_ST_SHARED;
181} 181}
182 182
183static inline struct address_space *gfs2_glock2aspace(struct gfs2_glock *gl)
184{
185 if (gl->gl_ops->go_flags & GLOF_ASPACE)
186 return (struct address_space *)(gl + 1);
187 return NULL;
188}
189
183int gfs2_glock_get(struct gfs2_sbd *sdp, 190int gfs2_glock_get(struct gfs2_sbd *sdp,
184 u64 number, const struct gfs2_glock_operations *glops, 191 u64 number, const struct gfs2_glock_operations *glops,
185 int create, struct gfs2_glock **glp); 192 int create, struct gfs2_glock **glp);
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index 78554acc0605..38e3749d476c 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -87,7 +87,7 @@ static void gfs2_ail_empty_gl(struct gfs2_glock *gl)
87 87
88static void rgrp_go_sync(struct gfs2_glock *gl) 88static void rgrp_go_sync(struct gfs2_glock *gl)
89{ 89{
90 struct address_space *metamapping = gl->gl_aspace->i_mapping; 90 struct address_space *metamapping = gfs2_glock2aspace(gl);
91 int error; 91 int error;
92 92
93 if (!test_and_clear_bit(GLF_DIRTY, &gl->gl_flags)) 93 if (!test_and_clear_bit(GLF_DIRTY, &gl->gl_flags))
@@ -113,7 +113,7 @@ static void rgrp_go_sync(struct gfs2_glock *gl)
113 113
114static void rgrp_go_inval(struct gfs2_glock *gl, int flags) 114static void rgrp_go_inval(struct gfs2_glock *gl, int flags)
115{ 115{
116 struct address_space *mapping = gl->gl_aspace->i_mapping; 116 struct address_space *mapping = gfs2_glock2aspace(gl);
117 117
118 BUG_ON(!(flags & DIO_METADATA)); 118 BUG_ON(!(flags & DIO_METADATA));
119 gfs2_assert_withdraw(gl->gl_sbd, !atomic_read(&gl->gl_ail_count)); 119 gfs2_assert_withdraw(gl->gl_sbd, !atomic_read(&gl->gl_ail_count));
@@ -134,7 +134,7 @@ static void rgrp_go_inval(struct gfs2_glock *gl, int flags)
134static void inode_go_sync(struct gfs2_glock *gl) 134static void inode_go_sync(struct gfs2_glock *gl)
135{ 135{
136 struct gfs2_inode *ip = gl->gl_object; 136 struct gfs2_inode *ip = gl->gl_object;
137 struct address_space *metamapping = gl->gl_aspace->i_mapping; 137 struct address_space *metamapping = gfs2_glock2aspace(gl);
138 int error; 138 int error;
139 139
140 if (ip && !S_ISREG(ip->i_inode.i_mode)) 140 if (ip && !S_ISREG(ip->i_inode.i_mode))
@@ -183,7 +183,7 @@ static void inode_go_inval(struct gfs2_glock *gl, int flags)
183 gfs2_assert_withdraw(gl->gl_sbd, !atomic_read(&gl->gl_ail_count)); 183 gfs2_assert_withdraw(gl->gl_sbd, !atomic_read(&gl->gl_ail_count));
184 184
185 if (flags & DIO_METADATA) { 185 if (flags & DIO_METADATA) {
186 struct address_space *mapping = gl->gl_aspace->i_mapping; 186 struct address_space *mapping = gfs2_glock2aspace(gl);
187 truncate_inode_pages(mapping, 0); 187 truncate_inode_pages(mapping, 0);
188 if (ip) { 188 if (ip) {
189 set_bit(GIF_INVALID, &ip->i_flags); 189 set_bit(GIF_INVALID, &ip->i_flags);
@@ -282,7 +282,8 @@ static int inode_go_dump(struct seq_file *seq, const struct gfs2_glock *gl)
282 282
283static int rgrp_go_demote_ok(const struct gfs2_glock *gl) 283static int rgrp_go_demote_ok(const struct gfs2_glock *gl)
284{ 284{
285 return !gl->gl_aspace->i_mapping->nrpages; 285 const struct address_space *mapping = (const struct address_space *)(gl + 1);
286 return !mapping->nrpages;
286} 287}
287 288
288/** 289/**
@@ -387,8 +388,7 @@ static void iopen_go_callback(struct gfs2_glock *gl)
387 struct gfs2_inode *ip = (struct gfs2_inode *)gl->gl_object; 388 struct gfs2_inode *ip = (struct gfs2_inode *)gl->gl_object;
388 389
389 if (gl->gl_demote_state == LM_ST_UNLOCKED && 390 if (gl->gl_demote_state == LM_ST_UNLOCKED &&
390 gl->gl_state == LM_ST_SHARED && 391 gl->gl_state == LM_ST_SHARED && ip) {
391 ip && test_bit(GIF_USER, &ip->i_flags)) {
392 gfs2_glock_hold(gl); 392 gfs2_glock_hold(gl);
393 if (queue_work(gfs2_delete_workqueue, &gl->gl_delete) == 0) 393 if (queue_work(gfs2_delete_workqueue, &gl->gl_delete) == 0)
394 gfs2_glock_put_nolock(gl); 394 gfs2_glock_put_nolock(gl);
@@ -407,6 +407,7 @@ const struct gfs2_glock_operations gfs2_inode_glops = {
407 .go_dump = inode_go_dump, 407 .go_dump = inode_go_dump,
408 .go_type = LM_TYPE_INODE, 408 .go_type = LM_TYPE_INODE,
409 .go_min_hold_time = HZ / 5, 409 .go_min_hold_time = HZ / 5,
410 .go_flags = GLOF_ASPACE,
410}; 411};
411 412
412const struct gfs2_glock_operations gfs2_rgrp_glops = { 413const struct gfs2_glock_operations gfs2_rgrp_glops = {
@@ -418,6 +419,7 @@ const struct gfs2_glock_operations gfs2_rgrp_glops = {
418 .go_dump = gfs2_rgrp_dump, 419 .go_dump = gfs2_rgrp_dump,
419 .go_type = LM_TYPE_RGRP, 420 .go_type = LM_TYPE_RGRP,
420 .go_min_hold_time = HZ / 5, 421 .go_min_hold_time = HZ / 5,
422 .go_flags = GLOF_ASPACE,
421}; 423};
422 424
423const struct gfs2_glock_operations gfs2_trans_glops = { 425const struct gfs2_glock_operations gfs2_trans_glops = {
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index bc0ad158e6b4..b8025e51cabf 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -162,6 +162,8 @@ struct gfs2_glock_operations {
162 void (*go_callback) (struct gfs2_glock *gl); 162 void (*go_callback) (struct gfs2_glock *gl);
163 const int go_type; 163 const int go_type;
164 const unsigned long go_min_hold_time; 164 const unsigned long go_min_hold_time;
165 const unsigned long go_flags;
166#define GLOF_ASPACE 1
165}; 167};
166 168
167enum { 169enum {
@@ -225,7 +227,6 @@ struct gfs2_glock {
225 227
226 struct gfs2_sbd *gl_sbd; 228 struct gfs2_sbd *gl_sbd;
227 229
228 struct inode *gl_aspace;
229 struct list_head gl_ail_list; 230 struct list_head gl_ail_list;
230 atomic_t gl_ail_count; 231 atomic_t gl_ail_count;
231 struct delayed_work gl_work; 232 struct delayed_work gl_work;
@@ -258,7 +259,6 @@ enum {
258 GIF_INVALID = 0, 259 GIF_INVALID = 0,
259 GIF_QD_LOCKED = 1, 260 GIF_QD_LOCKED = 1,
260 GIF_SW_PAGED = 3, 261 GIF_SW_PAGED = 3,
261 GIF_USER = 4, /* user inode, not metadata addr space */
262}; 262};
263 263
264 264
@@ -451,7 +451,6 @@ struct gfs2_tune {
451 unsigned int gt_quota_quantum; /* Secs between syncs to quota file */ 451 unsigned int gt_quota_quantum; /* Secs between syncs to quota file */
452 unsigned int gt_new_files_jdata; 452 unsigned int gt_new_files_jdata;
453 unsigned int gt_max_readahead; /* Max bytes to read-ahead from disk */ 453 unsigned int gt_max_readahead; /* Max bytes to read-ahead from disk */
454 unsigned int gt_stall_secs; /* Detects trouble! */
455 unsigned int gt_complain_secs; 454 unsigned int gt_complain_secs;
456 unsigned int gt_statfs_quantum; 455 unsigned int gt_statfs_quantum;
457 unsigned int gt_statfs_slow; 456 unsigned int gt_statfs_slow;
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 6e220f4eee7d..b1bf2694fb2b 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -45,7 +45,7 @@ static int iget_test(struct inode *inode, void *opaque)
45 struct gfs2_inode *ip = GFS2_I(inode); 45 struct gfs2_inode *ip = GFS2_I(inode);
46 u64 *no_addr = opaque; 46 u64 *no_addr = opaque;
47 47
48 if (ip->i_no_addr == *no_addr && test_bit(GIF_USER, &ip->i_flags)) 48 if (ip->i_no_addr == *no_addr)
49 return 1; 49 return 1;
50 50
51 return 0; 51 return 0;
@@ -58,7 +58,6 @@ static int iget_set(struct inode *inode, void *opaque)
58 58
59 inode->i_ino = (unsigned long)*no_addr; 59 inode->i_ino = (unsigned long)*no_addr;
60 ip->i_no_addr = *no_addr; 60 ip->i_no_addr = *no_addr;
61 set_bit(GIF_USER, &ip->i_flags);
62 return 0; 61 return 0;
63} 62}
64 63
@@ -84,7 +83,7 @@ static int iget_skip_test(struct inode *inode, void *opaque)
84 struct gfs2_inode *ip = GFS2_I(inode); 83 struct gfs2_inode *ip = GFS2_I(inode);
85 struct gfs2_skip_data *data = opaque; 84 struct gfs2_skip_data *data = opaque;
86 85
87 if (ip->i_no_addr == data->no_addr && test_bit(GIF_USER, &ip->i_flags)){ 86 if (ip->i_no_addr == data->no_addr) {
88 if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE)){ 87 if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE)){
89 data->skipped = 1; 88 data->skipped = 1;
90 return 0; 89 return 0;
@@ -103,7 +102,6 @@ static int iget_skip_set(struct inode *inode, void *opaque)
103 return 1; 102 return 1;
104 inode->i_ino = (unsigned long)(data->no_addr); 103 inode->i_ino = (unsigned long)(data->no_addr);
105 ip->i_no_addr = data->no_addr; 104 ip->i_no_addr = data->no_addr;
106 set_bit(GIF_USER, &ip->i_flags);
107 return 0; 105 return 0;
108} 106}
109 107
diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c
index 0e5e0e7022e5..569b46240f61 100644
--- a/fs/gfs2/lock_dlm.c
+++ b/fs/gfs2/lock_dlm.c
@@ -30,7 +30,10 @@ static void gdlm_ast(void *arg)
30 30
31 switch (gl->gl_lksb.sb_status) { 31 switch (gl->gl_lksb.sb_status) {
32 case -DLM_EUNLOCK: /* Unlocked, so glock can be freed */ 32 case -DLM_EUNLOCK: /* Unlocked, so glock can be freed */
33 kmem_cache_free(gfs2_glock_cachep, gl); 33 if (gl->gl_ops->go_flags & GLOF_ASPACE)
34 kmem_cache_free(gfs2_glock_aspace_cachep, gl);
35 else
36 kmem_cache_free(gfs2_glock_cachep, gl);
34 if (atomic_dec_and_test(&sdp->sd_glock_disposal)) 37 if (atomic_dec_and_test(&sdp->sd_glock_disposal))
35 wake_up(&sdp->sd_glock_wait); 38 wake_up(&sdp->sd_glock_wait);
36 return; 39 return;
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index de97632ba32f..adc260fbea90 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -528,9 +528,9 @@ static void databuf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
528 gfs2_pin(sdp, bd->bd_bh); 528 gfs2_pin(sdp, bd->bd_bh);
529 tr->tr_num_databuf_new++; 529 tr->tr_num_databuf_new++;
530 sdp->sd_log_num_databuf++; 530 sdp->sd_log_num_databuf++;
531 list_add(&le->le_list, &sdp->sd_log_le_databuf); 531 list_add_tail(&le->le_list, &sdp->sd_log_le_databuf);
532 } else { 532 } else {
533 list_add(&le->le_list, &sdp->sd_log_le_ordered); 533 list_add_tail(&le->le_list, &sdp->sd_log_le_ordered);
534 } 534 }
535out: 535out:
536 gfs2_log_unlock(sdp); 536 gfs2_log_unlock(sdp);
diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c
index 5b31f7741a8f..a88fadc704bb 100644
--- a/fs/gfs2/main.c
+++ b/fs/gfs2/main.c
@@ -52,6 +52,22 @@ static void gfs2_init_glock_once(void *foo)
52 atomic_set(&gl->gl_ail_count, 0); 52 atomic_set(&gl->gl_ail_count, 0);
53} 53}
54 54
55static void gfs2_init_gl_aspace_once(void *foo)
56{
57 struct gfs2_glock *gl = foo;
58 struct address_space *mapping = (struct address_space *)(gl + 1);
59
60 gfs2_init_glock_once(gl);
61 memset(mapping, 0, sizeof(*mapping));
62 INIT_RADIX_TREE(&mapping->page_tree, GFP_ATOMIC);
63 spin_lock_init(&mapping->tree_lock);
64 spin_lock_init(&mapping->i_mmap_lock);
65 INIT_LIST_HEAD(&mapping->private_list);
66 spin_lock_init(&mapping->private_lock);
67 INIT_RAW_PRIO_TREE_ROOT(&mapping->i_mmap);
68 INIT_LIST_HEAD(&mapping->i_mmap_nonlinear);
69}
70
55/** 71/**
56 * init_gfs2_fs - Register GFS2 as a filesystem 72 * init_gfs2_fs - Register GFS2 as a filesystem
57 * 73 *
@@ -78,6 +94,14 @@ static int __init init_gfs2_fs(void)
78 if (!gfs2_glock_cachep) 94 if (!gfs2_glock_cachep)
79 goto fail; 95 goto fail;
80 96
97 gfs2_glock_aspace_cachep = kmem_cache_create("gfs2_glock (aspace)",
98 sizeof(struct gfs2_glock) +
99 sizeof(struct address_space),
100 0, 0, gfs2_init_gl_aspace_once);
101
102 if (!gfs2_glock_aspace_cachep)
103 goto fail;
104
81 gfs2_inode_cachep = kmem_cache_create("gfs2_inode", 105 gfs2_inode_cachep = kmem_cache_create("gfs2_inode",
82 sizeof(struct gfs2_inode), 106 sizeof(struct gfs2_inode),
83 0, SLAB_RECLAIM_ACCOUNT| 107 0, SLAB_RECLAIM_ACCOUNT|
@@ -144,6 +168,9 @@ fail:
144 if (gfs2_inode_cachep) 168 if (gfs2_inode_cachep)
145 kmem_cache_destroy(gfs2_inode_cachep); 169 kmem_cache_destroy(gfs2_inode_cachep);
146 170
171 if (gfs2_glock_aspace_cachep)
172 kmem_cache_destroy(gfs2_glock_aspace_cachep);
173
147 if (gfs2_glock_cachep) 174 if (gfs2_glock_cachep)
148 kmem_cache_destroy(gfs2_glock_cachep); 175 kmem_cache_destroy(gfs2_glock_cachep);
149 176
@@ -169,6 +196,7 @@ static void __exit exit_gfs2_fs(void)
169 kmem_cache_destroy(gfs2_rgrpd_cachep); 196 kmem_cache_destroy(gfs2_rgrpd_cachep);
170 kmem_cache_destroy(gfs2_bufdata_cachep); 197 kmem_cache_destroy(gfs2_bufdata_cachep);
171 kmem_cache_destroy(gfs2_inode_cachep); 198 kmem_cache_destroy(gfs2_inode_cachep);
199 kmem_cache_destroy(gfs2_glock_aspace_cachep);
172 kmem_cache_destroy(gfs2_glock_cachep); 200 kmem_cache_destroy(gfs2_glock_cachep);
173 201
174 gfs2_sys_uninit(); 202 gfs2_sys_uninit();
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
index 6f68a5f18eb8..0bb12c80937a 100644
--- a/fs/gfs2/meta_io.c
+++ b/fs/gfs2/meta_io.c
@@ -93,49 +93,13 @@ static int gfs2_aspace_writepage(struct page *page, struct writeback_control *wb
93 return err; 93 return err;
94} 94}
95 95
96static const struct address_space_operations aspace_aops = { 96const struct address_space_operations gfs2_meta_aops = {
97 .writepage = gfs2_aspace_writepage, 97 .writepage = gfs2_aspace_writepage,
98 .releasepage = gfs2_releasepage, 98 .releasepage = gfs2_releasepage,
99 .sync_page = block_sync_page, 99 .sync_page = block_sync_page,
100}; 100};
101 101
102/** 102/**
103 * gfs2_aspace_get - Create and initialize a struct inode structure
104 * @sdp: the filesystem the aspace is in
105 *
106 * Right now a struct inode is just a struct inode. Maybe Linux
107 * will supply a more lightweight address space construct (that works)
108 * in the future.
109 *
110 * Make sure pages/buffers in this aspace aren't in high memory.
111 *
112 * Returns: the aspace
113 */
114
115struct inode *gfs2_aspace_get(struct gfs2_sbd *sdp)
116{
117 struct inode *aspace;
118 struct gfs2_inode *ip;
119
120 aspace = new_inode(sdp->sd_vfs);
121 if (aspace) {
122 mapping_set_gfp_mask(aspace->i_mapping, GFP_NOFS);
123 aspace->i_mapping->a_ops = &aspace_aops;
124 aspace->i_size = MAX_LFS_FILESIZE;
125 ip = GFS2_I(aspace);
126 clear_bit(GIF_USER, &ip->i_flags);
127 insert_inode_hash(aspace);
128 }
129 return aspace;
130}
131
132void gfs2_aspace_put(struct inode *aspace)
133{
134 remove_inode_hash(aspace);
135 iput(aspace);
136}
137
138/**
139 * gfs2_meta_sync - Sync all buffers associated with a glock 103 * gfs2_meta_sync - Sync all buffers associated with a glock
140 * @gl: The glock 104 * @gl: The glock
141 * 105 *
@@ -143,7 +107,7 @@ void gfs2_aspace_put(struct inode *aspace)
143 107
144void gfs2_meta_sync(struct gfs2_glock *gl) 108void gfs2_meta_sync(struct gfs2_glock *gl)
145{ 109{
146 struct address_space *mapping = gl->gl_aspace->i_mapping; 110 struct address_space *mapping = gfs2_glock2aspace(gl);
147 int error; 111 int error;
148 112
149 filemap_fdatawrite(mapping); 113 filemap_fdatawrite(mapping);
@@ -164,7 +128,7 @@ void gfs2_meta_sync(struct gfs2_glock *gl)
164 128
165struct buffer_head *gfs2_getbuf(struct gfs2_glock *gl, u64 blkno, int create) 129struct buffer_head *gfs2_getbuf(struct gfs2_glock *gl, u64 blkno, int create)
166{ 130{
167 struct address_space *mapping = gl->gl_aspace->i_mapping; 131 struct address_space *mapping = gfs2_glock2aspace(gl);
168 struct gfs2_sbd *sdp = gl->gl_sbd; 132 struct gfs2_sbd *sdp = gl->gl_sbd;
169 struct page *page; 133 struct page *page;
170 struct buffer_head *bh; 134 struct buffer_head *bh;
@@ -344,8 +308,10 @@ void gfs2_attach_bufdata(struct gfs2_glock *gl, struct buffer_head *bh,
344 308
345void gfs2_remove_from_journal(struct buffer_head *bh, struct gfs2_trans *tr, int meta) 309void gfs2_remove_from_journal(struct buffer_head *bh, struct gfs2_trans *tr, int meta)
346{ 310{
347 struct gfs2_sbd *sdp = GFS2_SB(bh->b_page->mapping->host); 311 struct address_space *mapping = bh->b_page->mapping;
312 struct gfs2_sbd *sdp = gfs2_mapping2sbd(mapping);
348 struct gfs2_bufdata *bd = bh->b_private; 313 struct gfs2_bufdata *bd = bh->b_private;
314
349 if (test_clear_buffer_pinned(bh)) { 315 if (test_clear_buffer_pinned(bh)) {
350 list_del_init(&bd->bd_le.le_list); 316 list_del_init(&bd->bd_le.le_list);
351 if (meta) { 317 if (meta) {
diff --git a/fs/gfs2/meta_io.h b/fs/gfs2/meta_io.h
index de270c2f9b63..6a1d9ba16411 100644
--- a/fs/gfs2/meta_io.h
+++ b/fs/gfs2/meta_io.h
@@ -37,8 +37,16 @@ static inline void gfs2_buffer_copy_tail(struct buffer_head *to_bh,
37 0, from_head - to_head); 37 0, from_head - to_head);
38} 38}
39 39
40struct inode *gfs2_aspace_get(struct gfs2_sbd *sdp); 40extern const struct address_space_operations gfs2_meta_aops;
41void gfs2_aspace_put(struct inode *aspace); 41
42static inline struct gfs2_sbd *gfs2_mapping2sbd(struct address_space *mapping)
43{
44 struct inode *inode = mapping->host;
45 if (mapping->a_ops == &gfs2_meta_aops)
46 return (((struct gfs2_glock *)mapping) - 1)->gl_sbd;
47 else
48 return inode->i_sb->s_fs_info;
49}
42 50
43void gfs2_meta_sync(struct gfs2_glock *gl); 51void gfs2_meta_sync(struct gfs2_glock *gl);
44 52
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 8a102f731003..a054b526dc08 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -65,7 +65,6 @@ static void gfs2_tune_init(struct gfs2_tune *gt)
65 gt->gt_quota_scale_den = 1; 65 gt->gt_quota_scale_den = 1;
66 gt->gt_new_files_jdata = 0; 66 gt->gt_new_files_jdata = 0;
67 gt->gt_max_readahead = 1 << 18; 67 gt->gt_max_readahead = 1 << 18;
68 gt->gt_stall_secs = 600;
69 gt->gt_complain_secs = 10; 68 gt->gt_complain_secs = 10;
70} 69}
71 70
@@ -725,7 +724,7 @@ static int init_journal(struct gfs2_sbd *sdp, int undo)
725 goto fail; 724 goto fail;
726 } 725 }
727 726
728 error = -EINVAL; 727 error = -EUSERS;
729 if (!gfs2_jindex_size(sdp)) { 728 if (!gfs2_jindex_size(sdp)) {
730 fs_err(sdp, "no journals!\n"); 729 fs_err(sdp, "no journals!\n");
731 goto fail_jindex; 730 goto fail_jindex;
@@ -1241,10 +1240,9 @@ fail_sb:
1241fail_locking: 1240fail_locking:
1242 init_locking(sdp, &mount_gh, UNDO); 1241 init_locking(sdp, &mount_gh, UNDO);
1243fail_lm: 1242fail_lm:
1243 invalidate_inodes(sb);
1244 gfs2_gl_hash_clear(sdp); 1244 gfs2_gl_hash_clear(sdp);
1245 gfs2_lm_unmount(sdp); 1245 gfs2_lm_unmount(sdp);
1246 while (invalidate_inodes(sb))
1247 yield();
1248fail_sys: 1246fail_sys:
1249 gfs2_sys_fs_del(sdp); 1247 gfs2_sys_fs_del(sdp);
1250fail: 1248fail:
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index b9dd3da22c0a..e5e22629da67 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -722,8 +722,7 @@ static int gfs2_write_inode(struct inode *inode, int sync)
722 int ret = 0; 722 int ret = 0;
723 723
724 /* Check this is a "normal" inode, etc */ 724 /* Check this is a "normal" inode, etc */
725 if (!test_bit(GIF_USER, &ip->i_flags) || 725 if (current->flags & PF_MEMALLOC)
726 (current->flags & PF_MEMALLOC))
727 return 0; 726 return 0;
728 ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); 727 ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
729 if (ret) 728 if (ret)
@@ -860,6 +859,7 @@ restart:
860 gfs2_clear_rgrpd(sdp); 859 gfs2_clear_rgrpd(sdp);
861 gfs2_jindex_free(sdp); 860 gfs2_jindex_free(sdp);
862 /* Take apart glock structures and buffer lists */ 861 /* Take apart glock structures and buffer lists */
862 invalidate_inodes(sdp->sd_vfs);
863 gfs2_gl_hash_clear(sdp); 863 gfs2_gl_hash_clear(sdp);
864 /* Unmount the locking protocol */ 864 /* Unmount the locking protocol */
865 gfs2_lm_unmount(sdp); 865 gfs2_lm_unmount(sdp);
@@ -1194,7 +1194,7 @@ static void gfs2_drop_inode(struct inode *inode)
1194{ 1194{
1195 struct gfs2_inode *ip = GFS2_I(inode); 1195 struct gfs2_inode *ip = GFS2_I(inode);
1196 1196
1197 if (test_bit(GIF_USER, &ip->i_flags) && inode->i_nlink) { 1197 if (inode->i_nlink) {
1198 struct gfs2_glock *gl = ip->i_iopen_gh.gh_gl; 1198 struct gfs2_glock *gl = ip->i_iopen_gh.gh_gl;
1199 if (gl && test_bit(GLF_DEMOTE, &gl->gl_flags)) 1199 if (gl && test_bit(GLF_DEMOTE, &gl->gl_flags))
1200 clear_nlink(inode); 1200 clear_nlink(inode);
@@ -1212,18 +1212,12 @@ static void gfs2_clear_inode(struct inode *inode)
1212{ 1212{
1213 struct gfs2_inode *ip = GFS2_I(inode); 1213 struct gfs2_inode *ip = GFS2_I(inode);
1214 1214
1215 /* This tells us its a "real" inode and not one which only 1215 ip->i_gl->gl_object = NULL;
1216 * serves to contain an address space (see rgrp.c, meta_io.c) 1216 gfs2_glock_put(ip->i_gl);
1217 * which therefore doesn't have its own glocks. 1217 ip->i_gl = NULL;
1218 */ 1218 if (ip->i_iopen_gh.gh_gl) {
1219 if (test_bit(GIF_USER, &ip->i_flags)) { 1219 ip->i_iopen_gh.gh_gl->gl_object = NULL;
1220 ip->i_gl->gl_object = NULL; 1220 gfs2_glock_dq_uninit(&ip->i_iopen_gh);
1221 gfs2_glock_put(ip->i_gl);
1222 ip->i_gl = NULL;
1223 if (ip->i_iopen_gh.gh_gl) {
1224 ip->i_iopen_gh.gh_gl->gl_object = NULL;
1225 gfs2_glock_dq_uninit(&ip->i_iopen_gh);
1226 }
1227 } 1221 }
1228} 1222}
1229 1223
@@ -1358,9 +1352,6 @@ static void gfs2_delete_inode(struct inode *inode)
1358 struct gfs2_holder gh; 1352 struct gfs2_holder gh;
1359 int error; 1353 int error;
1360 1354
1361 if (!test_bit(GIF_USER, &ip->i_flags))
1362 goto out;
1363
1364 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); 1355 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
1365 if (unlikely(error)) { 1356 if (unlikely(error)) {
1366 gfs2_glock_dq_uninit(&ip->i_iopen_gh); 1357 gfs2_glock_dq_uninit(&ip->i_iopen_gh);
diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c
index 0dc34621f6a6..a0db1c94317d 100644
--- a/fs/gfs2/sys.c
+++ b/fs/gfs2/sys.c
@@ -478,7 +478,6 @@ TUNE_ATTR(complain_secs, 0);
478TUNE_ATTR(statfs_slow, 0); 478TUNE_ATTR(statfs_slow, 0);
479TUNE_ATTR(new_files_jdata, 0); 479TUNE_ATTR(new_files_jdata, 0);
480TUNE_ATTR(quota_simul_sync, 1); 480TUNE_ATTR(quota_simul_sync, 1);
481TUNE_ATTR(stall_secs, 1);
482TUNE_ATTR(statfs_quantum, 1); 481TUNE_ATTR(statfs_quantum, 1);
483TUNE_ATTR_3(quota_scale, quota_scale_show, quota_scale_store); 482TUNE_ATTR_3(quota_scale, quota_scale_show, quota_scale_store);
484 483
@@ -491,7 +490,6 @@ static struct attribute *tune_attrs[] = {
491 &tune_attr_complain_secs.attr, 490 &tune_attr_complain_secs.attr,
492 &tune_attr_statfs_slow.attr, 491 &tune_attr_statfs_slow.attr,
493 &tune_attr_quota_simul_sync.attr, 492 &tune_attr_quota_simul_sync.attr,
494 &tune_attr_stall_secs.attr,
495 &tune_attr_statfs_quantum.attr, 493 &tune_attr_statfs_quantum.attr,
496 &tune_attr_quota_scale.attr, 494 &tune_attr_quota_scale.attr,
497 &tune_attr_new_files_jdata.attr, 495 &tune_attr_new_files_jdata.attr,
diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c
index f6a7efa34eb9..226f2bfbf16a 100644
--- a/fs/gfs2/util.c
+++ b/fs/gfs2/util.c
@@ -21,6 +21,7 @@
21#include "util.h" 21#include "util.h"
22 22
23struct kmem_cache *gfs2_glock_cachep __read_mostly; 23struct kmem_cache *gfs2_glock_cachep __read_mostly;
24struct kmem_cache *gfs2_glock_aspace_cachep __read_mostly;
24struct kmem_cache *gfs2_inode_cachep __read_mostly; 25struct kmem_cache *gfs2_inode_cachep __read_mostly;
25struct kmem_cache *gfs2_bufdata_cachep __read_mostly; 26struct kmem_cache *gfs2_bufdata_cachep __read_mostly;
26struct kmem_cache *gfs2_rgrpd_cachep __read_mostly; 27struct kmem_cache *gfs2_rgrpd_cachep __read_mostly;
diff --git a/fs/gfs2/util.h b/fs/gfs2/util.h
index 33e96b0ce9ab..b432e04600de 100644
--- a/fs/gfs2/util.h
+++ b/fs/gfs2/util.h
@@ -145,6 +145,7 @@ gfs2_io_error_bh_i((sdp), (bh), __func__, __FILE__, __LINE__);
145 145
146 146
147extern struct kmem_cache *gfs2_glock_cachep; 147extern struct kmem_cache *gfs2_glock_cachep;
148extern struct kmem_cache *gfs2_glock_aspace_cachep;
148extern struct kmem_cache *gfs2_inode_cachep; 149extern struct kmem_cache *gfs2_inode_cachep;
149extern struct kmem_cache *gfs2_bufdata_cachep; 150extern struct kmem_cache *gfs2_bufdata_cachep;
150extern struct kmem_cache *gfs2_rgrpd_cachep; 151extern struct kmem_cache *gfs2_rgrpd_cachep;
diff --git a/fs/namei.c b/fs/namei.c
index d62fdc875f22..a4855af776a8 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -823,6 +823,17 @@ fail:
823} 823}
824 824
825/* 825/*
826 * This is a temporary kludge to deal with "automount" symlinks; proper
827 * solution is to trigger them on follow_mount(), so that do_lookup()
828 * would DTRT. To be killed before 2.6.34-final.
829 */
830static inline int follow_on_final(struct inode *inode, unsigned lookup_flags)
831{
832 return inode && unlikely(inode->i_op->follow_link) &&
833 ((lookup_flags & LOOKUP_FOLLOW) || S_ISDIR(inode->i_mode));
834}
835
836/*
826 * Name resolution. 837 * Name resolution.
827 * This is the basic name resolution function, turning a pathname into 838 * This is the basic name resolution function, turning a pathname into
828 * the final dentry. We expect 'base' to be positive and a directory. 839 * the final dentry. We expect 'base' to be positive and a directory.
@@ -942,8 +953,7 @@ last_component:
942 if (err) 953 if (err)
943 break; 954 break;
944 inode = next.dentry->d_inode; 955 inode = next.dentry->d_inode;
945 if ((lookup_flags & LOOKUP_FOLLOW) 956 if (follow_on_final(inode, lookup_flags)) {
946 && inode && inode->i_op->follow_link) {
947 err = do_follow_link(&next, nd); 957 err = do_follow_link(&next, nd);
948 if (err) 958 if (err)
949 goto return_err; 959 goto return_err;
diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig
index 59e5673b4597..a43d07e7b924 100644
--- a/fs/nfs/Kconfig
+++ b/fs/nfs/Kconfig
@@ -95,8 +95,7 @@ config ROOT_NFS
95 Most people say N here. 95 Most people say N here.
96 96
97config NFS_FSCACHE 97config NFS_FSCACHE
98 bool "Provide NFS client caching support (EXPERIMENTAL)" 98 bool "Provide NFS client caching support"
99 depends on EXPERIMENTAL
100 depends on NFS_FS=m && FSCACHE || NFS_FS=y && FSCACHE=y 99 depends on NFS_FS=m && FSCACHE || NFS_FS=y && FSCACHE=y
101 help 100 help
102 Say Y here if you want NFS data to be cached locally on disc through 101 Say Y here if you want NFS data to be cached locally on disc through
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index e1d415e97849..0d289823e856 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -342,6 +342,7 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_direct_req *dreq,
342 data->res.fattr = &data->fattr; 342 data->res.fattr = &data->fattr;
343 data->res.eof = 0; 343 data->res.eof = 0;
344 data->res.count = bytes; 344 data->res.count = bytes;
345 nfs_fattr_init(&data->fattr);
345 msg.rpc_argp = &data->args; 346 msg.rpc_argp = &data->args;
346 msg.rpc_resp = &data->res; 347 msg.rpc_resp = &data->res;
347 348
@@ -575,6 +576,7 @@ static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq)
575 data->res.count = 0; 576 data->res.count = 0;
576 data->res.fattr = &data->fattr; 577 data->res.fattr = &data->fattr;
577 data->res.verf = &data->verf; 578 data->res.verf = &data->verf;
579 nfs_fattr_init(&data->fattr);
578 580
579 NFS_PROTO(data->inode)->commit_setup(data, &msg); 581 NFS_PROTO(data->inode)->commit_setup(data, &msg);
580 582
@@ -766,6 +768,7 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_direct_req *dreq,
766 data->res.fattr = &data->fattr; 768 data->res.fattr = &data->fattr;
767 data->res.count = bytes; 769 data->res.count = bytes;
768 data->res.verf = &data->verf; 770 data->res.verf = &data->verf;
771 nfs_fattr_init(&data->fattr);
769 772
770 task_setup_data.task = &data->task; 773 task_setup_data.task = &data->task;
771 task_setup_data.callback_data = data; 774 task_setup_data.callback_data = data;
diff --git a/fs/nfs/fscache.c b/fs/nfs/fscache.c
index fa588006588d..237874f1af23 100644
--- a/fs/nfs/fscache.c
+++ b/fs/nfs/fscache.c
@@ -354,12 +354,11 @@ void nfs_fscache_reset_inode_cookie(struct inode *inode)
354 */ 354 */
355int nfs_fscache_release_page(struct page *page, gfp_t gfp) 355int nfs_fscache_release_page(struct page *page, gfp_t gfp)
356{ 356{
357 struct nfs_inode *nfsi = NFS_I(page->mapping->host);
358 struct fscache_cookie *cookie = nfsi->fscache;
359
360 BUG_ON(!cookie);
361
362 if (PageFsCache(page)) { 357 if (PageFsCache(page)) {
358 struct nfs_inode *nfsi = NFS_I(page->mapping->host);
359 struct fscache_cookie *cookie = nfsi->fscache;
360
361 BUG_ON(!cookie);
363 dfprintk(FSCACHE, "NFS: fscache releasepage (0x%p/0x%p/0x%p)\n", 362 dfprintk(FSCACHE, "NFS: fscache releasepage (0x%p/0x%p/0x%p)\n",
364 cookie, page, nfsi); 363 cookie, page, nfsi);
365 364
diff --git a/fs/nfs/iostat.h b/fs/nfs/iostat.h
index 46d779abafd3..1d8d5c813b01 100644
--- a/fs/nfs/iostat.h
+++ b/fs/nfs/iostat.h
@@ -57,12 +57,12 @@ static inline void nfs_add_fscache_stats(struct inode *inode,
57} 57}
58#endif 58#endif
59 59
60static inline struct nfs_iostats *nfs_alloc_iostats(void) 60static inline struct nfs_iostats __percpu *nfs_alloc_iostats(void)
61{ 61{
62 return alloc_percpu(struct nfs_iostats); 62 return alloc_percpu(struct nfs_iostats);
63} 63}
64 64
65static inline void nfs_free_iostats(struct nfs_iostats *stats) 65static inline void nfs_free_iostats(struct nfs_iostats __percpu *stats)
66{ 66{
67 if (stats != NULL) 67 if (stats != NULL)
68 free_percpu(stats); 68 free_percpu(stats);
diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c
index 0adefc40cc89..59047f8d7d72 100644
--- a/fs/nfs/mount_clnt.c
+++ b/fs/nfs/mount_clnt.c
@@ -120,7 +120,7 @@ static struct {
120 { .status = MNT3ERR_INVAL, .errno = -EINVAL, }, 120 { .status = MNT3ERR_INVAL, .errno = -EINVAL, },
121 { .status = MNT3ERR_NAMETOOLONG, .errno = -ENAMETOOLONG, }, 121 { .status = MNT3ERR_NAMETOOLONG, .errno = -ENAMETOOLONG, },
122 { .status = MNT3ERR_NOTSUPP, .errno = -ENOTSUPP, }, 122 { .status = MNT3ERR_NOTSUPP, .errno = -ENOTSUPP, },
123 { .status = MNT3ERR_SERVERFAULT, .errno = -ESERVERFAULT, }, 123 { .status = MNT3ERR_SERVERFAULT, .errno = -EREMOTEIO, },
124}; 124};
125 125
126struct mountres { 126struct mountres {
diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
index 5e078b222b4e..7bc2da8efd4a 100644
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c
@@ -699,7 +699,7 @@ static struct {
699 { NFSERR_BAD_COOKIE, -EBADCOOKIE }, 699 { NFSERR_BAD_COOKIE, -EBADCOOKIE },
700 { NFSERR_NOTSUPP, -ENOTSUPP }, 700 { NFSERR_NOTSUPP, -ENOTSUPP },
701 { NFSERR_TOOSMALL, -ETOOSMALL }, 701 { NFSERR_TOOSMALL, -ETOOSMALL },
702 { NFSERR_SERVERFAULT, -ESERVERFAULT }, 702 { NFSERR_SERVERFAULT, -EREMOTEIO },
703 { NFSERR_BADTYPE, -EBADTYPE }, 703 { NFSERR_BADTYPE, -EBADTYPE },
704 { NFSERR_JUKEBOX, -EJUKEBOX }, 704 { NFSERR_JUKEBOX, -EJUKEBOX },
705 { -1, -EIO } 705 { -1, -EIO }
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index e437fd6a819f..5cd5184b56db 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -4631,7 +4631,7 @@ static int decode_sequence(struct xdr_stream *xdr,
4631 * If the server returns different values for sessionID, slotID or 4631 * If the server returns different values for sessionID, slotID or
4632 * sequence number, the server is looney tunes. 4632 * sequence number, the server is looney tunes.
4633 */ 4633 */
4634 status = -ESERVERFAULT; 4634 status = -EREMOTEIO;
4635 4635
4636 if (memcmp(id.data, res->sr_session->sess_id.data, 4636 if (memcmp(id.data, res->sr_session->sess_id.data,
4637 NFS4_MAX_SESSIONID_LEN)) { 4637 NFS4_MAX_SESSIONID_LEN)) {
@@ -5774,7 +5774,7 @@ static struct {
5774 { NFS4ERR_BAD_COOKIE, -EBADCOOKIE }, 5774 { NFS4ERR_BAD_COOKIE, -EBADCOOKIE },
5775 { NFS4ERR_NOTSUPP, -ENOTSUPP }, 5775 { NFS4ERR_NOTSUPP, -ENOTSUPP },
5776 { NFS4ERR_TOOSMALL, -ETOOSMALL }, 5776 { NFS4ERR_TOOSMALL, -ETOOSMALL },
5777 { NFS4ERR_SERVERFAULT, -ESERVERFAULT }, 5777 { NFS4ERR_SERVERFAULT, -EREMOTEIO },
5778 { NFS4ERR_BADTYPE, -EBADTYPE }, 5778 { NFS4ERR_BADTYPE, -EBADTYPE },
5779 { NFS4ERR_LOCKED, -EAGAIN }, 5779 { NFS4ERR_LOCKED, -EAGAIN },
5780 { NFS4ERR_SYMLINK, -ELOOP }, 5780 { NFS4ERR_SYMLINK, -ELOOP },
@@ -5801,7 +5801,7 @@ nfs4_stat_to_errno(int stat)
5801 } 5801 }
5802 if (stat <= 10000 || stat > 10100) { 5802 if (stat <= 10000 || stat > 10100) {
5803 /* The server is looney tunes. */ 5803 /* The server is looney tunes. */
5804 return -ESERVERFAULT; 5804 return -EREMOTEIO;
5805 } 5805 }
5806 /* If we cannot translate the error, the recovery routines should 5806 /* If we cannot translate the error, the recovery routines should
5807 * handle it. 5807 * handle it.
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 7b54b8bb101f..d63d964a0392 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1598,8 +1598,7 @@ int nfs_migrate_page(struct address_space *mapping, struct page *newpage,
1598 struct nfs_page *req; 1598 struct nfs_page *req;
1599 int ret; 1599 int ret;
1600 1600
1601 if (PageFsCache(page)) 1601 nfs_fscache_release_page(page, GFP_KERNEL);
1602 nfs_fscache_release_page(page, GFP_KERNEL);
1603 1602
1604 req = nfs_find_and_lock_request(page); 1603 req = nfs_find_and_lock_request(page);
1605 ret = PTR_ERR(req); 1604 ret = PTR_ERR(req);
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index c487810a2366..a0c4016413f1 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -1316,19 +1316,11 @@ rqst_exp_parent(struct svc_rqst *rqstp, struct path *path)
1316 1316
1317static struct svc_export *find_fsidzero_export(struct svc_rqst *rqstp) 1317static struct svc_export *find_fsidzero_export(struct svc_rqst *rqstp)
1318{ 1318{
1319 struct svc_export *exp;
1320 u32 fsidv[2]; 1319 u32 fsidv[2];
1321 1320
1322 mk_fsid(FSID_NUM, fsidv, 0, 0, 0, NULL); 1321 mk_fsid(FSID_NUM, fsidv, 0, 0, 0, NULL);
1323 1322
1324 exp = rqst_exp_find(rqstp, FSID_NUM, fsidv); 1323 return rqst_exp_find(rqstp, FSID_NUM, fsidv);
1325 /*
1326 * We shouldn't have accepting an nfsv4 request at all if we
1327 * don't have a pseudoexport!:
1328 */
1329 if (IS_ERR(exp) && PTR_ERR(exp) == -ENOENT)
1330 exp = ERR_PTR(-ESERVERFAULT);
1331 return exp;
1332} 1324}
1333 1325
1334/* 1326/*
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 97d79eff6b7f..8715d194561a 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -752,7 +752,8 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
752 flags, current_cred()); 752 flags, current_cred());
753 if (IS_ERR(*filp)) 753 if (IS_ERR(*filp))
754 host_err = PTR_ERR(*filp); 754 host_err = PTR_ERR(*filp);
755 host_err = ima_file_check(*filp, access); 755 else
756 host_err = ima_file_check(*filp, access);
756out_nfserr: 757out_nfserr:
757 err = nfserrno(host_err); 758 err = nfserrno(host_err);
758out: 759out:
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index 64bc8998ac9a..e8865c11777f 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -412,9 +412,10 @@ struct hd_struct *add_partition(struct gendisk *disk, int partno,
412 pdev = part_to_dev(p); 412 pdev = part_to_dev(p);
413 413
414 p->start_sect = start; 414 p->start_sect = start;
415 p->alignment_offset = queue_sector_alignment_offset(disk->queue, start); 415 p->alignment_offset =
416 p->discard_alignment = queue_sector_discard_alignment(disk->queue, 416 queue_limit_alignment_offset(&disk->queue->limits, start);
417 start); 417 p->discard_alignment =
418 queue_limit_discard_alignment(&disk->queue->limits, start);
418 p->nr_sects = len; 419 p->nr_sects = len;
419 p->partno = partno; 420 p->partno = partno;
420 p->policy = get_disk_ro(disk); 421 p->policy = get_disk_ro(disk);
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 13b5d0708175..18e20feee251 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -270,7 +270,9 @@ static inline void task_sig(struct seq_file *m, struct task_struct *p)
270 blocked = p->blocked; 270 blocked = p->blocked;
271 collect_sigign_sigcatch(p, &ignored, &caught); 271 collect_sigign_sigcatch(p, &ignored, &caught);
272 num_threads = atomic_read(&p->signal->count); 272 num_threads = atomic_read(&p->signal->count);
273 rcu_read_lock(); /* FIXME: is this correct? */
273 qsize = atomic_read(&__task_cred(p)->user->sigpending); 274 qsize = atomic_read(&__task_cred(p)->user->sigpending);
275 rcu_read_unlock();
274 qlim = p->signal->rlim[RLIMIT_SIGPENDING].rlim_cur; 276 qlim = p->signal->rlim[RLIMIT_SIGPENDING].rlim_cur;
275 unlock_task_sighand(p, &flags); 277 unlock_task_sighand(p, &flags);
276 } 278 }
diff --git a/fs/proc/base.c b/fs/proc/base.c
index e42bbd843ed1..623e2ffb5d2b 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1095,8 +1095,12 @@ static ssize_t proc_loginuid_write(struct file * file, const char __user * buf,
1095 if (!capable(CAP_AUDIT_CONTROL)) 1095 if (!capable(CAP_AUDIT_CONTROL))
1096 return -EPERM; 1096 return -EPERM;
1097 1097
1098 if (current != pid_task(proc_pid(inode), PIDTYPE_PID)) 1098 rcu_read_lock();
1099 if (current != pid_task(proc_pid(inode), PIDTYPE_PID)) {
1100 rcu_read_unlock();
1099 return -EPERM; 1101 return -EPERM;
1102 }
1103 rcu_read_unlock();
1100 1104
1101 if (count >= PAGE_SIZE) 1105 if (count >= PAGE_SIZE)
1102 count = PAGE_SIZE - 1; 1106 count = PAGE_SIZE - 1;
@@ -2369,16 +2373,30 @@ static void *proc_self_follow_link(struct dentry *dentry, struct nameidata *nd)
2369{ 2373{
2370 struct pid_namespace *ns = dentry->d_sb->s_fs_info; 2374 struct pid_namespace *ns = dentry->d_sb->s_fs_info;
2371 pid_t tgid = task_tgid_nr_ns(current, ns); 2375 pid_t tgid = task_tgid_nr_ns(current, ns);
2372 char tmp[PROC_NUMBUF]; 2376 char *name = ERR_PTR(-ENOENT);
2373 if (!tgid) 2377 if (tgid) {
2374 return ERR_PTR(-ENOENT); 2378 name = __getname();
2375 sprintf(tmp, "%d", task_tgid_nr_ns(current, ns)); 2379 if (!name)
2376 return ERR_PTR(vfs_follow_link(nd,tmp)); 2380 name = ERR_PTR(-ENOMEM);
2381 else
2382 sprintf(name, "%d", tgid);
2383 }
2384 nd_set_link(nd, name);
2385 return NULL;
2386}
2387
2388static void proc_self_put_link(struct dentry *dentry, struct nameidata *nd,
2389 void *cookie)
2390{
2391 char *s = nd_get_link(nd);
2392 if (!IS_ERR(s))
2393 __putname(s);
2377} 2394}
2378 2395
2379static const struct inode_operations proc_self_inode_operations = { 2396static const struct inode_operations proc_self_inode_operations = {
2380 .readlink = proc_self_readlink, 2397 .readlink = proc_self_readlink,
2381 .follow_link = proc_self_follow_link, 2398 .follow_link = proc_self_follow_link,
2399 .put_link = proc_self_put_link,
2382}; 2400};
2383 2401
2384/* 2402/*
diff --git a/fs/proc/kmsg.c b/fs/proc/kmsg.c
index 7ca78346d3f0..cfe90a48a6e8 100644
--- a/fs/proc/kmsg.c
+++ b/fs/proc/kmsg.c
@@ -12,37 +12,37 @@
12#include <linux/poll.h> 12#include <linux/poll.h>
13#include <linux/proc_fs.h> 13#include <linux/proc_fs.h>
14#include <linux/fs.h> 14#include <linux/fs.h>
15#include <linux/syslog.h>
15 16
16#include <asm/uaccess.h> 17#include <asm/uaccess.h>
17#include <asm/io.h> 18#include <asm/io.h>
18 19
19extern wait_queue_head_t log_wait; 20extern wait_queue_head_t log_wait;
20 21
21extern int do_syslog(int type, char __user *bug, int count);
22
23static int kmsg_open(struct inode * inode, struct file * file) 22static int kmsg_open(struct inode * inode, struct file * file)
24{ 23{
25 return do_syslog(1,NULL,0); 24 return do_syslog(SYSLOG_ACTION_OPEN, NULL, 0, SYSLOG_FROM_FILE);
26} 25}
27 26
28static int kmsg_release(struct inode * inode, struct file * file) 27static int kmsg_release(struct inode * inode, struct file * file)
29{ 28{
30 (void) do_syslog(0,NULL,0); 29 (void) do_syslog(SYSLOG_ACTION_CLOSE, NULL, 0, SYSLOG_FROM_FILE);
31 return 0; 30 return 0;
32} 31}
33 32
34static ssize_t kmsg_read(struct file *file, char __user *buf, 33static ssize_t kmsg_read(struct file *file, char __user *buf,
35 size_t count, loff_t *ppos) 34 size_t count, loff_t *ppos)
36{ 35{
37 if ((file->f_flags & O_NONBLOCK) && !do_syslog(9, NULL, 0)) 36 if ((file->f_flags & O_NONBLOCK) &&
37 !do_syslog(SYSLOG_ACTION_SIZE_UNREAD, NULL, 0, SYSLOG_FROM_FILE))
38 return -EAGAIN; 38 return -EAGAIN;
39 return do_syslog(2, buf, count); 39 return do_syslog(SYSLOG_ACTION_READ, buf, count, SYSLOG_FROM_FILE);
40} 40}
41 41
42static unsigned int kmsg_poll(struct file *file, poll_table *wait) 42static unsigned int kmsg_poll(struct file *file, poll_table *wait)
43{ 43{
44 poll_wait(file, &log_wait, wait); 44 poll_wait(file, &log_wait, wait);
45 if (do_syslog(9, NULL, 0)) 45 if (do_syslog(SYSLOG_ACTION_SIZE_UNREAD, NULL, 0, SYSLOG_FROM_FILE))
46 return POLLIN | POLLRDNORM; 46 return POLLIN | POLLRDNORM;
47 return 0; 47 return 0;
48} 48}
diff --git a/fs/proc/proc_devtree.c b/fs/proc/proc_devtree.c
index 123257bb356b..f8650dce74fb 100644
--- a/fs/proc/proc_devtree.c
+++ b/fs/proc/proc_devtree.c
@@ -10,16 +10,19 @@
10#include <linux/seq_file.h> 10#include <linux/seq_file.h>
11#include <linux/stat.h> 11#include <linux/stat.h>
12#include <linux/string.h> 12#include <linux/string.h>
13#include <linux/of.h>
14#include <linux/module.h>
13#include <asm/prom.h> 15#include <asm/prom.h>
14#include <asm/uaccess.h> 16#include <asm/uaccess.h>
15#include "internal.h" 17#include "internal.h"
16 18
17#ifndef HAVE_ARCH_DEVTREE_FIXUPS
18static inline void set_node_proc_entry(struct device_node *np, 19static inline void set_node_proc_entry(struct device_node *np,
19 struct proc_dir_entry *de) 20 struct proc_dir_entry *de)
20{ 21{
21} 22#ifdef HAVE_ARCH_DEVTREE_FIXUPS
23 np->pde = de;
22#endif 24#endif
25}
23 26
24static struct proc_dir_entry *proc_device_tree; 27static struct proc_dir_entry *proc_device_tree;
25 28
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 9087b10209e6..2df0f5c7c60b 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -1497,9 +1497,11 @@ struct inode *reiserfs_iget(struct super_block *s, const struct cpu_key *key)
1497 1497
1498 args.objectid = key->on_disk_key.k_objectid; 1498 args.objectid = key->on_disk_key.k_objectid;
1499 args.dirid = key->on_disk_key.k_dir_id; 1499 args.dirid = key->on_disk_key.k_dir_id;
1500 reiserfs_write_unlock(s);
1500 inode = iget5_locked(s, key->on_disk_key.k_objectid, 1501 inode = iget5_locked(s, key->on_disk_key.k_objectid,
1501 reiserfs_find_actor, reiserfs_init_locked_inode, 1502 reiserfs_find_actor, reiserfs_init_locked_inode,
1502 (void *)(&args)); 1503 (void *)(&args));
1504 reiserfs_write_lock(s);
1503 if (!inode) 1505 if (!inode)
1504 return ERR_PTR(-ENOMEM); 1506 return ERR_PTR(-ENOMEM);
1505 1507
diff --git a/fs/seq_file.c b/fs/seq_file.c
index eae7d9dbf3ff..5afd554efad3 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -674,7 +674,6 @@ struct list_head *seq_list_start(struct list_head *head, loff_t pos)
674 674
675 return NULL; 675 return NULL;
676} 676}
677
678EXPORT_SYMBOL(seq_list_start); 677EXPORT_SYMBOL(seq_list_start);
679 678
680struct list_head *seq_list_start_head(struct list_head *head, loff_t pos) 679struct list_head *seq_list_start_head(struct list_head *head, loff_t pos)
@@ -684,7 +683,6 @@ struct list_head *seq_list_start_head(struct list_head *head, loff_t pos)
684 683
685 return seq_list_start(head, pos - 1); 684 return seq_list_start(head, pos - 1);
686} 685}
687
688EXPORT_SYMBOL(seq_list_start_head); 686EXPORT_SYMBOL(seq_list_start_head);
689 687
690struct list_head *seq_list_next(void *v, struct list_head *head, loff_t *ppos) 688struct list_head *seq_list_next(void *v, struct list_head *head, loff_t *ppos)
@@ -695,5 +693,131 @@ struct list_head *seq_list_next(void *v, struct list_head *head, loff_t *ppos)
695 ++*ppos; 693 ++*ppos;
696 return lh == head ? NULL : lh; 694 return lh == head ? NULL : lh;
697} 695}
698
699EXPORT_SYMBOL(seq_list_next); 696EXPORT_SYMBOL(seq_list_next);
697
698/**
699 * seq_hlist_start - start an iteration of a hlist
700 * @head: the head of the hlist
701 * @pos: the start position of the sequence
702 *
703 * Called at seq_file->op->start().
704 */
705struct hlist_node *seq_hlist_start(struct hlist_head *head, loff_t pos)
706{
707 struct hlist_node *node;
708
709 hlist_for_each(node, head)
710 if (pos-- == 0)
711 return node;
712 return NULL;
713}
714EXPORT_SYMBOL(seq_hlist_start);
715
716/**
717 * seq_hlist_start_head - start an iteration of a hlist
718 * @head: the head of the hlist
719 * @pos: the start position of the sequence
720 *
721 * Called at seq_file->op->start(). Call this function if you want to
722 * print a header at the top of the output.
723 */
724struct hlist_node *seq_hlist_start_head(struct hlist_head *head, loff_t pos)
725{
726 if (!pos)
727 return SEQ_START_TOKEN;
728
729 return seq_hlist_start(head, pos - 1);
730}
731EXPORT_SYMBOL(seq_hlist_start_head);
732
733/**
734 * seq_hlist_next - move to the next position of the hlist
735 * @v: the current iterator
736 * @head: the head of the hlist
737 * @pos: the current posision
738 *
739 * Called at seq_file->op->next().
740 */
741struct hlist_node *seq_hlist_next(void *v, struct hlist_head *head,
742 loff_t *ppos)
743{
744 struct hlist_node *node = v;
745
746 ++*ppos;
747 if (v == SEQ_START_TOKEN)
748 return head->first;
749 else
750 return node->next;
751}
752EXPORT_SYMBOL(seq_hlist_next);
753
754/**
755 * seq_hlist_start_rcu - start an iteration of a hlist protected by RCU
756 * @head: the head of the hlist
757 * @pos: the start position of the sequence
758 *
759 * Called at seq_file->op->start().
760 *
761 * This list-traversal primitive may safely run concurrently with
762 * the _rcu list-mutation primitives such as hlist_add_head_rcu()
763 * as long as the traversal is guarded by rcu_read_lock().
764 */
765struct hlist_node *seq_hlist_start_rcu(struct hlist_head *head,
766 loff_t pos)
767{
768 struct hlist_node *node;
769
770 __hlist_for_each_rcu(node, head)
771 if (pos-- == 0)
772 return node;
773 return NULL;
774}
775EXPORT_SYMBOL(seq_hlist_start_rcu);
776
777/**
778 * seq_hlist_start_head_rcu - start an iteration of a hlist protected by RCU
779 * @head: the head of the hlist
780 * @pos: the start position of the sequence
781 *
782 * Called at seq_file->op->start(). Call this function if you want to
783 * print a header at the top of the output.
784 *
785 * This list-traversal primitive may safely run concurrently with
786 * the _rcu list-mutation primitives such as hlist_add_head_rcu()
787 * as long as the traversal is guarded by rcu_read_lock().
788 */
789struct hlist_node *seq_hlist_start_head_rcu(struct hlist_head *head,
790 loff_t pos)
791{
792 if (!pos)
793 return SEQ_START_TOKEN;
794
795 return seq_hlist_start_rcu(head, pos - 1);
796}
797EXPORT_SYMBOL(seq_hlist_start_head_rcu);
798
799/**
800 * seq_hlist_next_rcu - move to the next position of the hlist protected by RCU
801 * @v: the current iterator
802 * @head: the head of the hlist
803 * @pos: the current posision
804 *
805 * Called at seq_file->op->next().
806 *
807 * This list-traversal primitive may safely run concurrently with
808 * the _rcu list-mutation primitives such as hlist_add_head_rcu()
809 * as long as the traversal is guarded by rcu_read_lock().
810 */
811struct hlist_node *seq_hlist_next_rcu(void *v,
812 struct hlist_head *head,
813 loff_t *ppos)
814{
815 struct hlist_node *node = v;
816
817 ++*ppos;
818 if (v == SEQ_START_TOKEN)
819 return rcu_dereference(head->first);
820 else
821 return rcu_dereference(node->next);
822}
823EXPORT_SYMBOL(seq_hlist_next_rcu);
diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c
index 220b758523ae..6a06a1d1ea7b 100644
--- a/fs/sysfs/inode.c
+++ b/fs/sysfs/inode.c
@@ -81,24 +81,23 @@ int sysfs_sd_setattr(struct sysfs_dirent *sd, struct iattr * iattr)
81 if (!sd_attrs) 81 if (!sd_attrs)
82 return -ENOMEM; 82 return -ENOMEM;
83 sd->s_iattr = sd_attrs; 83 sd->s_iattr = sd_attrs;
84 } else { 84 }
85 /* attributes were changed at least once in past */ 85 /* attributes were changed at least once in past */
86 iattrs = &sd_attrs->ia_iattr; 86 iattrs = &sd_attrs->ia_iattr;
87 87
88 if (ia_valid & ATTR_UID) 88 if (ia_valid & ATTR_UID)
89 iattrs->ia_uid = iattr->ia_uid; 89 iattrs->ia_uid = iattr->ia_uid;
90 if (ia_valid & ATTR_GID) 90 if (ia_valid & ATTR_GID)
91 iattrs->ia_gid = iattr->ia_gid; 91 iattrs->ia_gid = iattr->ia_gid;
92 if (ia_valid & ATTR_ATIME) 92 if (ia_valid & ATTR_ATIME)
93 iattrs->ia_atime = iattr->ia_atime; 93 iattrs->ia_atime = iattr->ia_atime;
94 if (ia_valid & ATTR_MTIME) 94 if (ia_valid & ATTR_MTIME)
95 iattrs->ia_mtime = iattr->ia_mtime; 95 iattrs->ia_mtime = iattr->ia_mtime;
96 if (ia_valid & ATTR_CTIME) 96 if (ia_valid & ATTR_CTIME)
97 iattrs->ia_ctime = iattr->ia_ctime; 97 iattrs->ia_ctime = iattr->ia_ctime;
98 if (ia_valid & ATTR_MODE) { 98 if (ia_valid & ATTR_MODE) {
99 umode_t mode = iattr->ia_mode; 99 umode_t mode = iattr->ia_mode;
100 iattrs->ia_mode = sd->s_mode = mode; 100 iattrs->ia_mode = sd->s_mode = mode;
101 }
102 } 101 }
103 return 0; 102 return 0;
104} 103}
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index 56641fe52a23..5c5a366aa332 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -16,7 +16,7 @@
16# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 16# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17# 17#
18 18
19EXTRA_CFLAGS += -I$(src) -I$(src)/linux-2.6 -funsigned-char 19EXTRA_CFLAGS += -I$(src) -I$(src)/linux-2.6
20 20
21XFS_LINUX := linux-2.6 21XFS_LINUX := linux-2.6
22 22
diff --git a/fs/xfs/linux-2.6/kmem.c b/fs/xfs/linux-2.6/kmem.c
index 2d3f90afe5f1..bc7405585def 100644
--- a/fs/xfs/linux-2.6/kmem.c
+++ b/fs/xfs/linux-2.6/kmem.c
@@ -16,7 +16,6 @@
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */ 17 */
18#include <linux/mm.h> 18#include <linux/mm.h>
19#include <linux/vmalloc.h>
20#include <linux/highmem.h> 19#include <linux/highmem.h>
21#include <linux/swap.h> 20#include <linux/swap.h>
22#include <linux/blkdev.h> 21#include <linux/blkdev.h>
@@ -24,8 +23,25 @@
24#include "time.h" 23#include "time.h"
25#include "kmem.h" 24#include "kmem.h"
26 25
27#define MAX_VMALLOCS 6 26/*
28#define MAX_SLAB_SIZE 0x20000 27 * Greedy allocation. May fail and may return vmalloced memory.
28 *
29 * Must be freed using kmem_free_large.
30 */
31void *
32kmem_zalloc_greedy(size_t *size, size_t minsize, size_t maxsize)
33{
34 void *ptr;
35 size_t kmsize = maxsize;
36
37 while (!(ptr = kmem_zalloc_large(kmsize))) {
38 if ((kmsize >>= 1) <= minsize)
39 kmsize = minsize;
40 }
41 if (ptr)
42 *size = kmsize;
43 return ptr;
44}
29 45
30void * 46void *
31kmem_alloc(size_t size, unsigned int __nocast flags) 47kmem_alloc(size_t size, unsigned int __nocast flags)
@@ -34,19 +50,8 @@ kmem_alloc(size_t size, unsigned int __nocast flags)
34 gfp_t lflags = kmem_flags_convert(flags); 50 gfp_t lflags = kmem_flags_convert(flags);
35 void *ptr; 51 void *ptr;
36 52
37#ifdef DEBUG
38 if (unlikely(!(flags & KM_LARGE) && (size > PAGE_SIZE))) {
39 printk(KERN_WARNING "Large %s attempt, size=%ld\n",
40 __func__, (long)size);
41 dump_stack();
42 }
43#endif
44
45 do { 53 do {
46 if (size < MAX_SLAB_SIZE || retries > MAX_VMALLOCS) 54 ptr = kmalloc(size, lflags);
47 ptr = kmalloc(size, lflags);
48 else
49 ptr = __vmalloc(size, lflags, PAGE_KERNEL);
50 if (ptr || (flags & (KM_MAYFAIL|KM_NOSLEEP))) 55 if (ptr || (flags & (KM_MAYFAIL|KM_NOSLEEP)))
51 return ptr; 56 return ptr;
52 if (!(++retries % 100)) 57 if (!(++retries % 100))
@@ -68,27 +73,6 @@ kmem_zalloc(size_t size, unsigned int __nocast flags)
68 return ptr; 73 return ptr;
69} 74}
70 75
71void *
72kmem_zalloc_greedy(size_t *size, size_t minsize, size_t maxsize,
73 unsigned int __nocast flags)
74{
75 void *ptr;
76 size_t kmsize = maxsize;
77 unsigned int kmflags = (flags & ~KM_SLEEP) | KM_NOSLEEP;
78
79 while (!(ptr = kmem_zalloc(kmsize, kmflags))) {
80 if ((kmsize <= minsize) && (flags & KM_NOSLEEP))
81 break;
82 if ((kmsize >>= 1) <= minsize) {
83 kmsize = minsize;
84 kmflags = flags;
85 }
86 }
87 if (ptr)
88 *size = kmsize;
89 return ptr;
90}
91
92void 76void
93kmem_free(const void *ptr) 77kmem_free(const void *ptr)
94{ 78{
diff --git a/fs/xfs/linux-2.6/kmem.h b/fs/xfs/linux-2.6/kmem.h
index 179cbd630f69..f7c8f7a9ea6d 100644
--- a/fs/xfs/linux-2.6/kmem.h
+++ b/fs/xfs/linux-2.6/kmem.h
@@ -21,6 +21,7 @@
21#include <linux/slab.h> 21#include <linux/slab.h>
22#include <linux/sched.h> 22#include <linux/sched.h>
23#include <linux/mm.h> 23#include <linux/mm.h>
24#include <linux/vmalloc.h>
24 25
25/* 26/*
26 * General memory allocation interfaces 27 * General memory allocation interfaces
@@ -30,7 +31,6 @@
30#define KM_NOSLEEP 0x0002u 31#define KM_NOSLEEP 0x0002u
31#define KM_NOFS 0x0004u 32#define KM_NOFS 0x0004u
32#define KM_MAYFAIL 0x0008u 33#define KM_MAYFAIL 0x0008u
33#define KM_LARGE 0x0010u
34 34
35/* 35/*
36 * We use a special process flag to avoid recursive callbacks into 36 * We use a special process flag to avoid recursive callbacks into
@@ -42,7 +42,7 @@ kmem_flags_convert(unsigned int __nocast flags)
42{ 42{
43 gfp_t lflags; 43 gfp_t lflags;
44 44
45 BUG_ON(flags & ~(KM_SLEEP|KM_NOSLEEP|KM_NOFS|KM_MAYFAIL|KM_LARGE)); 45 BUG_ON(flags & ~(KM_SLEEP|KM_NOSLEEP|KM_NOFS|KM_MAYFAIL));
46 46
47 if (flags & KM_NOSLEEP) { 47 if (flags & KM_NOSLEEP) {
48 lflags = GFP_ATOMIC | __GFP_NOWARN; 48 lflags = GFP_ATOMIC | __GFP_NOWARN;
@@ -56,10 +56,25 @@ kmem_flags_convert(unsigned int __nocast flags)
56 56
57extern void *kmem_alloc(size_t, unsigned int __nocast); 57extern void *kmem_alloc(size_t, unsigned int __nocast);
58extern void *kmem_zalloc(size_t, unsigned int __nocast); 58extern void *kmem_zalloc(size_t, unsigned int __nocast);
59extern void *kmem_zalloc_greedy(size_t *, size_t, size_t, unsigned int __nocast);
60extern void *kmem_realloc(const void *, size_t, size_t, unsigned int __nocast); 59extern void *kmem_realloc(const void *, size_t, size_t, unsigned int __nocast);
61extern void kmem_free(const void *); 60extern void kmem_free(const void *);
62 61
62static inline void *kmem_zalloc_large(size_t size)
63{
64 void *ptr;
65
66 ptr = vmalloc(size);
67 if (ptr)
68 memset(ptr, 0, size);
69 return ptr;
70}
71static inline void kmem_free_large(void *ptr)
72{
73 vfree(ptr);
74}
75
76extern void *kmem_zalloc_greedy(size_t *, size_t, size_t);
77
63/* 78/*
64 * Zone interfaces 79 * Zone interfaces
65 */ 80 */
diff --git a/fs/xfs/linux-2.6/xfs_acl.c b/fs/xfs/linux-2.6/xfs_acl.c
index 883ca5ab8af5..bf85bbe4a9ae 100644
--- a/fs/xfs/linux-2.6/xfs_acl.c
+++ b/fs/xfs/linux-2.6/xfs_acl.c
@@ -106,7 +106,7 @@ xfs_get_acl(struct inode *inode, int type)
106 struct posix_acl *acl; 106 struct posix_acl *acl;
107 struct xfs_acl *xfs_acl; 107 struct xfs_acl *xfs_acl;
108 int len = sizeof(struct xfs_acl); 108 int len = sizeof(struct xfs_acl);
109 char *ea_name; 109 unsigned char *ea_name;
110 int error; 110 int error;
111 111
112 acl = get_cached_acl(inode, type); 112 acl = get_cached_acl(inode, type);
@@ -133,7 +133,8 @@ xfs_get_acl(struct inode *inode, int type)
133 if (!xfs_acl) 133 if (!xfs_acl)
134 return ERR_PTR(-ENOMEM); 134 return ERR_PTR(-ENOMEM);
135 135
136 error = -xfs_attr_get(ip, ea_name, (char *)xfs_acl, &len, ATTR_ROOT); 136 error = -xfs_attr_get(ip, ea_name, (unsigned char *)xfs_acl,
137 &len, ATTR_ROOT);
137 if (error) { 138 if (error) {
138 /* 139 /*
139 * If the attribute doesn't exist make sure we have a negative 140 * If the attribute doesn't exist make sure we have a negative
@@ -162,7 +163,7 @@ STATIC int
162xfs_set_acl(struct inode *inode, int type, struct posix_acl *acl) 163xfs_set_acl(struct inode *inode, int type, struct posix_acl *acl)
163{ 164{
164 struct xfs_inode *ip = XFS_I(inode); 165 struct xfs_inode *ip = XFS_I(inode);
165 char *ea_name; 166 unsigned char *ea_name;
166 int error; 167 int error;
167 168
168 if (S_ISLNK(inode->i_mode)) 169 if (S_ISLNK(inode->i_mode))
@@ -194,7 +195,7 @@ xfs_set_acl(struct inode *inode, int type, struct posix_acl *acl)
194 (sizeof(struct xfs_acl_entry) * 195 (sizeof(struct xfs_acl_entry) *
195 (XFS_ACL_MAX_ENTRIES - acl->a_count)); 196 (XFS_ACL_MAX_ENTRIES - acl->a_count));
196 197
197 error = -xfs_attr_set(ip, ea_name, (char *)xfs_acl, 198 error = -xfs_attr_set(ip, ea_name, (unsigned char *)xfs_acl,
198 len, ATTR_ROOT); 199 len, ATTR_ROOT);
199 200
200 kfree(xfs_acl); 201 kfree(xfs_acl);
@@ -262,7 +263,7 @@ xfs_set_mode(struct inode *inode, mode_t mode)
262} 263}
263 264
264static int 265static int
265xfs_acl_exists(struct inode *inode, char *name) 266xfs_acl_exists(struct inode *inode, unsigned char *name)
266{ 267{
267 int len = sizeof(struct xfs_acl); 268 int len = sizeof(struct xfs_acl);
268 269
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index 77b8be81c769..6f76ba85f193 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -33,6 +33,7 @@
33#include <linux/migrate.h> 33#include <linux/migrate.h>
34#include <linux/backing-dev.h> 34#include <linux/backing-dev.h>
35#include <linux/freezer.h> 35#include <linux/freezer.h>
36#include <linux/list_sort.h>
36 37
37#include "xfs_sb.h" 38#include "xfs_sb.h"
38#include "xfs_inum.h" 39#include "xfs_inum.h"
@@ -76,6 +77,27 @@ struct workqueue_struct *xfsconvertd_workqueue;
76#define xfs_buf_deallocate(bp) \ 77#define xfs_buf_deallocate(bp) \
77 kmem_zone_free(xfs_buf_zone, (bp)); 78 kmem_zone_free(xfs_buf_zone, (bp));
78 79
80static inline int
81xfs_buf_is_vmapped(
82 struct xfs_buf *bp)
83{
84 /*
85 * Return true if the buffer is vmapped.
86 *
87 * The XBF_MAPPED flag is set if the buffer should be mapped, but the
88 * code is clever enough to know it doesn't have to map a single page,
89 * so the check has to be both for XBF_MAPPED and bp->b_page_count > 1.
90 */
91 return (bp->b_flags & XBF_MAPPED) && bp->b_page_count > 1;
92}
93
94static inline int
95xfs_buf_vmap_len(
96 struct xfs_buf *bp)
97{
98 return (bp->b_page_count * PAGE_SIZE) - bp->b_offset;
99}
100
79/* 101/*
80 * Page Region interfaces. 102 * Page Region interfaces.
81 * 103 *
@@ -314,7 +336,7 @@ xfs_buf_free(
314 if (bp->b_flags & (_XBF_PAGE_CACHE|_XBF_PAGES)) { 336 if (bp->b_flags & (_XBF_PAGE_CACHE|_XBF_PAGES)) {
315 uint i; 337 uint i;
316 338
317 if ((bp->b_flags & XBF_MAPPED) && (bp->b_page_count > 1)) 339 if (xfs_buf_is_vmapped(bp))
318 free_address(bp->b_addr - bp->b_offset); 340 free_address(bp->b_addr - bp->b_offset);
319 341
320 for (i = 0; i < bp->b_page_count; i++) { 342 for (i = 0; i < bp->b_page_count; i++) {
@@ -1051,22 +1073,30 @@ xfs_buf_ioerror(
1051} 1073}
1052 1074
1053int 1075int
1054xfs_bawrite( 1076xfs_bwrite(
1055 void *mp, 1077 struct xfs_mount *mp,
1056 struct xfs_buf *bp) 1078 struct xfs_buf *bp)
1057{ 1079{
1058 trace_xfs_buf_bawrite(bp, _RET_IP_); 1080 int iowait = (bp->b_flags & XBF_ASYNC) == 0;
1081 int error = 0;
1059 1082
1060 ASSERT(bp->b_bn != XFS_BUF_DADDR_NULL); 1083 bp->b_strat = xfs_bdstrat_cb;
1084 bp->b_mount = mp;
1085 bp->b_flags |= XBF_WRITE;
1086 if (!iowait)
1087 bp->b_flags |= _XBF_RUN_QUEUES;
1061 1088
1062 xfs_buf_delwri_dequeue(bp); 1089 xfs_buf_delwri_dequeue(bp);
1090 xfs_buf_iostrategy(bp);
1063 1091
1064 bp->b_flags &= ~(XBF_READ | XBF_DELWRI | XBF_READ_AHEAD); 1092 if (iowait) {
1065 bp->b_flags |= (XBF_WRITE | XBF_ASYNC | _XBF_RUN_QUEUES); 1093 error = xfs_buf_iowait(bp);
1094 if (error)
1095 xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR);
1096 xfs_buf_relse(bp);
1097 }
1066 1098
1067 bp->b_mount = mp; 1099 return error;
1068 bp->b_strat = xfs_bdstrat_cb;
1069 return xfs_bdstrat_cb(bp);
1070} 1100}
1071 1101
1072void 1102void
@@ -1085,6 +1115,126 @@ xfs_bdwrite(
1085 xfs_buf_delwri_queue(bp, 1); 1115 xfs_buf_delwri_queue(bp, 1);
1086} 1116}
1087 1117
1118/*
1119 * Called when we want to stop a buffer from getting written or read.
1120 * We attach the EIO error, muck with its flags, and call biodone
1121 * so that the proper iodone callbacks get called.
1122 */
1123STATIC int
1124xfs_bioerror(
1125 xfs_buf_t *bp)
1126{
1127#ifdef XFSERRORDEBUG
1128 ASSERT(XFS_BUF_ISREAD(bp) || bp->b_iodone);
1129#endif
1130
1131 /*
1132 * No need to wait until the buffer is unpinned, we aren't flushing it.
1133 */
1134 XFS_BUF_ERROR(bp, EIO);
1135
1136 /*
1137 * We're calling biodone, so delete XBF_DONE flag.
1138 */
1139 XFS_BUF_UNREAD(bp);
1140 XFS_BUF_UNDELAYWRITE(bp);
1141 XFS_BUF_UNDONE(bp);
1142 XFS_BUF_STALE(bp);
1143
1144 XFS_BUF_CLR_BDSTRAT_FUNC(bp);
1145 xfs_biodone(bp);
1146
1147 return EIO;
1148}
1149
1150/*
1151 * Same as xfs_bioerror, except that we are releasing the buffer
1152 * here ourselves, and avoiding the biodone call.
1153 * This is meant for userdata errors; metadata bufs come with
1154 * iodone functions attached, so that we can track down errors.
1155 */
1156STATIC int
1157xfs_bioerror_relse(
1158 struct xfs_buf *bp)
1159{
1160 int64_t fl = XFS_BUF_BFLAGS(bp);
1161 /*
1162 * No need to wait until the buffer is unpinned.
1163 * We aren't flushing it.
1164 *
1165 * chunkhold expects B_DONE to be set, whether
1166 * we actually finish the I/O or not. We don't want to
1167 * change that interface.
1168 */
1169 XFS_BUF_UNREAD(bp);
1170 XFS_BUF_UNDELAYWRITE(bp);
1171 XFS_BUF_DONE(bp);
1172 XFS_BUF_STALE(bp);
1173 XFS_BUF_CLR_IODONE_FUNC(bp);
1174 XFS_BUF_CLR_BDSTRAT_FUNC(bp);
1175 if (!(fl & XBF_ASYNC)) {
1176 /*
1177 * Mark b_error and B_ERROR _both_.
1178 * Lot's of chunkcache code assumes that.
1179 * There's no reason to mark error for
1180 * ASYNC buffers.
1181 */
1182 XFS_BUF_ERROR(bp, EIO);
1183 XFS_BUF_FINISH_IOWAIT(bp);
1184 } else {
1185 xfs_buf_relse(bp);
1186 }
1187
1188 return EIO;
1189}
1190
1191
1192/*
1193 * All xfs metadata buffers except log state machine buffers
1194 * get this attached as their b_bdstrat callback function.
1195 * This is so that we can catch a buffer
1196 * after prematurely unpinning it to forcibly shutdown the filesystem.
1197 */
1198int
1199xfs_bdstrat_cb(
1200 struct xfs_buf *bp)
1201{
1202 if (XFS_FORCED_SHUTDOWN(bp->b_mount)) {
1203 trace_xfs_bdstrat_shut(bp, _RET_IP_);
1204 /*
1205 * Metadata write that didn't get logged but
1206 * written delayed anyway. These aren't associated
1207 * with a transaction, and can be ignored.
1208 */
1209 if (!bp->b_iodone && !XFS_BUF_ISREAD(bp))
1210 return xfs_bioerror_relse(bp);
1211 else
1212 return xfs_bioerror(bp);
1213 }
1214
1215 xfs_buf_iorequest(bp);
1216 return 0;
1217}
1218
1219/*
1220 * Wrapper around bdstrat so that we can stop data from going to disk in case
1221 * we are shutting down the filesystem. Typically user data goes thru this
1222 * path; one of the exceptions is the superblock.
1223 */
1224void
1225xfsbdstrat(
1226 struct xfs_mount *mp,
1227 struct xfs_buf *bp)
1228{
1229 if (XFS_FORCED_SHUTDOWN(mp)) {
1230 trace_xfs_bdstrat_shut(bp, _RET_IP_);
1231 xfs_bioerror_relse(bp);
1232 return;
1233 }
1234
1235 xfs_buf_iorequest(bp);
1236}
1237
1088STATIC void 1238STATIC void
1089_xfs_buf_ioend( 1239_xfs_buf_ioend(
1090 xfs_buf_t *bp, 1240 xfs_buf_t *bp,
@@ -1107,6 +1257,9 @@ xfs_buf_bio_end_io(
1107 1257
1108 xfs_buf_ioerror(bp, -error); 1258 xfs_buf_ioerror(bp, -error);
1109 1259
1260 if (!error && xfs_buf_is_vmapped(bp) && (bp->b_flags & XBF_READ))
1261 invalidate_kernel_vmap_range(bp->b_addr, xfs_buf_vmap_len(bp));
1262
1110 do { 1263 do {
1111 struct page *page = bvec->bv_page; 1264 struct page *page = bvec->bv_page;
1112 1265
@@ -1216,6 +1369,10 @@ next_chunk:
1216 1369
1217submit_io: 1370submit_io:
1218 if (likely(bio->bi_size)) { 1371 if (likely(bio->bi_size)) {
1372 if (xfs_buf_is_vmapped(bp)) {
1373 flush_kernel_vmap_range(bp->b_addr,
1374 xfs_buf_vmap_len(bp));
1375 }
1219 submit_bio(rw, bio); 1376 submit_bio(rw, bio);
1220 if (size) 1377 if (size)
1221 goto next_chunk; 1378 goto next_chunk;
@@ -1296,7 +1453,7 @@ xfs_buf_iomove(
1296 xfs_buf_t *bp, /* buffer to process */ 1453 xfs_buf_t *bp, /* buffer to process */
1297 size_t boff, /* starting buffer offset */ 1454 size_t boff, /* starting buffer offset */
1298 size_t bsize, /* length to copy */ 1455 size_t bsize, /* length to copy */
1299 caddr_t data, /* data address */ 1456 void *data, /* data address */
1300 xfs_buf_rw_t mode) /* read/write/zero flag */ 1457 xfs_buf_rw_t mode) /* read/write/zero flag */
1301{ 1458{
1302 size_t bend, cpoff, csize; 1459 size_t bend, cpoff, csize;
@@ -1378,8 +1535,8 @@ xfs_alloc_bufhash(
1378 1535
1379 btp->bt_hashshift = external ? 3 : 8; /* 8 or 256 buckets */ 1536 btp->bt_hashshift = external ? 3 : 8; /* 8 or 256 buckets */
1380 btp->bt_hashmask = (1 << btp->bt_hashshift) - 1; 1537 btp->bt_hashmask = (1 << btp->bt_hashshift) - 1;
1381 btp->bt_hash = kmem_zalloc((1 << btp->bt_hashshift) * 1538 btp->bt_hash = kmem_zalloc_large((1 << btp->bt_hashshift) *
1382 sizeof(xfs_bufhash_t), KM_SLEEP | KM_LARGE); 1539 sizeof(xfs_bufhash_t));
1383 for (i = 0; i < (1 << btp->bt_hashshift); i++) { 1540 for (i = 0; i < (1 << btp->bt_hashshift); i++) {
1384 spin_lock_init(&btp->bt_hash[i].bh_lock); 1541 spin_lock_init(&btp->bt_hash[i].bh_lock);
1385 INIT_LIST_HEAD(&btp->bt_hash[i].bh_list); 1542 INIT_LIST_HEAD(&btp->bt_hash[i].bh_list);
@@ -1390,7 +1547,7 @@ STATIC void
1390xfs_free_bufhash( 1547xfs_free_bufhash(
1391 xfs_buftarg_t *btp) 1548 xfs_buftarg_t *btp)
1392{ 1549{
1393 kmem_free(btp->bt_hash); 1550 kmem_free_large(btp->bt_hash);
1394 btp->bt_hash = NULL; 1551 btp->bt_hash = NULL;
1395} 1552}
1396 1553
@@ -1595,6 +1752,11 @@ xfs_buf_delwri_queue(
1595 list_del(&bp->b_list); 1752 list_del(&bp->b_list);
1596 } 1753 }
1597 1754
1755 if (list_empty(dwq)) {
1756 /* start xfsbufd as it is about to have something to do */
1757 wake_up_process(bp->b_target->bt_task);
1758 }
1759
1598 bp->b_flags |= _XBF_DELWRI_Q; 1760 bp->b_flags |= _XBF_DELWRI_Q;
1599 list_add_tail(&bp->b_list, dwq); 1761 list_add_tail(&bp->b_list, dwq);
1600 bp->b_queuetime = jiffies; 1762 bp->b_queuetime = jiffies;
@@ -1626,6 +1788,35 @@ xfs_buf_delwri_dequeue(
1626 trace_xfs_buf_delwri_dequeue(bp, _RET_IP_); 1788 trace_xfs_buf_delwri_dequeue(bp, _RET_IP_);
1627} 1789}
1628 1790
1791/*
1792 * If a delwri buffer needs to be pushed before it has aged out, then promote
1793 * it to the head of the delwri queue so that it will be flushed on the next
1794 * xfsbufd run. We do this by resetting the queuetime of the buffer to be older
1795 * than the age currently needed to flush the buffer. Hence the next time the
1796 * xfsbufd sees it is guaranteed to be considered old enough to flush.
1797 */
1798void
1799xfs_buf_delwri_promote(
1800 struct xfs_buf *bp)
1801{
1802 struct xfs_buftarg *btp = bp->b_target;
1803 long age = xfs_buf_age_centisecs * msecs_to_jiffies(10) + 1;
1804
1805 ASSERT(bp->b_flags & XBF_DELWRI);
1806 ASSERT(bp->b_flags & _XBF_DELWRI_Q);
1807
1808 /*
1809 * Check the buffer age before locking the delayed write queue as we
1810 * don't need to promote buffers that are already past the flush age.
1811 */
1812 if (bp->b_queuetime < jiffies - age)
1813 return;
1814 bp->b_queuetime = jiffies - age;
1815 spin_lock(&btp->bt_delwrite_lock);
1816 list_move(&bp->b_list, &btp->bt_delwrite_queue);
1817 spin_unlock(&btp->bt_delwrite_lock);
1818}
1819
1629STATIC void 1820STATIC void
1630xfs_buf_runall_queues( 1821xfs_buf_runall_queues(
1631 struct workqueue_struct *queue) 1822 struct workqueue_struct *queue)
@@ -1644,6 +1835,8 @@ xfsbufd_wakeup(
1644 list_for_each_entry(btp, &xfs_buftarg_list, bt_list) { 1835 list_for_each_entry(btp, &xfs_buftarg_list, bt_list) {
1645 if (test_bit(XBT_FORCE_SLEEP, &btp->bt_flags)) 1836 if (test_bit(XBT_FORCE_SLEEP, &btp->bt_flags))
1646 continue; 1837 continue;
1838 if (list_empty(&btp->bt_delwrite_queue))
1839 continue;
1647 set_bit(XBT_FORCE_FLUSH, &btp->bt_flags); 1840 set_bit(XBT_FORCE_FLUSH, &btp->bt_flags);
1648 wake_up_process(btp->bt_task); 1841 wake_up_process(btp->bt_task);
1649 } 1842 }
@@ -1694,20 +1887,53 @@ xfs_buf_delwri_split(
1694 1887
1695} 1888}
1696 1889
1890/*
1891 * Compare function is more complex than it needs to be because
1892 * the return value is only 32 bits and we are doing comparisons
1893 * on 64 bit values
1894 */
1895static int
1896xfs_buf_cmp(
1897 void *priv,
1898 struct list_head *a,
1899 struct list_head *b)
1900{
1901 struct xfs_buf *ap = container_of(a, struct xfs_buf, b_list);
1902 struct xfs_buf *bp = container_of(b, struct xfs_buf, b_list);
1903 xfs_daddr_t diff;
1904
1905 diff = ap->b_bn - bp->b_bn;
1906 if (diff < 0)
1907 return -1;
1908 if (diff > 0)
1909 return 1;
1910 return 0;
1911}
1912
1913void
1914xfs_buf_delwri_sort(
1915 xfs_buftarg_t *target,
1916 struct list_head *list)
1917{
1918 list_sort(NULL, list, xfs_buf_cmp);
1919}
1920
1697STATIC int 1921STATIC int
1698xfsbufd( 1922xfsbufd(
1699 void *data) 1923 void *data)
1700{ 1924{
1701 struct list_head tmp; 1925 xfs_buftarg_t *target = (xfs_buftarg_t *)data;
1702 xfs_buftarg_t *target = (xfs_buftarg_t *)data;
1703 int count;
1704 xfs_buf_t *bp;
1705 1926
1706 current->flags |= PF_MEMALLOC; 1927 current->flags |= PF_MEMALLOC;
1707 1928
1708 set_freezable(); 1929 set_freezable();
1709 1930
1710 do { 1931 do {
1932 long age = xfs_buf_age_centisecs * msecs_to_jiffies(10);
1933 long tout = xfs_buf_timer_centisecs * msecs_to_jiffies(10);
1934 int count = 0;
1935 struct list_head tmp;
1936
1711 if (unlikely(freezing(current))) { 1937 if (unlikely(freezing(current))) {
1712 set_bit(XBT_FORCE_SLEEP, &target->bt_flags); 1938 set_bit(XBT_FORCE_SLEEP, &target->bt_flags);
1713 refrigerator(); 1939 refrigerator();
@@ -1715,17 +1941,16 @@ xfsbufd(
1715 clear_bit(XBT_FORCE_SLEEP, &target->bt_flags); 1941 clear_bit(XBT_FORCE_SLEEP, &target->bt_flags);
1716 } 1942 }
1717 1943
1718 schedule_timeout_interruptible( 1944 /* sleep for a long time if there is nothing to do. */
1719 xfs_buf_timer_centisecs * msecs_to_jiffies(10)); 1945 if (list_empty(&target->bt_delwrite_queue))
1946 tout = MAX_SCHEDULE_TIMEOUT;
1947 schedule_timeout_interruptible(tout);
1720 1948
1721 xfs_buf_delwri_split(target, &tmp, 1949 xfs_buf_delwri_split(target, &tmp, age);
1722 xfs_buf_age_centisecs * msecs_to_jiffies(10)); 1950 list_sort(NULL, &tmp, xfs_buf_cmp);
1723
1724 count = 0;
1725 while (!list_empty(&tmp)) { 1951 while (!list_empty(&tmp)) {
1726 bp = list_entry(tmp.next, xfs_buf_t, b_list); 1952 struct xfs_buf *bp;
1727 ASSERT(target == bp->b_target); 1953 bp = list_first_entry(&tmp, struct xfs_buf, b_list);
1728
1729 list_del_init(&bp->b_list); 1954 list_del_init(&bp->b_list);
1730 xfs_buf_iostrategy(bp); 1955 xfs_buf_iostrategy(bp);
1731 count++; 1956 count++;
@@ -1751,42 +1976,45 @@ xfs_flush_buftarg(
1751 xfs_buftarg_t *target, 1976 xfs_buftarg_t *target,
1752 int wait) 1977 int wait)
1753{ 1978{
1754 struct list_head tmp; 1979 xfs_buf_t *bp;
1755 xfs_buf_t *bp, *n;
1756 int pincount = 0; 1980 int pincount = 0;
1981 LIST_HEAD(tmp_list);
1982 LIST_HEAD(wait_list);
1757 1983
1758 xfs_buf_runall_queues(xfsconvertd_workqueue); 1984 xfs_buf_runall_queues(xfsconvertd_workqueue);
1759 xfs_buf_runall_queues(xfsdatad_workqueue); 1985 xfs_buf_runall_queues(xfsdatad_workqueue);
1760 xfs_buf_runall_queues(xfslogd_workqueue); 1986 xfs_buf_runall_queues(xfslogd_workqueue);
1761 1987
1762 set_bit(XBT_FORCE_FLUSH, &target->bt_flags); 1988 set_bit(XBT_FORCE_FLUSH, &target->bt_flags);
1763 pincount = xfs_buf_delwri_split(target, &tmp, 0); 1989 pincount = xfs_buf_delwri_split(target, &tmp_list, 0);
1764 1990
1765 /* 1991 /*
1766 * Dropped the delayed write list lock, now walk the temporary list 1992 * Dropped the delayed write list lock, now walk the temporary list.
1993 * All I/O is issued async and then if we need to wait for completion
1994 * we do that after issuing all the IO.
1767 */ 1995 */
1768 list_for_each_entry_safe(bp, n, &tmp, b_list) { 1996 list_sort(NULL, &tmp_list, xfs_buf_cmp);
1997 while (!list_empty(&tmp_list)) {
1998 bp = list_first_entry(&tmp_list, struct xfs_buf, b_list);
1769 ASSERT(target == bp->b_target); 1999 ASSERT(target == bp->b_target);
1770 if (wait) 2000 list_del_init(&bp->b_list);
2001 if (wait) {
1771 bp->b_flags &= ~XBF_ASYNC; 2002 bp->b_flags &= ~XBF_ASYNC;
1772 else 2003 list_add(&bp->b_list, &wait_list);
1773 list_del_init(&bp->b_list); 2004 }
1774
1775 xfs_buf_iostrategy(bp); 2005 xfs_buf_iostrategy(bp);
1776 } 2006 }
1777 2007
1778 if (wait) 2008 if (wait) {
2009 /* Expedite and wait for IO to complete. */
1779 blk_run_address_space(target->bt_mapping); 2010 blk_run_address_space(target->bt_mapping);
2011 while (!list_empty(&wait_list)) {
2012 bp = list_first_entry(&wait_list, struct xfs_buf, b_list);
1780 2013
1781 /* 2014 list_del_init(&bp->b_list);
1782 * Remaining list items must be flushed before returning 2015 xfs_iowait(bp);
1783 */ 2016 xfs_buf_relse(bp);
1784 while (!list_empty(&tmp)) { 2017 }
1785 bp = list_entry(tmp.next, xfs_buf_t, b_list);
1786
1787 list_del_init(&bp->b_list);
1788 xfs_iowait(bp);
1789 xfs_buf_relse(bp);
1790 } 2018 }
1791 2019
1792 return pincount; 2020 return pincount;
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h
index a34c7b54822d..386e7361e50e 100644
--- a/fs/xfs/linux-2.6/xfs_buf.h
+++ b/fs/xfs/linux-2.6/xfs_buf.h
@@ -232,13 +232,17 @@ extern void xfs_buf_lock(xfs_buf_t *);
232extern void xfs_buf_unlock(xfs_buf_t *); 232extern void xfs_buf_unlock(xfs_buf_t *);
233 233
234/* Buffer Read and Write Routines */ 234/* Buffer Read and Write Routines */
235extern int xfs_bawrite(void *mp, xfs_buf_t *bp); 235extern int xfs_bwrite(struct xfs_mount *mp, struct xfs_buf *bp);
236extern void xfs_bdwrite(void *mp, xfs_buf_t *bp); 236extern void xfs_bdwrite(void *mp, xfs_buf_t *bp);
237
238extern void xfsbdstrat(struct xfs_mount *, struct xfs_buf *);
239extern int xfs_bdstrat_cb(struct xfs_buf *);
240
237extern void xfs_buf_ioend(xfs_buf_t *, int); 241extern void xfs_buf_ioend(xfs_buf_t *, int);
238extern void xfs_buf_ioerror(xfs_buf_t *, int); 242extern void xfs_buf_ioerror(xfs_buf_t *, int);
239extern int xfs_buf_iorequest(xfs_buf_t *); 243extern int xfs_buf_iorequest(xfs_buf_t *);
240extern int xfs_buf_iowait(xfs_buf_t *); 244extern int xfs_buf_iowait(xfs_buf_t *);
241extern void xfs_buf_iomove(xfs_buf_t *, size_t, size_t, xfs_caddr_t, 245extern void xfs_buf_iomove(xfs_buf_t *, size_t, size_t, void *,
242 xfs_buf_rw_t); 246 xfs_buf_rw_t);
243 247
244static inline int xfs_buf_iostrategy(xfs_buf_t *bp) 248static inline int xfs_buf_iostrategy(xfs_buf_t *bp)
@@ -261,6 +265,7 @@ extern int xfs_buf_ispin(xfs_buf_t *);
261 265
262/* Delayed Write Buffer Routines */ 266/* Delayed Write Buffer Routines */
263extern void xfs_buf_delwri_dequeue(xfs_buf_t *); 267extern void xfs_buf_delwri_dequeue(xfs_buf_t *);
268extern void xfs_buf_delwri_promote(xfs_buf_t *);
264 269
265/* Buffer Daemon Setup Routines */ 270/* Buffer Daemon Setup Routines */
266extern int xfs_buf_init(void); 271extern int xfs_buf_init(void);
@@ -270,33 +275,19 @@ extern void xfs_buf_terminate(void);
270 ({ char __b[BDEVNAME_SIZE]; bdevname((target)->bt_bdev, __b); __b; }) 275 ({ char __b[BDEVNAME_SIZE]; bdevname((target)->bt_bdev, __b); __b; })
271 276
272 277
273#define XFS_B_ASYNC XBF_ASYNC
274#define XFS_B_DELWRI XBF_DELWRI
275#define XFS_B_READ XBF_READ
276#define XFS_B_WRITE XBF_WRITE
277#define XFS_B_STALE XBF_STALE
278
279#define XFS_BUF_TRYLOCK XBF_TRYLOCK
280#define XFS_INCORE_TRYLOCK XBF_TRYLOCK
281#define XFS_BUF_LOCK XBF_LOCK
282#define XFS_BUF_MAPPED XBF_MAPPED
283
284#define BUF_BUSY XBF_DONT_BLOCK
285
286#define XFS_BUF_BFLAGS(bp) ((bp)->b_flags) 278#define XFS_BUF_BFLAGS(bp) ((bp)->b_flags)
287#define XFS_BUF_ZEROFLAGS(bp) ((bp)->b_flags &= \ 279#define XFS_BUF_ZEROFLAGS(bp) ((bp)->b_flags &= \
288 ~(XBF_READ|XBF_WRITE|XBF_ASYNC|XBF_DELWRI|XBF_ORDERED)) 280 ~(XBF_READ|XBF_WRITE|XBF_ASYNC|XBF_DELWRI|XBF_ORDERED))
289 281
290#define XFS_BUF_STALE(bp) ((bp)->b_flags |= XFS_B_STALE) 282#define XFS_BUF_STALE(bp) ((bp)->b_flags |= XBF_STALE)
291#define XFS_BUF_UNSTALE(bp) ((bp)->b_flags &= ~XFS_B_STALE) 283#define XFS_BUF_UNSTALE(bp) ((bp)->b_flags &= ~XBF_STALE)
292#define XFS_BUF_ISSTALE(bp) ((bp)->b_flags & XFS_B_STALE) 284#define XFS_BUF_ISSTALE(bp) ((bp)->b_flags & XBF_STALE)
293#define XFS_BUF_SUPER_STALE(bp) do { \ 285#define XFS_BUF_SUPER_STALE(bp) do { \
294 XFS_BUF_STALE(bp); \ 286 XFS_BUF_STALE(bp); \
295 xfs_buf_delwri_dequeue(bp); \ 287 xfs_buf_delwri_dequeue(bp); \
296 XFS_BUF_DONE(bp); \ 288 XFS_BUF_DONE(bp); \
297 } while (0) 289 } while (0)
298 290
299#define XFS_BUF_MANAGE XBF_FS_MANAGED
300#define XFS_BUF_UNMANAGE(bp) ((bp)->b_flags &= ~XBF_FS_MANAGED) 291#define XFS_BUF_UNMANAGE(bp) ((bp)->b_flags &= ~XBF_FS_MANAGED)
301 292
302#define XFS_BUF_DELAYWRITE(bp) ((bp)->b_flags |= XBF_DELWRI) 293#define XFS_BUF_DELAYWRITE(bp) ((bp)->b_flags |= XBF_DELWRI)
@@ -385,31 +376,11 @@ static inline void xfs_buf_relse(xfs_buf_t *bp)
385 376
386#define xfs_biomove(bp, off, len, data, rw) \ 377#define xfs_biomove(bp, off, len, data, rw) \
387 xfs_buf_iomove((bp), (off), (len), (data), \ 378 xfs_buf_iomove((bp), (off), (len), (data), \
388 ((rw) == XFS_B_WRITE) ? XBRW_WRITE : XBRW_READ) 379 ((rw) == XBF_WRITE) ? XBRW_WRITE : XBRW_READ)
389 380
390#define xfs_biozero(bp, off, len) \ 381#define xfs_biozero(bp, off, len) \
391 xfs_buf_iomove((bp), (off), (len), NULL, XBRW_ZERO) 382 xfs_buf_iomove((bp), (off), (len), NULL, XBRW_ZERO)
392 383
393
394static inline int XFS_bwrite(xfs_buf_t *bp)
395{
396 int iowait = (bp->b_flags & XBF_ASYNC) == 0;
397 int error = 0;
398
399 if (!iowait)
400 bp->b_flags |= _XBF_RUN_QUEUES;
401
402 xfs_buf_delwri_dequeue(bp);
403 xfs_buf_iostrategy(bp);
404 if (iowait) {
405 error = xfs_buf_iowait(bp);
406 xfs_buf_relse(bp);
407 }
408 return error;
409}
410
411#define XFS_bdstrat(bp) xfs_buf_iorequest(bp)
412
413#define xfs_iowait(bp) xfs_buf_iowait(bp) 384#define xfs_iowait(bp) xfs_buf_iowait(bp)
414 385
415#define xfs_baread(target, rablkno, ralen) \ 386#define xfs_baread(target, rablkno, ralen) \
@@ -424,6 +395,7 @@ extern void xfs_free_buftarg(struct xfs_mount *, struct xfs_buftarg *);
424extern void xfs_wait_buftarg(xfs_buftarg_t *); 395extern void xfs_wait_buftarg(xfs_buftarg_t *);
425extern int xfs_setsize_buftarg(xfs_buftarg_t *, unsigned int, unsigned int); 396extern int xfs_setsize_buftarg(xfs_buftarg_t *, unsigned int, unsigned int);
426extern int xfs_flush_buftarg(xfs_buftarg_t *, int); 397extern int xfs_flush_buftarg(xfs_buftarg_t *, int);
398
427#ifdef CONFIG_KDB_MODULES 399#ifdef CONFIG_KDB_MODULES
428extern struct list_head *xfs_get_buftarg_list(void); 400extern struct list_head *xfs_get_buftarg_list(void);
429#endif 401#endif
diff --git a/fs/xfs/linux-2.6/xfs_fs_subr.c b/fs/xfs/linux-2.6/xfs_fs_subr.c
index 7501b85fd860..b6918d76bc7b 100644
--- a/fs/xfs/linux-2.6/xfs_fs_subr.c
+++ b/fs/xfs/linux-2.6/xfs_fs_subr.c
@@ -79,7 +79,7 @@ xfs_flush_pages(
79 xfs_iflags_clear(ip, XFS_ITRUNCATED); 79 xfs_iflags_clear(ip, XFS_ITRUNCATED);
80 ret = -filemap_fdatawrite(mapping); 80 ret = -filemap_fdatawrite(mapping);
81 } 81 }
82 if (flags & XFS_B_ASYNC) 82 if (flags & XBF_ASYNC)
83 return ret; 83 return ret;
84 ret2 = xfs_wait_on_pages(ip, first, last); 84 ret2 = xfs_wait_on_pages(ip, first, last);
85 if (!ret) 85 if (!ret)
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index a034cf624437..4ea1ee18aded 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -447,12 +447,12 @@ xfs_attrlist_by_handle(
447int 447int
448xfs_attrmulti_attr_get( 448xfs_attrmulti_attr_get(
449 struct inode *inode, 449 struct inode *inode,
450 char *name, 450 unsigned char *name,
451 char __user *ubuf, 451 unsigned char __user *ubuf,
452 __uint32_t *len, 452 __uint32_t *len,
453 __uint32_t flags) 453 __uint32_t flags)
454{ 454{
455 char *kbuf; 455 unsigned char *kbuf;
456 int error = EFAULT; 456 int error = EFAULT;
457 457
458 if (*len > XATTR_SIZE_MAX) 458 if (*len > XATTR_SIZE_MAX)
@@ -476,12 +476,12 @@ xfs_attrmulti_attr_get(
476int 476int
477xfs_attrmulti_attr_set( 477xfs_attrmulti_attr_set(
478 struct inode *inode, 478 struct inode *inode,
479 char *name, 479 unsigned char *name,
480 const char __user *ubuf, 480 const unsigned char __user *ubuf,
481 __uint32_t len, 481 __uint32_t len,
482 __uint32_t flags) 482 __uint32_t flags)
483{ 483{
484 char *kbuf; 484 unsigned char *kbuf;
485 int error = EFAULT; 485 int error = EFAULT;
486 486
487 if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) 487 if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
@@ -501,7 +501,7 @@ xfs_attrmulti_attr_set(
501int 501int
502xfs_attrmulti_attr_remove( 502xfs_attrmulti_attr_remove(
503 struct inode *inode, 503 struct inode *inode,
504 char *name, 504 unsigned char *name,
505 __uint32_t flags) 505 __uint32_t flags)
506{ 506{
507 if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) 507 if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
@@ -519,7 +519,7 @@ xfs_attrmulti_by_handle(
519 xfs_fsop_attrmulti_handlereq_t am_hreq; 519 xfs_fsop_attrmulti_handlereq_t am_hreq;
520 struct dentry *dentry; 520 struct dentry *dentry;
521 unsigned int i, size; 521 unsigned int i, size;
522 char *attr_name; 522 unsigned char *attr_name;
523 523
524 if (!capable(CAP_SYS_ADMIN)) 524 if (!capable(CAP_SYS_ADMIN))
525 return -XFS_ERROR(EPERM); 525 return -XFS_ERROR(EPERM);
@@ -547,7 +547,7 @@ xfs_attrmulti_by_handle(
547 547
548 error = 0; 548 error = 0;
549 for (i = 0; i < am_hreq.opcount; i++) { 549 for (i = 0; i < am_hreq.opcount; i++) {
550 ops[i].am_error = strncpy_from_user(attr_name, 550 ops[i].am_error = strncpy_from_user((char *)attr_name,
551 ops[i].am_attrname, MAXNAMELEN); 551 ops[i].am_attrname, MAXNAMELEN);
552 if (ops[i].am_error == 0 || ops[i].am_error == MAXNAMELEN) 552 if (ops[i].am_error == 0 || ops[i].am_error == MAXNAMELEN)
553 error = -ERANGE; 553 error = -ERANGE;
@@ -1431,6 +1431,9 @@ xfs_file_ioctl(
1431 if (!capable(CAP_SYS_ADMIN)) 1431 if (!capable(CAP_SYS_ADMIN))
1432 return -EPERM; 1432 return -EPERM;
1433 1433
1434 if (mp->m_flags & XFS_MOUNT_RDONLY)
1435 return -XFS_ERROR(EROFS);
1436
1434 if (copy_from_user(&inout, arg, sizeof(inout))) 1437 if (copy_from_user(&inout, arg, sizeof(inout)))
1435 return -XFS_ERROR(EFAULT); 1438 return -XFS_ERROR(EFAULT);
1436 1439
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.h b/fs/xfs/linux-2.6/xfs_ioctl.h
index 7bd7c6afc1eb..d56173b34a2a 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.h
+++ b/fs/xfs/linux-2.6/xfs_ioctl.h
@@ -45,23 +45,23 @@ xfs_readlink_by_handle(
45extern int 45extern int
46xfs_attrmulti_attr_get( 46xfs_attrmulti_attr_get(
47 struct inode *inode, 47 struct inode *inode,
48 char *name, 48 unsigned char *name,
49 char __user *ubuf, 49 unsigned char __user *ubuf,
50 __uint32_t *len, 50 __uint32_t *len,
51 __uint32_t flags); 51 __uint32_t flags);
52 52
53extern int 53extern int
54 xfs_attrmulti_attr_set( 54xfs_attrmulti_attr_set(
55 struct inode *inode, 55 struct inode *inode,
56 char *name, 56 unsigned char *name,
57 const char __user *ubuf, 57 const unsigned char __user *ubuf,
58 __uint32_t len, 58 __uint32_t len,
59 __uint32_t flags); 59 __uint32_t flags);
60 60
61extern int 61extern int
62xfs_attrmulti_attr_remove( 62xfs_attrmulti_attr_remove(
63 struct inode *inode, 63 struct inode *inode,
64 char *name, 64 unsigned char *name,
65 __uint32_t flags); 65 __uint32_t flags);
66 66
67extern struct dentry * 67extern struct dentry *
diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.c b/fs/xfs/linux-2.6/xfs_ioctl32.c
index be1527b1670c..0bf6d61f0528 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl32.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl32.c
@@ -411,7 +411,7 @@ xfs_compat_attrmulti_by_handle(
411 compat_xfs_fsop_attrmulti_handlereq_t am_hreq; 411 compat_xfs_fsop_attrmulti_handlereq_t am_hreq;
412 struct dentry *dentry; 412 struct dentry *dentry;
413 unsigned int i, size; 413 unsigned int i, size;
414 char *attr_name; 414 unsigned char *attr_name;
415 415
416 if (!capable(CAP_SYS_ADMIN)) 416 if (!capable(CAP_SYS_ADMIN))
417 return -XFS_ERROR(EPERM); 417 return -XFS_ERROR(EPERM);
@@ -440,7 +440,7 @@ xfs_compat_attrmulti_by_handle(
440 440
441 error = 0; 441 error = 0;
442 for (i = 0; i < am_hreq.opcount; i++) { 442 for (i = 0; i < am_hreq.opcount; i++) {
443 ops[i].am_error = strncpy_from_user(attr_name, 443 ops[i].am_error = strncpy_from_user((char *)attr_name,
444 compat_ptr(ops[i].am_attrname), 444 compat_ptr(ops[i].am_attrname),
445 MAXNAMELEN); 445 MAXNAMELEN);
446 if (ops[i].am_error == 0 || ops[i].am_error == MAXNAMELEN) 446 if (ops[i].am_error == 0 || ops[i].am_error == MAXNAMELEN)
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index 225946012d0b..e8566bbf0f00 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -140,10 +140,10 @@ xfs_init_security(
140 struct xfs_inode *ip = XFS_I(inode); 140 struct xfs_inode *ip = XFS_I(inode);
141 size_t length; 141 size_t length;
142 void *value; 142 void *value;
143 char *name; 143 unsigned char *name;
144 int error; 144 int error;
145 145
146 error = security_inode_init_security(inode, dir, &name, 146 error = security_inode_init_security(inode, dir, (char **)&name,
147 &value, &length); 147 &value, &length);
148 if (error) { 148 if (error) {
149 if (error == -EOPNOTSUPP) 149 if (error == -EOPNOTSUPP)
diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c
index 0d32457abef1..eac6f80d786d 100644
--- a/fs/xfs/linux-2.6/xfs_lrw.c
+++ b/fs/xfs/linux-2.6/xfs_lrw.c
@@ -630,18 +630,9 @@ start:
630 * by root. This keeps people from modifying setuid and 630 * by root. This keeps people from modifying setuid and
631 * setgid binaries. 631 * setgid binaries.
632 */ 632 */
633 633 error = -file_remove_suid(file);
634 if (((xip->i_d.di_mode & S_ISUID) || 634 if (unlikely(error))
635 ((xip->i_d.di_mode & (S_ISGID | S_IXGRP)) == 635 goto out_unlock_internal;
636 (S_ISGID | S_IXGRP))) &&
637 !capable(CAP_FSETID)) {
638 error = xfs_write_clear_setuid(xip);
639 if (likely(!error))
640 error = -file_remove_suid(file);
641 if (unlikely(error)) {
642 goto out_unlock_internal;
643 }
644 }
645 636
646 /* We can write back this queue in page reclaim */ 637 /* We can write back this queue in page reclaim */
647 current->backing_dev_info = mapping->backing_dev_info; 638 current->backing_dev_info = mapping->backing_dev_info;
@@ -784,53 +775,6 @@ write_retry:
784} 775}
785 776
786/* 777/*
787 * All xfs metadata buffers except log state machine buffers
788 * get this attached as their b_bdstrat callback function.
789 * This is so that we can catch a buffer
790 * after prematurely unpinning it to forcibly shutdown the filesystem.
791 */
792int
793xfs_bdstrat_cb(struct xfs_buf *bp)
794{
795 if (XFS_FORCED_SHUTDOWN(bp->b_mount)) {
796 trace_xfs_bdstrat_shut(bp, _RET_IP_);
797 /*
798 * Metadata write that didn't get logged but
799 * written delayed anyway. These aren't associated
800 * with a transaction, and can be ignored.
801 */
802 if (XFS_BUF_IODONE_FUNC(bp) == NULL &&
803 (XFS_BUF_ISREAD(bp)) == 0)
804 return (xfs_bioerror_relse(bp));
805 else
806 return (xfs_bioerror(bp));
807 }
808
809 xfs_buf_iorequest(bp);
810 return 0;
811}
812
813/*
814 * Wrapper around bdstrat so that we can stop data from going to disk in case
815 * we are shutting down the filesystem. Typically user data goes thru this
816 * path; one of the exceptions is the superblock.
817 */
818void
819xfsbdstrat(
820 struct xfs_mount *mp,
821 struct xfs_buf *bp)
822{
823 ASSERT(mp);
824 if (!XFS_FORCED_SHUTDOWN(mp)) {
825 xfs_buf_iorequest(bp);
826 return;
827 }
828
829 trace_xfs_bdstrat_shut(bp, _RET_IP_);
830 xfs_bioerror_relse(bp);
831}
832
833/*
834 * If the underlying (data/log/rt) device is readonly, there are some 778 * If the underlying (data/log/rt) device is readonly, there are some
835 * operations that cannot proceed. 779 * operations that cannot proceed.
836 */ 780 */
diff --git a/fs/xfs/linux-2.6/xfs_lrw.h b/fs/xfs/linux-2.6/xfs_lrw.h
index d1f7789c7ffb..342ae8c0d011 100644
--- a/fs/xfs/linux-2.6/xfs_lrw.h
+++ b/fs/xfs/linux-2.6/xfs_lrw.h
@@ -22,9 +22,6 @@ struct xfs_mount;
22struct xfs_inode; 22struct xfs_inode;
23struct xfs_buf; 23struct xfs_buf;
24 24
25/* errors from xfsbdstrat() must be extracted from the buffer */
26extern void xfsbdstrat(struct xfs_mount *, struct xfs_buf *);
27extern int xfs_bdstrat_cb(struct xfs_buf *);
28extern int xfs_dev_is_read_only(struct xfs_mount *, char *); 25extern int xfs_dev_is_read_only(struct xfs_mount *, char *);
29 26
30extern int xfs_zero_eof(struct xfs_inode *, xfs_off_t, xfs_fsize_t); 27extern int xfs_zero_eof(struct xfs_inode *, xfs_off_t, xfs_fsize_t);
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 77414db10dc2..25ea2408118f 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -877,12 +877,11 @@ xfsaild(
877{ 877{
878 struct xfs_ail *ailp = data; 878 struct xfs_ail *ailp = data;
879 xfs_lsn_t last_pushed_lsn = 0; 879 xfs_lsn_t last_pushed_lsn = 0;
880 long tout = 0; 880 long tout = 0; /* milliseconds */
881 881
882 while (!kthread_should_stop()) { 882 while (!kthread_should_stop()) {
883 if (tout) 883 schedule_timeout_interruptible(tout ?
884 schedule_timeout_interruptible(msecs_to_jiffies(tout)); 884 msecs_to_jiffies(tout) : MAX_SCHEDULE_TIMEOUT);
885 tout = 1000;
886 885
887 /* swsusp */ 886 /* swsusp */
888 try_to_freeze(); 887 try_to_freeze();
@@ -1022,12 +1021,45 @@ xfs_fs_dirty_inode(
1022 XFS_I(inode)->i_update_core = 1; 1021 XFS_I(inode)->i_update_core = 1;
1023} 1022}
1024 1023
1025/* 1024STATIC int
1026 * Attempt to flush the inode, this will actually fail 1025xfs_log_inode(
1027 * if the inode is pinned, but we dirty the inode again 1026 struct xfs_inode *ip)
1028 * at the point when it is unpinned after a log write, 1027{
1029 * since this is when the inode itself becomes flushable. 1028 struct xfs_mount *mp = ip->i_mount;
1030 */ 1029 struct xfs_trans *tp;
1030 int error;
1031
1032 xfs_iunlock(ip, XFS_ILOCK_SHARED);
1033 tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS);
1034 error = xfs_trans_reserve(tp, 0, XFS_FSYNC_TS_LOG_RES(mp), 0, 0, 0);
1035
1036 if (error) {
1037 xfs_trans_cancel(tp, 0);
1038 /* we need to return with the lock hold shared */
1039 xfs_ilock(ip, XFS_ILOCK_SHARED);
1040 return error;
1041 }
1042
1043 xfs_ilock(ip, XFS_ILOCK_EXCL);
1044
1045 /*
1046 * Note - it's possible that we might have pushed ourselves out of the
1047 * way during trans_reserve which would flush the inode. But there's
1048 * no guarantee that the inode buffer has actually gone out yet (it's
1049 * delwri). Plus the buffer could be pinned anyway if it's part of
1050 * an inode in another recent transaction. So we play it safe and
1051 * fire off the transaction anyway.
1052 */
1053 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
1054 xfs_trans_ihold(tp, ip);
1055 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
1056 xfs_trans_set_sync(tp);
1057 error = xfs_trans_commit(tp, 0);
1058 xfs_ilock_demote(ip, XFS_ILOCK_EXCL);
1059
1060 return error;
1061}
1062
1031STATIC int 1063STATIC int
1032xfs_fs_write_inode( 1064xfs_fs_write_inode(
1033 struct inode *inode, 1065 struct inode *inode,
@@ -1035,7 +1067,7 @@ xfs_fs_write_inode(
1035{ 1067{
1036 struct xfs_inode *ip = XFS_I(inode); 1068 struct xfs_inode *ip = XFS_I(inode);
1037 struct xfs_mount *mp = ip->i_mount; 1069 struct xfs_mount *mp = ip->i_mount;
1038 int error = 0; 1070 int error = EAGAIN;
1039 1071
1040 xfs_itrace_entry(ip); 1072 xfs_itrace_entry(ip);
1041 1073
@@ -1046,35 +1078,55 @@ xfs_fs_write_inode(
1046 error = xfs_wait_on_pages(ip, 0, -1); 1078 error = xfs_wait_on_pages(ip, 0, -1);
1047 if (error) 1079 if (error)
1048 goto out; 1080 goto out;
1049 }
1050
1051 /*
1052 * Bypass inodes which have already been cleaned by
1053 * the inode flush clustering code inside xfs_iflush
1054 */
1055 if (xfs_inode_clean(ip))
1056 goto out;
1057 1081
1058 /* 1082 /*
1059 * We make this non-blocking if the inode is contended, return 1083 * Make sure the inode has hit stable storage. By using the
1060 * EAGAIN to indicate to the caller that they did not succeed. 1084 * log and the fsync transactions we reduce the IOs we have
1061 * This prevents the flush path from blocking on inodes inside 1085 * to do here from two (log and inode) to just the log.
1062 * another operation right now, they get caught later by xfs_sync. 1086 *
1063 */ 1087 * Note: We still need to do a delwri write of the inode after
1064 if (sync) { 1088 * this to flush it to the backing buffer so that bulkstat
1089 * works properly if this is the first time the inode has been
1090 * written. Because we hold the ilock atomically over the
1091 * transaction commit and the inode flush we are guaranteed
1092 * that the inode is not pinned when it returns. If the flush
1093 * lock is already held, then the inode has already been
1094 * flushed once and we don't need to flush it again. Hence
1095 * the code will only flush the inode if it isn't already
1096 * being flushed.
1097 */
1065 xfs_ilock(ip, XFS_ILOCK_SHARED); 1098 xfs_ilock(ip, XFS_ILOCK_SHARED);
1066 xfs_iflock(ip); 1099 if (ip->i_update_core) {
1067 1100 error = xfs_log_inode(ip);
1068 error = xfs_iflush(ip, XFS_IFLUSH_SYNC); 1101 if (error)
1102 goto out_unlock;
1103 }
1069 } else { 1104 } else {
1070 error = EAGAIN; 1105 /*
1106 * We make this non-blocking if the inode is contended, return
1107 * EAGAIN to indicate to the caller that they did not succeed.
1108 * This prevents the flush path from blocking on inodes inside
1109 * another operation right now, they get caught later by xfs_sync.
1110 */
1071 if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) 1111 if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED))
1072 goto out; 1112 goto out;
1073 if (xfs_ipincount(ip) || !xfs_iflock_nowait(ip)) 1113 }
1074 goto out_unlock;
1075 1114
1076 error = xfs_iflush(ip, XFS_IFLUSH_ASYNC_NOBLOCK); 1115 if (xfs_ipincount(ip) || !xfs_iflock_nowait(ip))
1116 goto out_unlock;
1117
1118 /*
1119 * Now we have the flush lock and the inode is not pinned, we can check
1120 * if the inode is really clean as we know that there are no pending
1121 * transaction completions, it is not waiting on the delayed write
1122 * queue and there is no IO in progress.
1123 */
1124 if (xfs_inode_clean(ip)) {
1125 xfs_ifunlock(ip);
1126 error = 0;
1127 goto out_unlock;
1077 } 1128 }
1129 error = xfs_iflush(ip, 0);
1078 1130
1079 out_unlock: 1131 out_unlock:
1080 xfs_iunlock(ip, XFS_ILOCK_SHARED); 1132 xfs_iunlock(ip, XFS_ILOCK_SHARED);
@@ -1257,6 +1309,29 @@ xfs_fs_statfs(
1257 return 0; 1309 return 0;
1258} 1310}
1259 1311
1312STATIC void
1313xfs_save_resvblks(struct xfs_mount *mp)
1314{
1315 __uint64_t resblks = 0;
1316
1317 mp->m_resblks_save = mp->m_resblks;
1318 xfs_reserve_blocks(mp, &resblks, NULL);
1319}
1320
1321STATIC void
1322xfs_restore_resvblks(struct xfs_mount *mp)
1323{
1324 __uint64_t resblks;
1325
1326 if (mp->m_resblks_save) {
1327 resblks = mp->m_resblks_save;
1328 mp->m_resblks_save = 0;
1329 } else
1330 resblks = xfs_default_resblks(mp);
1331
1332 xfs_reserve_blocks(mp, &resblks, NULL);
1333}
1334
1260STATIC int 1335STATIC int
1261xfs_fs_remount( 1336xfs_fs_remount(
1262 struct super_block *sb, 1337 struct super_block *sb,
@@ -1336,11 +1411,27 @@ xfs_fs_remount(
1336 } 1411 }
1337 mp->m_update_flags = 0; 1412 mp->m_update_flags = 0;
1338 } 1413 }
1414
1415 /*
1416 * Fill out the reserve pool if it is empty. Use the stashed
1417 * value if it is non-zero, otherwise go with the default.
1418 */
1419 xfs_restore_resvblks(mp);
1339 } 1420 }
1340 1421
1341 /* rw -> ro */ 1422 /* rw -> ro */
1342 if (!(mp->m_flags & XFS_MOUNT_RDONLY) && (*flags & MS_RDONLY)) { 1423 if (!(mp->m_flags & XFS_MOUNT_RDONLY) && (*flags & MS_RDONLY)) {
1424 /*
1425 * After we have synced the data but before we sync the
1426 * metadata, we need to free up the reserve block pool so that
1427 * the used block count in the superblock on disk is correct at
1428 * the end of the remount. Stash the current reserve pool size
1429 * so that if we get remounted rw, we can return it to the same
1430 * size.
1431 */
1432
1343 xfs_quiesce_data(mp); 1433 xfs_quiesce_data(mp);
1434 xfs_save_resvblks(mp);
1344 xfs_quiesce_attr(mp); 1435 xfs_quiesce_attr(mp);
1345 mp->m_flags |= XFS_MOUNT_RDONLY; 1436 mp->m_flags |= XFS_MOUNT_RDONLY;
1346 } 1437 }
@@ -1359,11 +1450,22 @@ xfs_fs_freeze(
1359{ 1450{
1360 struct xfs_mount *mp = XFS_M(sb); 1451 struct xfs_mount *mp = XFS_M(sb);
1361 1452
1453 xfs_save_resvblks(mp);
1362 xfs_quiesce_attr(mp); 1454 xfs_quiesce_attr(mp);
1363 return -xfs_fs_log_dummy(mp); 1455 return -xfs_fs_log_dummy(mp);
1364} 1456}
1365 1457
1366STATIC int 1458STATIC int
1459xfs_fs_unfreeze(
1460 struct super_block *sb)
1461{
1462 struct xfs_mount *mp = XFS_M(sb);
1463
1464 xfs_restore_resvblks(mp);
1465 return 0;
1466}
1467
1468STATIC int
1367xfs_fs_show_options( 1469xfs_fs_show_options(
1368 struct seq_file *m, 1470 struct seq_file *m,
1369 struct vfsmount *mnt) 1471 struct vfsmount *mnt)
@@ -1585,6 +1687,7 @@ static const struct super_operations xfs_super_operations = {
1585 .put_super = xfs_fs_put_super, 1687 .put_super = xfs_fs_put_super,
1586 .sync_fs = xfs_fs_sync_fs, 1688 .sync_fs = xfs_fs_sync_fs,
1587 .freeze_fs = xfs_fs_freeze, 1689 .freeze_fs = xfs_fs_freeze,
1690 .unfreeze_fs = xfs_fs_unfreeze,
1588 .statfs = xfs_fs_statfs, 1691 .statfs = xfs_fs_statfs,
1589 .remount_fs = xfs_fs_remount, 1692 .remount_fs = xfs_fs_remount,
1590 .show_options = xfs_fs_show_options, 1693 .show_options = xfs_fs_show_options,
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index 1f5e4bb5e970..a9f6d20aff41 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -90,14 +90,13 @@ xfs_inode_ag_lookup(
90STATIC int 90STATIC int
91xfs_inode_ag_walk( 91xfs_inode_ag_walk(
92 struct xfs_mount *mp, 92 struct xfs_mount *mp,
93 xfs_agnumber_t ag, 93 struct xfs_perag *pag,
94 int (*execute)(struct xfs_inode *ip, 94 int (*execute)(struct xfs_inode *ip,
95 struct xfs_perag *pag, int flags), 95 struct xfs_perag *pag, int flags),
96 int flags, 96 int flags,
97 int tag, 97 int tag,
98 int exclusive) 98 int exclusive)
99{ 99{
100 struct xfs_perag *pag = &mp->m_perag[ag];
101 uint32_t first_index; 100 uint32_t first_index;
102 int last_error = 0; 101 int last_error = 0;
103 int skipped; 102 int skipped;
@@ -141,8 +140,6 @@ restart:
141 delay(1); 140 delay(1);
142 goto restart; 141 goto restart;
143 } 142 }
144
145 xfs_put_perag(mp, pag);
146 return last_error; 143 return last_error;
147} 144}
148 145
@@ -160,10 +157,16 @@ xfs_inode_ag_iterator(
160 xfs_agnumber_t ag; 157 xfs_agnumber_t ag;
161 158
162 for (ag = 0; ag < mp->m_sb.sb_agcount; ag++) { 159 for (ag = 0; ag < mp->m_sb.sb_agcount; ag++) {
163 if (!mp->m_perag[ag].pag_ici_init) 160 struct xfs_perag *pag;
161
162 pag = xfs_perag_get(mp, ag);
163 if (!pag->pag_ici_init) {
164 xfs_perag_put(pag);
164 continue; 165 continue;
165 error = xfs_inode_ag_walk(mp, ag, execute, flags, tag, 166 }
167 error = xfs_inode_ag_walk(mp, pag, execute, flags, tag,
166 exclusive); 168 exclusive);
169 xfs_perag_put(pag);
167 if (error) { 170 if (error) {
168 last_error = error; 171 last_error = error;
169 if (error == EFSCORRUPTED) 172 if (error == EFSCORRUPTED)
@@ -231,7 +234,7 @@ xfs_sync_inode_data(
231 } 234 }
232 235
233 error = xfs_flush_pages(ip, 0, -1, (flags & SYNC_WAIT) ? 236 error = xfs_flush_pages(ip, 0, -1, (flags & SYNC_WAIT) ?
234 0 : XFS_B_ASYNC, FI_NONE); 237 0 : XBF_ASYNC, FI_NONE);
235 xfs_iunlock(ip, XFS_IOLOCK_SHARED); 238 xfs_iunlock(ip, XFS_IOLOCK_SHARED);
236 239
237 out_wait: 240 out_wait:
@@ -267,8 +270,7 @@ xfs_sync_inode_attr(
267 goto out_unlock; 270 goto out_unlock;
268 } 271 }
269 272
270 error = xfs_iflush(ip, (flags & SYNC_WAIT) ? 273 error = xfs_iflush(ip, flags);
271 XFS_IFLUSH_SYNC : XFS_IFLUSH_DELWRI);
272 274
273 out_unlock: 275 out_unlock:
274 xfs_iunlock(ip, XFS_ILOCK_SHARED); 276 xfs_iunlock(ip, XFS_ILOCK_SHARED);
@@ -293,10 +295,7 @@ xfs_sync_data(
293 if (error) 295 if (error)
294 return XFS_ERROR(error); 296 return XFS_ERROR(error);
295 297
296 xfs_log_force(mp, 0, 298 xfs_log_force(mp, (flags & SYNC_WAIT) ? XFS_LOG_SYNC : 0);
297 (flags & SYNC_WAIT) ?
298 XFS_LOG_FORCE | XFS_LOG_SYNC :
299 XFS_LOG_FORCE);
300 return 0; 299 return 0;
301} 300}
302 301
@@ -322,10 +321,6 @@ xfs_commit_dummy_trans(
322 struct xfs_inode *ip = mp->m_rootip; 321 struct xfs_inode *ip = mp->m_rootip;
323 struct xfs_trans *tp; 322 struct xfs_trans *tp;
324 int error; 323 int error;
325 int log_flags = XFS_LOG_FORCE;
326
327 if (flags & SYNC_WAIT)
328 log_flags |= XFS_LOG_SYNC;
329 324
330 /* 325 /*
331 * Put a dummy transaction in the log to tell recovery 326 * Put a dummy transaction in the log to tell recovery
@@ -347,11 +342,11 @@ xfs_commit_dummy_trans(
347 xfs_iunlock(ip, XFS_ILOCK_EXCL); 342 xfs_iunlock(ip, XFS_ILOCK_EXCL);
348 343
349 /* the log force ensures this transaction is pushed to disk */ 344 /* the log force ensures this transaction is pushed to disk */
350 xfs_log_force(mp, 0, log_flags); 345 xfs_log_force(mp, (flags & SYNC_WAIT) ? XFS_LOG_SYNC : 0);
351 return error; 346 return error;
352} 347}
353 348
354int 349STATIC int
355xfs_sync_fsdata( 350xfs_sync_fsdata(
356 struct xfs_mount *mp, 351 struct xfs_mount *mp,
357 int flags) 352 int flags)
@@ -367,7 +362,7 @@ xfs_sync_fsdata(
367 if (flags & SYNC_TRYLOCK) { 362 if (flags & SYNC_TRYLOCK) {
368 ASSERT(!(flags & SYNC_WAIT)); 363 ASSERT(!(flags & SYNC_WAIT));
369 364
370 bp = xfs_getsb(mp, XFS_BUF_TRYLOCK); 365 bp = xfs_getsb(mp, XBF_TRYLOCK);
371 if (!bp) 366 if (!bp)
372 goto out; 367 goto out;
373 368
@@ -387,7 +382,7 @@ xfs_sync_fsdata(
387 * become pinned in between there and here. 382 * become pinned in between there and here.
388 */ 383 */
389 if (XFS_BUF_ISPINNED(bp)) 384 if (XFS_BUF_ISPINNED(bp))
390 xfs_log_force(mp, 0, XFS_LOG_FORCE); 385 xfs_log_force(mp, 0);
391 } 386 }
392 387
393 388
@@ -448,9 +443,6 @@ xfs_quiesce_data(
448 xfs_sync_data(mp, SYNC_WAIT); 443 xfs_sync_data(mp, SYNC_WAIT);
449 xfs_qm_sync(mp, SYNC_WAIT); 444 xfs_qm_sync(mp, SYNC_WAIT);
450 445
451 /* drop inode references pinned by filestreams */
452 xfs_filestream_flush(mp);
453
454 /* write superblock and hoover up shutdown errors */ 446 /* write superblock and hoover up shutdown errors */
455 error = xfs_sync_fsdata(mp, SYNC_WAIT); 447 error = xfs_sync_fsdata(mp, SYNC_WAIT);
456 448
@@ -467,16 +459,18 @@ xfs_quiesce_fs(
467{ 459{
468 int count = 0, pincount; 460 int count = 0, pincount;
469 461
462 xfs_reclaim_inodes(mp, 0);
470 xfs_flush_buftarg(mp->m_ddev_targp, 0); 463 xfs_flush_buftarg(mp->m_ddev_targp, 0);
471 xfs_reclaim_inodes(mp, XFS_IFLUSH_DELWRI_ELSE_ASYNC);
472 464
473 /* 465 /*
474 * This loop must run at least twice. The first instance of the loop 466 * This loop must run at least twice. The first instance of the loop
475 * will flush most meta data but that will generate more meta data 467 * will flush most meta data but that will generate more meta data
476 * (typically directory updates). Which then must be flushed and 468 * (typically directory updates). Which then must be flushed and
477 * logged before we can write the unmount record. 469 * logged before we can write the unmount record. We also so sync
470 * reclaim of inodes to catch any that the above delwri flush skipped.
478 */ 471 */
479 do { 472 do {
473 xfs_reclaim_inodes(mp, SYNC_WAIT);
480 xfs_sync_attr(mp, SYNC_WAIT); 474 xfs_sync_attr(mp, SYNC_WAIT);
481 pincount = xfs_flush_buftarg(mp->m_ddev_targp, 1); 475 pincount = xfs_flush_buftarg(mp->m_ddev_targp, 1);
482 if (!pincount) { 476 if (!pincount) {
@@ -575,7 +569,7 @@ xfs_flush_inodes(
575 igrab(inode); 569 igrab(inode);
576 xfs_syncd_queue_work(ip->i_mount, inode, xfs_flush_inodes_work, &completion); 570 xfs_syncd_queue_work(ip->i_mount, inode, xfs_flush_inodes_work, &completion);
577 wait_for_completion(&completion); 571 wait_for_completion(&completion);
578 xfs_log_force(ip->i_mount, (xfs_lsn_t)0, XFS_LOG_FORCE|XFS_LOG_SYNC); 572 xfs_log_force(ip->i_mount, XFS_LOG_SYNC);
579} 573}
580 574
581/* 575/*
@@ -591,8 +585,8 @@ xfs_sync_worker(
591 int error; 585 int error;
592 586
593 if (!(mp->m_flags & XFS_MOUNT_RDONLY)) { 587 if (!(mp->m_flags & XFS_MOUNT_RDONLY)) {
594 xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE); 588 xfs_log_force(mp, 0);
595 xfs_reclaim_inodes(mp, XFS_IFLUSH_DELWRI_ELSE_ASYNC); 589 xfs_reclaim_inodes(mp, 0);
596 /* dgc: errors ignored here */ 590 /* dgc: errors ignored here */
597 error = xfs_qm_sync(mp, SYNC_TRYLOCK); 591 error = xfs_qm_sync(mp, SYNC_TRYLOCK);
598 error = xfs_sync_fsdata(mp, SYNC_TRYLOCK); 592 error = xfs_sync_fsdata(mp, SYNC_TRYLOCK);
@@ -690,16 +684,17 @@ void
690xfs_inode_set_reclaim_tag( 684xfs_inode_set_reclaim_tag(
691 xfs_inode_t *ip) 685 xfs_inode_t *ip)
692{ 686{
693 xfs_mount_t *mp = ip->i_mount; 687 struct xfs_mount *mp = ip->i_mount;
694 xfs_perag_t *pag = xfs_get_perag(mp, ip->i_ino); 688 struct xfs_perag *pag;
695 689
690 pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
696 read_lock(&pag->pag_ici_lock); 691 read_lock(&pag->pag_ici_lock);
697 spin_lock(&ip->i_flags_lock); 692 spin_lock(&ip->i_flags_lock);
698 __xfs_inode_set_reclaim_tag(pag, ip); 693 __xfs_inode_set_reclaim_tag(pag, ip);
699 __xfs_iflags_set(ip, XFS_IRECLAIMABLE); 694 __xfs_iflags_set(ip, XFS_IRECLAIMABLE);
700 spin_unlock(&ip->i_flags_lock); 695 spin_unlock(&ip->i_flags_lock);
701 read_unlock(&pag->pag_ici_lock); 696 read_unlock(&pag->pag_ici_lock);
702 xfs_put_perag(mp, pag); 697 xfs_perag_put(pag);
703} 698}
704 699
705void 700void
@@ -712,12 +707,64 @@ __xfs_inode_clear_reclaim_tag(
712 XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG); 707 XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG);
713} 708}
714 709
710/*
711 * Inodes in different states need to be treated differently, and the return
712 * value of xfs_iflush is not sufficient to get this right. The following table
713 * lists the inode states and the reclaim actions necessary for non-blocking
714 * reclaim:
715 *
716 *
717 * inode state iflush ret required action
718 * --------------- ---------- ---------------
719 * bad - reclaim
720 * shutdown EIO unpin and reclaim
721 * clean, unpinned 0 reclaim
722 * stale, unpinned 0 reclaim
723 * clean, pinned(*) 0 requeue
724 * stale, pinned EAGAIN requeue
725 * dirty, delwri ok 0 requeue
726 * dirty, delwri blocked EAGAIN requeue
727 * dirty, sync flush 0 reclaim
728 *
729 * (*) dgc: I don't think the clean, pinned state is possible but it gets
730 * handled anyway given the order of checks implemented.
731 *
732 * As can be seen from the table, the return value of xfs_iflush() is not
733 * sufficient to correctly decide the reclaim action here. The checks in
734 * xfs_iflush() might look like duplicates, but they are not.
735 *
736 * Also, because we get the flush lock first, we know that any inode that has
737 * been flushed delwri has had the flush completed by the time we check that
738 * the inode is clean. The clean inode check needs to be done before flushing
739 * the inode delwri otherwise we would loop forever requeuing clean inodes as
740 * we cannot tell apart a successful delwri flush and a clean inode from the
741 * return value of xfs_iflush().
742 *
743 * Note that because the inode is flushed delayed write by background
744 * writeback, the flush lock may already be held here and waiting on it can
745 * result in very long latencies. Hence for sync reclaims, where we wait on the
746 * flush lock, the caller should push out delayed write inodes first before
747 * trying to reclaim them to minimise the amount of time spent waiting. For
748 * background relaim, we just requeue the inode for the next pass.
749 *
750 * Hence the order of actions after gaining the locks should be:
751 * bad => reclaim
752 * shutdown => unpin and reclaim
753 * pinned, delwri => requeue
754 * pinned, sync => unpin
755 * stale => reclaim
756 * clean => reclaim
757 * dirty, delwri => flush and requeue
758 * dirty, sync => flush, wait and reclaim
759 */
715STATIC int 760STATIC int
716xfs_reclaim_inode( 761xfs_reclaim_inode(
717 struct xfs_inode *ip, 762 struct xfs_inode *ip,
718 struct xfs_perag *pag, 763 struct xfs_perag *pag,
719 int sync_mode) 764 int sync_mode)
720{ 765{
766 int error = 0;
767
721 /* 768 /*
722 * The radix tree lock here protects a thread in xfs_iget from racing 769 * The radix tree lock here protects a thread in xfs_iget from racing
723 * with us starting reclaim on the inode. Once we have the 770 * with us starting reclaim on the inode. Once we have the
@@ -735,33 +782,70 @@ xfs_reclaim_inode(
735 spin_unlock(&ip->i_flags_lock); 782 spin_unlock(&ip->i_flags_lock);
736 write_unlock(&pag->pag_ici_lock); 783 write_unlock(&pag->pag_ici_lock);
737 784
738 /*
739 * If the inode is still dirty, then flush it out. If the inode
740 * is not in the AIL, then it will be OK to flush it delwri as
741 * long as xfs_iflush() does not keep any references to the inode.
742 * We leave that decision up to xfs_iflush() since it has the
743 * knowledge of whether it's OK to simply do a delwri flush of
744 * the inode or whether we need to wait until the inode is
745 * pulled from the AIL.
746 * We get the flush lock regardless, though, just to make sure
747 * we don't free it while it is being flushed.
748 */
749 xfs_ilock(ip, XFS_ILOCK_EXCL); 785 xfs_ilock(ip, XFS_ILOCK_EXCL);
750 xfs_iflock(ip); 786 if (!xfs_iflock_nowait(ip)) {
787 if (!(sync_mode & SYNC_WAIT))
788 goto out;
789 xfs_iflock(ip);
790 }
791
792 if (is_bad_inode(VFS_I(ip)))
793 goto reclaim;
794 if (XFS_FORCED_SHUTDOWN(ip->i_mount)) {
795 xfs_iunpin_wait(ip);
796 goto reclaim;
797 }
798 if (xfs_ipincount(ip)) {
799 if (!(sync_mode & SYNC_WAIT)) {
800 xfs_ifunlock(ip);
801 goto out;
802 }
803 xfs_iunpin_wait(ip);
804 }
805 if (xfs_iflags_test(ip, XFS_ISTALE))
806 goto reclaim;
807 if (xfs_inode_clean(ip))
808 goto reclaim;
809
810 /* Now we have an inode that needs flushing */
811 error = xfs_iflush(ip, sync_mode);
812 if (sync_mode & SYNC_WAIT) {
813 xfs_iflock(ip);
814 goto reclaim;
815 }
751 816
752 /* 817 /*
753 * In the case of a forced shutdown we rely on xfs_iflush() to 818 * When we have to flush an inode but don't have SYNC_WAIT set, we
754 * wait for the inode to be unpinned before returning an error. 819 * flush the inode out using a delwri buffer and wait for the next
820 * call into reclaim to find it in a clean state instead of waiting for
821 * it now. We also don't return errors here - if the error is transient
822 * then the next reclaim pass will flush the inode, and if the error
823 * is permanent then the next sync reclaim will relcaim the inode and
824 * pass on the error.
755 */ 825 */
756 if (!is_bad_inode(VFS_I(ip)) && xfs_iflush(ip, sync_mode) == 0) { 826 if (error && !XFS_FORCED_SHUTDOWN(ip->i_mount)) {
757 /* synchronize with xfs_iflush_done */ 827 xfs_fs_cmn_err(CE_WARN, ip->i_mount,
758 xfs_iflock(ip); 828 "inode 0x%llx background reclaim flush failed with %d",
759 xfs_ifunlock(ip); 829 (long long)ip->i_ino, error);
760 } 830 }
831out:
832 xfs_iflags_clear(ip, XFS_IRECLAIM);
833 xfs_iunlock(ip, XFS_ILOCK_EXCL);
834 /*
835 * We could return EAGAIN here to make reclaim rescan the inode tree in
836 * a short while. However, this just burns CPU time scanning the tree
837 * waiting for IO to complete and xfssyncd never goes back to the idle
838 * state. Instead, return 0 to let the next scheduled background reclaim
839 * attempt to reclaim the inode again.
840 */
841 return 0;
761 842
843reclaim:
844 xfs_ifunlock(ip);
762 xfs_iunlock(ip, XFS_ILOCK_EXCL); 845 xfs_iunlock(ip, XFS_ILOCK_EXCL);
763 xfs_ireclaim(ip); 846 xfs_ireclaim(ip);
764 return 0; 847 return error;
848
765} 849}
766 850
767int 851int
diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h
index ea932b43335d..d480c346cabb 100644
--- a/fs/xfs/linux-2.6/xfs_sync.h
+++ b/fs/xfs/linux-2.6/xfs_sync.h
@@ -37,7 +37,6 @@ void xfs_syncd_stop(struct xfs_mount *mp);
37 37
38int xfs_sync_attr(struct xfs_mount *mp, int flags); 38int xfs_sync_attr(struct xfs_mount *mp, int flags);
39int xfs_sync_data(struct xfs_mount *mp, int flags); 39int xfs_sync_data(struct xfs_mount *mp, int flags);
40int xfs_sync_fsdata(struct xfs_mount *mp, int flags);
41 40
42int xfs_quiesce_data(struct xfs_mount *mp); 41int xfs_quiesce_data(struct xfs_mount *mp);
43void xfs_quiesce_attr(struct xfs_mount *mp); 42void xfs_quiesce_attr(struct xfs_mount *mp);
diff --git a/fs/xfs/linux-2.6/xfs_trace.h b/fs/xfs/linux-2.6/xfs_trace.h
index c22a608321a3..a4574dcf5065 100644
--- a/fs/xfs/linux-2.6/xfs_trace.h
+++ b/fs/xfs/linux-2.6/xfs_trace.h
@@ -78,6 +78,33 @@ DECLARE_EVENT_CLASS(xfs_attr_list_class,
78 ) 78 )
79) 79)
80 80
81#define DEFINE_PERAG_REF_EVENT(name) \
82TRACE_EVENT(name, \
83 TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, int refcount, \
84 unsigned long caller_ip), \
85 TP_ARGS(mp, agno, refcount, caller_ip), \
86 TP_STRUCT__entry( \
87 __field(dev_t, dev) \
88 __field(xfs_agnumber_t, agno) \
89 __field(int, refcount) \
90 __field(unsigned long, caller_ip) \
91 ), \
92 TP_fast_assign( \
93 __entry->dev = mp->m_super->s_dev; \
94 __entry->agno = agno; \
95 __entry->refcount = refcount; \
96 __entry->caller_ip = caller_ip; \
97 ), \
98 TP_printk("dev %d:%d agno %u refcount %d caller %pf", \
99 MAJOR(__entry->dev), MINOR(__entry->dev), \
100 __entry->agno, \
101 __entry->refcount, \
102 (char *)__entry->caller_ip) \
103);
104
105DEFINE_PERAG_REF_EVENT(xfs_perag_get)
106DEFINE_PERAG_REF_EVENT(xfs_perag_put)
107
81#define DEFINE_ATTR_LIST_EVENT(name) \ 108#define DEFINE_ATTR_LIST_EVENT(name) \
82DEFINE_EVENT(xfs_attr_list_class, name, \ 109DEFINE_EVENT(xfs_attr_list_class, name, \
83 TP_PROTO(struct xfs_attr_list_context *ctx), \ 110 TP_PROTO(struct xfs_attr_list_context *ctx), \
@@ -456,6 +483,7 @@ DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unlock);
456DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unlock_stale); 483DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unlock_stale);
457DEFINE_BUF_ITEM_EVENT(xfs_buf_item_committed); 484DEFINE_BUF_ITEM_EVENT(xfs_buf_item_committed);
458DEFINE_BUF_ITEM_EVENT(xfs_buf_item_push); 485DEFINE_BUF_ITEM_EVENT(xfs_buf_item_push);
486DEFINE_BUF_ITEM_EVENT(xfs_buf_item_pushbuf);
459DEFINE_BUF_ITEM_EVENT(xfs_trans_get_buf); 487DEFINE_BUF_ITEM_EVENT(xfs_trans_get_buf);
460DEFINE_BUF_ITEM_EVENT(xfs_trans_get_buf_recur); 488DEFINE_BUF_ITEM_EVENT(xfs_trans_get_buf_recur);
461DEFINE_BUF_ITEM_EVENT(xfs_trans_getsb); 489DEFINE_BUF_ITEM_EVENT(xfs_trans_getsb);
@@ -1414,6 +1442,59 @@ TRACE_EVENT(xfs_dir2_leafn_moveents,
1414 __entry->count) 1442 __entry->count)
1415); 1443);
1416 1444
1445#define XFS_SWAPEXT_INODES \
1446 { 0, "target" }, \
1447 { 1, "temp" }
1448
1449#define XFS_INODE_FORMAT_STR \
1450 { 0, "invalid" }, \
1451 { 1, "local" }, \
1452 { 2, "extent" }, \
1453 { 3, "btree" }
1454
1455DECLARE_EVENT_CLASS(xfs_swap_extent_class,
1456 TP_PROTO(struct xfs_inode *ip, int which),
1457 TP_ARGS(ip, which),
1458 TP_STRUCT__entry(
1459 __field(dev_t, dev)
1460 __field(int, which)
1461 __field(xfs_ino_t, ino)
1462 __field(int, format)
1463 __field(int, nex)
1464 __field(int, max_nex)
1465 __field(int, broot_size)
1466 __field(int, fork_off)
1467 ),
1468 TP_fast_assign(
1469 __entry->dev = VFS_I(ip)->i_sb->s_dev;
1470 __entry->which = which;
1471 __entry->ino = ip->i_ino;
1472 __entry->format = ip->i_d.di_format;
1473 __entry->nex = ip->i_d.di_nextents;
1474 __entry->max_nex = ip->i_df.if_ext_max;
1475 __entry->broot_size = ip->i_df.if_broot_bytes;
1476 __entry->fork_off = XFS_IFORK_BOFF(ip);
1477 ),
1478 TP_printk("dev %d:%d ino 0x%llx (%s), %s format, num_extents %d, "
1479 "Max in-fork extents %d, broot size %d, fork offset %d",
1480 MAJOR(__entry->dev), MINOR(__entry->dev),
1481 __entry->ino,
1482 __print_symbolic(__entry->which, XFS_SWAPEXT_INODES),
1483 __print_symbolic(__entry->format, XFS_INODE_FORMAT_STR),
1484 __entry->nex,
1485 __entry->max_nex,
1486 __entry->broot_size,
1487 __entry->fork_off)
1488)
1489
1490#define DEFINE_SWAPEXT_EVENT(name) \
1491DEFINE_EVENT(xfs_swap_extent_class, name, \
1492 TP_PROTO(struct xfs_inode *ip, int which), \
1493 TP_ARGS(ip, which))
1494
1495DEFINE_SWAPEXT_EVENT(xfs_swap_extent_before);
1496DEFINE_SWAPEXT_EVENT(xfs_swap_extent_after);
1497
1417#endif /* _TRACE_XFS_H */ 1498#endif /* _TRACE_XFS_H */
1418 1499
1419#undef TRACE_INCLUDE_PATH 1500#undef TRACE_INCLUDE_PATH
diff --git a/fs/xfs/linux-2.6/xfs_xattr.c b/fs/xfs/linux-2.6/xfs_xattr.c
index 0b1878857fc3..fa01b9daba6b 100644
--- a/fs/xfs/linux-2.6/xfs_xattr.c
+++ b/fs/xfs/linux-2.6/xfs_xattr.c
@@ -45,7 +45,7 @@ xfs_xattr_get(struct dentry *dentry, const char *name,
45 value = NULL; 45 value = NULL;
46 } 46 }
47 47
48 error = -xfs_attr_get(ip, name, value, &asize, xflags); 48 error = -xfs_attr_get(ip, (unsigned char *)name, value, &asize, xflags);
49 if (error) 49 if (error)
50 return error; 50 return error;
51 return asize; 51 return asize;
@@ -67,8 +67,9 @@ xfs_xattr_set(struct dentry *dentry, const char *name, const void *value,
67 xflags |= ATTR_REPLACE; 67 xflags |= ATTR_REPLACE;
68 68
69 if (!value) 69 if (!value)
70 return -xfs_attr_remove(ip, name, xflags); 70 return -xfs_attr_remove(ip, (unsigned char *)name, xflags);
71 return -xfs_attr_set(ip, name, (void *)value, size, xflags); 71 return -xfs_attr_set(ip, (unsigned char *)name,
72 (void *)value, size, xflags);
72} 73}
73 74
74static struct xattr_handler xfs_xattr_user_handler = { 75static struct xattr_handler xfs_xattr_user_handler = {
@@ -124,8 +125,13 @@ static const char *xfs_xattr_prefix(int flags)
124} 125}
125 126
126static int 127static int
127xfs_xattr_put_listent(struct xfs_attr_list_context *context, int flags, 128xfs_xattr_put_listent(
128 char *name, int namelen, int valuelen, char *value) 129 struct xfs_attr_list_context *context,
130 int flags,
131 unsigned char *name,
132 int namelen,
133 int valuelen,
134 unsigned char *value)
129{ 135{
130 unsigned int prefix_len = xfs_xattr_prefix_len(flags); 136 unsigned int prefix_len = xfs_xattr_prefix_len(flags);
131 char *offset; 137 char *offset;
@@ -148,7 +154,7 @@ xfs_xattr_put_listent(struct xfs_attr_list_context *context, int flags,
148 offset = (char *)context->alist + context->count; 154 offset = (char *)context->alist + context->count;
149 strncpy(offset, xfs_xattr_prefix(flags), prefix_len); 155 strncpy(offset, xfs_xattr_prefix(flags), prefix_len);
150 offset += prefix_len; 156 offset += prefix_len;
151 strncpy(offset, name, namelen); /* real name */ 157 strncpy(offset, (char *)name, namelen); /* real name */
152 offset += namelen; 158 offset += namelen;
153 *offset = '\0'; 159 *offset = '\0';
154 context->count += prefix_len + namelen + 1; 160 context->count += prefix_len + namelen + 1;
@@ -156,8 +162,13 @@ xfs_xattr_put_listent(struct xfs_attr_list_context *context, int flags,
156} 162}
157 163
158static int 164static int
159xfs_xattr_put_listent_sizes(struct xfs_attr_list_context *context, int flags, 165xfs_xattr_put_listent_sizes(
160 char *name, int namelen, int valuelen, char *value) 166 struct xfs_attr_list_context *context,
167 int flags,
168 unsigned char *name,
169 int namelen,
170 int valuelen,
171 unsigned char *value)
161{ 172{
162 context->count += xfs_xattr_prefix_len(flags) + namelen + 1; 173 context->count += xfs_xattr_prefix_len(flags) + namelen + 1;
163 return 0; 174 return 0;
diff --git a/fs/xfs/quota/xfs_dquot.c b/fs/xfs/quota/xfs_dquot.c
index d7c7eea09fc2..5f79dd78626b 100644
--- a/fs/xfs/quota/xfs_dquot.c
+++ b/fs/xfs/quota/xfs_dquot.c
@@ -1187,7 +1187,7 @@ xfs_qm_dqflush(
1187 * block, nada. 1187 * block, nada.
1188 */ 1188 */
1189 if (!XFS_DQ_IS_DIRTY(dqp) || 1189 if (!XFS_DQ_IS_DIRTY(dqp) ||
1190 (!(flags & XFS_QMOPT_SYNC) && atomic_read(&dqp->q_pincount) > 0)) { 1190 (!(flags & SYNC_WAIT) && atomic_read(&dqp->q_pincount) > 0)) {
1191 xfs_dqfunlock(dqp); 1191 xfs_dqfunlock(dqp);
1192 return 0; 1192 return 0;
1193 } 1193 }
@@ -1248,23 +1248,20 @@ xfs_qm_dqflush(
1248 */ 1248 */
1249 if (XFS_BUF_ISPINNED(bp)) { 1249 if (XFS_BUF_ISPINNED(bp)) {
1250 trace_xfs_dqflush_force(dqp); 1250 trace_xfs_dqflush_force(dqp);
1251 xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE); 1251 xfs_log_force(mp, 0);
1252 } 1252 }
1253 1253
1254 if (flags & XFS_QMOPT_DELWRI) { 1254 if (flags & SYNC_WAIT)
1255 xfs_bdwrite(mp, bp);
1256 } else if (flags & XFS_QMOPT_ASYNC) {
1257 error = xfs_bawrite(mp, bp);
1258 } else {
1259 error = xfs_bwrite(mp, bp); 1255 error = xfs_bwrite(mp, bp);
1260 } 1256 else
1257 xfs_bdwrite(mp, bp);
1261 1258
1262 trace_xfs_dqflush_done(dqp); 1259 trace_xfs_dqflush_done(dqp);
1263 1260
1264 /* 1261 /*
1265 * dqp is still locked, but caller is free to unlock it now. 1262 * dqp is still locked, but caller is free to unlock it now.
1266 */ 1263 */
1267 return (error); 1264 return error;
1268 1265
1269} 1266}
1270 1267
@@ -1445,7 +1442,7 @@ xfs_qm_dqpurge(
1445 * We don't care about getting disk errors here. We need 1442 * We don't care about getting disk errors here. We need
1446 * to purge this dquot anyway, so we go ahead regardless. 1443 * to purge this dquot anyway, so we go ahead regardless.
1447 */ 1444 */
1448 error = xfs_qm_dqflush(dqp, XFS_QMOPT_SYNC); 1445 error = xfs_qm_dqflush(dqp, SYNC_WAIT);
1449 if (error) 1446 if (error)
1450 xfs_fs_cmn_err(CE_WARN, mp, 1447 xfs_fs_cmn_err(CE_WARN, mp,
1451 "xfs_qm_dqpurge: dquot %p flush failed", dqp); 1448 "xfs_qm_dqpurge: dquot %p flush failed", dqp);
@@ -1529,25 +1526,17 @@ xfs_qm_dqflock_pushbuf_wait(
1529 * the flush lock when the I/O completes. 1526 * the flush lock when the I/O completes.
1530 */ 1527 */
1531 bp = xfs_incore(dqp->q_mount->m_ddev_targp, dqp->q_blkno, 1528 bp = xfs_incore(dqp->q_mount->m_ddev_targp, dqp->q_blkno,
1532 XFS_QI_DQCHUNKLEN(dqp->q_mount), 1529 XFS_QI_DQCHUNKLEN(dqp->q_mount), XBF_TRYLOCK);
1533 XFS_INCORE_TRYLOCK); 1530 if (!bp)
1534 if (bp != NULL) { 1531 goto out_lock;
1535 if (XFS_BUF_ISDELAYWRITE(bp)) { 1532
1536 int error; 1533 if (XFS_BUF_ISDELAYWRITE(bp)) {
1537 if (XFS_BUF_ISPINNED(bp)) { 1534 if (XFS_BUF_ISPINNED(bp))
1538 xfs_log_force(dqp->q_mount, 1535 xfs_log_force(dqp->q_mount, 0);
1539 (xfs_lsn_t)0, 1536 xfs_buf_delwri_promote(bp);
1540 XFS_LOG_FORCE); 1537 wake_up_process(bp->b_target->bt_task);
1541 }
1542 error = xfs_bawrite(dqp->q_mount, bp);
1543 if (error)
1544 xfs_fs_cmn_err(CE_WARN, dqp->q_mount,
1545 "xfs_qm_dqflock_pushbuf_wait: "
1546 "pushbuf error %d on dqp %p, bp %p",
1547 error, dqp, bp);
1548 } else {
1549 xfs_buf_relse(bp);
1550 }
1551 } 1538 }
1539 xfs_buf_relse(bp);
1540out_lock:
1552 xfs_dqflock(dqp); 1541 xfs_dqflock(dqp);
1553} 1542}
diff --git a/fs/xfs/quota/xfs_dquot_item.c b/fs/xfs/quota/xfs_dquot_item.c
index d0d4a9a0bbd7..4e4ee9a57194 100644
--- a/fs/xfs/quota/xfs_dquot_item.c
+++ b/fs/xfs/quota/xfs_dquot_item.c
@@ -74,11 +74,11 @@ xfs_qm_dquot_logitem_format(
74 74
75 logvec->i_addr = (xfs_caddr_t)&logitem->qli_format; 75 logvec->i_addr = (xfs_caddr_t)&logitem->qli_format;
76 logvec->i_len = sizeof(xfs_dq_logformat_t); 76 logvec->i_len = sizeof(xfs_dq_logformat_t);
77 XLOG_VEC_SET_TYPE(logvec, XLOG_REG_TYPE_QFORMAT); 77 logvec->i_type = XLOG_REG_TYPE_QFORMAT;
78 logvec++; 78 logvec++;
79 logvec->i_addr = (xfs_caddr_t)&logitem->qli_dquot->q_core; 79 logvec->i_addr = (xfs_caddr_t)&logitem->qli_dquot->q_core;
80 logvec->i_len = sizeof(xfs_disk_dquot_t); 80 logvec->i_len = sizeof(xfs_disk_dquot_t);
81 XLOG_VEC_SET_TYPE(logvec, XLOG_REG_TYPE_DQUOT); 81 logvec->i_type = XLOG_REG_TYPE_DQUOT;
82 82
83 ASSERT(2 == logitem->qli_item.li_desc->lid_size); 83 ASSERT(2 == logitem->qli_item.li_desc->lid_size);
84 logitem->qli_format.qlf_size = 2; 84 logitem->qli_format.qlf_size = 2;
@@ -153,7 +153,7 @@ xfs_qm_dquot_logitem_push(
153 * lock without sleeping, then there must not have been 153 * lock without sleeping, then there must not have been
154 * anyone in the process of flushing the dquot. 154 * anyone in the process of flushing the dquot.
155 */ 155 */
156 error = xfs_qm_dqflush(dqp, XFS_QMOPT_DELWRI); 156 error = xfs_qm_dqflush(dqp, 0);
157 if (error) 157 if (error)
158 xfs_fs_cmn_err(CE_WARN, dqp->q_mount, 158 xfs_fs_cmn_err(CE_WARN, dqp->q_mount,
159 "xfs_qm_dquot_logitem_push: push error %d on dqp %p", 159 "xfs_qm_dquot_logitem_push: push error %d on dqp %p",
@@ -190,7 +190,7 @@ xfs_qm_dqunpin_wait(
190 /* 190 /*
191 * Give the log a push so we don't wait here too long. 191 * Give the log a push so we don't wait here too long.
192 */ 192 */
193 xfs_log_force(dqp->q_mount, (xfs_lsn_t)0, XFS_LOG_FORCE); 193 xfs_log_force(dqp->q_mount, 0);
194 wait_event(dqp->q_pinwait, (atomic_read(&dqp->q_pincount) == 0)); 194 wait_event(dqp->q_pinwait, (atomic_read(&dqp->q_pincount) == 0));
195} 195}
196 196
@@ -212,68 +212,31 @@ xfs_qm_dquot_logitem_pushbuf(
212 xfs_dquot_t *dqp; 212 xfs_dquot_t *dqp;
213 xfs_mount_t *mp; 213 xfs_mount_t *mp;
214 xfs_buf_t *bp; 214 xfs_buf_t *bp;
215 uint dopush;
216 215
217 dqp = qip->qli_dquot; 216 dqp = qip->qli_dquot;
218 ASSERT(XFS_DQ_IS_LOCKED(dqp)); 217 ASSERT(XFS_DQ_IS_LOCKED(dqp));
219 218
220 /* 219 /*
221 * The qli_pushbuf_flag keeps others from
222 * trying to duplicate our effort.
223 */
224 ASSERT(qip->qli_pushbuf_flag != 0);
225 ASSERT(qip->qli_push_owner == current_pid());
226
227 /*
228 * If flushlock isn't locked anymore, chances are that the 220 * If flushlock isn't locked anymore, chances are that the
229 * inode flush completed and the inode was taken off the AIL. 221 * inode flush completed and the inode was taken off the AIL.
230 * So, just get out. 222 * So, just get out.
231 */ 223 */
232 if (completion_done(&dqp->q_flush) || 224 if (completion_done(&dqp->q_flush) ||
233 ((qip->qli_item.li_flags & XFS_LI_IN_AIL) == 0)) { 225 ((qip->qli_item.li_flags & XFS_LI_IN_AIL) == 0)) {
234 qip->qli_pushbuf_flag = 0;
235 xfs_dqunlock(dqp); 226 xfs_dqunlock(dqp);
236 return; 227 return;
237 } 228 }
238 mp = dqp->q_mount; 229 mp = dqp->q_mount;
239 bp = xfs_incore(mp->m_ddev_targp, qip->qli_format.qlf_blkno, 230 bp = xfs_incore(mp->m_ddev_targp, qip->qli_format.qlf_blkno,
240 XFS_QI_DQCHUNKLEN(mp), 231 XFS_QI_DQCHUNKLEN(mp), XBF_TRYLOCK);
241 XFS_INCORE_TRYLOCK); 232 xfs_dqunlock(dqp);
242 if (bp != NULL) { 233 if (!bp)
243 if (XFS_BUF_ISDELAYWRITE(bp)) {
244 dopush = ((qip->qli_item.li_flags & XFS_LI_IN_AIL) &&
245 !completion_done(&dqp->q_flush));
246 qip->qli_pushbuf_flag = 0;
247 xfs_dqunlock(dqp);
248
249 if (XFS_BUF_ISPINNED(bp)) {
250 xfs_log_force(mp, (xfs_lsn_t)0,
251 XFS_LOG_FORCE);
252 }
253 if (dopush) {
254 int error;
255#ifdef XFSRACEDEBUG
256 delay_for_intr();
257 delay(300);
258#endif
259 error = xfs_bawrite(mp, bp);
260 if (error)
261 xfs_fs_cmn_err(CE_WARN, mp,
262 "xfs_qm_dquot_logitem_pushbuf: pushbuf error %d on qip %p, bp %p",
263 error, qip, bp);
264 } else {
265 xfs_buf_relse(bp);
266 }
267 } else {
268 qip->qli_pushbuf_flag = 0;
269 xfs_dqunlock(dqp);
270 xfs_buf_relse(bp);
271 }
272 return; 234 return;
273 } 235 if (XFS_BUF_ISDELAYWRITE(bp))
236 xfs_buf_delwri_promote(bp);
237 xfs_buf_relse(bp);
238 return;
274 239
275 qip->qli_pushbuf_flag = 0;
276 xfs_dqunlock(dqp);
277} 240}
278 241
279/* 242/*
@@ -291,50 +254,24 @@ xfs_qm_dquot_logitem_trylock(
291 xfs_dq_logitem_t *qip) 254 xfs_dq_logitem_t *qip)
292{ 255{
293 xfs_dquot_t *dqp; 256 xfs_dquot_t *dqp;
294 uint retval;
295 257
296 dqp = qip->qli_dquot; 258 dqp = qip->qli_dquot;
297 if (atomic_read(&dqp->q_pincount) > 0) 259 if (atomic_read(&dqp->q_pincount) > 0)
298 return (XFS_ITEM_PINNED); 260 return XFS_ITEM_PINNED;
299 261
300 if (! xfs_qm_dqlock_nowait(dqp)) 262 if (! xfs_qm_dqlock_nowait(dqp))
301 return (XFS_ITEM_LOCKED); 263 return XFS_ITEM_LOCKED;
302 264
303 retval = XFS_ITEM_SUCCESS;
304 if (!xfs_dqflock_nowait(dqp)) { 265 if (!xfs_dqflock_nowait(dqp)) {
305 /* 266 /*
306 * The dquot is already being flushed. It may have been 267 * dquot has already been flushed to the backing buffer,
307 * flushed delayed write, however, and we don't want to 268 * leave it locked, pushbuf routine will unlock it.
308 * get stuck waiting for that to complete. So, we want to check
309 * to see if we can lock the dquot's buffer without sleeping.
310 * If we can and it is marked for delayed write, then we
311 * hold it and send it out from the push routine. We don't
312 * want to do that now since we might sleep in the device
313 * strategy routine. We also don't want to grab the buffer lock
314 * here because we'd like not to call into the buffer cache
315 * while holding the AIL lock.
316 * Make sure to only return PUSHBUF if we set pushbuf_flag
317 * ourselves. If someone else is doing it then we don't
318 * want to go to the push routine and duplicate their efforts.
319 */ 269 */
320 if (qip->qli_pushbuf_flag == 0) { 270 return XFS_ITEM_PUSHBUF;
321 qip->qli_pushbuf_flag = 1;
322 ASSERT(qip->qli_format.qlf_blkno == dqp->q_blkno);
323#ifdef DEBUG
324 qip->qli_push_owner = current_pid();
325#endif
326 /*
327 * The dquot is left locked.
328 */
329 retval = XFS_ITEM_PUSHBUF;
330 } else {
331 retval = XFS_ITEM_FLUSHING;
332 xfs_dqunlock_nonotify(dqp);
333 }
334 } 271 }
335 272
336 ASSERT(qip->qli_item.li_flags & XFS_LI_IN_AIL); 273 ASSERT(qip->qli_item.li_flags & XFS_LI_IN_AIL);
337 return (retval); 274 return XFS_ITEM_SUCCESS;
338} 275}
339 276
340 277
@@ -467,7 +404,7 @@ xfs_qm_qoff_logitem_format(xfs_qoff_logitem_t *qf,
467 404
468 log_vector->i_addr = (xfs_caddr_t)&(qf->qql_format); 405 log_vector->i_addr = (xfs_caddr_t)&(qf->qql_format);
469 log_vector->i_len = sizeof(xfs_qoff_logitem_t); 406 log_vector->i_len = sizeof(xfs_qoff_logitem_t);
470 XLOG_VEC_SET_TYPE(log_vector, XLOG_REG_TYPE_QUOTAOFF); 407 log_vector->i_type = XLOG_REG_TYPE_QUOTAOFF;
471 qf->qql_format.qf_size = 1; 408 qf->qql_format.qf_size = 1;
472} 409}
473 410
diff --git a/fs/xfs/quota/xfs_dquot_item.h b/fs/xfs/quota/xfs_dquot_item.h
index 5a632531f843..5acae2ada70b 100644
--- a/fs/xfs/quota/xfs_dquot_item.h
+++ b/fs/xfs/quota/xfs_dquot_item.h
@@ -27,10 +27,6 @@ typedef struct xfs_dq_logitem {
27 xfs_log_item_t qli_item; /* common portion */ 27 xfs_log_item_t qli_item; /* common portion */
28 struct xfs_dquot *qli_dquot; /* dquot ptr */ 28 struct xfs_dquot *qli_dquot; /* dquot ptr */
29 xfs_lsn_t qli_flush_lsn; /* lsn at last flush */ 29 xfs_lsn_t qli_flush_lsn; /* lsn at last flush */
30 unsigned short qli_pushbuf_flag; /* 1 bit used in push_ail */
31#ifdef DEBUG
32 uint64_t qli_push_owner;
33#endif
34 xfs_dq_logformat_t qli_format; /* logged structure */ 30 xfs_dq_logformat_t qli_format; /* logged structure */
35} xfs_dq_logitem_t; 31} xfs_dq_logitem_t;
36 32
diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c
index 9e627a8b5b0e..417e61e3d9dd 100644
--- a/fs/xfs/quota/xfs_qm.c
+++ b/fs/xfs/quota/xfs_qm.c
@@ -118,9 +118,14 @@ xfs_Gqm_init(void)
118 */ 118 */
119 udqhash = kmem_zalloc_greedy(&hsize, 119 udqhash = kmem_zalloc_greedy(&hsize,
120 XFS_QM_HASHSIZE_LOW * sizeof(xfs_dqhash_t), 120 XFS_QM_HASHSIZE_LOW * sizeof(xfs_dqhash_t),
121 XFS_QM_HASHSIZE_HIGH * sizeof(xfs_dqhash_t), 121 XFS_QM_HASHSIZE_HIGH * sizeof(xfs_dqhash_t));
122 KM_SLEEP | KM_MAYFAIL | KM_LARGE); 122 if (!udqhash)
123 gdqhash = kmem_zalloc(hsize, KM_SLEEP | KM_LARGE); 123 goto out;
124
125 gdqhash = kmem_zalloc_large(hsize);
126 if (!gdqhash)
127 goto out_free_udqhash;
128
124 hsize /= sizeof(xfs_dqhash_t); 129 hsize /= sizeof(xfs_dqhash_t);
125 ndquot = hsize << 8; 130 ndquot = hsize << 8;
126 131
@@ -170,6 +175,11 @@ xfs_Gqm_init(void)
170 mutex_init(&qcheck_lock); 175 mutex_init(&qcheck_lock);
171#endif 176#endif
172 return xqm; 177 return xqm;
178
179 out_free_udqhash:
180 kmem_free_large(udqhash);
181 out:
182 return NULL;
173} 183}
174 184
175/* 185/*
@@ -189,8 +199,8 @@ xfs_qm_destroy(
189 xfs_qm_list_destroy(&(xqm->qm_usr_dqhtable[i])); 199 xfs_qm_list_destroy(&(xqm->qm_usr_dqhtable[i]));
190 xfs_qm_list_destroy(&(xqm->qm_grp_dqhtable[i])); 200 xfs_qm_list_destroy(&(xqm->qm_grp_dqhtable[i]));
191 } 201 }
192 kmem_free(xqm->qm_usr_dqhtable); 202 kmem_free_large(xqm->qm_usr_dqhtable);
193 kmem_free(xqm->qm_grp_dqhtable); 203 kmem_free_large(xqm->qm_grp_dqhtable);
194 xqm->qm_usr_dqhtable = NULL; 204 xqm->qm_usr_dqhtable = NULL;
195 xqm->qm_grp_dqhtable = NULL; 205 xqm->qm_grp_dqhtable = NULL;
196 xqm->qm_dqhashmask = 0; 206 xqm->qm_dqhashmask = 0;
@@ -219,8 +229,12 @@ xfs_qm_hold_quotafs_ref(
219 */ 229 */
220 mutex_lock(&xfs_Gqm_lock); 230 mutex_lock(&xfs_Gqm_lock);
221 231
222 if (xfs_Gqm == NULL) 232 if (!xfs_Gqm) {
223 xfs_Gqm = xfs_Gqm_init(); 233 xfs_Gqm = xfs_Gqm_init();
234 if (!xfs_Gqm)
235 return ENOMEM;
236 }
237
224 /* 238 /*
225 * We can keep a list of all filesystems with quotas mounted for 239 * We can keep a list of all filesystems with quotas mounted for
226 * debugging and statistical purposes, but ... 240 * debugging and statistical purposes, but ...
@@ -436,7 +450,7 @@ xfs_qm_unmount_quotas(
436STATIC int 450STATIC int
437xfs_qm_dqflush_all( 451xfs_qm_dqflush_all(
438 xfs_mount_t *mp, 452 xfs_mount_t *mp,
439 int flags) 453 int sync_mode)
440{ 454{
441 int recl; 455 int recl;
442 xfs_dquot_t *dqp; 456 xfs_dquot_t *dqp;
@@ -472,7 +486,7 @@ again:
472 * across a disk write. 486 * across a disk write.
473 */ 487 */
474 xfs_qm_mplist_unlock(mp); 488 xfs_qm_mplist_unlock(mp);
475 error = xfs_qm_dqflush(dqp, flags); 489 error = xfs_qm_dqflush(dqp, sync_mode);
476 xfs_dqunlock(dqp); 490 xfs_dqunlock(dqp);
477 if (error) 491 if (error)
478 return error; 492 return error;
@@ -912,13 +926,11 @@ xfs_qm_sync(
912{ 926{
913 int recl, restarts; 927 int recl, restarts;
914 xfs_dquot_t *dqp; 928 xfs_dquot_t *dqp;
915 uint flush_flags;
916 int error; 929 int error;
917 930
918 if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp)) 931 if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
919 return 0; 932 return 0;
920 933
921 flush_flags = (flags & SYNC_WAIT) ? XFS_QMOPT_SYNC : XFS_QMOPT_DELWRI;
922 restarts = 0; 934 restarts = 0;
923 935
924 again: 936 again:
@@ -978,7 +990,7 @@ xfs_qm_sync(
978 * across a disk write 990 * across a disk write
979 */ 991 */
980 xfs_qm_mplist_unlock(mp); 992 xfs_qm_mplist_unlock(mp);
981 error = xfs_qm_dqflush(dqp, flush_flags); 993 error = xfs_qm_dqflush(dqp, flags);
982 xfs_dqunlock(dqp); 994 xfs_dqunlock(dqp);
983 if (error && XFS_FORCED_SHUTDOWN(mp)) 995 if (error && XFS_FORCED_SHUTDOWN(mp))
984 return 0; /* Need to prevent umount failure */ 996 return 0; /* Need to prevent umount failure */
@@ -1782,7 +1794,7 @@ xfs_qm_quotacheck(
1782 * successfully. 1794 * successfully.
1783 */ 1795 */
1784 if (!error) 1796 if (!error)
1785 error = xfs_qm_dqflush_all(mp, XFS_QMOPT_DELWRI); 1797 error = xfs_qm_dqflush_all(mp, 0);
1786 1798
1787 /* 1799 /*
1788 * We can get this error if we couldn't do a dquot allocation inside 1800 * We can get this error if we couldn't do a dquot allocation inside
@@ -2004,7 +2016,7 @@ xfs_qm_shake_freelist(
2004 * We flush it delayed write, so don't bother 2016 * We flush it delayed write, so don't bother
2005 * releasing the mplock. 2017 * releasing the mplock.
2006 */ 2018 */
2007 error = xfs_qm_dqflush(dqp, XFS_QMOPT_DELWRI); 2019 error = xfs_qm_dqflush(dqp, 0);
2008 if (error) { 2020 if (error) {
2009 xfs_fs_cmn_err(CE_WARN, dqp->q_mount, 2021 xfs_fs_cmn_err(CE_WARN, dqp->q_mount,
2010 "xfs_qm_dqflush_all: dquot %p flush failed", dqp); 2022 "xfs_qm_dqflush_all: dquot %p flush failed", dqp);
@@ -2187,7 +2199,7 @@ xfs_qm_dqreclaim_one(void)
2187 * We flush it delayed write, so don't bother 2199 * We flush it delayed write, so don't bother
2188 * releasing the freelist lock. 2200 * releasing the freelist lock.
2189 */ 2201 */
2190 error = xfs_qm_dqflush(dqp, XFS_QMOPT_DELWRI); 2202 error = xfs_qm_dqflush(dqp, 0);
2191 if (error) { 2203 if (error) {
2192 xfs_fs_cmn_err(CE_WARN, dqp->q_mount, 2204 xfs_fs_cmn_err(CE_WARN, dqp->q_mount,
2193 "xfs_qm_dqreclaim: dquot %p flush failed", dqp); 2205 "xfs_qm_dqreclaim: dquot %p flush failed", dqp);
diff --git a/fs/xfs/quota/xfs_qm_bhv.c b/fs/xfs/quota/xfs_qm_bhv.c
index a5346630dfae..97b410c12794 100644
--- a/fs/xfs/quota/xfs_qm_bhv.c
+++ b/fs/xfs/quota/xfs_qm_bhv.c
@@ -59,7 +59,7 @@ xfs_fill_statvfs_from_dquot(
59 be64_to_cpu(dp->d_blk_hardlimit); 59 be64_to_cpu(dp->d_blk_hardlimit);
60 if (limit && statp->f_blocks > limit) { 60 if (limit && statp->f_blocks > limit) {
61 statp->f_blocks = limit; 61 statp->f_blocks = limit;
62 statp->f_bfree = 62 statp->f_bfree = statp->f_bavail =
63 (statp->f_blocks > be64_to_cpu(dp->d_bcount)) ? 63 (statp->f_blocks > be64_to_cpu(dp->d_bcount)) ?
64 (statp->f_blocks - be64_to_cpu(dp->d_bcount)) : 0; 64 (statp->f_blocks - be64_to_cpu(dp->d_bcount)) : 0;
65 } 65 }
diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c
index 873e07e29074..5d0ee8d492db 100644
--- a/fs/xfs/quota/xfs_qm_syscalls.c
+++ b/fs/xfs/quota/xfs_qm_syscalls.c
@@ -1192,9 +1192,9 @@ xfs_qm_internalqcheck(
1192 if (! XFS_IS_QUOTA_ON(mp)) 1192 if (! XFS_IS_QUOTA_ON(mp))
1193 return XFS_ERROR(ESRCH); 1193 return XFS_ERROR(ESRCH);
1194 1194
1195 xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE | XFS_LOG_SYNC); 1195 xfs_log_force(mp, XFS_LOG_SYNC);
1196 XFS_bflush(mp->m_ddev_targp); 1196 XFS_bflush(mp->m_ddev_targp);
1197 xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE | XFS_LOG_SYNC); 1197 xfs_log_force(mp, XFS_LOG_SYNC);
1198 XFS_bflush(mp->m_ddev_targp); 1198 XFS_bflush(mp->m_ddev_targp);
1199 1199
1200 mutex_lock(&qcheck_lock); 1200 mutex_lock(&qcheck_lock);
diff --git a/fs/xfs/quota/xfs_trans_dquot.c b/fs/xfs/quota/xfs_trans_dquot.c
index 97ac9640be98..c3ab75cb1d9a 100644
--- a/fs/xfs/quota/xfs_trans_dquot.c
+++ b/fs/xfs/quota/xfs_trans_dquot.c
@@ -589,12 +589,18 @@ xfs_trans_unreserve_and_mod_dquots(
589 } 589 }
590} 590}
591 591
592STATIC int 592STATIC void
593xfs_quota_error(uint flags) 593xfs_quota_warn(
594 struct xfs_mount *mp,
595 struct xfs_dquot *dqp,
596 int type)
594{ 597{
595 if (flags & XFS_QMOPT_ENOSPC) 598 /* no warnings for project quotas - we just return ENOSPC later */
596 return ENOSPC; 599 if (dqp->dq_flags & XFS_DQ_PROJ)
597 return EDQUOT; 600 return;
601 quota_send_warning((dqp->dq_flags & XFS_DQ_USER) ? USRQUOTA : GRPQUOTA,
602 be32_to_cpu(dqp->q_core.d_id), mp->m_super->s_dev,
603 type);
598} 604}
599 605
600/* 606/*
@@ -612,7 +618,6 @@ xfs_trans_dqresv(
612 long ninos, 618 long ninos,
613 uint flags) 619 uint flags)
614{ 620{
615 int error;
616 xfs_qcnt_t hardlimit; 621 xfs_qcnt_t hardlimit;
617 xfs_qcnt_t softlimit; 622 xfs_qcnt_t softlimit;
618 time_t timer; 623 time_t timer;
@@ -649,7 +654,6 @@ xfs_trans_dqresv(
649 warnlimit = XFS_QI_RTBWARNLIMIT(dqp->q_mount); 654 warnlimit = XFS_QI_RTBWARNLIMIT(dqp->q_mount);
650 resbcountp = &dqp->q_res_rtbcount; 655 resbcountp = &dqp->q_res_rtbcount;
651 } 656 }
652 error = 0;
653 657
654 if ((flags & XFS_QMOPT_FORCE_RES) == 0 && 658 if ((flags & XFS_QMOPT_FORCE_RES) == 0 &&
655 dqp->q_core.d_id && 659 dqp->q_core.d_id &&
@@ -667,18 +671,20 @@ xfs_trans_dqresv(
667 * nblks. 671 * nblks.
668 */ 672 */
669 if (hardlimit > 0ULL && 673 if (hardlimit > 0ULL &&
670 (hardlimit <= nblks + *resbcountp)) { 674 hardlimit <= nblks + *resbcountp) {
671 error = xfs_quota_error(flags); 675 xfs_quota_warn(mp, dqp, QUOTA_NL_BHARDWARN);
672 goto error_return; 676 goto error_return;
673 } 677 }
674
675 if (softlimit > 0ULL && 678 if (softlimit > 0ULL &&
676 (softlimit <= nblks + *resbcountp)) { 679 softlimit <= nblks + *resbcountp) {
677 if ((timer != 0 && get_seconds() > timer) || 680 if ((timer != 0 && get_seconds() > timer) ||
678 (warns != 0 && warns >= warnlimit)) { 681 (warns != 0 && warns >= warnlimit)) {
679 error = xfs_quota_error(flags); 682 xfs_quota_warn(mp, dqp,
683 QUOTA_NL_BSOFTLONGWARN);
680 goto error_return; 684 goto error_return;
681 } 685 }
686
687 xfs_quota_warn(mp, dqp, QUOTA_NL_BSOFTWARN);
682 } 688 }
683 } 689 }
684 if (ninos > 0) { 690 if (ninos > 0) {
@@ -692,15 +698,19 @@ xfs_trans_dqresv(
692 softlimit = be64_to_cpu(dqp->q_core.d_ino_softlimit); 698 softlimit = be64_to_cpu(dqp->q_core.d_ino_softlimit);
693 if (!softlimit) 699 if (!softlimit)
694 softlimit = q->qi_isoftlimit; 700 softlimit = q->qi_isoftlimit;
701
695 if (hardlimit > 0ULL && count >= hardlimit) { 702 if (hardlimit > 0ULL && count >= hardlimit) {
696 error = xfs_quota_error(flags); 703 xfs_quota_warn(mp, dqp, QUOTA_NL_IHARDWARN);
697 goto error_return; 704 goto error_return;
698 } else if (softlimit > 0ULL && count >= softlimit) { 705 }
699 if ((timer != 0 && get_seconds() > timer) || 706 if (softlimit > 0ULL && count >= softlimit) {
707 if ((timer != 0 && get_seconds() > timer) ||
700 (warns != 0 && warns >= warnlimit)) { 708 (warns != 0 && warns >= warnlimit)) {
701 error = xfs_quota_error(flags); 709 xfs_quota_warn(mp, dqp,
710 QUOTA_NL_ISOFTLONGWARN);
702 goto error_return; 711 goto error_return;
703 } 712 }
713 xfs_quota_warn(mp, dqp, QUOTA_NL_ISOFTWARN);
704 } 714 }
705 } 715 }
706 } 716 }
@@ -736,9 +746,14 @@ xfs_trans_dqresv(
736 ASSERT(dqp->q_res_rtbcount >= be64_to_cpu(dqp->q_core.d_rtbcount)); 746 ASSERT(dqp->q_res_rtbcount >= be64_to_cpu(dqp->q_core.d_rtbcount));
737 ASSERT(dqp->q_res_icount >= be64_to_cpu(dqp->q_core.d_icount)); 747 ASSERT(dqp->q_res_icount >= be64_to_cpu(dqp->q_core.d_icount));
738 748
749 xfs_dqunlock(dqp);
750 return 0;
751
739error_return: 752error_return:
740 xfs_dqunlock(dqp); 753 xfs_dqunlock(dqp);
741 return error; 754 if (flags & XFS_QMOPT_ENOSPC)
755 return ENOSPC;
756 return EDQUOT;
742} 757}
743 758
744 759
diff --git a/fs/xfs/xfs_acl.h b/fs/xfs/xfs_acl.h
index 00fd357c3e46..d13eeba2c8f8 100644
--- a/fs/xfs/xfs_acl.h
+++ b/fs/xfs/xfs_acl.h
@@ -36,8 +36,8 @@ struct xfs_acl {
36}; 36};
37 37
38/* On-disk XFS extended attribute names */ 38/* On-disk XFS extended attribute names */
39#define SGI_ACL_FILE "SGI_ACL_FILE" 39#define SGI_ACL_FILE (unsigned char *)"SGI_ACL_FILE"
40#define SGI_ACL_DEFAULT "SGI_ACL_DEFAULT" 40#define SGI_ACL_DEFAULT (unsigned char *)"SGI_ACL_DEFAULT"
41#define SGI_ACL_FILE_SIZE (sizeof(SGI_ACL_FILE)-1) 41#define SGI_ACL_FILE_SIZE (sizeof(SGI_ACL_FILE)-1)
42#define SGI_ACL_DEFAULT_SIZE (sizeof(SGI_ACL_DEFAULT)-1) 42#define SGI_ACL_DEFAULT_SIZE (sizeof(SGI_ACL_DEFAULT)-1)
43 43
diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h
index 6702bd865811..b1a5a1ff88ea 100644
--- a/fs/xfs/xfs_ag.h
+++ b/fs/xfs/xfs_ag.h
@@ -187,17 +187,13 @@ typedef struct xfs_perag_busy {
187/* 187/*
188 * Per-ag incore structure, copies of information in agf and agi, 188 * Per-ag incore structure, copies of information in agf and agi,
189 * to improve the performance of allocation group selection. 189 * to improve the performance of allocation group selection.
190 *
191 * pick sizes which fit in allocation buckets well
192 */ 190 */
193#if (BITS_PER_LONG == 32)
194#define XFS_PAGB_NUM_SLOTS 84
195#elif (BITS_PER_LONG == 64)
196#define XFS_PAGB_NUM_SLOTS 128 191#define XFS_PAGB_NUM_SLOTS 128
197#endif
198 192
199typedef struct xfs_perag 193typedef struct xfs_perag {
200{ 194 struct xfs_mount *pag_mount; /* owner filesystem */
195 xfs_agnumber_t pag_agno; /* AG this structure belongs to */
196 atomic_t pag_ref; /* perag reference count */
201 char pagf_init; /* this agf's entry is initialized */ 197 char pagf_init; /* this agf's entry is initialized */
202 char pagi_init; /* this agi's entry is initialized */ 198 char pagi_init; /* this agi's entry is initialized */
203 char pagf_metadata; /* the agf is preferred to be metadata */ 199 char pagf_metadata; /* the agf is preferred to be metadata */
@@ -210,8 +206,6 @@ typedef struct xfs_perag
210 __uint32_t pagf_btreeblks; /* # of blocks held in AGF btrees */ 206 __uint32_t pagf_btreeblks; /* # of blocks held in AGF btrees */
211 xfs_agino_t pagi_freecount; /* number of free inodes */ 207 xfs_agino_t pagi_freecount; /* number of free inodes */
212 xfs_agino_t pagi_count; /* number of allocated inodes */ 208 xfs_agino_t pagi_count; /* number of allocated inodes */
213 int pagb_count; /* pagb slots in use */
214 xfs_perag_busy_t *pagb_list; /* unstable blocks */
215 209
216 /* 210 /*
217 * Inode allocation search lookup optimisation. 211 * Inode allocation search lookup optimisation.
@@ -230,6 +224,8 @@ typedef struct xfs_perag
230 rwlock_t pag_ici_lock; /* incore inode lock */ 224 rwlock_t pag_ici_lock; /* incore inode lock */
231 struct radix_tree_root pag_ici_root; /* incore inode cache root */ 225 struct radix_tree_root pag_ici_root; /* incore inode cache root */
232#endif 226#endif
227 int pagb_count; /* pagb slots in use */
228 xfs_perag_busy_t pagb_list[XFS_PAGB_NUM_SLOTS]; /* unstable blocks */
233} xfs_perag_t; 229} xfs_perag_t;
234 230
235/* 231/*
diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c
index 275b1f4f9430..94cddbfb2560 100644
--- a/fs/xfs/xfs_alloc.c
+++ b/fs/xfs/xfs_alloc.c
@@ -1662,11 +1662,13 @@ xfs_free_ag_extent(
1662 xfs_agf_t *agf; 1662 xfs_agf_t *agf;
1663 xfs_perag_t *pag; /* per allocation group data */ 1663 xfs_perag_t *pag; /* per allocation group data */
1664 1664
1665 pag = xfs_perag_get(mp, agno);
1666 pag->pagf_freeblks += len;
1667 xfs_perag_put(pag);
1668
1665 agf = XFS_BUF_TO_AGF(agbp); 1669 agf = XFS_BUF_TO_AGF(agbp);
1666 pag = &mp->m_perag[agno];
1667 be32_add_cpu(&agf->agf_freeblks, len); 1670 be32_add_cpu(&agf->agf_freeblks, len);
1668 xfs_trans_agblocks_delta(tp, len); 1671 xfs_trans_agblocks_delta(tp, len);
1669 pag->pagf_freeblks += len;
1670 XFS_WANT_CORRUPTED_GOTO( 1672 XFS_WANT_CORRUPTED_GOTO(
1671 be32_to_cpu(agf->agf_freeblks) <= 1673 be32_to_cpu(agf->agf_freeblks) <=
1672 be32_to_cpu(agf->agf_length), 1674 be32_to_cpu(agf->agf_length),
@@ -1969,10 +1971,12 @@ xfs_alloc_get_freelist(
1969 xfs_trans_brelse(tp, agflbp); 1971 xfs_trans_brelse(tp, agflbp);
1970 if (be32_to_cpu(agf->agf_flfirst) == XFS_AGFL_SIZE(mp)) 1972 if (be32_to_cpu(agf->agf_flfirst) == XFS_AGFL_SIZE(mp))
1971 agf->agf_flfirst = 0; 1973 agf->agf_flfirst = 0;
1972 pag = &mp->m_perag[be32_to_cpu(agf->agf_seqno)]; 1974
1975 pag = xfs_perag_get(mp, be32_to_cpu(agf->agf_seqno));
1973 be32_add_cpu(&agf->agf_flcount, -1); 1976 be32_add_cpu(&agf->agf_flcount, -1);
1974 xfs_trans_agflist_delta(tp, -1); 1977 xfs_trans_agflist_delta(tp, -1);
1975 pag->pagf_flcount--; 1978 pag->pagf_flcount--;
1979 xfs_perag_put(pag);
1976 1980
1977 logflags = XFS_AGF_FLFIRST | XFS_AGF_FLCOUNT; 1981 logflags = XFS_AGF_FLFIRST | XFS_AGF_FLCOUNT;
1978 if (btreeblk) { 1982 if (btreeblk) {
@@ -2078,7 +2082,8 @@ xfs_alloc_put_freelist(
2078 be32_add_cpu(&agf->agf_fllast, 1); 2082 be32_add_cpu(&agf->agf_fllast, 1);
2079 if (be32_to_cpu(agf->agf_fllast) == XFS_AGFL_SIZE(mp)) 2083 if (be32_to_cpu(agf->agf_fllast) == XFS_AGFL_SIZE(mp))
2080 agf->agf_fllast = 0; 2084 agf->agf_fllast = 0;
2081 pag = &mp->m_perag[be32_to_cpu(agf->agf_seqno)]; 2085
2086 pag = xfs_perag_get(mp, be32_to_cpu(agf->agf_seqno));
2082 be32_add_cpu(&agf->agf_flcount, 1); 2087 be32_add_cpu(&agf->agf_flcount, 1);
2083 xfs_trans_agflist_delta(tp, 1); 2088 xfs_trans_agflist_delta(tp, 1);
2084 pag->pagf_flcount++; 2089 pag->pagf_flcount++;
@@ -2089,6 +2094,7 @@ xfs_alloc_put_freelist(
2089 pag->pagf_btreeblks--; 2094 pag->pagf_btreeblks--;
2090 logflags |= XFS_AGF_BTREEBLKS; 2095 logflags |= XFS_AGF_BTREEBLKS;
2091 } 2096 }
2097 xfs_perag_put(pag);
2092 2098
2093 xfs_alloc_log_agf(tp, agbp, logflags); 2099 xfs_alloc_log_agf(tp, agbp, logflags);
2094 2100
@@ -2152,7 +2158,6 @@ xfs_read_agf(
2152 xfs_trans_brelse(tp, *bpp); 2158 xfs_trans_brelse(tp, *bpp);
2153 return XFS_ERROR(EFSCORRUPTED); 2159 return XFS_ERROR(EFSCORRUPTED);
2154 } 2160 }
2155
2156 XFS_BUF_SET_VTYPE_REF(*bpp, B_FS_AGF, XFS_AGF_REF); 2161 XFS_BUF_SET_VTYPE_REF(*bpp, B_FS_AGF, XFS_AGF_REF);
2157 return 0; 2162 return 0;
2158} 2163}
@@ -2175,7 +2180,7 @@ xfs_alloc_read_agf(
2175 ASSERT(agno != NULLAGNUMBER); 2180 ASSERT(agno != NULLAGNUMBER);
2176 2181
2177 error = xfs_read_agf(mp, tp, agno, 2182 error = xfs_read_agf(mp, tp, agno,
2178 (flags & XFS_ALLOC_FLAG_TRYLOCK) ? XFS_BUF_TRYLOCK : 0, 2183 (flags & XFS_ALLOC_FLAG_TRYLOCK) ? XBF_TRYLOCK : 0,
2179 bpp); 2184 bpp);
2180 if (error) 2185 if (error)
2181 return error; 2186 return error;
@@ -2184,7 +2189,7 @@ xfs_alloc_read_agf(
2184 ASSERT(!XFS_BUF_GETERROR(*bpp)); 2189 ASSERT(!XFS_BUF_GETERROR(*bpp));
2185 2190
2186 agf = XFS_BUF_TO_AGF(*bpp); 2191 agf = XFS_BUF_TO_AGF(*bpp);
2187 pag = &mp->m_perag[agno]; 2192 pag = xfs_perag_get(mp, agno);
2188 if (!pag->pagf_init) { 2193 if (!pag->pagf_init) {
2189 pag->pagf_freeblks = be32_to_cpu(agf->agf_freeblks); 2194 pag->pagf_freeblks = be32_to_cpu(agf->agf_freeblks);
2190 pag->pagf_btreeblks = be32_to_cpu(agf->agf_btreeblks); 2195 pag->pagf_btreeblks = be32_to_cpu(agf->agf_btreeblks);
@@ -2195,8 +2200,8 @@ xfs_alloc_read_agf(
2195 pag->pagf_levels[XFS_BTNUM_CNTi] = 2200 pag->pagf_levels[XFS_BTNUM_CNTi] =
2196 be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]); 2201 be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]);
2197 spin_lock_init(&pag->pagb_lock); 2202 spin_lock_init(&pag->pagb_lock);
2198 pag->pagb_list = kmem_zalloc(XFS_PAGB_NUM_SLOTS * 2203 pag->pagb_count = 0;
2199 sizeof(xfs_perag_busy_t), KM_SLEEP); 2204 memset(pag->pagb_list, 0, sizeof(pag->pagb_list));
2200 pag->pagf_init = 1; 2205 pag->pagf_init = 1;
2201 } 2206 }
2202#ifdef DEBUG 2207#ifdef DEBUG
@@ -2211,6 +2216,7 @@ xfs_alloc_read_agf(
2211 be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi])); 2216 be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]));
2212 } 2217 }
2213#endif 2218#endif
2219 xfs_perag_put(pag);
2214 return 0; 2220 return 0;
2215} 2221}
2216 2222
@@ -2270,8 +2276,7 @@ xfs_alloc_vextent(
2270 * These three force us into a single a.g. 2276 * These three force us into a single a.g.
2271 */ 2277 */
2272 args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno); 2278 args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno);
2273 down_read(&mp->m_peraglock); 2279 args->pag = xfs_perag_get(mp, args->agno);
2274 args->pag = &mp->m_perag[args->agno];
2275 args->minleft = 0; 2280 args->minleft = 0;
2276 error = xfs_alloc_fix_freelist(args, 0); 2281 error = xfs_alloc_fix_freelist(args, 0);
2277 args->minleft = minleft; 2282 args->minleft = minleft;
@@ -2280,14 +2285,12 @@ xfs_alloc_vextent(
2280 goto error0; 2285 goto error0;
2281 } 2286 }
2282 if (!args->agbp) { 2287 if (!args->agbp) {
2283 up_read(&mp->m_peraglock);
2284 trace_xfs_alloc_vextent_noagbp(args); 2288 trace_xfs_alloc_vextent_noagbp(args);
2285 break; 2289 break;
2286 } 2290 }
2287 args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno); 2291 args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno);
2288 if ((error = xfs_alloc_ag_vextent(args))) 2292 if ((error = xfs_alloc_ag_vextent(args)))
2289 goto error0; 2293 goto error0;
2290 up_read(&mp->m_peraglock);
2291 break; 2294 break;
2292 case XFS_ALLOCTYPE_START_BNO: 2295 case XFS_ALLOCTYPE_START_BNO:
2293 /* 2296 /*
@@ -2339,9 +2342,8 @@ xfs_alloc_vextent(
2339 * Loop over allocation groups twice; first time with 2342 * Loop over allocation groups twice; first time with
2340 * trylock set, second time without. 2343 * trylock set, second time without.
2341 */ 2344 */
2342 down_read(&mp->m_peraglock);
2343 for (;;) { 2345 for (;;) {
2344 args->pag = &mp->m_perag[args->agno]; 2346 args->pag = xfs_perag_get(mp, args->agno);
2345 if (no_min) args->minleft = 0; 2347 if (no_min) args->minleft = 0;
2346 error = xfs_alloc_fix_freelist(args, flags); 2348 error = xfs_alloc_fix_freelist(args, flags);
2347 args->minleft = minleft; 2349 args->minleft = minleft;
@@ -2400,8 +2402,8 @@ xfs_alloc_vextent(
2400 } 2402 }
2401 } 2403 }
2402 } 2404 }
2405 xfs_perag_put(args->pag);
2403 } 2406 }
2404 up_read(&mp->m_peraglock);
2405 if (bump_rotor || (type == XFS_ALLOCTYPE_ANY_AG)) { 2407 if (bump_rotor || (type == XFS_ALLOCTYPE_ANY_AG)) {
2406 if (args->agno == sagno) 2408 if (args->agno == sagno)
2407 mp->m_agfrotor = (mp->m_agfrotor + 1) % 2409 mp->m_agfrotor = (mp->m_agfrotor + 1) %
@@ -2427,9 +2429,10 @@ xfs_alloc_vextent(
2427 args->len); 2429 args->len);
2428#endif 2430#endif
2429 } 2431 }
2432 xfs_perag_put(args->pag);
2430 return 0; 2433 return 0;
2431error0: 2434error0:
2432 up_read(&mp->m_peraglock); 2435 xfs_perag_put(args->pag);
2433 return error; 2436 return error;
2434} 2437}
2435 2438
@@ -2454,8 +2457,7 @@ xfs_free_extent(
2454 args.agno = XFS_FSB_TO_AGNO(args.mp, bno); 2457 args.agno = XFS_FSB_TO_AGNO(args.mp, bno);
2455 ASSERT(args.agno < args.mp->m_sb.sb_agcount); 2458 ASSERT(args.agno < args.mp->m_sb.sb_agcount);
2456 args.agbno = XFS_FSB_TO_AGBNO(args.mp, bno); 2459 args.agbno = XFS_FSB_TO_AGBNO(args.mp, bno);
2457 down_read(&args.mp->m_peraglock); 2460 args.pag = xfs_perag_get(args.mp, args.agno);
2458 args.pag = &args.mp->m_perag[args.agno];
2459 if ((error = xfs_alloc_fix_freelist(&args, XFS_ALLOC_FLAG_FREEING))) 2461 if ((error = xfs_alloc_fix_freelist(&args, XFS_ALLOC_FLAG_FREEING)))
2460 goto error0; 2462 goto error0;
2461#ifdef DEBUG 2463#ifdef DEBUG
@@ -2465,7 +2467,7 @@ xfs_free_extent(
2465#endif 2467#endif
2466 error = xfs_free_ag_extent(tp, args.agbp, args.agno, args.agbno, len, 0); 2468 error = xfs_free_ag_extent(tp, args.agbp, args.agno, args.agbno, len, 0);
2467error0: 2469error0:
2468 up_read(&args.mp->m_peraglock); 2470 xfs_perag_put(args.pag);
2469 return error; 2471 return error;
2470} 2472}
2471 2473
@@ -2486,15 +2488,15 @@ xfs_alloc_mark_busy(xfs_trans_t *tp,
2486 xfs_agblock_t bno, 2488 xfs_agblock_t bno,
2487 xfs_extlen_t len) 2489 xfs_extlen_t len)
2488{ 2490{
2489 xfs_mount_t *mp;
2490 xfs_perag_busy_t *bsy; 2491 xfs_perag_busy_t *bsy;
2492 struct xfs_perag *pag;
2491 int n; 2493 int n;
2492 2494
2493 mp = tp->t_mountp; 2495 pag = xfs_perag_get(tp->t_mountp, agno);
2494 spin_lock(&mp->m_perag[agno].pagb_lock); 2496 spin_lock(&pag->pagb_lock);
2495 2497
2496 /* search pagb_list for an open slot */ 2498 /* search pagb_list for an open slot */
2497 for (bsy = mp->m_perag[agno].pagb_list, n = 0; 2499 for (bsy = pag->pagb_list, n = 0;
2498 n < XFS_PAGB_NUM_SLOTS; 2500 n < XFS_PAGB_NUM_SLOTS;
2499 bsy++, n++) { 2501 bsy++, n++) {
2500 if (bsy->busy_tp == NULL) { 2502 if (bsy->busy_tp == NULL) {
@@ -2502,11 +2504,11 @@ xfs_alloc_mark_busy(xfs_trans_t *tp,
2502 } 2504 }
2503 } 2505 }
2504 2506
2505 trace_xfs_alloc_busy(mp, agno, bno, len, n); 2507 trace_xfs_alloc_busy(tp->t_mountp, agno, bno, len, n);
2506 2508
2507 if (n < XFS_PAGB_NUM_SLOTS) { 2509 if (n < XFS_PAGB_NUM_SLOTS) {
2508 bsy = &mp->m_perag[agno].pagb_list[n]; 2510 bsy = &pag->pagb_list[n];
2509 mp->m_perag[agno].pagb_count++; 2511 pag->pagb_count++;
2510 bsy->busy_start = bno; 2512 bsy->busy_start = bno;
2511 bsy->busy_length = len; 2513 bsy->busy_length = len;
2512 bsy->busy_tp = tp; 2514 bsy->busy_tp = tp;
@@ -2521,7 +2523,8 @@ xfs_alloc_mark_busy(xfs_trans_t *tp,
2521 xfs_trans_set_sync(tp); 2523 xfs_trans_set_sync(tp);
2522 } 2524 }
2523 2525
2524 spin_unlock(&mp->m_perag[agno].pagb_lock); 2526 spin_unlock(&pag->pagb_lock);
2527 xfs_perag_put(pag);
2525} 2528}
2526 2529
2527void 2530void
@@ -2529,24 +2532,23 @@ xfs_alloc_clear_busy(xfs_trans_t *tp,
2529 xfs_agnumber_t agno, 2532 xfs_agnumber_t agno,
2530 int idx) 2533 int idx)
2531{ 2534{
2532 xfs_mount_t *mp; 2535 struct xfs_perag *pag;
2533 xfs_perag_busy_t *list; 2536 xfs_perag_busy_t *list;
2534 2537
2535 mp = tp->t_mountp;
2536
2537 spin_lock(&mp->m_perag[agno].pagb_lock);
2538 list = mp->m_perag[agno].pagb_list;
2539
2540 ASSERT(idx < XFS_PAGB_NUM_SLOTS); 2538 ASSERT(idx < XFS_PAGB_NUM_SLOTS);
2539 pag = xfs_perag_get(tp->t_mountp, agno);
2540 spin_lock(&pag->pagb_lock);
2541 list = pag->pagb_list;
2541 2542
2542 trace_xfs_alloc_unbusy(mp, agno, idx, list[idx].busy_tp == tp); 2543 trace_xfs_alloc_unbusy(tp->t_mountp, agno, idx, list[idx].busy_tp == tp);
2543 2544
2544 if (list[idx].busy_tp == tp) { 2545 if (list[idx].busy_tp == tp) {
2545 list[idx].busy_tp = NULL; 2546 list[idx].busy_tp = NULL;
2546 mp->m_perag[agno].pagb_count--; 2547 pag->pagb_count--;
2547 } 2548 }
2548 2549
2549 spin_unlock(&mp->m_perag[agno].pagb_lock); 2550 spin_unlock(&pag->pagb_lock);
2551 xfs_perag_put(pag);
2550} 2552}
2551 2553
2552 2554
@@ -2560,17 +2562,15 @@ xfs_alloc_search_busy(xfs_trans_t *tp,
2560 xfs_agblock_t bno, 2562 xfs_agblock_t bno,
2561 xfs_extlen_t len) 2563 xfs_extlen_t len)
2562{ 2564{
2563 xfs_mount_t *mp; 2565 struct xfs_perag *pag;
2564 xfs_perag_busy_t *bsy; 2566 xfs_perag_busy_t *bsy;
2565 xfs_agblock_t uend, bend; 2567 xfs_agblock_t uend, bend;
2566 xfs_lsn_t lsn = 0; 2568 xfs_lsn_t lsn = 0;
2567 int cnt; 2569 int cnt;
2568 2570
2569 mp = tp->t_mountp; 2571 pag = xfs_perag_get(tp->t_mountp, agno);
2570 2572 spin_lock(&pag->pagb_lock);
2571 spin_lock(&mp->m_perag[agno].pagb_lock); 2573 cnt = pag->pagb_count;
2572
2573 uend = bno + len - 1;
2574 2574
2575 /* 2575 /*
2576 * search pagb_list for this slot, skipping open slots. We have to 2576 * search pagb_list for this slot, skipping open slots. We have to
@@ -2578,8 +2578,9 @@ xfs_alloc_search_busy(xfs_trans_t *tp,
2578 * we have to get the most recent LSN for the log force to push out 2578 * we have to get the most recent LSN for the log force to push out
2579 * all the transactions that span the range. 2579 * all the transactions that span the range.
2580 */ 2580 */
2581 for (cnt = 0; cnt < mp->m_perag[agno].pagb_count; cnt++) { 2581 uend = bno + len - 1;
2582 bsy = &mp->m_perag[agno].pagb_list[cnt]; 2582 for (cnt = 0; cnt < pag->pagb_count; cnt++) {
2583 bsy = &pag->pagb_list[cnt];
2583 if (!bsy->busy_tp) 2584 if (!bsy->busy_tp)
2584 continue; 2585 continue;
2585 2586
@@ -2591,7 +2592,8 @@ xfs_alloc_search_busy(xfs_trans_t *tp,
2591 if (XFS_LSN_CMP(bsy->busy_tp->t_commit_lsn, lsn) > 0) 2592 if (XFS_LSN_CMP(bsy->busy_tp->t_commit_lsn, lsn) > 0)
2592 lsn = bsy->busy_tp->t_commit_lsn; 2593 lsn = bsy->busy_tp->t_commit_lsn;
2593 } 2594 }
2594 spin_unlock(&mp->m_perag[agno].pagb_lock); 2595 spin_unlock(&pag->pagb_lock);
2596 xfs_perag_put(pag);
2595 trace_xfs_alloc_busysearch(tp->t_mountp, agno, bno, len, lsn); 2597 trace_xfs_alloc_busysearch(tp->t_mountp, agno, bno, len, lsn);
2596 2598
2597 /* 2599 /*
@@ -2599,5 +2601,5 @@ xfs_alloc_search_busy(xfs_trans_t *tp,
2599 * transaction that freed the block 2601 * transaction that freed the block
2600 */ 2602 */
2601 if (lsn) 2603 if (lsn)
2602 xfs_log_force(mp, lsn, XFS_LOG_FORCE|XFS_LOG_SYNC); 2604 xfs_log_force_lsn(tp->t_mountp, lsn, XFS_LOG_SYNC);
2603} 2605}
diff --git a/fs/xfs/xfs_alloc_btree.c b/fs/xfs/xfs_alloc_btree.c
index adbd9141aea1..b726e10d2c1c 100644
--- a/fs/xfs/xfs_alloc_btree.c
+++ b/fs/xfs/xfs_alloc_btree.c
@@ -61,12 +61,14 @@ xfs_allocbt_set_root(
61 struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp); 61 struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp);
62 xfs_agnumber_t seqno = be32_to_cpu(agf->agf_seqno); 62 xfs_agnumber_t seqno = be32_to_cpu(agf->agf_seqno);
63 int btnum = cur->bc_btnum; 63 int btnum = cur->bc_btnum;
64 struct xfs_perag *pag = xfs_perag_get(cur->bc_mp, seqno);
64 65
65 ASSERT(ptr->s != 0); 66 ASSERT(ptr->s != 0);
66 67
67 agf->agf_roots[btnum] = ptr->s; 68 agf->agf_roots[btnum] = ptr->s;
68 be32_add_cpu(&agf->agf_levels[btnum], inc); 69 be32_add_cpu(&agf->agf_levels[btnum], inc);
69 cur->bc_mp->m_perag[seqno].pagf_levels[btnum] += inc; 70 pag->pagf_levels[btnum] += inc;
71 xfs_perag_put(pag);
70 72
71 xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_ROOTS | XFS_AGF_LEVELS); 73 xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_ROOTS | XFS_AGF_LEVELS);
72} 74}
@@ -150,6 +152,7 @@ xfs_allocbt_update_lastrec(
150{ 152{
151 struct xfs_agf *agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp); 153 struct xfs_agf *agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp);
152 xfs_agnumber_t seqno = be32_to_cpu(agf->agf_seqno); 154 xfs_agnumber_t seqno = be32_to_cpu(agf->agf_seqno);
155 struct xfs_perag *pag;
153 __be32 len; 156 __be32 len;
154 int numrecs; 157 int numrecs;
155 158
@@ -193,7 +196,9 @@ xfs_allocbt_update_lastrec(
193 } 196 }
194 197
195 agf->agf_longest = len; 198 agf->agf_longest = len;
196 cur->bc_mp->m_perag[seqno].pagf_longest = be32_to_cpu(len); 199 pag = xfs_perag_get(cur->bc_mp, seqno);
200 pag->pagf_longest = be32_to_cpu(len);
201 xfs_perag_put(pag);
197 xfs_alloc_log_agf(cur->bc_tp, cur->bc_private.a.agbp, XFS_AGF_LONGEST); 202 xfs_alloc_log_agf(cur->bc_tp, cur->bc_private.a.agbp, XFS_AGF_LONGEST);
198} 203}
199 204
diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c
index e953b6cfb2a8..b9c196a53c42 100644
--- a/fs/xfs/xfs_attr.c
+++ b/fs/xfs/xfs_attr.c
@@ -93,12 +93,12 @@ STATIC int xfs_attr_rmtval_remove(xfs_da_args_t *args);
93STATIC int 93STATIC int
94xfs_attr_name_to_xname( 94xfs_attr_name_to_xname(
95 struct xfs_name *xname, 95 struct xfs_name *xname,
96 const char *aname) 96 const unsigned char *aname)
97{ 97{
98 if (!aname) 98 if (!aname)
99 return EINVAL; 99 return EINVAL;
100 xname->name = aname; 100 xname->name = aname;
101 xname->len = strlen(aname); 101 xname->len = strlen((char *)aname);
102 if (xname->len >= MAXNAMELEN) 102 if (xname->len >= MAXNAMELEN)
103 return EFAULT; /* match IRIX behaviour */ 103 return EFAULT; /* match IRIX behaviour */
104 104
@@ -124,7 +124,7 @@ STATIC int
124xfs_attr_get_int( 124xfs_attr_get_int(
125 struct xfs_inode *ip, 125 struct xfs_inode *ip,
126 struct xfs_name *name, 126 struct xfs_name *name,
127 char *value, 127 unsigned char *value,
128 int *valuelenp, 128 int *valuelenp,
129 int flags) 129 int flags)
130{ 130{
@@ -171,8 +171,8 @@ xfs_attr_get_int(
171int 171int
172xfs_attr_get( 172xfs_attr_get(
173 xfs_inode_t *ip, 173 xfs_inode_t *ip,
174 const char *name, 174 const unsigned char *name,
175 char *value, 175 unsigned char *value,
176 int *valuelenp, 176 int *valuelenp,
177 int flags) 177 int flags)
178{ 178{
@@ -197,7 +197,7 @@ xfs_attr_get(
197/* 197/*
198 * Calculate how many blocks we need for the new attribute, 198 * Calculate how many blocks we need for the new attribute,
199 */ 199 */
200int 200STATIC int
201xfs_attr_calc_size( 201xfs_attr_calc_size(
202 struct xfs_inode *ip, 202 struct xfs_inode *ip,
203 int namelen, 203 int namelen,
@@ -235,8 +235,12 @@ xfs_attr_calc_size(
235} 235}
236 236
237STATIC int 237STATIC int
238xfs_attr_set_int(xfs_inode_t *dp, struct xfs_name *name, 238xfs_attr_set_int(
239 char *value, int valuelen, int flags) 239 struct xfs_inode *dp,
240 struct xfs_name *name,
241 unsigned char *value,
242 int valuelen,
243 int flags)
240{ 244{
241 xfs_da_args_t args; 245 xfs_da_args_t args;
242 xfs_fsblock_t firstblock; 246 xfs_fsblock_t firstblock;
@@ -452,8 +456,8 @@ out:
452int 456int
453xfs_attr_set( 457xfs_attr_set(
454 xfs_inode_t *dp, 458 xfs_inode_t *dp,
455 const char *name, 459 const unsigned char *name,
456 char *value, 460 unsigned char *value,
457 int valuelen, 461 int valuelen,
458 int flags) 462 int flags)
459{ 463{
@@ -600,7 +604,7 @@ out:
600int 604int
601xfs_attr_remove( 605xfs_attr_remove(
602 xfs_inode_t *dp, 606 xfs_inode_t *dp,
603 const char *name, 607 const unsigned char *name,
604 int flags) 608 int flags)
605{ 609{
606 int error; 610 int error;
@@ -669,9 +673,13 @@ xfs_attr_list_int(xfs_attr_list_context_t *context)
669 */ 673 */
670/*ARGSUSED*/ 674/*ARGSUSED*/
671STATIC int 675STATIC int
672xfs_attr_put_listent(xfs_attr_list_context_t *context, int flags, 676xfs_attr_put_listent(
673 char *name, int namelen, 677 xfs_attr_list_context_t *context,
674 int valuelen, char *value) 678 int flags,
679 unsigned char *name,
680 int namelen,
681 int valuelen,
682 unsigned char *value)
675{ 683{
676 struct attrlist *alist = (struct attrlist *)context->alist; 684 struct attrlist *alist = (struct attrlist *)context->alist;
677 attrlist_ent_t *aep; 685 attrlist_ent_t *aep;
@@ -1980,7 +1988,7 @@ xfs_attr_rmtval_get(xfs_da_args_t *args)
1980 xfs_bmbt_irec_t map[ATTR_RMTVALUE_MAPSIZE]; 1988 xfs_bmbt_irec_t map[ATTR_RMTVALUE_MAPSIZE];
1981 xfs_mount_t *mp; 1989 xfs_mount_t *mp;
1982 xfs_daddr_t dblkno; 1990 xfs_daddr_t dblkno;
1983 xfs_caddr_t dst; 1991 void *dst;
1984 xfs_buf_t *bp; 1992 xfs_buf_t *bp;
1985 int nmap, error, tmp, valuelen, blkcnt, i; 1993 int nmap, error, tmp, valuelen, blkcnt, i;
1986 xfs_dablk_t lblkno; 1994 xfs_dablk_t lblkno;
@@ -2007,15 +2015,14 @@ xfs_attr_rmtval_get(xfs_da_args_t *args)
2007 dblkno = XFS_FSB_TO_DADDR(mp, map[i].br_startblock); 2015 dblkno = XFS_FSB_TO_DADDR(mp, map[i].br_startblock);
2008 blkcnt = XFS_FSB_TO_BB(mp, map[i].br_blockcount); 2016 blkcnt = XFS_FSB_TO_BB(mp, map[i].br_blockcount);
2009 error = xfs_read_buf(mp, mp->m_ddev_targp, dblkno, 2017 error = xfs_read_buf(mp, mp->m_ddev_targp, dblkno,
2010 blkcnt, 2018 blkcnt, XBF_LOCK | XBF_DONT_BLOCK,
2011 XFS_BUF_LOCK | XBF_DONT_BLOCK,
2012 &bp); 2019 &bp);
2013 if (error) 2020 if (error)
2014 return(error); 2021 return(error);
2015 2022
2016 tmp = (valuelen < XFS_BUF_SIZE(bp)) 2023 tmp = (valuelen < XFS_BUF_SIZE(bp))
2017 ? valuelen : XFS_BUF_SIZE(bp); 2024 ? valuelen : XFS_BUF_SIZE(bp);
2018 xfs_biomove(bp, 0, tmp, dst, XFS_B_READ); 2025 xfs_biomove(bp, 0, tmp, dst, XBF_READ);
2019 xfs_buf_relse(bp); 2026 xfs_buf_relse(bp);
2020 dst += tmp; 2027 dst += tmp;
2021 valuelen -= tmp; 2028 valuelen -= tmp;
@@ -2039,7 +2046,7 @@ xfs_attr_rmtval_set(xfs_da_args_t *args)
2039 xfs_inode_t *dp; 2046 xfs_inode_t *dp;
2040 xfs_bmbt_irec_t map; 2047 xfs_bmbt_irec_t map;
2041 xfs_daddr_t dblkno; 2048 xfs_daddr_t dblkno;
2042 xfs_caddr_t src; 2049 void *src;
2043 xfs_buf_t *bp; 2050 xfs_buf_t *bp;
2044 xfs_dablk_t lblkno; 2051 xfs_dablk_t lblkno;
2045 int blkcnt, valuelen, nmap, error, tmp, committed; 2052 int blkcnt, valuelen, nmap, error, tmp, committed;
@@ -2141,13 +2148,13 @@ xfs_attr_rmtval_set(xfs_da_args_t *args)
2141 blkcnt = XFS_FSB_TO_BB(mp, map.br_blockcount); 2148 blkcnt = XFS_FSB_TO_BB(mp, map.br_blockcount);
2142 2149
2143 bp = xfs_buf_get(mp->m_ddev_targp, dblkno, blkcnt, 2150 bp = xfs_buf_get(mp->m_ddev_targp, dblkno, blkcnt,
2144 XFS_BUF_LOCK | XBF_DONT_BLOCK); 2151 XBF_LOCK | XBF_DONT_BLOCK);
2145 ASSERT(bp); 2152 ASSERT(bp);
2146 ASSERT(!XFS_BUF_GETERROR(bp)); 2153 ASSERT(!XFS_BUF_GETERROR(bp));
2147 2154
2148 tmp = (valuelen < XFS_BUF_SIZE(bp)) ? valuelen : 2155 tmp = (valuelen < XFS_BUF_SIZE(bp)) ? valuelen :
2149 XFS_BUF_SIZE(bp); 2156 XFS_BUF_SIZE(bp);
2150 xfs_biomove(bp, 0, tmp, src, XFS_B_WRITE); 2157 xfs_biomove(bp, 0, tmp, src, XBF_WRITE);
2151 if (tmp < XFS_BUF_SIZE(bp)) 2158 if (tmp < XFS_BUF_SIZE(bp))
2152 xfs_biozero(bp, tmp, XFS_BUF_SIZE(bp) - tmp); 2159 xfs_biozero(bp, tmp, XFS_BUF_SIZE(bp) - tmp);
2153 if ((error = xfs_bwrite(mp, bp))) {/* GROT: NOTE: synchronous write */ 2160 if ((error = xfs_bwrite(mp, bp))) {/* GROT: NOTE: synchronous write */
@@ -2208,8 +2215,7 @@ xfs_attr_rmtval_remove(xfs_da_args_t *args)
2208 /* 2215 /*
2209 * If the "remote" value is in the cache, remove it. 2216 * If the "remote" value is in the cache, remove it.
2210 */ 2217 */
2211 bp = xfs_incore(mp->m_ddev_targp, dblkno, blkcnt, 2218 bp = xfs_incore(mp->m_ddev_targp, dblkno, blkcnt, XBF_TRYLOCK);
2212 XFS_INCORE_TRYLOCK);
2213 if (bp) { 2219 if (bp) {
2214 XFS_BUF_STALE(bp); 2220 XFS_BUF_STALE(bp);
2215 XFS_BUF_UNDELAYWRITE(bp); 2221 XFS_BUF_UNDELAYWRITE(bp);
diff --git a/fs/xfs/xfs_attr.h b/fs/xfs/xfs_attr.h
index 59b410ce69a1..e920d68ef509 100644
--- a/fs/xfs/xfs_attr.h
+++ b/fs/xfs/xfs_attr.h
@@ -113,7 +113,7 @@ typedef struct attrlist_cursor_kern {
113 113
114 114
115typedef int (*put_listent_func_t)(struct xfs_attr_list_context *, int, 115typedef int (*put_listent_func_t)(struct xfs_attr_list_context *, int,
116 char *, int, int, char *); 116 unsigned char *, int, int, unsigned char *);
117 117
118typedef struct xfs_attr_list_context { 118typedef struct xfs_attr_list_context {
119 struct xfs_inode *dp; /* inode */ 119 struct xfs_inode *dp; /* inode */
@@ -139,7 +139,6 @@ typedef struct xfs_attr_list_context {
139/* 139/*
140 * Overall external interface routines. 140 * Overall external interface routines.
141 */ 141 */
142int xfs_attr_calc_size(struct xfs_inode *, int, int, int *);
143int xfs_attr_inactive(struct xfs_inode *dp); 142int xfs_attr_inactive(struct xfs_inode *dp);
144int xfs_attr_rmtval_get(struct xfs_da_args *args); 143int xfs_attr_rmtval_get(struct xfs_da_args *args);
145int xfs_attr_list_int(struct xfs_attr_list_context *); 144int xfs_attr_list_int(struct xfs_attr_list_context *);
diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c
index baf41b5af756..a90ce74fc256 100644
--- a/fs/xfs/xfs_attr_leaf.c
+++ b/fs/xfs/xfs_attr_leaf.c
@@ -521,11 +521,11 @@ xfs_attr_shortform_to_leaf(xfs_da_args_t *args)
521 521
522 sfe = &sf->list[0]; 522 sfe = &sf->list[0];
523 for (i = 0; i < sf->hdr.count; i++) { 523 for (i = 0; i < sf->hdr.count; i++) {
524 nargs.name = (char *)sfe->nameval; 524 nargs.name = sfe->nameval;
525 nargs.namelen = sfe->namelen; 525 nargs.namelen = sfe->namelen;
526 nargs.value = (char *)&sfe->nameval[nargs.namelen]; 526 nargs.value = &sfe->nameval[nargs.namelen];
527 nargs.valuelen = sfe->valuelen; 527 nargs.valuelen = sfe->valuelen;
528 nargs.hashval = xfs_da_hashname((char *)sfe->nameval, 528 nargs.hashval = xfs_da_hashname(sfe->nameval,
529 sfe->namelen); 529 sfe->namelen);
530 nargs.flags = XFS_ATTR_NSP_ONDISK_TO_ARGS(sfe->flags); 530 nargs.flags = XFS_ATTR_NSP_ONDISK_TO_ARGS(sfe->flags);
531 error = xfs_attr_leaf_lookup_int(bp, &nargs); /* set a->index */ 531 error = xfs_attr_leaf_lookup_int(bp, &nargs); /* set a->index */
@@ -612,10 +612,10 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context)
612 for (i = 0, sfe = &sf->list[0]; i < sf->hdr.count; i++) { 612 for (i = 0, sfe = &sf->list[0]; i < sf->hdr.count; i++) {
613 error = context->put_listent(context, 613 error = context->put_listent(context,
614 sfe->flags, 614 sfe->flags,
615 (char *)sfe->nameval, 615 sfe->nameval,
616 (int)sfe->namelen, 616 (int)sfe->namelen,
617 (int)sfe->valuelen, 617 (int)sfe->valuelen,
618 (char*)&sfe->nameval[sfe->namelen]); 618 &sfe->nameval[sfe->namelen]);
619 619
620 /* 620 /*
621 * Either search callback finished early or 621 * Either search callback finished early or
@@ -659,8 +659,8 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context)
659 } 659 }
660 660
661 sbp->entno = i; 661 sbp->entno = i;
662 sbp->hash = xfs_da_hashname((char *)sfe->nameval, sfe->namelen); 662 sbp->hash = xfs_da_hashname(sfe->nameval, sfe->namelen);
663 sbp->name = (char *)sfe->nameval; 663 sbp->name = sfe->nameval;
664 sbp->namelen = sfe->namelen; 664 sbp->namelen = sfe->namelen;
665 /* These are bytes, and both on-disk, don't endian-flip */ 665 /* These are bytes, and both on-disk, don't endian-flip */
666 sbp->valuelen = sfe->valuelen; 666 sbp->valuelen = sfe->valuelen;
@@ -818,9 +818,9 @@ xfs_attr_leaf_to_shortform(xfs_dabuf_t *bp, xfs_da_args_t *args, int forkoff)
818 continue; 818 continue;
819 ASSERT(entry->flags & XFS_ATTR_LOCAL); 819 ASSERT(entry->flags & XFS_ATTR_LOCAL);
820 name_loc = xfs_attr_leaf_name_local(leaf, i); 820 name_loc = xfs_attr_leaf_name_local(leaf, i);
821 nargs.name = (char *)name_loc->nameval; 821 nargs.name = name_loc->nameval;
822 nargs.namelen = name_loc->namelen; 822 nargs.namelen = name_loc->namelen;
823 nargs.value = (char *)&name_loc->nameval[nargs.namelen]; 823 nargs.value = &name_loc->nameval[nargs.namelen];
824 nargs.valuelen = be16_to_cpu(name_loc->valuelen); 824 nargs.valuelen = be16_to_cpu(name_loc->valuelen);
825 nargs.hashval = be32_to_cpu(entry->hashval); 825 nargs.hashval = be32_to_cpu(entry->hashval);
826 nargs.flags = XFS_ATTR_NSP_ONDISK_TO_ARGS(entry->flags); 826 nargs.flags = XFS_ATTR_NSP_ONDISK_TO_ARGS(entry->flags);
@@ -2370,10 +2370,10 @@ xfs_attr_leaf_list_int(xfs_dabuf_t *bp, xfs_attr_list_context_t *context)
2370 2370
2371 retval = context->put_listent(context, 2371 retval = context->put_listent(context,
2372 entry->flags, 2372 entry->flags,
2373 (char *)name_loc->nameval, 2373 name_loc->nameval,
2374 (int)name_loc->namelen, 2374 (int)name_loc->namelen,
2375 be16_to_cpu(name_loc->valuelen), 2375 be16_to_cpu(name_loc->valuelen),
2376 (char *)&name_loc->nameval[name_loc->namelen]); 2376 &name_loc->nameval[name_loc->namelen]);
2377 if (retval) 2377 if (retval)
2378 return retval; 2378 return retval;
2379 } else { 2379 } else {
@@ -2397,15 +2397,15 @@ xfs_attr_leaf_list_int(xfs_dabuf_t *bp, xfs_attr_list_context_t *context)
2397 return retval; 2397 return retval;
2398 retval = context->put_listent(context, 2398 retval = context->put_listent(context,
2399 entry->flags, 2399 entry->flags,
2400 (char *)name_rmt->name, 2400 name_rmt->name,
2401 (int)name_rmt->namelen, 2401 (int)name_rmt->namelen,
2402 valuelen, 2402 valuelen,
2403 (char*)args.value); 2403 args.value);
2404 kmem_free(args.value); 2404 kmem_free(args.value);
2405 } else { 2405 } else {
2406 retval = context->put_listent(context, 2406 retval = context->put_listent(context,
2407 entry->flags, 2407 entry->flags,
2408 (char *)name_rmt->name, 2408 name_rmt->name,
2409 (int)name_rmt->namelen, 2409 (int)name_rmt->namelen,
2410 valuelen, 2410 valuelen,
2411 NULL); 2411 NULL);
@@ -2950,7 +2950,7 @@ xfs_attr_leaf_freextent(xfs_trans_t **trans, xfs_inode_t *dp,
2950 map.br_blockcount); 2950 map.br_blockcount);
2951 bp = xfs_trans_get_buf(*trans, 2951 bp = xfs_trans_get_buf(*trans,
2952 dp->i_mount->m_ddev_targp, 2952 dp->i_mount->m_ddev_targp,
2953 dblkno, dblkcnt, XFS_BUF_LOCK); 2953 dblkno, dblkcnt, XBF_LOCK);
2954 xfs_trans_binval(*trans, bp); 2954 xfs_trans_binval(*trans, bp);
2955 /* 2955 /*
2956 * Roll to next transaction. 2956 * Roll to next transaction.
diff --git a/fs/xfs/xfs_attr_sf.h b/fs/xfs/xfs_attr_sf.h
index 76ab7b0cbb3a..919756e3ba53 100644
--- a/fs/xfs/xfs_attr_sf.h
+++ b/fs/xfs/xfs_attr_sf.h
@@ -52,7 +52,7 @@ typedef struct xfs_attr_sf_sort {
52 __uint8_t valuelen; /* length of value */ 52 __uint8_t valuelen; /* length of value */
53 __uint8_t flags; /* flags bits (see xfs_attr_leaf.h) */ 53 __uint8_t flags; /* flags bits (see xfs_attr_leaf.h) */
54 xfs_dahash_t hash; /* this entry's hash value */ 54 xfs_dahash_t hash; /* this entry's hash value */
55 char *name; /* name value, pointer into buffer */ 55 unsigned char *name; /* name value, pointer into buffer */
56} xfs_attr_sf_sort_t; 56} xfs_attr_sf_sort_t;
57 57
58#define XFS_ATTR_SF_ENTSIZE_BYNAME(nlen,vlen) /* space name/value uses */ \ 58#define XFS_ATTR_SF_ENTSIZE_BYNAME(nlen,vlen) /* space name/value uses */ \
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index 98251cdc52aa..1869fb973819 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -2629,13 +2629,12 @@ xfs_bmap_btalloc(
2629 if (startag == NULLAGNUMBER) 2629 if (startag == NULLAGNUMBER)
2630 startag = ag = 0; 2630 startag = ag = 0;
2631 notinit = 0; 2631 notinit = 0;
2632 down_read(&mp->m_peraglock); 2632 pag = xfs_perag_get(mp, ag);
2633 while (blen < ap->alen) { 2633 while (blen < ap->alen) {
2634 pag = &mp->m_perag[ag];
2635 if (!pag->pagf_init && 2634 if (!pag->pagf_init &&
2636 (error = xfs_alloc_pagf_init(mp, args.tp, 2635 (error = xfs_alloc_pagf_init(mp, args.tp,
2637 ag, XFS_ALLOC_FLAG_TRYLOCK))) { 2636 ag, XFS_ALLOC_FLAG_TRYLOCK))) {
2638 up_read(&mp->m_peraglock); 2637 xfs_perag_put(pag);
2639 return error; 2638 return error;
2640 } 2639 }
2641 /* 2640 /*
@@ -2667,13 +2666,13 @@ xfs_bmap_btalloc(
2667 break; 2666 break;
2668 2667
2669 error = xfs_filestream_new_ag(ap, &ag); 2668 error = xfs_filestream_new_ag(ap, &ag);
2670 if (error) { 2669 xfs_perag_put(pag);
2671 up_read(&mp->m_peraglock); 2670 if (error)
2672 return error; 2671 return error;
2673 }
2674 2672
2675 /* loop again to set 'blen'*/ 2673 /* loop again to set 'blen'*/
2676 startag = NULLAGNUMBER; 2674 startag = NULLAGNUMBER;
2675 pag = xfs_perag_get(mp, ag);
2677 continue; 2676 continue;
2678 } 2677 }
2679 } 2678 }
@@ -2681,8 +2680,10 @@ xfs_bmap_btalloc(
2681 ag = 0; 2680 ag = 0;
2682 if (ag == startag) 2681 if (ag == startag)
2683 break; 2682 break;
2683 xfs_perag_put(pag);
2684 pag = xfs_perag_get(mp, ag);
2684 } 2685 }
2685 up_read(&mp->m_peraglock); 2686 xfs_perag_put(pag);
2686 /* 2687 /*
2687 * Since the above loop did a BUF_TRYLOCK, it is 2688 * Since the above loop did a BUF_TRYLOCK, it is
2688 * possible that there is space for this request. 2689 * possible that there is space for this request.
@@ -4470,7 +4471,7 @@ xfs_bmapi(
4470 xfs_fsblock_t abno; /* allocated block number */ 4471 xfs_fsblock_t abno; /* allocated block number */
4471 xfs_extlen_t alen; /* allocated extent length */ 4472 xfs_extlen_t alen; /* allocated extent length */
4472 xfs_fileoff_t aoff; /* allocated file offset */ 4473 xfs_fileoff_t aoff; /* allocated file offset */
4473 xfs_bmalloca_t bma; /* args for xfs_bmap_alloc */ 4474 xfs_bmalloca_t bma = { 0 }; /* args for xfs_bmap_alloc */
4474 xfs_btree_cur_t *cur; /* bmap btree cursor */ 4475 xfs_btree_cur_t *cur; /* bmap btree cursor */
4475 xfs_fileoff_t end; /* end of mapped file region */ 4476 xfs_fileoff_t end; /* end of mapped file region */
4476 int eof; /* we've hit the end of extents */ 4477 int eof; /* we've hit the end of extents */
diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c
index 38751d5fac6f..416e47e54b83 100644
--- a/fs/xfs/xfs_bmap_btree.c
+++ b/fs/xfs/xfs_bmap_btree.c
@@ -334,7 +334,7 @@ xfs_bmbt_disk_set_allf(
334/* 334/*
335 * Set all the fields in a bmap extent record from the uncompressed form. 335 * Set all the fields in a bmap extent record from the uncompressed form.
336 */ 336 */
337void 337STATIC void
338xfs_bmbt_disk_set_all( 338xfs_bmbt_disk_set_all(
339 xfs_bmbt_rec_t *r, 339 xfs_bmbt_rec_t *r,
340 xfs_bmbt_irec_t *s) 340 xfs_bmbt_irec_t *s)
diff --git a/fs/xfs/xfs_bmap_btree.h b/fs/xfs/xfs_bmap_btree.h
index cf07ca7c22e7..0e66c4ea0f85 100644
--- a/fs/xfs/xfs_bmap_btree.h
+++ b/fs/xfs/xfs_bmap_btree.h
@@ -223,7 +223,6 @@ extern void xfs_bmbt_set_startblock(xfs_bmbt_rec_host_t *r, xfs_fsblock_t v);
223extern void xfs_bmbt_set_startoff(xfs_bmbt_rec_host_t *r, xfs_fileoff_t v); 223extern void xfs_bmbt_set_startoff(xfs_bmbt_rec_host_t *r, xfs_fileoff_t v);
224extern void xfs_bmbt_set_state(xfs_bmbt_rec_host_t *r, xfs_exntst_t v); 224extern void xfs_bmbt_set_state(xfs_bmbt_rec_host_t *r, xfs_exntst_t v);
225 225
226extern void xfs_bmbt_disk_set_all(xfs_bmbt_rec_t *r, xfs_bmbt_irec_t *s);
227extern void xfs_bmbt_disk_set_allf(xfs_bmbt_rec_t *r, xfs_fileoff_t o, 226extern void xfs_bmbt_disk_set_allf(xfs_bmbt_rec_t *r, xfs_fileoff_t o,
228 xfs_fsblock_t b, xfs_filblks_t c, xfs_exntst_t v); 227 xfs_fsblock_t b, xfs_filblks_t c, xfs_exntst_t v);
229 228
diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c
index 36a0992dd669..96be4b0f2496 100644
--- a/fs/xfs/xfs_btree.c
+++ b/fs/xfs/xfs_btree.c
@@ -977,7 +977,7 @@ xfs_btree_get_buf_block(
977 xfs_daddr_t d; 977 xfs_daddr_t d;
978 978
979 /* need to sort out how callers deal with failures first */ 979 /* need to sort out how callers deal with failures first */
980 ASSERT(!(flags & XFS_BUF_TRYLOCK)); 980 ASSERT(!(flags & XBF_TRYLOCK));
981 981
982 d = xfs_btree_ptr_to_daddr(cur, ptr); 982 d = xfs_btree_ptr_to_daddr(cur, ptr);
983 *bpp = xfs_trans_get_buf(cur->bc_tp, mp->m_ddev_targp, d, 983 *bpp = xfs_trans_get_buf(cur->bc_tp, mp->m_ddev_targp, d,
@@ -1008,7 +1008,7 @@ xfs_btree_read_buf_block(
1008 int error; 1008 int error;
1009 1009
1010 /* need to sort out how callers deal with failures first */ 1010 /* need to sort out how callers deal with failures first */
1011 ASSERT(!(flags & XFS_BUF_TRYLOCK)); 1011 ASSERT(!(flags & XBF_TRYLOCK));
1012 1012
1013 d = xfs_btree_ptr_to_daddr(cur, ptr); 1013 d = xfs_btree_ptr_to_daddr(cur, ptr);
1014 error = xfs_trans_read_buf(mp, cur->bc_tp, mp->m_ddev_targp, d, 1014 error = xfs_trans_read_buf(mp, cur->bc_tp, mp->m_ddev_targp, d,
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index a30f7e9eb2b9..f3c49e69eab9 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -250,7 +250,7 @@ xfs_buf_item_format(
250 ((bip->bli_format.blf_map_size - 1) * sizeof(uint))); 250 ((bip->bli_format.blf_map_size - 1) * sizeof(uint)));
251 vecp->i_addr = (xfs_caddr_t)&bip->bli_format; 251 vecp->i_addr = (xfs_caddr_t)&bip->bli_format;
252 vecp->i_len = base_size; 252 vecp->i_len = base_size;
253 XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_BFORMAT); 253 vecp->i_type = XLOG_REG_TYPE_BFORMAT;
254 vecp++; 254 vecp++;
255 nvecs = 1; 255 nvecs = 1;
256 256
@@ -297,14 +297,14 @@ xfs_buf_item_format(
297 buffer_offset = first_bit * XFS_BLI_CHUNK; 297 buffer_offset = first_bit * XFS_BLI_CHUNK;
298 vecp->i_addr = xfs_buf_offset(bp, buffer_offset); 298 vecp->i_addr = xfs_buf_offset(bp, buffer_offset);
299 vecp->i_len = nbits * XFS_BLI_CHUNK; 299 vecp->i_len = nbits * XFS_BLI_CHUNK;
300 XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_BCHUNK); 300 vecp->i_type = XLOG_REG_TYPE_BCHUNK;
301 nvecs++; 301 nvecs++;
302 break; 302 break;
303 } else if (next_bit != last_bit + 1) { 303 } else if (next_bit != last_bit + 1) {
304 buffer_offset = first_bit * XFS_BLI_CHUNK; 304 buffer_offset = first_bit * XFS_BLI_CHUNK;
305 vecp->i_addr = xfs_buf_offset(bp, buffer_offset); 305 vecp->i_addr = xfs_buf_offset(bp, buffer_offset);
306 vecp->i_len = nbits * XFS_BLI_CHUNK; 306 vecp->i_len = nbits * XFS_BLI_CHUNK;
307 XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_BCHUNK); 307 vecp->i_type = XLOG_REG_TYPE_BCHUNK;
308 nvecs++; 308 nvecs++;
309 vecp++; 309 vecp++;
310 first_bit = next_bit; 310 first_bit = next_bit;
@@ -316,7 +316,7 @@ xfs_buf_item_format(
316 buffer_offset = first_bit * XFS_BLI_CHUNK; 316 buffer_offset = first_bit * XFS_BLI_CHUNK;
317 vecp->i_addr = xfs_buf_offset(bp, buffer_offset); 317 vecp->i_addr = xfs_buf_offset(bp, buffer_offset);
318 vecp->i_len = nbits * XFS_BLI_CHUNK; 318 vecp->i_len = nbits * XFS_BLI_CHUNK;
319 XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_BCHUNK); 319 vecp->i_type = XLOG_REG_TYPE_BCHUNK;
320/* You would think we need to bump the nvecs here too, but we do not 320/* You would think we need to bump the nvecs here too, but we do not
321 * this number is used by recovery, and it gets confused by the boundary 321 * this number is used by recovery, and it gets confused by the boundary
322 * split here 322 * split here
@@ -467,8 +467,10 @@ xfs_buf_item_unpin_remove(
467/* 467/*
468 * This is called to attempt to lock the buffer associated with this 468 * This is called to attempt to lock the buffer associated with this
469 * buf log item. Don't sleep on the buffer lock. If we can't get 469 * buf log item. Don't sleep on the buffer lock. If we can't get
470 * the lock right away, return 0. If we can get the lock, pull the 470 * the lock right away, return 0. If we can get the lock, take a
471 * buffer from the free list, mark it busy, and return 1. 471 * reference to the buffer. If this is a delayed write buffer that
472 * needs AIL help to be written back, invoke the pushbuf routine
473 * rather than the normal success path.
472 */ 474 */
473STATIC uint 475STATIC uint
474xfs_buf_item_trylock( 476xfs_buf_item_trylock(
@@ -477,24 +479,18 @@ xfs_buf_item_trylock(
477 xfs_buf_t *bp; 479 xfs_buf_t *bp;
478 480
479 bp = bip->bli_buf; 481 bp = bip->bli_buf;
480 482 if (XFS_BUF_ISPINNED(bp))
481 if (XFS_BUF_ISPINNED(bp)) {
482 return XFS_ITEM_PINNED; 483 return XFS_ITEM_PINNED;
483 } 484 if (!XFS_BUF_CPSEMA(bp))
484
485 if (!XFS_BUF_CPSEMA(bp)) {
486 return XFS_ITEM_LOCKED; 485 return XFS_ITEM_LOCKED;
487 }
488 486
489 /* 487 /* take a reference to the buffer. */
490 * Remove the buffer from the free list. Only do this
491 * if it's on the free list. Private buffers like the
492 * superblock buffer are not.
493 */
494 XFS_BUF_HOLD(bp); 488 XFS_BUF_HOLD(bp);
495 489
496 ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); 490 ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
497 trace_xfs_buf_item_trylock(bip); 491 trace_xfs_buf_item_trylock(bip);
492 if (XFS_BUF_ISDELAYWRITE(bp))
493 return XFS_ITEM_PUSHBUF;
498 return XFS_ITEM_SUCCESS; 494 return XFS_ITEM_SUCCESS;
499} 495}
500 496
@@ -626,11 +622,9 @@ xfs_buf_item_committed(
626} 622}
627 623
628/* 624/*
629 * This is called to asynchronously write the buffer associated with this 625 * The buffer is locked, but is not a delayed write buffer. This happens
630 * buf log item out to disk. The buffer will already have been locked by 626 * if we race with IO completion and hence we don't want to try to write it
631 * a successful call to xfs_buf_item_trylock(). If the buffer still has 627 * again. Just release the buffer.
632 * B_DELWRI set, then get it going out to disk with a call to bawrite().
633 * If not, then just release the buffer.
634 */ 628 */
635STATIC void 629STATIC void
636xfs_buf_item_push( 630xfs_buf_item_push(
@@ -642,17 +636,29 @@ xfs_buf_item_push(
642 trace_xfs_buf_item_push(bip); 636 trace_xfs_buf_item_push(bip);
643 637
644 bp = bip->bli_buf; 638 bp = bip->bli_buf;
639 ASSERT(!XFS_BUF_ISDELAYWRITE(bp));
640 xfs_buf_relse(bp);
641}
645 642
646 if (XFS_BUF_ISDELAYWRITE(bp)) { 643/*
647 int error; 644 * The buffer is locked and is a delayed write buffer. Promote the buffer
648 error = xfs_bawrite(bip->bli_item.li_mountp, bp); 645 * in the delayed write queue as the caller knows that they must invoke
649 if (error) 646 * the xfsbufd to get this buffer written. We have to unlock the buffer
650 xfs_fs_cmn_err(CE_WARN, bip->bli_item.li_mountp, 647 * to allow the xfsbufd to write it, too.
651 "xfs_buf_item_push: pushbuf error %d on bip %p, bp %p", 648 */
652 error, bip, bp); 649STATIC void
653 } else { 650xfs_buf_item_pushbuf(
654 xfs_buf_relse(bp); 651 xfs_buf_log_item_t *bip)
655 } 652{
653 xfs_buf_t *bp;
654
655 ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
656 trace_xfs_buf_item_pushbuf(bip);
657
658 bp = bip->bli_buf;
659 ASSERT(XFS_BUF_ISDELAYWRITE(bp));
660 xfs_buf_delwri_promote(bp);
661 xfs_buf_relse(bp);
656} 662}
657 663
658/* ARGSUSED */ 664/* ARGSUSED */
@@ -677,7 +683,7 @@ static struct xfs_item_ops xfs_buf_item_ops = {
677 .iop_committed = (xfs_lsn_t(*)(xfs_log_item_t*, xfs_lsn_t)) 683 .iop_committed = (xfs_lsn_t(*)(xfs_log_item_t*, xfs_lsn_t))
678 xfs_buf_item_committed, 684 xfs_buf_item_committed,
679 .iop_push = (void(*)(xfs_log_item_t*))xfs_buf_item_push, 685 .iop_push = (void(*)(xfs_log_item_t*))xfs_buf_item_push,
680 .iop_pushbuf = NULL, 686 .iop_pushbuf = (void(*)(xfs_log_item_t*))xfs_buf_item_pushbuf,
681 .iop_committing = (void(*)(xfs_log_item_t*, xfs_lsn_t)) 687 .iop_committing = (void(*)(xfs_log_item_t*, xfs_lsn_t))
682 xfs_buf_item_committing 688 xfs_buf_item_committing
683}; 689};
diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/xfs_da_btree.c
index c0c8869115b1..0ca556b4bf31 100644
--- a/fs/xfs/xfs_da_btree.c
+++ b/fs/xfs/xfs_da_btree.c
@@ -1534,8 +1534,8 @@ xfs_da_hashname(const __uint8_t *name, int namelen)
1534enum xfs_dacmp 1534enum xfs_dacmp
1535xfs_da_compname( 1535xfs_da_compname(
1536 struct xfs_da_args *args, 1536 struct xfs_da_args *args,
1537 const char *name, 1537 const unsigned char *name,
1538 int len) 1538 int len)
1539{ 1539{
1540 return (args->namelen == len && memcmp(args->name, name, len) == 0) ? 1540 return (args->namelen == len && memcmp(args->name, name, len) == 0) ?
1541 XFS_CMP_EXACT : XFS_CMP_DIFFERENT; 1541 XFS_CMP_EXACT : XFS_CMP_DIFFERENT;
diff --git a/fs/xfs/xfs_da_btree.h b/fs/xfs/xfs_da_btree.h
index 30cd08f56a3a..fe9f5a8c1d2a 100644
--- a/fs/xfs/xfs_da_btree.h
+++ b/fs/xfs/xfs_da_btree.h
@@ -209,7 +209,8 @@ typedef struct xfs_da_state {
209 */ 209 */
210struct xfs_nameops { 210struct xfs_nameops {
211 xfs_dahash_t (*hashname)(struct xfs_name *); 211 xfs_dahash_t (*hashname)(struct xfs_name *);
212 enum xfs_dacmp (*compname)(struct xfs_da_args *, const char *, int); 212 enum xfs_dacmp (*compname)(struct xfs_da_args *,
213 const unsigned char *, int);
213}; 214};
214 215
215 216
@@ -260,7 +261,7 @@ int xfs_da_shrink_inode(xfs_da_args_t *args, xfs_dablk_t dead_blkno,
260 261
261uint xfs_da_hashname(const __uint8_t *name_string, int name_length); 262uint xfs_da_hashname(const __uint8_t *name_string, int name_length);
262enum xfs_dacmp xfs_da_compname(struct xfs_da_args *args, 263enum xfs_dacmp xfs_da_compname(struct xfs_da_args *args,
263 const char *name, int len); 264 const unsigned char *name, int len);
264 265
265 266
266xfs_da_state_t *xfs_da_state_alloc(void); 267xfs_da_state_t *xfs_da_state_alloc(void);
diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c
index 84ca1cf16a1e..cd27c9d6c71f 100644
--- a/fs/xfs/xfs_dfrag.c
+++ b/fs/xfs/xfs_dfrag.c
@@ -45,15 +45,21 @@
45#include "xfs_vnodeops.h" 45#include "xfs_vnodeops.h"
46#include "xfs_trace.h" 46#include "xfs_trace.h"
47 47
48
49static int xfs_swap_extents(
50 xfs_inode_t *ip, /* target inode */
51 xfs_inode_t *tip, /* tmp inode */
52 xfs_swapext_t *sxp);
53
48/* 54/*
49 * Syssgi interface for swapext 55 * ioctl interface for swapext
50 */ 56 */
51int 57int
52xfs_swapext( 58xfs_swapext(
53 xfs_swapext_t *sxp) 59 xfs_swapext_t *sxp)
54{ 60{
55 xfs_inode_t *ip, *tip; 61 xfs_inode_t *ip, *tip;
56 struct file *file, *target_file; 62 struct file *file, *tmp_file;
57 int error = 0; 63 int error = 0;
58 64
59 /* Pull information for the target fd */ 65 /* Pull information for the target fd */
@@ -68,46 +74,46 @@ xfs_swapext(
68 goto out_put_file; 74 goto out_put_file;
69 } 75 }
70 76
71 target_file = fget((int)sxp->sx_fdtmp); 77 tmp_file = fget((int)sxp->sx_fdtmp);
72 if (!target_file) { 78 if (!tmp_file) {
73 error = XFS_ERROR(EINVAL); 79 error = XFS_ERROR(EINVAL);
74 goto out_put_file; 80 goto out_put_file;
75 } 81 }
76 82
77 if (!(target_file->f_mode & FMODE_WRITE) || 83 if (!(tmp_file->f_mode & FMODE_WRITE) ||
78 (target_file->f_flags & O_APPEND)) { 84 (tmp_file->f_flags & O_APPEND)) {
79 error = XFS_ERROR(EBADF); 85 error = XFS_ERROR(EBADF);
80 goto out_put_target_file; 86 goto out_put_tmp_file;
81 } 87 }
82 88
83 if (IS_SWAPFILE(file->f_path.dentry->d_inode) || 89 if (IS_SWAPFILE(file->f_path.dentry->d_inode) ||
84 IS_SWAPFILE(target_file->f_path.dentry->d_inode)) { 90 IS_SWAPFILE(tmp_file->f_path.dentry->d_inode)) {
85 error = XFS_ERROR(EINVAL); 91 error = XFS_ERROR(EINVAL);
86 goto out_put_target_file; 92 goto out_put_tmp_file;
87 } 93 }
88 94
89 ip = XFS_I(file->f_path.dentry->d_inode); 95 ip = XFS_I(file->f_path.dentry->d_inode);
90 tip = XFS_I(target_file->f_path.dentry->d_inode); 96 tip = XFS_I(tmp_file->f_path.dentry->d_inode);
91 97
92 if (ip->i_mount != tip->i_mount) { 98 if (ip->i_mount != tip->i_mount) {
93 error = XFS_ERROR(EINVAL); 99 error = XFS_ERROR(EINVAL);
94 goto out_put_target_file; 100 goto out_put_tmp_file;
95 } 101 }
96 102
97 if (ip->i_ino == tip->i_ino) { 103 if (ip->i_ino == tip->i_ino) {
98 error = XFS_ERROR(EINVAL); 104 error = XFS_ERROR(EINVAL);
99 goto out_put_target_file; 105 goto out_put_tmp_file;
100 } 106 }
101 107
102 if (XFS_FORCED_SHUTDOWN(ip->i_mount)) { 108 if (XFS_FORCED_SHUTDOWN(ip->i_mount)) {
103 error = XFS_ERROR(EIO); 109 error = XFS_ERROR(EIO);
104 goto out_put_target_file; 110 goto out_put_tmp_file;
105 } 111 }
106 112
107 error = xfs_swap_extents(ip, tip, sxp); 113 error = xfs_swap_extents(ip, tip, sxp);
108 114
109 out_put_target_file: 115 out_put_tmp_file:
110 fput(target_file); 116 fput(tmp_file);
111 out_put_file: 117 out_put_file:
112 fput(file); 118 fput(file);
113 out: 119 out:
@@ -186,7 +192,7 @@ xfs_swap_extents_check_format(
186 return 0; 192 return 0;
187} 193}
188 194
189int 195static int
190xfs_swap_extents( 196xfs_swap_extents(
191 xfs_inode_t *ip, /* target inode */ 197 xfs_inode_t *ip, /* target inode */
192 xfs_inode_t *tip, /* tmp inode */ 198 xfs_inode_t *tip, /* tmp inode */
@@ -254,6 +260,9 @@ xfs_swap_extents(
254 goto out_unlock; 260 goto out_unlock;
255 } 261 }
256 262
263 trace_xfs_swap_extent_before(ip, 0);
264 trace_xfs_swap_extent_before(tip, 1);
265
257 /* check inode formats now that data is flushed */ 266 /* check inode formats now that data is flushed */
258 error = xfs_swap_extents_check_format(ip, tip); 267 error = xfs_swap_extents_check_format(ip, tip);
259 if (error) { 268 if (error) {
@@ -421,6 +430,8 @@ xfs_swap_extents(
421 430
422 error = xfs_trans_commit(tp, XFS_TRANS_SWAPEXT); 431 error = xfs_trans_commit(tp, XFS_TRANS_SWAPEXT);
423 432
433 trace_xfs_swap_extent_after(ip, 0);
434 trace_xfs_swap_extent_after(tip, 1);
424out: 435out:
425 kmem_free(tempifp); 436 kmem_free(tempifp);
426 return error; 437 return error;
diff --git a/fs/xfs/xfs_dfrag.h b/fs/xfs/xfs_dfrag.h
index 4f55a6306558..20bdd935c121 100644
--- a/fs/xfs/xfs_dfrag.h
+++ b/fs/xfs/xfs_dfrag.h
@@ -48,9 +48,6 @@ typedef struct xfs_swapext
48 */ 48 */
49int xfs_swapext(struct xfs_swapext *sx); 49int xfs_swapext(struct xfs_swapext *sx);
50 50
51int xfs_swap_extents(struct xfs_inode *ip, struct xfs_inode *tip,
52 struct xfs_swapext *sxp);
53
54#endif /* __KERNEL__ */ 51#endif /* __KERNEL__ */
55 52
56#endif /* __XFS_DFRAG_H__ */ 53#endif /* __XFS_DFRAG_H__ */
diff --git a/fs/xfs/xfs_dir2.c b/fs/xfs/xfs_dir2.c
index 93634a7e90e9..42520f041265 100644
--- a/fs/xfs/xfs_dir2.c
+++ b/fs/xfs/xfs_dir2.c
@@ -44,7 +44,7 @@
44#include "xfs_vnodeops.h" 44#include "xfs_vnodeops.h"
45#include "xfs_trace.h" 45#include "xfs_trace.h"
46 46
47struct xfs_name xfs_name_dotdot = {"..", 2}; 47struct xfs_name xfs_name_dotdot = { (unsigned char *)"..", 2};
48 48
49/* 49/*
50 * ASCII case-insensitive (ie. A-Z) support for directories that was 50 * ASCII case-insensitive (ie. A-Z) support for directories that was
@@ -66,8 +66,8 @@ xfs_ascii_ci_hashname(
66STATIC enum xfs_dacmp 66STATIC enum xfs_dacmp
67xfs_ascii_ci_compname( 67xfs_ascii_ci_compname(
68 struct xfs_da_args *args, 68 struct xfs_da_args *args,
69 const char *name, 69 const unsigned char *name,
70 int len) 70 int len)
71{ 71{
72 enum xfs_dacmp result; 72 enum xfs_dacmp result;
73 int i; 73 int i;
@@ -247,7 +247,7 @@ xfs_dir_createname(
247int 247int
248xfs_dir_cilookup_result( 248xfs_dir_cilookup_result(
249 struct xfs_da_args *args, 249 struct xfs_da_args *args,
250 const char *name, 250 const unsigned char *name,
251 int len) 251 int len)
252{ 252{
253 if (args->cmpresult == XFS_CMP_DIFFERENT) 253 if (args->cmpresult == XFS_CMP_DIFFERENT)
diff --git a/fs/xfs/xfs_dir2.h b/fs/xfs/xfs_dir2.h
index 1d9ef96f33aa..74a3b1057685 100644
--- a/fs/xfs/xfs_dir2.h
+++ b/fs/xfs/xfs_dir2.h
@@ -100,7 +100,7 @@ extern int xfs_dir2_isleaf(struct xfs_trans *tp, struct xfs_inode *dp,
100extern int xfs_dir2_shrink_inode(struct xfs_da_args *args, xfs_dir2_db_t db, 100extern int xfs_dir2_shrink_inode(struct xfs_da_args *args, xfs_dir2_db_t db,
101 struct xfs_dabuf *bp); 101 struct xfs_dabuf *bp);
102 102
103extern int xfs_dir_cilookup_result(struct xfs_da_args *args, const char *name, 103extern int xfs_dir_cilookup_result(struct xfs_da_args *args,
104 int len); 104 const unsigned char *name, int len);
105 105
106#endif /* __XFS_DIR2_H__ */ 106#endif /* __XFS_DIR2_H__ */
diff --git a/fs/xfs/xfs_dir2_block.c b/fs/xfs/xfs_dir2_block.c
index ddc4ecc7807f..779a267b0a84 100644
--- a/fs/xfs/xfs_dir2_block.c
+++ b/fs/xfs/xfs_dir2_block.c
@@ -57,8 +57,8 @@ static xfs_dahash_t xfs_dir_hash_dot, xfs_dir_hash_dotdot;
57void 57void
58xfs_dir_startup(void) 58xfs_dir_startup(void)
59{ 59{
60 xfs_dir_hash_dot = xfs_da_hashname(".", 1); 60 xfs_dir_hash_dot = xfs_da_hashname((unsigned char *)".", 1);
61 xfs_dir_hash_dotdot = xfs_da_hashname("..", 2); 61 xfs_dir_hash_dotdot = xfs_da_hashname((unsigned char *)"..", 2);
62} 62}
63 63
64/* 64/*
@@ -513,8 +513,9 @@ xfs_dir2_block_getdents(
513 /* 513 /*
514 * If it didn't fit, set the final offset to here & return. 514 * If it didn't fit, set the final offset to here & return.
515 */ 515 */
516 if (filldir(dirent, dep->name, dep->namelen, cook & 0x7fffffff, 516 if (filldir(dirent, (char *)dep->name, dep->namelen,
517 be64_to_cpu(dep->inumber), DT_UNKNOWN)) { 517 cook & 0x7fffffff, be64_to_cpu(dep->inumber),
518 DT_UNKNOWN)) {
518 *offset = cook & 0x7fffffff; 519 *offset = cook & 0x7fffffff;
519 xfs_da_brelse(NULL, bp); 520 xfs_da_brelse(NULL, bp);
520 return 0; 521 return 0;
diff --git a/fs/xfs/xfs_dir2_leaf.c b/fs/xfs/xfs_dir2_leaf.c
index 29f484c11b3a..e2d89854ec9e 100644
--- a/fs/xfs/xfs_dir2_leaf.c
+++ b/fs/xfs/xfs_dir2_leaf.c
@@ -1081,7 +1081,7 @@ xfs_dir2_leaf_getdents(
1081 dep = (xfs_dir2_data_entry_t *)ptr; 1081 dep = (xfs_dir2_data_entry_t *)ptr;
1082 length = xfs_dir2_data_entsize(dep->namelen); 1082 length = xfs_dir2_data_entsize(dep->namelen);
1083 1083
1084 if (filldir(dirent, dep->name, dep->namelen, 1084 if (filldir(dirent, (char *)dep->name, dep->namelen,
1085 xfs_dir2_byte_to_dataptr(mp, curoff) & 0x7fffffff, 1085 xfs_dir2_byte_to_dataptr(mp, curoff) & 0x7fffffff,
1086 be64_to_cpu(dep->inumber), DT_UNKNOWN)) 1086 be64_to_cpu(dep->inumber), DT_UNKNOWN))
1087 break; 1087 break;
diff --git a/fs/xfs/xfs_dir2_node.c b/fs/xfs/xfs_dir2_node.c
index ce6e355199b5..78fc4d9ae756 100644
--- a/fs/xfs/xfs_dir2_node.c
+++ b/fs/xfs/xfs_dir2_node.c
@@ -65,7 +65,7 @@ static int xfs_dir2_node_addname_int(xfs_da_args_t *args,
65/* 65/*
66 * Log entries from a freespace block. 66 * Log entries from a freespace block.
67 */ 67 */
68void 68STATIC void
69xfs_dir2_free_log_bests( 69xfs_dir2_free_log_bests(
70 xfs_trans_t *tp, /* transaction pointer */ 70 xfs_trans_t *tp, /* transaction pointer */
71 xfs_dabuf_t *bp, /* freespace buffer */ 71 xfs_dabuf_t *bp, /* freespace buffer */
diff --git a/fs/xfs/xfs_dir2_node.h b/fs/xfs/xfs_dir2_node.h
index dde72db3d695..82dfe7147195 100644
--- a/fs/xfs/xfs_dir2_node.h
+++ b/fs/xfs/xfs_dir2_node.h
@@ -75,8 +75,6 @@ xfs_dir2_db_to_fdindex(struct xfs_mount *mp, xfs_dir2_db_t db)
75 return ((db) % XFS_DIR2_MAX_FREE_BESTS(mp)); 75 return ((db) % XFS_DIR2_MAX_FREE_BESTS(mp));
76} 76}
77 77
78extern void xfs_dir2_free_log_bests(struct xfs_trans *tp, struct xfs_dabuf *bp,
79 int first, int last);
80extern int xfs_dir2_leaf_to_node(struct xfs_da_args *args, 78extern int xfs_dir2_leaf_to_node(struct xfs_da_args *args,
81 struct xfs_dabuf *lbp); 79 struct xfs_dabuf *lbp);
82extern xfs_dahash_t xfs_dir2_leafn_lasthash(struct xfs_dabuf *bp, int *count); 80extern xfs_dahash_t xfs_dir2_leafn_lasthash(struct xfs_dabuf *bp, int *count);
diff --git a/fs/xfs/xfs_dir2_sf.c b/fs/xfs/xfs_dir2_sf.c
index 9d4f17a69676..c1a5945d463a 100644
--- a/fs/xfs/xfs_dir2_sf.c
+++ b/fs/xfs/xfs_dir2_sf.c
@@ -782,7 +782,7 @@ xfs_dir2_sf_getdents(
782 } 782 }
783 783
784 ino = xfs_dir2_sf_get_inumber(sfp, xfs_dir2_sf_inumberp(sfep)); 784 ino = xfs_dir2_sf_get_inumber(sfp, xfs_dir2_sf_inumberp(sfep));
785 if (filldir(dirent, sfep->name, sfep->namelen, 785 if (filldir(dirent, (char *)sfep->name, sfep->namelen,
786 off & 0x7fffffff, ino, DT_UNKNOWN)) { 786 off & 0x7fffffff, ino, DT_UNKNOWN)) {
787 *offset = off & 0x7fffffff; 787 *offset = off & 0x7fffffff;
788 return 0; 788 return 0;
diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c
index 05a4bdd4be39..6f35ed1b39b9 100644
--- a/fs/xfs/xfs_extfree_item.c
+++ b/fs/xfs/xfs_extfree_item.c
@@ -82,7 +82,7 @@ xfs_efi_item_format(xfs_efi_log_item_t *efip,
82 82
83 log_vector->i_addr = (xfs_caddr_t)&(efip->efi_format); 83 log_vector->i_addr = (xfs_caddr_t)&(efip->efi_format);
84 log_vector->i_len = size; 84 log_vector->i_len = size;
85 XLOG_VEC_SET_TYPE(log_vector, XLOG_REG_TYPE_EFI_FORMAT); 85 log_vector->i_type = XLOG_REG_TYPE_EFI_FORMAT;
86 ASSERT(size >= sizeof(xfs_efi_log_format_t)); 86 ASSERT(size >= sizeof(xfs_efi_log_format_t));
87} 87}
88 88
@@ -406,7 +406,7 @@ xfs_efd_item_format(xfs_efd_log_item_t *efdp,
406 406
407 log_vector->i_addr = (xfs_caddr_t)&(efdp->efd_format); 407 log_vector->i_addr = (xfs_caddr_t)&(efdp->efd_format);
408 log_vector->i_len = size; 408 log_vector->i_len = size;
409 XLOG_VEC_SET_TYPE(log_vector, XLOG_REG_TYPE_EFD_FORMAT); 409 log_vector->i_type = XLOG_REG_TYPE_EFD_FORMAT;
410 ASSERT(size >= sizeof(xfs_efd_log_format_t)); 410 ASSERT(size >= sizeof(xfs_efd_log_format_t));
411} 411}
412 412
diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c
index a631e1451abb..390850ee6603 100644
--- a/fs/xfs/xfs_filestream.c
+++ b/fs/xfs/xfs_filestream.c
@@ -140,6 +140,7 @@ _xfs_filestream_pick_ag(
140 int flags, 140 int flags,
141 xfs_extlen_t minlen) 141 xfs_extlen_t minlen)
142{ 142{
143 int streams, max_streams;
143 int err, trylock, nscan; 144 int err, trylock, nscan;
144 xfs_extlen_t longest, free, minfree, maxfree = 0; 145 xfs_extlen_t longest, free, minfree, maxfree = 0;
145 xfs_agnumber_t ag, max_ag = NULLAGNUMBER; 146 xfs_agnumber_t ag, max_ag = NULLAGNUMBER;
@@ -155,15 +156,15 @@ _xfs_filestream_pick_ag(
155 trylock = XFS_ALLOC_FLAG_TRYLOCK; 156 trylock = XFS_ALLOC_FLAG_TRYLOCK;
156 157
157 for (nscan = 0; 1; nscan++) { 158 for (nscan = 0; 1; nscan++) {
158 159 pag = xfs_perag_get(mp, ag);
159 TRACE_AG_SCAN(mp, ag, xfs_filestream_peek_ag(mp, ag)); 160 TRACE_AG_SCAN(mp, ag, atomic_read(&pag->pagf_fstrms));
160
161 pag = mp->m_perag + ag;
162 161
163 if (!pag->pagf_init) { 162 if (!pag->pagf_init) {
164 err = xfs_alloc_pagf_init(mp, NULL, ag, trylock); 163 err = xfs_alloc_pagf_init(mp, NULL, ag, trylock);
165 if (err && !trylock) 164 if (err && !trylock) {
165 xfs_perag_put(pag);
166 return err; 166 return err;
167 }
167 } 168 }
168 169
169 /* Might fail sometimes during the 1st pass with trylock set. */ 170 /* Might fail sometimes during the 1st pass with trylock set. */
@@ -173,6 +174,7 @@ _xfs_filestream_pick_ag(
173 /* Keep track of the AG with the most free blocks. */ 174 /* Keep track of the AG with the most free blocks. */
174 if (pag->pagf_freeblks > maxfree) { 175 if (pag->pagf_freeblks > maxfree) {
175 maxfree = pag->pagf_freeblks; 176 maxfree = pag->pagf_freeblks;
177 max_streams = atomic_read(&pag->pagf_fstrms);
176 max_ag = ag; 178 max_ag = ag;
177 } 179 }
178 180
@@ -195,6 +197,8 @@ _xfs_filestream_pick_ag(
195 197
196 /* Break out, retaining the reference on the AG. */ 198 /* Break out, retaining the reference on the AG. */
197 free = pag->pagf_freeblks; 199 free = pag->pagf_freeblks;
200 streams = atomic_read(&pag->pagf_fstrms);
201 xfs_perag_put(pag);
198 *agp = ag; 202 *agp = ag;
199 break; 203 break;
200 } 204 }
@@ -202,6 +206,7 @@ _xfs_filestream_pick_ag(
202 /* Drop the reference on this AG, it's not usable. */ 206 /* Drop the reference on this AG, it's not usable. */
203 xfs_filestream_put_ag(mp, ag); 207 xfs_filestream_put_ag(mp, ag);
204next_ag: 208next_ag:
209 xfs_perag_put(pag);
205 /* Move to the next AG, wrapping to AG 0 if necessary. */ 210 /* Move to the next AG, wrapping to AG 0 if necessary. */
206 if (++ag >= mp->m_sb.sb_agcount) 211 if (++ag >= mp->m_sb.sb_agcount)
207 ag = 0; 212 ag = 0;
@@ -229,6 +234,7 @@ next_ag:
229 if (max_ag != NULLAGNUMBER) { 234 if (max_ag != NULLAGNUMBER) {
230 xfs_filestream_get_ag(mp, max_ag); 235 xfs_filestream_get_ag(mp, max_ag);
231 TRACE_AG_PICK1(mp, max_ag, maxfree); 236 TRACE_AG_PICK1(mp, max_ag, maxfree);
237 streams = max_streams;
232 free = maxfree; 238 free = maxfree;
233 *agp = max_ag; 239 *agp = max_ag;
234 break; 240 break;
@@ -240,16 +246,14 @@ next_ag:
240 return 0; 246 return 0;
241 } 247 }
242 248
243 TRACE_AG_PICK2(mp, startag, *agp, xfs_filestream_peek_ag(mp, *agp), 249 TRACE_AG_PICK2(mp, startag, *agp, streams, free, nscan, flags);
244 free, nscan, flags);
245 250
246 return 0; 251 return 0;
247} 252}
248 253
249/* 254/*
250 * Set the allocation group number for a file or a directory, updating inode 255 * Set the allocation group number for a file or a directory, updating inode
251 * references and per-AG references as appropriate. Must be called with the 256 * references and per-AG references as appropriate.
252 * m_peraglock held in read mode.
253 */ 257 */
254static int 258static int
255_xfs_filestream_update_ag( 259_xfs_filestream_update_ag(
@@ -451,20 +455,6 @@ xfs_filestream_unmount(
451} 455}
452 456
453/* 457/*
454 * If the mount point's m_perag array is going to be reallocated, all
455 * outstanding cache entries must be flushed to avoid accessing reference count
456 * addresses that have been freed. The call to xfs_filestream_flush() must be
457 * made inside the block that holds the m_peraglock in write mode to do the
458 * reallocation.
459 */
460void
461xfs_filestream_flush(
462 xfs_mount_t *mp)
463{
464 xfs_mru_cache_flush(mp->m_filestream);
465}
466
467/*
468 * Return the AG of the filestream the file or directory belongs to, or 458 * Return the AG of the filestream the file or directory belongs to, or
469 * NULLAGNUMBER otherwise. 459 * NULLAGNUMBER otherwise.
470 */ 460 */
@@ -526,7 +516,6 @@ xfs_filestream_associate(
526 516
527 mp = pip->i_mount; 517 mp = pip->i_mount;
528 cache = mp->m_filestream; 518 cache = mp->m_filestream;
529 down_read(&mp->m_peraglock);
530 519
531 /* 520 /*
532 * We have a problem, Houston. 521 * We have a problem, Houston.
@@ -543,10 +532,8 @@ xfs_filestream_associate(
543 * 532 *
544 * So, if we can't get the iolock without sleeping then just give up 533 * So, if we can't get the iolock without sleeping then just give up
545 */ 534 */
546 if (!xfs_ilock_nowait(pip, XFS_IOLOCK_EXCL)) { 535 if (!xfs_ilock_nowait(pip, XFS_IOLOCK_EXCL))
547 up_read(&mp->m_peraglock);
548 return 1; 536 return 1;
549 }
550 537
551 /* If the parent directory is already in the cache, use its AG. */ 538 /* If the parent directory is already in the cache, use its AG. */
552 item = xfs_mru_cache_lookup(cache, pip->i_ino); 539 item = xfs_mru_cache_lookup(cache, pip->i_ino);
@@ -601,7 +588,6 @@ exit_did_pick:
601 588
602exit: 589exit:
603 xfs_iunlock(pip, XFS_IOLOCK_EXCL); 590 xfs_iunlock(pip, XFS_IOLOCK_EXCL);
604 up_read(&mp->m_peraglock);
605 return -err; 591 return -err;
606} 592}
607 593
diff --git a/fs/xfs/xfs_filestream.h b/fs/xfs/xfs_filestream.h
index 4aba67c5f64f..260f757bbc5d 100644
--- a/fs/xfs/xfs_filestream.h
+++ b/fs/xfs/xfs_filestream.h
@@ -79,12 +79,21 @@ extern ktrace_t *xfs_filestreams_trace_buf;
79 * the cache that reference per-ag array elements that have since been 79 * the cache that reference per-ag array elements that have since been
80 * reallocated. 80 * reallocated.
81 */ 81 */
82/*
83 * xfs_filestream_peek_ag is only used in tracing code
84 */
82static inline int 85static inline int
83xfs_filestream_peek_ag( 86xfs_filestream_peek_ag(
84 xfs_mount_t *mp, 87 xfs_mount_t *mp,
85 xfs_agnumber_t agno) 88 xfs_agnumber_t agno)
86{ 89{
87 return atomic_read(&mp->m_perag[agno].pagf_fstrms); 90 struct xfs_perag *pag;
91 int ret;
92
93 pag = xfs_perag_get(mp, agno);
94 ret = atomic_read(&pag->pagf_fstrms);
95 xfs_perag_put(pag);
96 return ret;
88} 97}
89 98
90static inline int 99static inline int
@@ -92,7 +101,13 @@ xfs_filestream_get_ag(
92 xfs_mount_t *mp, 101 xfs_mount_t *mp,
93 xfs_agnumber_t agno) 102 xfs_agnumber_t agno)
94{ 103{
95 return atomic_inc_return(&mp->m_perag[agno].pagf_fstrms); 104 struct xfs_perag *pag;
105 int ret;
106
107 pag = xfs_perag_get(mp, agno);
108 ret = atomic_inc_return(&pag->pagf_fstrms);
109 xfs_perag_put(pag);
110 return ret;
96} 111}
97 112
98static inline int 113static inline int
@@ -100,7 +115,13 @@ xfs_filestream_put_ag(
100 xfs_mount_t *mp, 115 xfs_mount_t *mp,
101 xfs_agnumber_t agno) 116 xfs_agnumber_t agno)
102{ 117{
103 return atomic_dec_return(&mp->m_perag[agno].pagf_fstrms); 118 struct xfs_perag *pag;
119 int ret;
120
121 pag = xfs_perag_get(mp, agno);
122 ret = atomic_dec_return(&pag->pagf_fstrms);
123 xfs_perag_put(pag);
124 return ret;
104} 125}
105 126
106/* allocation selection flags */ 127/* allocation selection flags */
@@ -114,7 +135,6 @@ int xfs_filestream_init(void);
114void xfs_filestream_uninit(void); 135void xfs_filestream_uninit(void);
115int xfs_filestream_mount(struct xfs_mount *mp); 136int xfs_filestream_mount(struct xfs_mount *mp);
116void xfs_filestream_unmount(struct xfs_mount *mp); 137void xfs_filestream_unmount(struct xfs_mount *mp);
117void xfs_filestream_flush(struct xfs_mount *mp);
118xfs_agnumber_t xfs_filestream_lookup_ag(struct xfs_inode *ip); 138xfs_agnumber_t xfs_filestream_lookup_ag(struct xfs_inode *ip);
119int xfs_filestream_associate(struct xfs_inode *dip, struct xfs_inode *ip); 139int xfs_filestream_associate(struct xfs_inode *dip, struct xfs_inode *ip);
120void xfs_filestream_deassociate(struct xfs_inode *ip); 140void xfs_filestream_deassociate(struct xfs_inode *ip);
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index a13919a6a364..37a6f62c57b6 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -167,27 +167,14 @@ xfs_growfs_data_private(
167 } 167 }
168 new = nb - mp->m_sb.sb_dblocks; 168 new = nb - mp->m_sb.sb_dblocks;
169 oagcount = mp->m_sb.sb_agcount; 169 oagcount = mp->m_sb.sb_agcount;
170 if (nagcount > oagcount) {
171 void *new_perag, *old_perag;
172
173 xfs_filestream_flush(mp);
174
175 new_perag = kmem_zalloc(sizeof(xfs_perag_t) * nagcount,
176 KM_MAYFAIL);
177 if (!new_perag)
178 return XFS_ERROR(ENOMEM);
179
180 down_write(&mp->m_peraglock);
181 memcpy(new_perag, mp->m_perag, sizeof(xfs_perag_t) * oagcount);
182 old_perag = mp->m_perag;
183 mp->m_perag = new_perag;
184
185 mp->m_flags |= XFS_MOUNT_32BITINODES;
186 nagimax = xfs_initialize_perag(mp, nagcount);
187 up_write(&mp->m_peraglock);
188 170
189 kmem_free(old_perag); 171 /* allocate the new per-ag structures */
172 if (nagcount > oagcount) {
173 error = xfs_initialize_perag(mp, nagcount, &nagimax);
174 if (error)
175 return error;
190 } 176 }
177
191 tp = xfs_trans_alloc(mp, XFS_TRANS_GROWFS); 178 tp = xfs_trans_alloc(mp, XFS_TRANS_GROWFS);
192 tp->t_flags |= XFS_TRANS_RESERVE; 179 tp->t_flags |= XFS_TRANS_RESERVE;
193 if ((error = xfs_trans_reserve(tp, XFS_GROWFS_SPACE_RES(mp), 180 if ((error = xfs_trans_reserve(tp, XFS_GROWFS_SPACE_RES(mp),
@@ -196,6 +183,11 @@ xfs_growfs_data_private(
196 return error; 183 return error;
197 } 184 }
198 185
186 /*
187 * Write new AG headers to disk. Non-transactional, but written
188 * synchronously so they are completed prior to the growfs transaction
189 * being logged.
190 */
199 nfree = 0; 191 nfree = 0;
200 for (agno = nagcount - 1; agno >= oagcount; agno--, new -= agsize) { 192 for (agno = nagcount - 1; agno >= oagcount; agno--, new -= agsize) {
201 /* 193 /*
@@ -359,6 +351,12 @@ xfs_growfs_data_private(
359 goto error0; 351 goto error0;
360 } 352 }
361 } 353 }
354
355 /*
356 * Update changed superblock fields transactionally. These are not
357 * seen by the rest of the world until the transaction commit applies
358 * them atomically to the superblock.
359 */
362 if (nagcount > oagcount) 360 if (nagcount > oagcount)
363 xfs_trans_mod_sb(tp, XFS_TRANS_SB_AGCOUNT, nagcount - oagcount); 361 xfs_trans_mod_sb(tp, XFS_TRANS_SB_AGCOUNT, nagcount - oagcount);
364 if (nb > mp->m_sb.sb_dblocks) 362 if (nb > mp->m_sb.sb_dblocks)
@@ -369,9 +367,9 @@ xfs_growfs_data_private(
369 if (dpct) 367 if (dpct)
370 xfs_trans_mod_sb(tp, XFS_TRANS_SB_IMAXPCT, dpct); 368 xfs_trans_mod_sb(tp, XFS_TRANS_SB_IMAXPCT, dpct);
371 error = xfs_trans_commit(tp, 0); 369 error = xfs_trans_commit(tp, 0);
372 if (error) { 370 if (error)
373 return error; 371 return error;
374 } 372
375 /* New allocation groups fully initialized, so update mount struct */ 373 /* New allocation groups fully initialized, so update mount struct */
376 if (nagimax) 374 if (nagimax)
377 mp->m_maxagi = nagimax; 375 mp->m_maxagi = nagimax;
@@ -381,6 +379,8 @@ xfs_growfs_data_private(
381 mp->m_maxicount = icount << mp->m_sb.sb_inopblog; 379 mp->m_maxicount = icount << mp->m_sb.sb_inopblog;
382 } else 380 } else
383 mp->m_maxicount = 0; 381 mp->m_maxicount = 0;
382
383 /* update secondary superblocks. */
384 for (agno = 1; agno < nagcount; agno++) { 384 for (agno = 1; agno < nagcount; agno++) {
385 error = xfs_read_buf(mp, mp->m_ddev_targp, 385 error = xfs_read_buf(mp, mp->m_ddev_targp,
386 XFS_AGB_TO_DADDR(mp, agno, XFS_SB_BLOCK(mp)), 386 XFS_AGB_TO_DADDR(mp, agno, XFS_SB_BLOCK(mp)),
diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c
index cb907ba69c4c..9d884c127bb9 100644
--- a/fs/xfs/xfs_ialloc.c
+++ b/fs/xfs/xfs_ialloc.c
@@ -205,7 +205,7 @@ xfs_ialloc_inode_init(
205 d = XFS_AGB_TO_DADDR(mp, agno, agbno + (j * blks_per_cluster)); 205 d = XFS_AGB_TO_DADDR(mp, agno, agbno + (j * blks_per_cluster));
206 fbuf = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, 206 fbuf = xfs_trans_get_buf(tp, mp->m_ddev_targp, d,
207 mp->m_bsize * blks_per_cluster, 207 mp->m_bsize * blks_per_cluster,
208 XFS_BUF_LOCK); 208 XBF_LOCK);
209 ASSERT(fbuf); 209 ASSERT(fbuf);
210 ASSERT(!XFS_BUF_GETERROR(fbuf)); 210 ASSERT(!XFS_BUF_GETERROR(fbuf));
211 211
@@ -253,6 +253,7 @@ xfs_ialloc_ag_alloc(
253 xfs_agino_t thisino; /* current inode number, for loop */ 253 xfs_agino_t thisino; /* current inode number, for loop */
254 int isaligned = 0; /* inode allocation at stripe unit */ 254 int isaligned = 0; /* inode allocation at stripe unit */
255 /* boundary */ 255 /* boundary */
256 struct xfs_perag *pag;
256 257
257 args.tp = tp; 258 args.tp = tp;
258 args.mp = tp->t_mountp; 259 args.mp = tp->t_mountp;
@@ -382,9 +383,9 @@ xfs_ialloc_ag_alloc(
382 newino = XFS_OFFBNO_TO_AGINO(args.mp, args.agbno, 0); 383 newino = XFS_OFFBNO_TO_AGINO(args.mp, args.agbno, 0);
383 be32_add_cpu(&agi->agi_count, newlen); 384 be32_add_cpu(&agi->agi_count, newlen);
384 be32_add_cpu(&agi->agi_freecount, newlen); 385 be32_add_cpu(&agi->agi_freecount, newlen);
385 down_read(&args.mp->m_peraglock); 386 pag = xfs_perag_get(args.mp, agno);
386 args.mp->m_perag[agno].pagi_freecount += newlen; 387 pag->pagi_freecount += newlen;
387 up_read(&args.mp->m_peraglock); 388 xfs_perag_put(pag);
388 agi->agi_newino = cpu_to_be32(newino); 389 agi->agi_newino = cpu_to_be32(newino);
389 390
390 /* 391 /*
@@ -486,9 +487,8 @@ xfs_ialloc_ag_select(
486 */ 487 */
487 agno = pagno; 488 agno = pagno;
488 flags = XFS_ALLOC_FLAG_TRYLOCK; 489 flags = XFS_ALLOC_FLAG_TRYLOCK;
489 down_read(&mp->m_peraglock);
490 for (;;) { 490 for (;;) {
491 pag = &mp->m_perag[agno]; 491 pag = xfs_perag_get(mp, agno);
492 if (!pag->pagi_init) { 492 if (!pag->pagi_init) {
493 if (xfs_ialloc_read_agi(mp, tp, agno, &agbp)) { 493 if (xfs_ialloc_read_agi(mp, tp, agno, &agbp)) {
494 agbp = NULL; 494 agbp = NULL;
@@ -527,7 +527,7 @@ xfs_ialloc_ag_select(
527 agbp = NULL; 527 agbp = NULL;
528 goto nextag; 528 goto nextag;
529 } 529 }
530 up_read(&mp->m_peraglock); 530 xfs_perag_put(pag);
531 return agbp; 531 return agbp;
532 } 532 }
533 } 533 }
@@ -535,22 +535,19 @@ unlock_nextag:
535 if (agbp) 535 if (agbp)
536 xfs_trans_brelse(tp, agbp); 536 xfs_trans_brelse(tp, agbp);
537nextag: 537nextag:
538 xfs_perag_put(pag);
538 /* 539 /*
539 * No point in iterating over the rest, if we're shutting 540 * No point in iterating over the rest, if we're shutting
540 * down. 541 * down.
541 */ 542 */
542 if (XFS_FORCED_SHUTDOWN(mp)) { 543 if (XFS_FORCED_SHUTDOWN(mp))
543 up_read(&mp->m_peraglock);
544 return NULL; 544 return NULL;
545 }
546 agno++; 545 agno++;
547 if (agno >= agcount) 546 if (agno >= agcount)
548 agno = 0; 547 agno = 0;
549 if (agno == pagno) { 548 if (agno == pagno) {
550 if (flags == 0) { 549 if (flags == 0)
551 up_read(&mp->m_peraglock);
552 return NULL; 550 return NULL;
553 }
554 flags = 0; 551 flags = 0;
555 } 552 }
556 } 553 }
@@ -672,6 +669,7 @@ xfs_dialloc(
672 xfs_agnumber_t tagno; /* testing allocation group number */ 669 xfs_agnumber_t tagno; /* testing allocation group number */
673 xfs_btree_cur_t *tcur; /* temp cursor */ 670 xfs_btree_cur_t *tcur; /* temp cursor */
674 xfs_inobt_rec_incore_t trec; /* temp inode allocation record */ 671 xfs_inobt_rec_incore_t trec; /* temp inode allocation record */
672 struct xfs_perag *pag;
675 673
676 674
677 if (*IO_agbp == NULL) { 675 if (*IO_agbp == NULL) {
@@ -771,13 +769,13 @@ nextag:
771 *inop = NULLFSINO; 769 *inop = NULLFSINO;
772 return noroom ? ENOSPC : 0; 770 return noroom ? ENOSPC : 0;
773 } 771 }
774 down_read(&mp->m_peraglock); 772 pag = xfs_perag_get(mp, tagno);
775 if (mp->m_perag[tagno].pagi_inodeok == 0) { 773 if (pag->pagi_inodeok == 0) {
776 up_read(&mp->m_peraglock); 774 xfs_perag_put(pag);
777 goto nextag; 775 goto nextag;
778 } 776 }
779 error = xfs_ialloc_read_agi(mp, tp, tagno, &agbp); 777 error = xfs_ialloc_read_agi(mp, tp, tagno, &agbp);
780 up_read(&mp->m_peraglock); 778 xfs_perag_put(pag);
781 if (error) 779 if (error)
782 goto nextag; 780 goto nextag;
783 agi = XFS_BUF_TO_AGI(agbp); 781 agi = XFS_BUF_TO_AGI(agbp);
@@ -790,6 +788,7 @@ nextag:
790 */ 788 */
791 agno = tagno; 789 agno = tagno;
792 *IO_agbp = NULL; 790 *IO_agbp = NULL;
791 pag = xfs_perag_get(mp, agno);
793 792
794 restart_pagno: 793 restart_pagno:
795 cur = xfs_inobt_init_cursor(mp, tp, agbp, be32_to_cpu(agi->agi_seqno)); 794 cur = xfs_inobt_init_cursor(mp, tp, agbp, be32_to_cpu(agi->agi_seqno));
@@ -808,7 +807,6 @@ nextag:
808 * If in the same AG as the parent, try to get near the parent. 807 * If in the same AG as the parent, try to get near the parent.
809 */ 808 */
810 if (pagno == agno) { 809 if (pagno == agno) {
811 xfs_perag_t *pag = &mp->m_perag[agno];
812 int doneleft; /* done, to the left */ 810 int doneleft; /* done, to the left */
813 int doneright; /* done, to the right */ 811 int doneright; /* done, to the right */
814 int searchdistance = 10; 812 int searchdistance = 10;
@@ -1006,9 +1004,7 @@ alloc_inode:
1006 goto error0; 1004 goto error0;
1007 be32_add_cpu(&agi->agi_freecount, -1); 1005 be32_add_cpu(&agi->agi_freecount, -1);
1008 xfs_ialloc_log_agi(tp, agbp, XFS_AGI_FREECOUNT); 1006 xfs_ialloc_log_agi(tp, agbp, XFS_AGI_FREECOUNT);
1009 down_read(&mp->m_peraglock); 1007 pag->pagi_freecount--;
1010 mp->m_perag[tagno].pagi_freecount--;
1011 up_read(&mp->m_peraglock);
1012 1008
1013 error = xfs_check_agi_freecount(cur, agi); 1009 error = xfs_check_agi_freecount(cur, agi);
1014 if (error) 1010 if (error)
@@ -1016,12 +1012,14 @@ alloc_inode:
1016 1012
1017 xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); 1013 xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
1018 xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, -1); 1014 xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, -1);
1015 xfs_perag_put(pag);
1019 *inop = ino; 1016 *inop = ino;
1020 return 0; 1017 return 0;
1021error1: 1018error1:
1022 xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR); 1019 xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
1023error0: 1020error0:
1024 xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); 1021 xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
1022 xfs_perag_put(pag);
1025 return error; 1023 return error;
1026} 1024}
1027 1025
@@ -1052,6 +1050,7 @@ xfs_difree(
1052 xfs_mount_t *mp; /* mount structure for filesystem */ 1050 xfs_mount_t *mp; /* mount structure for filesystem */
1053 int off; /* offset of inode in inode chunk */ 1051 int off; /* offset of inode in inode chunk */
1054 xfs_inobt_rec_incore_t rec; /* btree record */ 1052 xfs_inobt_rec_incore_t rec; /* btree record */
1053 struct xfs_perag *pag;
1055 1054
1056 mp = tp->t_mountp; 1055 mp = tp->t_mountp;
1057 1056
@@ -1088,9 +1087,7 @@ xfs_difree(
1088 /* 1087 /*
1089 * Get the allocation group header. 1088 * Get the allocation group header.
1090 */ 1089 */
1091 down_read(&mp->m_peraglock);
1092 error = xfs_ialloc_read_agi(mp, tp, agno, &agbp); 1090 error = xfs_ialloc_read_agi(mp, tp, agno, &agbp);
1093 up_read(&mp->m_peraglock);
1094 if (error) { 1091 if (error) {
1095 cmn_err(CE_WARN, 1092 cmn_err(CE_WARN,
1096 "xfs_difree: xfs_ialloc_read_agi() returned an error %d on %s. Returning error.", 1093 "xfs_difree: xfs_ialloc_read_agi() returned an error %d on %s. Returning error.",
@@ -1157,9 +1154,9 @@ xfs_difree(
1157 be32_add_cpu(&agi->agi_count, -ilen); 1154 be32_add_cpu(&agi->agi_count, -ilen);
1158 be32_add_cpu(&agi->agi_freecount, -(ilen - 1)); 1155 be32_add_cpu(&agi->agi_freecount, -(ilen - 1));
1159 xfs_ialloc_log_agi(tp, agbp, XFS_AGI_COUNT | XFS_AGI_FREECOUNT); 1156 xfs_ialloc_log_agi(tp, agbp, XFS_AGI_COUNT | XFS_AGI_FREECOUNT);
1160 down_read(&mp->m_peraglock); 1157 pag = xfs_perag_get(mp, agno);
1161 mp->m_perag[agno].pagi_freecount -= ilen - 1; 1158 pag->pagi_freecount -= ilen - 1;
1162 up_read(&mp->m_peraglock); 1159 xfs_perag_put(pag);
1163 xfs_trans_mod_sb(tp, XFS_TRANS_SB_ICOUNT, -ilen); 1160 xfs_trans_mod_sb(tp, XFS_TRANS_SB_ICOUNT, -ilen);
1164 xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, -(ilen - 1)); 1161 xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, -(ilen - 1));
1165 1162
@@ -1188,9 +1185,9 @@ xfs_difree(
1188 */ 1185 */
1189 be32_add_cpu(&agi->agi_freecount, 1); 1186 be32_add_cpu(&agi->agi_freecount, 1);
1190 xfs_ialloc_log_agi(tp, agbp, XFS_AGI_FREECOUNT); 1187 xfs_ialloc_log_agi(tp, agbp, XFS_AGI_FREECOUNT);
1191 down_read(&mp->m_peraglock); 1188 pag = xfs_perag_get(mp, agno);
1192 mp->m_perag[agno].pagi_freecount++; 1189 pag->pagi_freecount++;
1193 up_read(&mp->m_peraglock); 1190 xfs_perag_put(pag);
1194 xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, 1); 1191 xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, 1);
1195 } 1192 }
1196 1193
@@ -1312,9 +1309,7 @@ xfs_imap(
1312 xfs_buf_t *agbp; /* agi buffer */ 1309 xfs_buf_t *agbp; /* agi buffer */
1313 int i; /* temp state */ 1310 int i; /* temp state */
1314 1311
1315 down_read(&mp->m_peraglock);
1316 error = xfs_ialloc_read_agi(mp, tp, agno, &agbp); 1312 error = xfs_ialloc_read_agi(mp, tp, agno, &agbp);
1317 up_read(&mp->m_peraglock);
1318 if (error) { 1313 if (error) {
1319 xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: " 1314 xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: "
1320 "xfs_ialloc_read_agi() returned " 1315 "xfs_ialloc_read_agi() returned "
@@ -1379,7 +1374,6 @@ xfs_imap(
1379 XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)); 1374 XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks));
1380 return XFS_ERROR(EINVAL); 1375 return XFS_ERROR(EINVAL);
1381 } 1376 }
1382
1383 return 0; 1377 return 0;
1384} 1378}
1385 1379
@@ -1523,8 +1517,7 @@ xfs_ialloc_read_agi(
1523 return error; 1517 return error;
1524 1518
1525 agi = XFS_BUF_TO_AGI(*bpp); 1519 agi = XFS_BUF_TO_AGI(*bpp);
1526 pag = &mp->m_perag[agno]; 1520 pag = xfs_perag_get(mp, agno);
1527
1528 if (!pag->pagi_init) { 1521 if (!pag->pagi_init) {
1529 pag->pagi_freecount = be32_to_cpu(agi->agi_freecount); 1522 pag->pagi_freecount = be32_to_cpu(agi->agi_freecount);
1530 pag->pagi_count = be32_to_cpu(agi->agi_count); 1523 pag->pagi_count = be32_to_cpu(agi->agi_count);
@@ -1537,6 +1530,7 @@ xfs_ialloc_read_agi(
1537 */ 1530 */
1538 ASSERT(pag->pagi_freecount == be32_to_cpu(agi->agi_freecount) || 1531 ASSERT(pag->pagi_freecount == be32_to_cpu(agi->agi_freecount) ||
1539 XFS_FORCED_SHUTDOWN(mp)); 1532 XFS_FORCED_SHUTDOWN(mp));
1533 xfs_perag_put(pag);
1540 return 0; 1534 return 0;
1541} 1535}
1542 1536
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index 155e798f30a1..e281eb4a1c49 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -374,7 +374,7 @@ xfs_iget(
374 return EINVAL; 374 return EINVAL;
375 375
376 /* get the perag structure and ensure that it's inode capable */ 376 /* get the perag structure and ensure that it's inode capable */
377 pag = xfs_get_perag(mp, ino); 377 pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ino));
378 if (!pag->pagi_inodeok) 378 if (!pag->pagi_inodeok)
379 return EINVAL; 379 return EINVAL;
380 ASSERT(pag->pag_ici_init); 380 ASSERT(pag->pag_ici_init);
@@ -398,7 +398,7 @@ again:
398 if (error) 398 if (error)
399 goto out_error_or_again; 399 goto out_error_or_again;
400 } 400 }
401 xfs_put_perag(mp, pag); 401 xfs_perag_put(pag);
402 402
403 *ipp = ip; 403 *ipp = ip;
404 404
@@ -417,7 +417,7 @@ out_error_or_again:
417 delay(1); 417 delay(1);
418 goto again; 418 goto again;
419 } 419 }
420 xfs_put_perag(mp, pag); 420 xfs_perag_put(pag);
421 return error; 421 return error;
422} 422}
423 423
@@ -488,12 +488,12 @@ xfs_ireclaim(
488 * added to the tree assert that it's been there before to catch 488 * added to the tree assert that it's been there before to catch
489 * problems with the inode life time early on. 489 * problems with the inode life time early on.
490 */ 490 */
491 pag = xfs_get_perag(mp, ip->i_ino); 491 pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
492 write_lock(&pag->pag_ici_lock); 492 write_lock(&pag->pag_ici_lock);
493 if (!radix_tree_delete(&pag->pag_ici_root, agino)) 493 if (!radix_tree_delete(&pag->pag_ici_root, agino))
494 ASSERT(0); 494 ASSERT(0);
495 write_unlock(&pag->pag_ici_lock); 495 write_unlock(&pag->pag_ici_lock);
496 xfs_put_perag(mp, pag); 496 xfs_perag_put(pag);
497 497
498 /* 498 /*
499 * Here we do an (almost) spurious inode lock in order to coordinate 499 * Here we do an (almost) spurious inode lock in order to coordinate
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index ef77fd88c8e3..fa31360046d4 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -151,7 +151,7 @@ xfs_imap_to_bp(
151 "an error %d on %s. Returning error.", 151 "an error %d on %s. Returning error.",
152 error, mp->m_fsname); 152 error, mp->m_fsname);
153 } else { 153 } else {
154 ASSERT(buf_flags & XFS_BUF_TRYLOCK); 154 ASSERT(buf_flags & XBF_TRYLOCK);
155 } 155 }
156 return error; 156 return error;
157 } 157 }
@@ -239,7 +239,7 @@ xfs_inotobp(
239 if (error) 239 if (error)
240 return error; 240 return error;
241 241
242 error = xfs_imap_to_bp(mp, tp, &imap, &bp, XFS_BUF_LOCK, imap_flags); 242 error = xfs_imap_to_bp(mp, tp, &imap, &bp, XBF_LOCK, imap_flags);
243 if (error) 243 if (error)
244 return error; 244 return error;
245 245
@@ -285,7 +285,7 @@ xfs_itobp(
285 return error; 285 return error;
286 286
287 if (!bp) { 287 if (!bp) {
288 ASSERT(buf_flags & XFS_BUF_TRYLOCK); 288 ASSERT(buf_flags & XBF_TRYLOCK);
289 ASSERT(tp == NULL); 289 ASSERT(tp == NULL);
290 *bpp = NULL; 290 *bpp = NULL;
291 return EAGAIN; 291 return EAGAIN;
@@ -807,7 +807,7 @@ xfs_iread(
807 * Get pointers to the on-disk inode and the buffer containing it. 807 * Get pointers to the on-disk inode and the buffer containing it.
808 */ 808 */
809 error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &bp, 809 error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &bp,
810 XFS_BUF_LOCK, iget_flags); 810 XBF_LOCK, iget_flags);
811 if (error) 811 if (error)
812 return error; 812 return error;
813 dip = (xfs_dinode_t *)xfs_buf_offset(bp, ip->i_imap.im_boffset); 813 dip = (xfs_dinode_t *)xfs_buf_offset(bp, ip->i_imap.im_boffset);
@@ -1751,7 +1751,7 @@ xfs_iunlink(
1751 * Here we put the head pointer into our next pointer, 1751 * Here we put the head pointer into our next pointer,
1752 * and then we fall through to point the head at us. 1752 * and then we fall through to point the head at us.
1753 */ 1753 */
1754 error = xfs_itobp(mp, tp, ip, &dip, &ibp, XFS_BUF_LOCK); 1754 error = xfs_itobp(mp, tp, ip, &dip, &ibp, XBF_LOCK);
1755 if (error) 1755 if (error)
1756 return error; 1756 return error;
1757 1757
@@ -1833,7 +1833,7 @@ xfs_iunlink_remove(
1833 * of dealing with the buffer when there is no need to 1833 * of dealing with the buffer when there is no need to
1834 * change it. 1834 * change it.
1835 */ 1835 */
1836 error = xfs_itobp(mp, tp, ip, &dip, &ibp, XFS_BUF_LOCK); 1836 error = xfs_itobp(mp, tp, ip, &dip, &ibp, XBF_LOCK);
1837 if (error) { 1837 if (error) {
1838 cmn_err(CE_WARN, 1838 cmn_err(CE_WARN,
1839 "xfs_iunlink_remove: xfs_itobp() returned an error %d on %s. Returning error.", 1839 "xfs_iunlink_remove: xfs_itobp() returned an error %d on %s. Returning error.",
@@ -1895,7 +1895,7 @@ xfs_iunlink_remove(
1895 * Now last_ibp points to the buffer previous to us on 1895 * Now last_ibp points to the buffer previous to us on
1896 * the unlinked list. Pull us from the list. 1896 * the unlinked list. Pull us from the list.
1897 */ 1897 */
1898 error = xfs_itobp(mp, tp, ip, &dip, &ibp, XFS_BUF_LOCK); 1898 error = xfs_itobp(mp, tp, ip, &dip, &ibp, XBF_LOCK);
1899 if (error) { 1899 if (error) {
1900 cmn_err(CE_WARN, 1900 cmn_err(CE_WARN,
1901 "xfs_iunlink_remove: xfs_itobp() returned an error %d on %s. Returning error.", 1901 "xfs_iunlink_remove: xfs_itobp() returned an error %d on %s. Returning error.",
@@ -1946,8 +1946,9 @@ xfs_ifree_cluster(
1946 xfs_inode_t *ip, **ip_found; 1946 xfs_inode_t *ip, **ip_found;
1947 xfs_inode_log_item_t *iip; 1947 xfs_inode_log_item_t *iip;
1948 xfs_log_item_t *lip; 1948 xfs_log_item_t *lip;
1949 xfs_perag_t *pag = xfs_get_perag(mp, inum); 1949 struct xfs_perag *pag;
1950 1950
1951 pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, inum));
1951 if (mp->m_sb.sb_blocksize >= XFS_INODE_CLUSTER_SIZE(mp)) { 1952 if (mp->m_sb.sb_blocksize >= XFS_INODE_CLUSTER_SIZE(mp)) {
1952 blks_per_cluster = 1; 1953 blks_per_cluster = 1;
1953 ninodes = mp->m_sb.sb_inopblock; 1954 ninodes = mp->m_sb.sb_inopblock;
@@ -2039,7 +2040,7 @@ xfs_ifree_cluster(
2039 2040
2040 bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, blkno, 2041 bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, blkno,
2041 mp->m_bsize * blks_per_cluster, 2042 mp->m_bsize * blks_per_cluster,
2042 XFS_BUF_LOCK); 2043 XBF_LOCK);
2043 2044
2044 pre_flushed = 0; 2045 pre_flushed = 0;
2045 lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *); 2046 lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *);
@@ -2088,7 +2089,7 @@ xfs_ifree_cluster(
2088 } 2089 }
2089 2090
2090 kmem_free(ip_found); 2091 kmem_free(ip_found);
2091 xfs_put_perag(mp, pag); 2092 xfs_perag_put(pag);
2092} 2093}
2093 2094
2094/* 2095/*
@@ -2150,7 +2151,7 @@ xfs_ifree(
2150 2151
2151 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 2152 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
2152 2153
2153 error = xfs_itobp(ip->i_mount, tp, ip, &dip, &ibp, XFS_BUF_LOCK); 2154 error = xfs_itobp(ip->i_mount, tp, ip, &dip, &ibp, XBF_LOCK);
2154 if (error) 2155 if (error)
2155 return error; 2156 return error;
2156 2157
@@ -2483,13 +2484,16 @@ __xfs_iunpin_wait(
2483 return; 2484 return;
2484 2485
2485 /* Give the log a push to start the unpinning I/O */ 2486 /* Give the log a push to start the unpinning I/O */
2486 xfs_log_force(ip->i_mount, (iip && iip->ili_last_lsn) ? 2487 if (iip && iip->ili_last_lsn)
2487 iip->ili_last_lsn : 0, XFS_LOG_FORCE); 2488 xfs_log_force_lsn(ip->i_mount, iip->ili_last_lsn, 0);
2489 else
2490 xfs_log_force(ip->i_mount, 0);
2491
2488 if (wait) 2492 if (wait)
2489 wait_event(ip->i_ipin_wait, (atomic_read(&ip->i_pincount) == 0)); 2493 wait_event(ip->i_ipin_wait, (atomic_read(&ip->i_pincount) == 0));
2490} 2494}
2491 2495
2492static inline void 2496void
2493xfs_iunpin_wait( 2497xfs_iunpin_wait(
2494 xfs_inode_t *ip) 2498 xfs_inode_t *ip)
2495{ 2499{
@@ -2675,7 +2679,7 @@ xfs_iflush_cluster(
2675 xfs_buf_t *bp) 2679 xfs_buf_t *bp)
2676{ 2680{
2677 xfs_mount_t *mp = ip->i_mount; 2681 xfs_mount_t *mp = ip->i_mount;
2678 xfs_perag_t *pag = xfs_get_perag(mp, ip->i_ino); 2682 struct xfs_perag *pag;
2679 unsigned long first_index, mask; 2683 unsigned long first_index, mask;
2680 unsigned long inodes_per_cluster; 2684 unsigned long inodes_per_cluster;
2681 int ilist_size; 2685 int ilist_size;
@@ -2686,6 +2690,7 @@ xfs_iflush_cluster(
2686 int bufwasdelwri; 2690 int bufwasdelwri;
2687 int i; 2691 int i;
2688 2692
2693 pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
2689 ASSERT(pag->pagi_inodeok); 2694 ASSERT(pag->pagi_inodeok);
2690 ASSERT(pag->pag_ici_init); 2695 ASSERT(pag->pag_ici_init);
2691 2696
@@ -2693,7 +2698,7 @@ xfs_iflush_cluster(
2693 ilist_size = inodes_per_cluster * sizeof(xfs_inode_t *); 2698 ilist_size = inodes_per_cluster * sizeof(xfs_inode_t *);
2694 ilist = kmem_alloc(ilist_size, KM_MAYFAIL|KM_NOFS); 2699 ilist = kmem_alloc(ilist_size, KM_MAYFAIL|KM_NOFS);
2695 if (!ilist) 2700 if (!ilist)
2696 return 0; 2701 goto out_put;
2697 2702
2698 mask = ~(((XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog)) - 1); 2703 mask = ~(((XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog)) - 1);
2699 first_index = XFS_INO_TO_AGINO(mp, ip->i_ino) & mask; 2704 first_index = XFS_INO_TO_AGINO(mp, ip->i_ino) & mask;
@@ -2762,6 +2767,8 @@ xfs_iflush_cluster(
2762out_free: 2767out_free:
2763 read_unlock(&pag->pag_ici_lock); 2768 read_unlock(&pag->pag_ici_lock);
2764 kmem_free(ilist); 2769 kmem_free(ilist);
2770out_put:
2771 xfs_perag_put(pag);
2765 return 0; 2772 return 0;
2766 2773
2767 2774
@@ -2805,6 +2812,7 @@ cluster_corrupt_out:
2805 */ 2812 */
2806 xfs_iflush_abort(iq); 2813 xfs_iflush_abort(iq);
2807 kmem_free(ilist); 2814 kmem_free(ilist);
2815 xfs_perag_put(pag);
2808 return XFS_ERROR(EFSCORRUPTED); 2816 return XFS_ERROR(EFSCORRUPTED);
2809} 2817}
2810 2818
@@ -2827,8 +2835,6 @@ xfs_iflush(
2827 xfs_dinode_t *dip; 2835 xfs_dinode_t *dip;
2828 xfs_mount_t *mp; 2836 xfs_mount_t *mp;
2829 int error; 2837 int error;
2830 int noblock = (flags == XFS_IFLUSH_ASYNC_NOBLOCK);
2831 enum { INT_DELWRI = (1 << 0), INT_ASYNC = (1 << 1) };
2832 2838
2833 XFS_STATS_INC(xs_iflush_count); 2839 XFS_STATS_INC(xs_iflush_count);
2834 2840
@@ -2841,15 +2847,6 @@ xfs_iflush(
2841 mp = ip->i_mount; 2847 mp = ip->i_mount;
2842 2848
2843 /* 2849 /*
2844 * If the inode isn't dirty, then just release the inode flush lock and
2845 * do nothing.
2846 */
2847 if (xfs_inode_clean(ip)) {
2848 xfs_ifunlock(ip);
2849 return 0;
2850 }
2851
2852 /*
2853 * We can't flush the inode until it is unpinned, so wait for it if we 2850 * We can't flush the inode until it is unpinned, so wait for it if we
2854 * are allowed to block. We know noone new can pin it, because we are 2851 * are allowed to block. We know noone new can pin it, because we are
2855 * holding the inode lock shared and you need to hold it exclusively to 2852 * holding the inode lock shared and you need to hold it exclusively to
@@ -2860,7 +2857,7 @@ xfs_iflush(
2860 * in the same cluster are dirty, they will probably write the inode 2857 * in the same cluster are dirty, they will probably write the inode
2861 * out for us if they occur after the log force completes. 2858 * out for us if they occur after the log force completes.
2862 */ 2859 */
2863 if (noblock && xfs_ipincount(ip)) { 2860 if (!(flags & SYNC_WAIT) && xfs_ipincount(ip)) {
2864 xfs_iunpin_nowait(ip); 2861 xfs_iunpin_nowait(ip);
2865 xfs_ifunlock(ip); 2862 xfs_ifunlock(ip);
2866 return EAGAIN; 2863 return EAGAIN;
@@ -2894,60 +2891,10 @@ xfs_iflush(
2894 } 2891 }
2895 2892
2896 /* 2893 /*
2897 * Decide how buffer will be flushed out. This is done before
2898 * the call to xfs_iflush_int because this field is zeroed by it.
2899 */
2900 if (iip != NULL && iip->ili_format.ilf_fields != 0) {
2901 /*
2902 * Flush out the inode buffer according to the directions
2903 * of the caller. In the cases where the caller has given
2904 * us a choice choose the non-delwri case. This is because
2905 * the inode is in the AIL and we need to get it out soon.
2906 */
2907 switch (flags) {
2908 case XFS_IFLUSH_SYNC:
2909 case XFS_IFLUSH_DELWRI_ELSE_SYNC:
2910 flags = 0;
2911 break;
2912 case XFS_IFLUSH_ASYNC_NOBLOCK:
2913 case XFS_IFLUSH_ASYNC:
2914 case XFS_IFLUSH_DELWRI_ELSE_ASYNC:
2915 flags = INT_ASYNC;
2916 break;
2917 case XFS_IFLUSH_DELWRI:
2918 flags = INT_DELWRI;
2919 break;
2920 default:
2921 ASSERT(0);
2922 flags = 0;
2923 break;
2924 }
2925 } else {
2926 switch (flags) {
2927 case XFS_IFLUSH_DELWRI_ELSE_SYNC:
2928 case XFS_IFLUSH_DELWRI_ELSE_ASYNC:
2929 case XFS_IFLUSH_DELWRI:
2930 flags = INT_DELWRI;
2931 break;
2932 case XFS_IFLUSH_ASYNC_NOBLOCK:
2933 case XFS_IFLUSH_ASYNC:
2934 flags = INT_ASYNC;
2935 break;
2936 case XFS_IFLUSH_SYNC:
2937 flags = 0;
2938 break;
2939 default:
2940 ASSERT(0);
2941 flags = 0;
2942 break;
2943 }
2944 }
2945
2946 /*
2947 * Get the buffer containing the on-disk inode. 2894 * Get the buffer containing the on-disk inode.
2948 */ 2895 */
2949 error = xfs_itobp(mp, NULL, ip, &dip, &bp, 2896 error = xfs_itobp(mp, NULL, ip, &dip, &bp,
2950 noblock ? XFS_BUF_TRYLOCK : XFS_BUF_LOCK); 2897 (flags & SYNC_WAIT) ? XBF_LOCK : XBF_TRYLOCK);
2951 if (error || !bp) { 2898 if (error || !bp) {
2952 xfs_ifunlock(ip); 2899 xfs_ifunlock(ip);
2953 return error; 2900 return error;
@@ -2965,7 +2912,7 @@ xfs_iflush(
2965 * get stuck waiting in the write for too long. 2912 * get stuck waiting in the write for too long.
2966 */ 2913 */
2967 if (XFS_BUF_ISPINNED(bp)) 2914 if (XFS_BUF_ISPINNED(bp))
2968 xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE); 2915 xfs_log_force(mp, 0);
2969 2916
2970 /* 2917 /*
2971 * inode clustering: 2918 * inode clustering:
@@ -2975,13 +2922,10 @@ xfs_iflush(
2975 if (error) 2922 if (error)
2976 goto cluster_corrupt_out; 2923 goto cluster_corrupt_out;
2977 2924
2978 if (flags & INT_DELWRI) { 2925 if (flags & SYNC_WAIT)
2979 xfs_bdwrite(mp, bp);
2980 } else if (flags & INT_ASYNC) {
2981 error = xfs_bawrite(mp, bp);
2982 } else {
2983 error = xfs_bwrite(mp, bp); 2926 error = xfs_bwrite(mp, bp);
2984 } 2927 else
2928 xfs_bdwrite(mp, bp);
2985 return error; 2929 return error;
2986 2930
2987corrupt_out: 2931corrupt_out:
@@ -3016,16 +2960,6 @@ xfs_iflush_int(
3016 iip = ip->i_itemp; 2960 iip = ip->i_itemp;
3017 mp = ip->i_mount; 2961 mp = ip->i_mount;
3018 2962
3019
3020 /*
3021 * If the inode isn't dirty, then just release the inode
3022 * flush lock and do nothing.
3023 */
3024 if (xfs_inode_clean(ip)) {
3025 xfs_ifunlock(ip);
3026 return 0;
3027 }
3028
3029 /* set *dip = inode's place in the buffer */ 2963 /* set *dip = inode's place in the buffer */
3030 dip = (xfs_dinode_t *)xfs_buf_offset(bp, ip->i_imap.im_boffset); 2964 dip = (xfs_dinode_t *)xfs_buf_offset(bp, ip->i_imap.im_boffset);
3031 2965
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index ec1f28c4fc4f..6c912b027596 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -420,16 +420,6 @@ static inline void xfs_ifunlock(xfs_inode_t *ip)
420#define XFS_ILOCK_DEP(flags) (((flags) & XFS_ILOCK_DEP_MASK) >> XFS_ILOCK_SHIFT) 420#define XFS_ILOCK_DEP(flags) (((flags) & XFS_ILOCK_DEP_MASK) >> XFS_ILOCK_SHIFT)
421 421
422/* 422/*
423 * Flags for xfs_iflush()
424 */
425#define XFS_IFLUSH_DELWRI_ELSE_SYNC 1
426#define XFS_IFLUSH_DELWRI_ELSE_ASYNC 2
427#define XFS_IFLUSH_SYNC 3
428#define XFS_IFLUSH_ASYNC 4
429#define XFS_IFLUSH_DELWRI 5
430#define XFS_IFLUSH_ASYNC_NOBLOCK 6
431
432/*
433 * Flags for xfs_itruncate_start(). 423 * Flags for xfs_itruncate_start().
434 */ 424 */
435#define XFS_ITRUNC_DEFINITE 0x1 425#define XFS_ITRUNC_DEFINITE 0x1
@@ -483,6 +473,7 @@ int xfs_iunlink(struct xfs_trans *, xfs_inode_t *);
483void xfs_iext_realloc(xfs_inode_t *, int, int); 473void xfs_iext_realloc(xfs_inode_t *, int, int);
484void xfs_ipin(xfs_inode_t *); 474void xfs_ipin(xfs_inode_t *);
485void xfs_iunpin(xfs_inode_t *); 475void xfs_iunpin(xfs_inode_t *);
476void xfs_iunpin_wait(xfs_inode_t *);
486int xfs_iflush(xfs_inode_t *, uint); 477int xfs_iflush(xfs_inode_t *, uint);
487void xfs_ichgtime(xfs_inode_t *, int); 478void xfs_ichgtime(xfs_inode_t *, int);
488void xfs_lock_inodes(xfs_inode_t **, int, uint); 479void xfs_lock_inodes(xfs_inode_t **, int, uint);
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index f38855d21ea5..d4dc063111f8 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -228,7 +228,7 @@ xfs_inode_item_format(
228 228
229 vecp->i_addr = (xfs_caddr_t)&iip->ili_format; 229 vecp->i_addr = (xfs_caddr_t)&iip->ili_format;
230 vecp->i_len = sizeof(xfs_inode_log_format_t); 230 vecp->i_len = sizeof(xfs_inode_log_format_t);
231 XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IFORMAT); 231 vecp->i_type = XLOG_REG_TYPE_IFORMAT;
232 vecp++; 232 vecp++;
233 nvecs = 1; 233 nvecs = 1;
234 234
@@ -279,7 +279,7 @@ xfs_inode_item_format(
279 279
280 vecp->i_addr = (xfs_caddr_t)&ip->i_d; 280 vecp->i_addr = (xfs_caddr_t)&ip->i_d;
281 vecp->i_len = sizeof(struct xfs_icdinode); 281 vecp->i_len = sizeof(struct xfs_icdinode);
282 XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_ICORE); 282 vecp->i_type = XLOG_REG_TYPE_ICORE;
283 vecp++; 283 vecp++;
284 nvecs++; 284 nvecs++;
285 iip->ili_format.ilf_fields |= XFS_ILOG_CORE; 285 iip->ili_format.ilf_fields |= XFS_ILOG_CORE;
@@ -336,7 +336,7 @@ xfs_inode_item_format(
336 vecp->i_addr = 336 vecp->i_addr =
337 (char *)(ip->i_df.if_u1.if_extents); 337 (char *)(ip->i_df.if_u1.if_extents);
338 vecp->i_len = ip->i_df.if_bytes; 338 vecp->i_len = ip->i_df.if_bytes;
339 XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IEXT); 339 vecp->i_type = XLOG_REG_TYPE_IEXT;
340 } else 340 } else
341#endif 341#endif
342 { 342 {
@@ -355,7 +355,7 @@ xfs_inode_item_format(
355 vecp->i_addr = (xfs_caddr_t)ext_buffer; 355 vecp->i_addr = (xfs_caddr_t)ext_buffer;
356 vecp->i_len = xfs_iextents_copy(ip, ext_buffer, 356 vecp->i_len = xfs_iextents_copy(ip, ext_buffer,
357 XFS_DATA_FORK); 357 XFS_DATA_FORK);
358 XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IEXT); 358 vecp->i_type = XLOG_REG_TYPE_IEXT;
359 } 359 }
360 ASSERT(vecp->i_len <= ip->i_df.if_bytes); 360 ASSERT(vecp->i_len <= ip->i_df.if_bytes);
361 iip->ili_format.ilf_dsize = vecp->i_len; 361 iip->ili_format.ilf_dsize = vecp->i_len;
@@ -373,7 +373,7 @@ xfs_inode_item_format(
373 ASSERT(ip->i_df.if_broot != NULL); 373 ASSERT(ip->i_df.if_broot != NULL);
374 vecp->i_addr = (xfs_caddr_t)ip->i_df.if_broot; 374 vecp->i_addr = (xfs_caddr_t)ip->i_df.if_broot;
375 vecp->i_len = ip->i_df.if_broot_bytes; 375 vecp->i_len = ip->i_df.if_broot_bytes;
376 XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IBROOT); 376 vecp->i_type = XLOG_REG_TYPE_IBROOT;
377 vecp++; 377 vecp++;
378 nvecs++; 378 nvecs++;
379 iip->ili_format.ilf_dsize = ip->i_df.if_broot_bytes; 379 iip->ili_format.ilf_dsize = ip->i_df.if_broot_bytes;
@@ -399,7 +399,7 @@ xfs_inode_item_format(
399 ASSERT((ip->i_df.if_real_bytes == 0) || 399 ASSERT((ip->i_df.if_real_bytes == 0) ||
400 (ip->i_df.if_real_bytes == data_bytes)); 400 (ip->i_df.if_real_bytes == data_bytes));
401 vecp->i_len = (int)data_bytes; 401 vecp->i_len = (int)data_bytes;
402 XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_ILOCAL); 402 vecp->i_type = XLOG_REG_TYPE_ILOCAL;
403 vecp++; 403 vecp++;
404 nvecs++; 404 nvecs++;
405 iip->ili_format.ilf_dsize = (unsigned)data_bytes; 405 iip->ili_format.ilf_dsize = (unsigned)data_bytes;
@@ -477,7 +477,7 @@ xfs_inode_item_format(
477 vecp->i_len = xfs_iextents_copy(ip, ext_buffer, 477 vecp->i_len = xfs_iextents_copy(ip, ext_buffer,
478 XFS_ATTR_FORK); 478 XFS_ATTR_FORK);
479#endif 479#endif
480 XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IATTR_EXT); 480 vecp->i_type = XLOG_REG_TYPE_IATTR_EXT;
481 iip->ili_format.ilf_asize = vecp->i_len; 481 iip->ili_format.ilf_asize = vecp->i_len;
482 vecp++; 482 vecp++;
483 nvecs++; 483 nvecs++;
@@ -492,7 +492,7 @@ xfs_inode_item_format(
492 ASSERT(ip->i_afp->if_broot != NULL); 492 ASSERT(ip->i_afp->if_broot != NULL);
493 vecp->i_addr = (xfs_caddr_t)ip->i_afp->if_broot; 493 vecp->i_addr = (xfs_caddr_t)ip->i_afp->if_broot;
494 vecp->i_len = ip->i_afp->if_broot_bytes; 494 vecp->i_len = ip->i_afp->if_broot_bytes;
495 XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IATTR_BROOT); 495 vecp->i_type = XLOG_REG_TYPE_IATTR_BROOT;
496 vecp++; 496 vecp++;
497 nvecs++; 497 nvecs++;
498 iip->ili_format.ilf_asize = ip->i_afp->if_broot_bytes; 498 iip->ili_format.ilf_asize = ip->i_afp->if_broot_bytes;
@@ -516,7 +516,7 @@ xfs_inode_item_format(
516 ASSERT((ip->i_afp->if_real_bytes == 0) || 516 ASSERT((ip->i_afp->if_real_bytes == 0) ||
517 (ip->i_afp->if_real_bytes == data_bytes)); 517 (ip->i_afp->if_real_bytes == data_bytes));
518 vecp->i_len = (int)data_bytes; 518 vecp->i_len = (int)data_bytes;
519 XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IATTR_LOCAL); 519 vecp->i_type = XLOG_REG_TYPE_IATTR_LOCAL;
520 vecp++; 520 vecp++;
521 nvecs++; 521 nvecs++;
522 iip->ili_format.ilf_asize = (unsigned)data_bytes; 522 iip->ili_format.ilf_asize = (unsigned)data_bytes;
@@ -602,33 +602,20 @@ xfs_inode_item_trylock(
602 602
603 if (!xfs_iflock_nowait(ip)) { 603 if (!xfs_iflock_nowait(ip)) {
604 /* 604 /*
605 * If someone else isn't already trying to push the inode 605 * inode has already been flushed to the backing buffer,
606 * buffer, we get to do it. 606 * leave it locked in shared mode, pushbuf routine will
607 * unlock it.
607 */ 608 */
608 if (iip->ili_pushbuf_flag == 0) { 609 return XFS_ITEM_PUSHBUF;
609 iip->ili_pushbuf_flag = 1;
610#ifdef DEBUG
611 iip->ili_push_owner = current_pid();
612#endif
613 /*
614 * Inode is left locked in shared mode.
615 * Pushbuf routine gets to unlock it.
616 */
617 return XFS_ITEM_PUSHBUF;
618 } else {
619 /*
620 * We hold the AIL lock, so we must specify the
621 * NONOTIFY flag so that we won't double trip.
622 */
623 xfs_iunlock(ip, XFS_ILOCK_SHARED|XFS_IUNLOCK_NONOTIFY);
624 return XFS_ITEM_FLUSHING;
625 }
626 /* NOTREACHED */
627 } 610 }
628 611
629 /* Stale items should force out the iclog */ 612 /* Stale items should force out the iclog */
630 if (ip->i_flags & XFS_ISTALE) { 613 if (ip->i_flags & XFS_ISTALE) {
631 xfs_ifunlock(ip); 614 xfs_ifunlock(ip);
615 /*
616 * we hold the AIL lock - notify the unlock routine of this
617 * so it doesn't try to get the lock again.
618 */
632 xfs_iunlock(ip, XFS_ILOCK_SHARED|XFS_IUNLOCK_NONOTIFY); 619 xfs_iunlock(ip, XFS_ILOCK_SHARED|XFS_IUNLOCK_NONOTIFY);
633 return XFS_ITEM_PINNED; 620 return XFS_ITEM_PINNED;
634 } 621 }
@@ -746,11 +733,8 @@ xfs_inode_item_committed(
746 * This gets called by xfs_trans_push_ail(), when IOP_TRYLOCK 733 * This gets called by xfs_trans_push_ail(), when IOP_TRYLOCK
747 * failed to get the inode flush lock but did get the inode locked SHARED. 734 * failed to get the inode flush lock but did get the inode locked SHARED.
748 * Here we're trying to see if the inode buffer is incore, and if so whether it's 735 * Here we're trying to see if the inode buffer is incore, and if so whether it's
749 * marked delayed write. If that's the case, we'll initiate a bawrite on that 736 * marked delayed write. If that's the case, we'll promote it and that will
750 * buffer to expedite the process. 737 * allow the caller to write the buffer by triggering the xfsbufd to run.
751 *
752 * We aren't holding the AIL lock (or the flush lock) when this gets called,
753 * so it is inherently race-y.
754 */ 738 */
755STATIC void 739STATIC void
756xfs_inode_item_pushbuf( 740xfs_inode_item_pushbuf(
@@ -759,82 +743,30 @@ xfs_inode_item_pushbuf(
759 xfs_inode_t *ip; 743 xfs_inode_t *ip;
760 xfs_mount_t *mp; 744 xfs_mount_t *mp;
761 xfs_buf_t *bp; 745 xfs_buf_t *bp;
762 uint dopush;
763 746
764 ip = iip->ili_inode; 747 ip = iip->ili_inode;
765
766 ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED)); 748 ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED));
767 749
768 /* 750 /*
769 * The ili_pushbuf_flag keeps others from
770 * trying to duplicate our effort.
771 */
772 ASSERT(iip->ili_pushbuf_flag != 0);
773 ASSERT(iip->ili_push_owner == current_pid());
774
775 /*
776 * If a flush is not in progress anymore, chances are that the 751 * If a flush is not in progress anymore, chances are that the
777 * inode was taken off the AIL. So, just get out. 752 * inode was taken off the AIL. So, just get out.
778 */ 753 */
779 if (completion_done(&ip->i_flush) || 754 if (completion_done(&ip->i_flush) ||
780 ((iip->ili_item.li_flags & XFS_LI_IN_AIL) == 0)) { 755 ((iip->ili_item.li_flags & XFS_LI_IN_AIL) == 0)) {
781 iip->ili_pushbuf_flag = 0;
782 xfs_iunlock(ip, XFS_ILOCK_SHARED); 756 xfs_iunlock(ip, XFS_ILOCK_SHARED);
783 return; 757 return;
784 } 758 }
785 759
786 mp = ip->i_mount; 760 mp = ip->i_mount;
787 bp = xfs_incore(mp->m_ddev_targp, iip->ili_format.ilf_blkno, 761 bp = xfs_incore(mp->m_ddev_targp, iip->ili_format.ilf_blkno,
788 iip->ili_format.ilf_len, XFS_INCORE_TRYLOCK); 762 iip->ili_format.ilf_len, XBF_TRYLOCK);
789 763
790 if (bp != NULL) {
791 if (XFS_BUF_ISDELAYWRITE(bp)) {
792 /*
793 * We were racing with iflush because we don't hold
794 * the AIL lock or the flush lock. However, at this point,
795 * we have the buffer, and we know that it's dirty.
796 * So, it's possible that iflush raced with us, and
797 * this item is already taken off the AIL.
798 * If not, we can flush it async.
799 */
800 dopush = ((iip->ili_item.li_flags & XFS_LI_IN_AIL) &&
801 !completion_done(&ip->i_flush));
802 iip->ili_pushbuf_flag = 0;
803 xfs_iunlock(ip, XFS_ILOCK_SHARED);
804
805 trace_xfs_inode_item_push(bp, _RET_IP_);
806
807 if (XFS_BUF_ISPINNED(bp)) {
808 xfs_log_force(mp, (xfs_lsn_t)0,
809 XFS_LOG_FORCE);
810 }
811 if (dopush) {
812 int error;
813 error = xfs_bawrite(mp, bp);
814 if (error)
815 xfs_fs_cmn_err(CE_WARN, mp,
816 "xfs_inode_item_pushbuf: pushbuf error %d on iip %p, bp %p",
817 error, iip, bp);
818 } else {
819 xfs_buf_relse(bp);
820 }
821 } else {
822 iip->ili_pushbuf_flag = 0;
823 xfs_iunlock(ip, XFS_ILOCK_SHARED);
824 xfs_buf_relse(bp);
825 }
826 return;
827 }
828 /*
829 * We have to be careful about resetting pushbuf flag too early (above).
830 * Even though in theory we can do it as soon as we have the buflock,
831 * we don't want others to be doing work needlessly. They'll come to
832 * this function thinking that pushing the buffer is their
833 * responsibility only to find that the buffer is still locked by
834 * another doing the same thing
835 */
836 iip->ili_pushbuf_flag = 0;
837 xfs_iunlock(ip, XFS_ILOCK_SHARED); 764 xfs_iunlock(ip, XFS_ILOCK_SHARED);
765 if (!bp)
766 return;
767 if (XFS_BUF_ISDELAYWRITE(bp))
768 xfs_buf_delwri_promote(bp);
769 xfs_buf_relse(bp);
838 return; 770 return;
839} 771}
840 772
@@ -867,10 +799,14 @@ xfs_inode_item_push(
867 iip->ili_format.ilf_fields != 0); 799 iip->ili_format.ilf_fields != 0);
868 800
869 /* 801 /*
870 * Write out the inode. The completion routine ('iflush_done') will 802 * Push the inode to it's backing buffer. This will not remove the
871 * pull it from the AIL, mark it clean, unlock the flush lock. 803 * inode from the AIL - a further push will be required to trigger a
804 * buffer push. However, this allows all the dirty inodes to be pushed
805 * to the buffer before it is pushed to disk. THe buffer IO completion
806 * will pull th einode from the AIL, mark it clean and unlock the flush
807 * lock.
872 */ 808 */
873 (void) xfs_iflush(ip, XFS_IFLUSH_ASYNC); 809 (void) xfs_iflush(ip, 0);
874 xfs_iunlock(ip, XFS_ILOCK_SHARED); 810 xfs_iunlock(ip, XFS_ILOCK_SHARED);
875 811
876 return; 812 return;
@@ -934,7 +870,6 @@ xfs_inode_item_init(
934 /* 870 /*
935 We have zeroed memory. No need ... 871 We have zeroed memory. No need ...
936 iip->ili_extents_buf = NULL; 872 iip->ili_extents_buf = NULL;
937 iip->ili_pushbuf_flag = 0;
938 */ 873 */
939 874
940 iip->ili_format.ilf_type = XFS_LI_INODE; 875 iip->ili_format.ilf_type = XFS_LI_INODE;
diff --git a/fs/xfs/xfs_inode_item.h b/fs/xfs/xfs_inode_item.h
index cc8df1ac7783..9a467958ecdd 100644
--- a/fs/xfs/xfs_inode_item.h
+++ b/fs/xfs/xfs_inode_item.h
@@ -144,12 +144,6 @@ typedef struct xfs_inode_log_item {
144 data exts */ 144 data exts */
145 struct xfs_bmbt_rec *ili_aextents_buf; /* array of logged 145 struct xfs_bmbt_rec *ili_aextents_buf; /* array of logged
146 attr exts */ 146 attr exts */
147 unsigned int ili_pushbuf_flag; /* one bit used in push_ail */
148
149#ifdef DEBUG
150 uint64_t ili_push_owner; /* one who sets pushbuf_flag
151 above gets to push the buf */
152#endif
153#ifdef XFS_TRANS_DEBUG 147#ifdef XFS_TRANS_DEBUG
154 int ili_root_size; 148 int ili_root_size;
155 char *ili_orig_root; 149 char *ili_orig_root;
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
index 62efab2f3839..3af02314c605 100644
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -408,8 +408,10 @@ xfs_bulkstat(
408 (XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog); 408 (XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog);
409 nimask = ~(nicluster - 1); 409 nimask = ~(nicluster - 1);
410 nbcluster = nicluster >> mp->m_sb.sb_inopblog; 410 nbcluster = nicluster >> mp->m_sb.sb_inopblog;
411 irbuf = kmem_zalloc_greedy(&irbsize, PAGE_SIZE, PAGE_SIZE * 4, 411 irbuf = kmem_zalloc_greedy(&irbsize, PAGE_SIZE, PAGE_SIZE * 4);
412 KM_SLEEP | KM_MAYFAIL | KM_LARGE); 412 if (!irbuf)
413 return ENOMEM;
414
413 nirbuf = irbsize / sizeof(*irbuf); 415 nirbuf = irbsize / sizeof(*irbuf);
414 416
415 /* 417 /*
@@ -420,9 +422,7 @@ xfs_bulkstat(
420 while (XFS_BULKSTAT_UBLEFT(ubleft) && agno < mp->m_sb.sb_agcount) { 422 while (XFS_BULKSTAT_UBLEFT(ubleft) && agno < mp->m_sb.sb_agcount) {
421 cond_resched(); 423 cond_resched();
422 bp = NULL; 424 bp = NULL;
423 down_read(&mp->m_peraglock);
424 error = xfs_ialloc_read_agi(mp, NULL, agno, &agbp); 425 error = xfs_ialloc_read_agi(mp, NULL, agno, &agbp);
425 up_read(&mp->m_peraglock);
426 if (error) { 426 if (error) {
427 /* 427 /*
428 * Skip this allocation group and go to the next one. 428 * Skip this allocation group and go to the next one.
@@ -729,7 +729,7 @@ xfs_bulkstat(
729 /* 729 /*
730 * Done, we're either out of filesystem or space to put the data. 730 * Done, we're either out of filesystem or space to put the data.
731 */ 731 */
732 kmem_free(irbuf); 732 kmem_free_large(irbuf);
733 *ubcountp = ubelem; 733 *ubcountp = ubelem;
734 /* 734 /*
735 * Found some inodes, return them now and return the error next time. 735 * Found some inodes, return them now and return the error next time.
@@ -849,9 +849,7 @@ xfs_inumbers(
849 agbp = NULL; 849 agbp = NULL;
850 while (left > 0 && agno < mp->m_sb.sb_agcount) { 850 while (left > 0 && agno < mp->m_sb.sb_agcount) {
851 if (agbp == NULL) { 851 if (agbp == NULL) {
852 down_read(&mp->m_peraglock);
853 error = xfs_ialloc_read_agi(mp, NULL, agno, &agbp); 852 error = xfs_ialloc_read_agi(mp, NULL, agno, &agbp);
854 up_read(&mp->m_peraglock);
855 if (error) { 853 if (error) {
856 /* 854 /*
857 * If we can't read the AGI of this ag, 855 * If we can't read the AGI of this ag,
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 600b5b06aaeb..4f16be4b6ee5 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -50,7 +50,6 @@ kmem_zone_t *xfs_log_ticket_zone;
50 (off) += (bytes);} 50 (off) += (bytes);}
51 51
52/* Local miscellaneous function prototypes */ 52/* Local miscellaneous function prototypes */
53STATIC int xlog_bdstrat_cb(struct xfs_buf *);
54STATIC int xlog_commit_record(xfs_mount_t *mp, xlog_ticket_t *ticket, 53STATIC int xlog_commit_record(xfs_mount_t *mp, xlog_ticket_t *ticket,
55 xlog_in_core_t **, xfs_lsn_t *); 54 xlog_in_core_t **, xfs_lsn_t *);
56STATIC xlog_t * xlog_alloc_log(xfs_mount_t *mp, 55STATIC xlog_t * xlog_alloc_log(xfs_mount_t *mp,
@@ -80,11 +79,6 @@ STATIC int xlog_state_release_iclog(xlog_t *log,
80STATIC void xlog_state_switch_iclogs(xlog_t *log, 79STATIC void xlog_state_switch_iclogs(xlog_t *log,
81 xlog_in_core_t *iclog, 80 xlog_in_core_t *iclog,
82 int eventual_size); 81 int eventual_size);
83STATIC int xlog_state_sync(xlog_t *log,
84 xfs_lsn_t lsn,
85 uint flags,
86 int *log_flushed);
87STATIC int xlog_state_sync_all(xlog_t *log, uint flags, int *log_flushed);
88STATIC void xlog_state_want_sync(xlog_t *log, xlog_in_core_t *iclog); 82STATIC void xlog_state_want_sync(xlog_t *log, xlog_in_core_t *iclog);
89 83
90/* local functions to manipulate grant head */ 84/* local functions to manipulate grant head */
@@ -297,65 +291,6 @@ xfs_log_done(xfs_mount_t *mp,
297 return lsn; 291 return lsn;
298} /* xfs_log_done */ 292} /* xfs_log_done */
299 293
300
301/*
302 * Force the in-core log to disk. If flags == XFS_LOG_SYNC,
303 * the force is done synchronously.
304 *
305 * Asynchronous forces are implemented by setting the WANT_SYNC
306 * bit in the appropriate in-core log and then returning.
307 *
308 * Synchronous forces are implemented with a signal variable. All callers
309 * to force a given lsn to disk will wait on a the sv attached to the
310 * specific in-core log. When given in-core log finally completes its
311 * write to disk, that thread will wake up all threads waiting on the
312 * sv.
313 */
314int
315_xfs_log_force(
316 xfs_mount_t *mp,
317 xfs_lsn_t lsn,
318 uint flags,
319 int *log_flushed)
320{
321 xlog_t *log = mp->m_log;
322 int dummy;
323
324 if (!log_flushed)
325 log_flushed = &dummy;
326
327 ASSERT(flags & XFS_LOG_FORCE);
328
329 XFS_STATS_INC(xs_log_force);
330
331 if (log->l_flags & XLOG_IO_ERROR)
332 return XFS_ERROR(EIO);
333 if (lsn == 0)
334 return xlog_state_sync_all(log, flags, log_flushed);
335 else
336 return xlog_state_sync(log, lsn, flags, log_flushed);
337} /* _xfs_log_force */
338
339/*
340 * Wrapper for _xfs_log_force(), to be used when caller doesn't care
341 * about errors or whether the log was flushed or not. This is the normal
342 * interface to use when trying to unpin items or move the log forward.
343 */
344void
345xfs_log_force(
346 xfs_mount_t *mp,
347 xfs_lsn_t lsn,
348 uint flags)
349{
350 int error;
351 error = _xfs_log_force(mp, lsn, flags, NULL);
352 if (error) {
353 xfs_fs_cmn_err(CE_WARN, mp, "xfs_log_force: "
354 "error %d returned.", error);
355 }
356}
357
358
359/* 294/*
360 * Attaches a new iclog I/O completion callback routine during 295 * Attaches a new iclog I/O completion callback routine during
361 * transaction commit. If the log is in error state, a non-zero 296 * transaction commit. If the log is in error state, a non-zero
@@ -602,7 +537,7 @@ xfs_log_unmount_write(xfs_mount_t *mp)
602 if (mp->m_flags & XFS_MOUNT_RDONLY) 537 if (mp->m_flags & XFS_MOUNT_RDONLY)
603 return 0; 538 return 0;
604 539
605 error = _xfs_log_force(mp, 0, XFS_LOG_FORCE|XFS_LOG_SYNC, NULL); 540 error = _xfs_log_force(mp, XFS_LOG_SYNC, NULL);
606 ASSERT(error || !(XLOG_FORCED_SHUTDOWN(log))); 541 ASSERT(error || !(XLOG_FORCED_SHUTDOWN(log)));
607 542
608#ifdef DEBUG 543#ifdef DEBUG
@@ -618,7 +553,7 @@ xfs_log_unmount_write(xfs_mount_t *mp)
618 if (! (XLOG_FORCED_SHUTDOWN(log))) { 553 if (! (XLOG_FORCED_SHUTDOWN(log))) {
619 reg[0].i_addr = (void*)&magic; 554 reg[0].i_addr = (void*)&magic;
620 reg[0].i_len = sizeof(magic); 555 reg[0].i_len = sizeof(magic);
621 XLOG_VEC_SET_TYPE(&reg[0], XLOG_REG_TYPE_UNMOUNT); 556 reg[0].i_type = XLOG_REG_TYPE_UNMOUNT;
622 557
623 error = xfs_log_reserve(mp, 600, 1, &tic, 558 error = xfs_log_reserve(mp, 600, 1, &tic,
624 XFS_LOG, 0, XLOG_UNMOUNT_REC_TYPE); 559 XFS_LOG, 0, XLOG_UNMOUNT_REC_TYPE);
@@ -988,35 +923,6 @@ xlog_iodone(xfs_buf_t *bp)
988} /* xlog_iodone */ 923} /* xlog_iodone */
989 924
990/* 925/*
991 * The bdstrat callback function for log bufs. This gives us a central
992 * place to trap bufs in case we get hit by a log I/O error and need to
993 * shutdown. Actually, in practice, even when we didn't get a log error,
994 * we transition the iclogs to IOERROR state *after* flushing all existing
995 * iclogs to disk. This is because we don't want anymore new transactions to be
996 * started or completed afterwards.
997 */
998STATIC int
999xlog_bdstrat_cb(struct xfs_buf *bp)
1000{
1001 xlog_in_core_t *iclog;
1002
1003 iclog = XFS_BUF_FSPRIVATE(bp, xlog_in_core_t *);
1004
1005 if ((iclog->ic_state & XLOG_STATE_IOERROR) == 0) {
1006 /* note for irix bstrat will need struct bdevsw passed
1007 * Fix the following macro if the code ever is merged
1008 */
1009 XFS_bdstrat(bp);
1010 return 0;
1011 }
1012
1013 XFS_BUF_ERROR(bp, EIO);
1014 XFS_BUF_STALE(bp);
1015 xfs_biodone(bp);
1016 return XFS_ERROR(EIO);
1017}
1018
1019/*
1020 * Return size of each in-core log record buffer. 926 * Return size of each in-core log record buffer.
1021 * 927 *
1022 * All machines get 8 x 32kB buffers by default, unless tuned otherwise. 928 * All machines get 8 x 32kB buffers by default, unless tuned otherwise.
@@ -1158,7 +1064,6 @@ xlog_alloc_log(xfs_mount_t *mp,
1158 if (!bp) 1064 if (!bp)
1159 goto out_free_log; 1065 goto out_free_log;
1160 XFS_BUF_SET_IODONE_FUNC(bp, xlog_iodone); 1066 XFS_BUF_SET_IODONE_FUNC(bp, xlog_iodone);
1161 XFS_BUF_SET_BDSTRAT_FUNC(bp, xlog_bdstrat_cb);
1162 XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)1); 1067 XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)1);
1163 ASSERT(XFS_BUF_ISBUSY(bp)); 1068 ASSERT(XFS_BUF_ISBUSY(bp));
1164 ASSERT(XFS_BUF_VALUSEMA(bp) <= 0); 1069 ASSERT(XFS_BUF_VALUSEMA(bp) <= 0);
@@ -1196,7 +1101,6 @@ xlog_alloc_log(xfs_mount_t *mp,
1196 if (!XFS_BUF_CPSEMA(bp)) 1101 if (!XFS_BUF_CPSEMA(bp))
1197 ASSERT(0); 1102 ASSERT(0);
1198 XFS_BUF_SET_IODONE_FUNC(bp, xlog_iodone); 1103 XFS_BUF_SET_IODONE_FUNC(bp, xlog_iodone);
1199 XFS_BUF_SET_BDSTRAT_FUNC(bp, xlog_bdstrat_cb);
1200 XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)1); 1104 XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)1);
1201 iclog->ic_bp = bp; 1105 iclog->ic_bp = bp;
1202 iclog->ic_data = bp->b_addr; 1106 iclog->ic_data = bp->b_addr;
@@ -1268,7 +1172,7 @@ xlog_commit_record(xfs_mount_t *mp,
1268 1172
1269 reg[0].i_addr = NULL; 1173 reg[0].i_addr = NULL;
1270 reg[0].i_len = 0; 1174 reg[0].i_len = 0;
1271 XLOG_VEC_SET_TYPE(&reg[0], XLOG_REG_TYPE_COMMIT); 1175 reg[0].i_type = XLOG_REG_TYPE_COMMIT;
1272 1176
1273 ASSERT_ALWAYS(iclog); 1177 ASSERT_ALWAYS(iclog);
1274 if ((error = xlog_write(mp, reg, 1, ticket, commitlsnp, 1178 if ((error = xlog_write(mp, reg, 1, ticket, commitlsnp,
@@ -1343,6 +1247,37 @@ xlog_grant_push_ail(xfs_mount_t *mp,
1343 xfs_trans_ail_push(log->l_ailp, threshold_lsn); 1247 xfs_trans_ail_push(log->l_ailp, threshold_lsn);
1344} /* xlog_grant_push_ail */ 1248} /* xlog_grant_push_ail */
1345 1249
1250/*
1251 * The bdstrat callback function for log bufs. This gives us a central
1252 * place to trap bufs in case we get hit by a log I/O error and need to
1253 * shutdown. Actually, in practice, even when we didn't get a log error,
1254 * we transition the iclogs to IOERROR state *after* flushing all existing
1255 * iclogs to disk. This is because we don't want anymore new transactions to be
1256 * started or completed afterwards.
1257 */
1258STATIC int
1259xlog_bdstrat(
1260 struct xfs_buf *bp)
1261{
1262 struct xlog_in_core *iclog;
1263
1264 iclog = XFS_BUF_FSPRIVATE(bp, xlog_in_core_t *);
1265 if (iclog->ic_state & XLOG_STATE_IOERROR) {
1266 XFS_BUF_ERROR(bp, EIO);
1267 XFS_BUF_STALE(bp);
1268 xfs_biodone(bp);
1269 /*
1270 * It would seem logical to return EIO here, but we rely on
1271 * the log state machine to propagate I/O errors instead of
1272 * doing it here.
1273 */
1274 return 0;
1275 }
1276
1277 bp->b_flags |= _XBF_RUN_QUEUES;
1278 xfs_buf_iorequest(bp);
1279 return 0;
1280}
1346 1281
1347/* 1282/*
1348 * Flush out the in-core log (iclog) to the on-disk log in an asynchronous 1283 * Flush out the in-core log (iclog) to the on-disk log in an asynchronous
@@ -1462,7 +1397,7 @@ xlog_sync(xlog_t *log,
1462 */ 1397 */
1463 XFS_BUF_WRITE(bp); 1398 XFS_BUF_WRITE(bp);
1464 1399
1465 if ((error = XFS_bwrite(bp))) { 1400 if ((error = xlog_bdstrat(bp))) {
1466 xfs_ioerror_alert("xlog_sync", log->l_mp, bp, 1401 xfs_ioerror_alert("xlog_sync", log->l_mp, bp,
1467 XFS_BUF_ADDR(bp)); 1402 XFS_BUF_ADDR(bp));
1468 return error; 1403 return error;
@@ -1502,7 +1437,7 @@ xlog_sync(xlog_t *log,
1502 /* account for internal log which doesn't start at block #0 */ 1437 /* account for internal log which doesn't start at block #0 */
1503 XFS_BUF_SET_ADDR(bp, XFS_BUF_ADDR(bp) + log->l_logBBstart); 1438 XFS_BUF_SET_ADDR(bp, XFS_BUF_ADDR(bp) + log->l_logBBstart);
1504 XFS_BUF_WRITE(bp); 1439 XFS_BUF_WRITE(bp);
1505 if ((error = XFS_bwrite(bp))) { 1440 if ((error = xlog_bdstrat(bp))) {
1506 xfs_ioerror_alert("xlog_sync (split)", log->l_mp, 1441 xfs_ioerror_alert("xlog_sync (split)", log->l_mp,
1507 bp, XFS_BUF_ADDR(bp)); 1442 bp, XFS_BUF_ADDR(bp));
1508 return error; 1443 return error;
@@ -2854,7 +2789,6 @@ xlog_state_switch_iclogs(xlog_t *log,
2854 log->l_iclog = iclog->ic_next; 2789 log->l_iclog = iclog->ic_next;
2855} /* xlog_state_switch_iclogs */ 2790} /* xlog_state_switch_iclogs */
2856 2791
2857
2858/* 2792/*
2859 * Write out all data in the in-core log as of this exact moment in time. 2793 * Write out all data in the in-core log as of this exact moment in time.
2860 * 2794 *
@@ -2882,11 +2816,17 @@ xlog_state_switch_iclogs(xlog_t *log,
2882 * b) when we return from flushing out this iclog, it is still 2816 * b) when we return from flushing out this iclog, it is still
2883 * not in the active nor dirty state. 2817 * not in the active nor dirty state.
2884 */ 2818 */
2885STATIC int 2819int
2886xlog_state_sync_all(xlog_t *log, uint flags, int *log_flushed) 2820_xfs_log_force(
2821 struct xfs_mount *mp,
2822 uint flags,
2823 int *log_flushed)
2887{ 2824{
2888 xlog_in_core_t *iclog; 2825 struct log *log = mp->m_log;
2889 xfs_lsn_t lsn; 2826 struct xlog_in_core *iclog;
2827 xfs_lsn_t lsn;
2828
2829 XFS_STATS_INC(xs_log_force);
2890 2830
2891 spin_lock(&log->l_icloglock); 2831 spin_lock(&log->l_icloglock);
2892 2832
@@ -2932,7 +2872,9 @@ xlog_state_sync_all(xlog_t *log, uint flags, int *log_flushed)
2932 2872
2933 if (xlog_state_release_iclog(log, iclog)) 2873 if (xlog_state_release_iclog(log, iclog))
2934 return XFS_ERROR(EIO); 2874 return XFS_ERROR(EIO);
2935 *log_flushed = 1; 2875
2876 if (log_flushed)
2877 *log_flushed = 1;
2936 spin_lock(&log->l_icloglock); 2878 spin_lock(&log->l_icloglock);
2937 if (be64_to_cpu(iclog->ic_header.h_lsn) == lsn && 2879 if (be64_to_cpu(iclog->ic_header.h_lsn) == lsn &&
2938 iclog->ic_state != XLOG_STATE_DIRTY) 2880 iclog->ic_state != XLOG_STATE_DIRTY)
@@ -2976,19 +2918,37 @@ maybe_sleep:
2976 */ 2918 */
2977 if (iclog->ic_state & XLOG_STATE_IOERROR) 2919 if (iclog->ic_state & XLOG_STATE_IOERROR)
2978 return XFS_ERROR(EIO); 2920 return XFS_ERROR(EIO);
2979 *log_flushed = 1; 2921 if (log_flushed)
2980 2922 *log_flushed = 1;
2981 } else { 2923 } else {
2982 2924
2983no_sleep: 2925no_sleep:
2984 spin_unlock(&log->l_icloglock); 2926 spin_unlock(&log->l_icloglock);
2985 } 2927 }
2986 return 0; 2928 return 0;
2987} /* xlog_state_sync_all */ 2929}
2988 2930
2931/*
2932 * Wrapper for _xfs_log_force(), to be used when caller doesn't care
2933 * about errors or whether the log was flushed or not. This is the normal
2934 * interface to use when trying to unpin items or move the log forward.
2935 */
2936void
2937xfs_log_force(
2938 xfs_mount_t *mp,
2939 uint flags)
2940{
2941 int error;
2942
2943 error = _xfs_log_force(mp, flags, NULL);
2944 if (error) {
2945 xfs_fs_cmn_err(CE_WARN, mp, "xfs_log_force: "
2946 "error %d returned.", error);
2947 }
2948}
2989 2949
2990/* 2950/*
2991 * Used by code which implements synchronous log forces. 2951 * Force the in-core log to disk for a specific LSN.
2992 * 2952 *
2993 * Find in-core log with lsn. 2953 * Find in-core log with lsn.
2994 * If it is in the DIRTY state, just return. 2954 * If it is in the DIRTY state, just return.
@@ -2996,109 +2956,142 @@ no_sleep:
2996 * state and go to sleep or return. 2956 * state and go to sleep or return.
2997 * If it is in any other state, go to sleep or return. 2957 * If it is in any other state, go to sleep or return.
2998 * 2958 *
2999 * If filesystem activity goes to zero, the iclog will get flushed only by 2959 * Synchronous forces are implemented with a signal variable. All callers
3000 * bdflush(). 2960 * to force a given lsn to disk will wait on a the sv attached to the
2961 * specific in-core log. When given in-core log finally completes its
2962 * write to disk, that thread will wake up all threads waiting on the
2963 * sv.
3001 */ 2964 */
3002STATIC int 2965int
3003xlog_state_sync(xlog_t *log, 2966_xfs_log_force_lsn(
3004 xfs_lsn_t lsn, 2967 struct xfs_mount *mp,
3005 uint flags, 2968 xfs_lsn_t lsn,
3006 int *log_flushed) 2969 uint flags,
2970 int *log_flushed)
3007{ 2971{
3008 xlog_in_core_t *iclog; 2972 struct log *log = mp->m_log;
3009 int already_slept = 0; 2973 struct xlog_in_core *iclog;
2974 int already_slept = 0;
3010 2975
3011try_again: 2976 ASSERT(lsn != 0);
3012 spin_lock(&log->l_icloglock);
3013 iclog = log->l_iclog;
3014 2977
3015 if (iclog->ic_state & XLOG_STATE_IOERROR) { 2978 XFS_STATS_INC(xs_log_force);
3016 spin_unlock(&log->l_icloglock);
3017 return XFS_ERROR(EIO);
3018 }
3019
3020 do {
3021 if (be64_to_cpu(iclog->ic_header.h_lsn) != lsn) {
3022 iclog = iclog->ic_next;
3023 continue;
3024 }
3025 2979
3026 if (iclog->ic_state == XLOG_STATE_DIRTY) { 2980try_again:
2981 spin_lock(&log->l_icloglock);
2982 iclog = log->l_iclog;
2983 if (iclog->ic_state & XLOG_STATE_IOERROR) {
3027 spin_unlock(&log->l_icloglock); 2984 spin_unlock(&log->l_icloglock);
3028 return 0; 2985 return XFS_ERROR(EIO);
3029 } 2986 }
3030 2987
3031 if (iclog->ic_state == XLOG_STATE_ACTIVE) { 2988 do {
3032 /* 2989 if (be64_to_cpu(iclog->ic_header.h_lsn) != lsn) {
3033 * We sleep here if we haven't already slept (e.g. 2990 iclog = iclog->ic_next;
3034 * this is the first time we've looked at the correct 2991 continue;
3035 * iclog buf) and the buffer before us is going to 2992 }
3036 * be sync'ed. The reason for this is that if we 2993
3037 * are doing sync transactions here, by waiting for 2994 if (iclog->ic_state == XLOG_STATE_DIRTY) {
3038 * the previous I/O to complete, we can allow a few 2995 spin_unlock(&log->l_icloglock);
3039 * more transactions into this iclog before we close 2996 return 0;
3040 * it down. 2997 }
3041 * 2998
3042 * Otherwise, we mark the buffer WANT_SYNC, and bump 2999 if (iclog->ic_state == XLOG_STATE_ACTIVE) {
3043 * up the refcnt so we can release the log (which drops 3000 /*
3044 * the ref count). The state switch keeps new transaction 3001 * We sleep here if we haven't already slept (e.g.
3045 * commits from using this buffer. When the current commits 3002 * this is the first time we've looked at the correct
3046 * finish writing into the buffer, the refcount will drop to 3003 * iclog buf) and the buffer before us is going to
3047 * zero and the buffer will go out then. 3004 * be sync'ed. The reason for this is that if we
3048 */ 3005 * are doing sync transactions here, by waiting for
3049 if (!already_slept && 3006 * the previous I/O to complete, we can allow a few
3050 (iclog->ic_prev->ic_state & (XLOG_STATE_WANT_SYNC | 3007 * more transactions into this iclog before we close
3051 XLOG_STATE_SYNCING))) { 3008 * it down.
3052 ASSERT(!(iclog->ic_state & XLOG_STATE_IOERROR)); 3009 *
3053 XFS_STATS_INC(xs_log_force_sleep); 3010 * Otherwise, we mark the buffer WANT_SYNC, and bump
3054 sv_wait(&iclog->ic_prev->ic_write_wait, PSWP, 3011 * up the refcnt so we can release the log (which
3055 &log->l_icloglock, s); 3012 * drops the ref count). The state switch keeps new
3056 *log_flushed = 1; 3013 * transaction commits from using this buffer. When
3057 already_slept = 1; 3014 * the current commits finish writing into the buffer,
3058 goto try_again; 3015 * the refcount will drop to zero and the buffer will
3059 } else { 3016 * go out then.
3017 */
3018 if (!already_slept &&
3019 (iclog->ic_prev->ic_state &
3020 (XLOG_STATE_WANT_SYNC | XLOG_STATE_SYNCING))) {
3021 ASSERT(!(iclog->ic_state & XLOG_STATE_IOERROR));
3022
3023 XFS_STATS_INC(xs_log_force_sleep);
3024
3025 sv_wait(&iclog->ic_prev->ic_write_wait,
3026 PSWP, &log->l_icloglock, s);
3027 if (log_flushed)
3028 *log_flushed = 1;
3029 already_slept = 1;
3030 goto try_again;
3031 }
3060 atomic_inc(&iclog->ic_refcnt); 3032 atomic_inc(&iclog->ic_refcnt);
3061 xlog_state_switch_iclogs(log, iclog, 0); 3033 xlog_state_switch_iclogs(log, iclog, 0);
3062 spin_unlock(&log->l_icloglock); 3034 spin_unlock(&log->l_icloglock);
3063 if (xlog_state_release_iclog(log, iclog)) 3035 if (xlog_state_release_iclog(log, iclog))
3064 return XFS_ERROR(EIO); 3036 return XFS_ERROR(EIO);
3065 *log_flushed = 1; 3037 if (log_flushed)
3038 *log_flushed = 1;
3066 spin_lock(&log->l_icloglock); 3039 spin_lock(&log->l_icloglock);
3067 } 3040 }
3068 }
3069 3041
3070 if ((flags & XFS_LOG_SYNC) && /* sleep */ 3042 if ((flags & XFS_LOG_SYNC) && /* sleep */
3071 !(iclog->ic_state & (XLOG_STATE_ACTIVE | XLOG_STATE_DIRTY))) { 3043 !(iclog->ic_state &
3044 (XLOG_STATE_ACTIVE | XLOG_STATE_DIRTY))) {
3045 /*
3046 * Don't wait on completion if we know that we've
3047 * gotten a log write error.
3048 */
3049 if (iclog->ic_state & XLOG_STATE_IOERROR) {
3050 spin_unlock(&log->l_icloglock);
3051 return XFS_ERROR(EIO);
3052 }
3053 XFS_STATS_INC(xs_log_force_sleep);
3054 sv_wait(&iclog->ic_force_wait, PSWP, &log->l_icloglock, s);
3055 /*
3056 * No need to grab the log lock here since we're
3057 * only deciding whether or not to return EIO
3058 * and the memory read should be atomic.
3059 */
3060 if (iclog->ic_state & XLOG_STATE_IOERROR)
3061 return XFS_ERROR(EIO);
3072 3062
3073 /* 3063 if (log_flushed)
3074 * Don't wait on completion if we know that we've 3064 *log_flushed = 1;
3075 * gotten a log write error. 3065 } else { /* just return */
3076 */
3077 if (iclog->ic_state & XLOG_STATE_IOERROR) {
3078 spin_unlock(&log->l_icloglock); 3066 spin_unlock(&log->l_icloglock);
3079 return XFS_ERROR(EIO);
3080 } 3067 }
3081 XFS_STATS_INC(xs_log_force_sleep);
3082 sv_wait(&iclog->ic_force_wait, PSWP, &log->l_icloglock, s);
3083 /*
3084 * No need to grab the log lock here since we're
3085 * only deciding whether or not to return EIO
3086 * and the memory read should be atomic.
3087 */
3088 if (iclog->ic_state & XLOG_STATE_IOERROR)
3089 return XFS_ERROR(EIO);
3090 *log_flushed = 1;
3091 } else { /* just return */
3092 spin_unlock(&log->l_icloglock);
3093 }
3094 return 0;
3095 3068
3096 } while (iclog != log->l_iclog); 3069 return 0;
3070 } while (iclog != log->l_iclog);
3097 3071
3098 spin_unlock(&log->l_icloglock); 3072 spin_unlock(&log->l_icloglock);
3099 return 0; 3073 return 0;
3100} /* xlog_state_sync */ 3074}
3101 3075
3076/*
3077 * Wrapper for _xfs_log_force_lsn(), to be used when caller doesn't care
3078 * about errors or whether the log was flushed or not. This is the normal
3079 * interface to use when trying to unpin items or move the log forward.
3080 */
3081void
3082xfs_log_force_lsn(
3083 xfs_mount_t *mp,
3084 xfs_lsn_t lsn,
3085 uint flags)
3086{
3087 int error;
3088
3089 error = _xfs_log_force_lsn(mp, lsn, flags, NULL);
3090 if (error) {
3091 xfs_fs_cmn_err(CE_WARN, mp, "xfs_log_force: "
3092 "error %d returned.", error);
3093 }
3094}
3102 3095
3103/* 3096/*
3104 * Called when we want to mark the current iclog as being ready to sync to 3097 * Called when we want to mark the current iclog as being ready to sync to
@@ -3463,7 +3456,6 @@ xfs_log_force_umount(
3463 xlog_ticket_t *tic; 3456 xlog_ticket_t *tic;
3464 xlog_t *log; 3457 xlog_t *log;
3465 int retval; 3458 int retval;
3466 int dummy;
3467 3459
3468 log = mp->m_log; 3460 log = mp->m_log;
3469 3461
@@ -3537,13 +3529,14 @@ xfs_log_force_umount(
3537 } 3529 }
3538 spin_unlock(&log->l_grant_lock); 3530 spin_unlock(&log->l_grant_lock);
3539 3531
3540 if (! (log->l_iclog->ic_state & XLOG_STATE_IOERROR)) { 3532 if (!(log->l_iclog->ic_state & XLOG_STATE_IOERROR)) {
3541 ASSERT(!logerror); 3533 ASSERT(!logerror);
3542 /* 3534 /*
3543 * Force the incore logs to disk before shutting the 3535 * Force the incore logs to disk before shutting the
3544 * log down completely. 3536 * log down completely.
3545 */ 3537 */
3546 xlog_state_sync_all(log, XFS_LOG_FORCE|XFS_LOG_SYNC, &dummy); 3538 _xfs_log_force(mp, XFS_LOG_SYNC, NULL);
3539
3547 spin_lock(&log->l_icloglock); 3540 spin_lock(&log->l_icloglock);
3548 retval = xlog_state_ioerror(log); 3541 retval = xlog_state_ioerror(log);
3549 spin_unlock(&log->l_icloglock); 3542 spin_unlock(&log->l_icloglock);
diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h
index d0c9baa50b1a..7074be9d13e9 100644
--- a/fs/xfs/xfs_log.h
+++ b/fs/xfs/xfs_log.h
@@ -70,14 +70,8 @@ static inline xfs_lsn_t _lsn_cmp(xfs_lsn_t lsn1, xfs_lsn_t lsn2)
70 * Flags to xfs_log_force() 70 * Flags to xfs_log_force()
71 * 71 *
72 * XFS_LOG_SYNC: Synchronous force in-core log to disk 72 * XFS_LOG_SYNC: Synchronous force in-core log to disk
73 * XFS_LOG_FORCE: Start in-core log write now.
74 * XFS_LOG_URGE: Start write within some window of time.
75 *
76 * Note: Either XFS_LOG_FORCE or XFS_LOG_URGE must be set.
77 */ 73 */
78#define XFS_LOG_SYNC 0x1 74#define XFS_LOG_SYNC 0x1
79#define XFS_LOG_FORCE 0x2
80#define XFS_LOG_URGE 0x4
81 75
82#endif /* __KERNEL__ */ 76#endif /* __KERNEL__ */
83 77
@@ -110,10 +104,8 @@ static inline xfs_lsn_t _lsn_cmp(xfs_lsn_t lsn1, xfs_lsn_t lsn2)
110#define XLOG_REG_TYPE_TRANSHDR 19 104#define XLOG_REG_TYPE_TRANSHDR 19
111#define XLOG_REG_TYPE_MAX 19 105#define XLOG_REG_TYPE_MAX 19
112 106
113#define XLOG_VEC_SET_TYPE(vecp, t) ((vecp)->i_type = (t))
114
115typedef struct xfs_log_iovec { 107typedef struct xfs_log_iovec {
116 xfs_caddr_t i_addr; /* beginning address of region */ 108 xfs_caddr_t i_addr; /* beginning address of region */
117 int i_len; /* length in bytes of region */ 109 int i_len; /* length in bytes of region */
118 uint i_type; /* type of region */ 110 uint i_type; /* type of region */
119} xfs_log_iovec_t; 111} xfs_log_iovec_t;
@@ -140,12 +132,17 @@ xfs_lsn_t xfs_log_done(struct xfs_mount *mp,
140 void **iclog, 132 void **iclog,
141 uint flags); 133 uint flags);
142int _xfs_log_force(struct xfs_mount *mp, 134int _xfs_log_force(struct xfs_mount *mp,
143 xfs_lsn_t lsn,
144 uint flags, 135 uint flags,
145 int *log_forced); 136 int *log_forced);
146void xfs_log_force(struct xfs_mount *mp, 137void xfs_log_force(struct xfs_mount *mp,
147 xfs_lsn_t lsn,
148 uint flags); 138 uint flags);
139int _xfs_log_force_lsn(struct xfs_mount *mp,
140 xfs_lsn_t lsn,
141 uint flags,
142 int *log_forced);
143void xfs_log_force_lsn(struct xfs_mount *mp,
144 xfs_lsn_t lsn,
145 uint flags);
149int xfs_log_mount(struct xfs_mount *mp, 146int xfs_log_mount(struct xfs_mount *mp,
150 struct xfs_buftarg *log_target, 147 struct xfs_buftarg *log_target,
151 xfs_daddr_t start_block, 148 xfs_daddr_t start_block,
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index d55662db7077..fd02a18facd5 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -443,14 +443,9 @@ typedef struct log {
443 443
444/* common routines */ 444/* common routines */
445extern xfs_lsn_t xlog_assign_tail_lsn(struct xfs_mount *mp); 445extern xfs_lsn_t xlog_assign_tail_lsn(struct xfs_mount *mp);
446extern int xlog_find_tail(xlog_t *log,
447 xfs_daddr_t *head_blk,
448 xfs_daddr_t *tail_blk);
449extern int xlog_recover(xlog_t *log); 446extern int xlog_recover(xlog_t *log);
450extern int xlog_recover_finish(xlog_t *log); 447extern int xlog_recover_finish(xlog_t *log);
451extern void xlog_pack_data(xlog_t *log, xlog_in_core_t *iclog, int); 448extern void xlog_pack_data(xlog_t *log, xlog_in_core_t *iclog, int);
452extern struct xfs_buf *xlog_get_bp(xlog_t *, int);
453extern void xlog_put_bp(struct xfs_buf *);
454 449
455extern kmem_zone_t *xfs_log_ticket_zone; 450extern kmem_zone_t *xfs_log_ticket_zone;
456 451
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 69ac2e5ef20c..22e6efdc17ea 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -50,8 +50,6 @@
50 50
51STATIC int xlog_find_zeroed(xlog_t *, xfs_daddr_t *); 51STATIC int xlog_find_zeroed(xlog_t *, xfs_daddr_t *);
52STATIC int xlog_clear_stale_blocks(xlog_t *, xfs_lsn_t); 52STATIC int xlog_clear_stale_blocks(xlog_t *, xfs_lsn_t);
53STATIC void xlog_recover_insert_item_backq(xlog_recover_item_t **q,
54 xlog_recover_item_t *item);
55#if defined(DEBUG) 53#if defined(DEBUG)
56STATIC void xlog_recover_check_summary(xlog_t *); 54STATIC void xlog_recover_check_summary(xlog_t *);
57#else 55#else
@@ -68,7 +66,7 @@ STATIC void xlog_recover_check_summary(xlog_t *);
68 ((bbs + (log)->l_sectbb_mask + 1) & ~(log)->l_sectbb_mask) : (bbs) ) 66 ((bbs + (log)->l_sectbb_mask + 1) & ~(log)->l_sectbb_mask) : (bbs) )
69#define XLOG_SECTOR_ROUNDDOWN_BLKNO(log, bno) ((bno) & ~(log)->l_sectbb_mask) 67#define XLOG_SECTOR_ROUNDDOWN_BLKNO(log, bno) ((bno) & ~(log)->l_sectbb_mask)
70 68
71xfs_buf_t * 69STATIC xfs_buf_t *
72xlog_get_bp( 70xlog_get_bp(
73 xlog_t *log, 71 xlog_t *log,
74 int nbblks) 72 int nbblks)
@@ -88,7 +86,7 @@ xlog_get_bp(
88 return xfs_buf_get_noaddr(BBTOB(nbblks), log->l_mp->m_logdev_targp); 86 return xfs_buf_get_noaddr(BBTOB(nbblks), log->l_mp->m_logdev_targp);
89} 87}
90 88
91void 89STATIC void
92xlog_put_bp( 90xlog_put_bp(
93 xfs_buf_t *bp) 91 xfs_buf_t *bp)
94{ 92{
@@ -805,7 +803,7 @@ xlog_find_head(
805 * We could speed up search by using current head_blk buffer, but it is not 803 * We could speed up search by using current head_blk buffer, but it is not
806 * available. 804 * available.
807 */ 805 */
808int 806STATIC int
809xlog_find_tail( 807xlog_find_tail(
810 xlog_t *log, 808 xlog_t *log,
811 xfs_daddr_t *head_blk, 809 xfs_daddr_t *head_blk,
@@ -1367,36 +1365,45 @@ xlog_clear_stale_blocks(
1367 1365
1368STATIC xlog_recover_t * 1366STATIC xlog_recover_t *
1369xlog_recover_find_tid( 1367xlog_recover_find_tid(
1370 xlog_recover_t *q, 1368 struct hlist_head *head,
1371 xlog_tid_t tid) 1369 xlog_tid_t tid)
1372{ 1370{
1373 xlog_recover_t *p = q; 1371 xlog_recover_t *trans;
1372 struct hlist_node *n;
1374 1373
1375 while (p != NULL) { 1374 hlist_for_each_entry(trans, n, head, r_list) {
1376 if (p->r_log_tid == tid) 1375 if (trans->r_log_tid == tid)
1377 break; 1376 return trans;
1378 p = p->r_next;
1379 } 1377 }
1380 return p; 1378 return NULL;
1381} 1379}
1382 1380
1383STATIC void 1381STATIC void
1384xlog_recover_put_hashq( 1382xlog_recover_new_tid(
1385 xlog_recover_t **q, 1383 struct hlist_head *head,
1386 xlog_recover_t *trans) 1384 xlog_tid_t tid,
1385 xfs_lsn_t lsn)
1387{ 1386{
1388 trans->r_next = *q; 1387 xlog_recover_t *trans;
1389 *q = trans; 1388
1389 trans = kmem_zalloc(sizeof(xlog_recover_t), KM_SLEEP);
1390 trans->r_log_tid = tid;
1391 trans->r_lsn = lsn;
1392 INIT_LIST_HEAD(&trans->r_itemq);
1393
1394 INIT_HLIST_NODE(&trans->r_list);
1395 hlist_add_head(&trans->r_list, head);
1390} 1396}
1391 1397
1392STATIC void 1398STATIC void
1393xlog_recover_add_item( 1399xlog_recover_add_item(
1394 xlog_recover_item_t **itemq) 1400 struct list_head *head)
1395{ 1401{
1396 xlog_recover_item_t *item; 1402 xlog_recover_item_t *item;
1397 1403
1398 item = kmem_zalloc(sizeof(xlog_recover_item_t), KM_SLEEP); 1404 item = kmem_zalloc(sizeof(xlog_recover_item_t), KM_SLEEP);
1399 xlog_recover_insert_item_backq(itemq, item); 1405 INIT_LIST_HEAD(&item->ri_list);
1406 list_add_tail(&item->ri_list, head);
1400} 1407}
1401 1408
1402STATIC int 1409STATIC int
@@ -1409,8 +1416,7 @@ xlog_recover_add_to_cont_trans(
1409 xfs_caddr_t ptr, old_ptr; 1416 xfs_caddr_t ptr, old_ptr;
1410 int old_len; 1417 int old_len;
1411 1418
1412 item = trans->r_itemq; 1419 if (list_empty(&trans->r_itemq)) {
1413 if (item == NULL) {
1414 /* finish copying rest of trans header */ 1420 /* finish copying rest of trans header */
1415 xlog_recover_add_item(&trans->r_itemq); 1421 xlog_recover_add_item(&trans->r_itemq);
1416 ptr = (xfs_caddr_t) &trans->r_theader + 1422 ptr = (xfs_caddr_t) &trans->r_theader +
@@ -1418,7 +1424,8 @@ xlog_recover_add_to_cont_trans(
1418 memcpy(ptr, dp, len); /* d, s, l */ 1424 memcpy(ptr, dp, len); /* d, s, l */
1419 return 0; 1425 return 0;
1420 } 1426 }
1421 item = item->ri_prev; 1427 /* take the tail entry */
1428 item = list_entry(trans->r_itemq.prev, xlog_recover_item_t, ri_list);
1422 1429
1423 old_ptr = item->ri_buf[item->ri_cnt-1].i_addr; 1430 old_ptr = item->ri_buf[item->ri_cnt-1].i_addr;
1424 old_len = item->ri_buf[item->ri_cnt-1].i_len; 1431 old_len = item->ri_buf[item->ri_cnt-1].i_len;
@@ -1455,8 +1462,7 @@ xlog_recover_add_to_trans(
1455 1462
1456 if (!len) 1463 if (!len)
1457 return 0; 1464 return 0;
1458 item = trans->r_itemq; 1465 if (list_empty(&trans->r_itemq)) {
1459 if (item == NULL) {
1460 /* we need to catch log corruptions here */ 1466 /* we need to catch log corruptions here */
1461 if (*(uint *)dp != XFS_TRANS_HEADER_MAGIC) { 1467 if (*(uint *)dp != XFS_TRANS_HEADER_MAGIC) {
1462 xlog_warn("XFS: xlog_recover_add_to_trans: " 1468 xlog_warn("XFS: xlog_recover_add_to_trans: "
@@ -1474,12 +1480,15 @@ xlog_recover_add_to_trans(
1474 memcpy(ptr, dp, len); 1480 memcpy(ptr, dp, len);
1475 in_f = (xfs_inode_log_format_t *)ptr; 1481 in_f = (xfs_inode_log_format_t *)ptr;
1476 1482
1477 if (item->ri_prev->ri_total != 0 && 1483 /* take the tail entry */
1478 item->ri_prev->ri_total == item->ri_prev->ri_cnt) { 1484 item = list_entry(trans->r_itemq.prev, xlog_recover_item_t, ri_list);
1485 if (item->ri_total != 0 &&
1486 item->ri_total == item->ri_cnt) {
1487 /* tail item is in use, get a new one */
1479 xlog_recover_add_item(&trans->r_itemq); 1488 xlog_recover_add_item(&trans->r_itemq);
1489 item = list_entry(trans->r_itemq.prev,
1490 xlog_recover_item_t, ri_list);
1480 } 1491 }
1481 item = trans->r_itemq;
1482 item = item->ri_prev;
1483 1492
1484 if (item->ri_total == 0) { /* first region to be added */ 1493 if (item->ri_total == 0) { /* first region to be added */
1485 if (in_f->ilf_size == 0 || 1494 if (in_f->ilf_size == 0 ||
@@ -1504,96 +1513,29 @@ xlog_recover_add_to_trans(
1504 return 0; 1513 return 0;
1505} 1514}
1506 1515
1507STATIC void 1516/*
1508xlog_recover_new_tid( 1517 * Sort the log items in the transaction. Cancelled buffers need
1509 xlog_recover_t **q, 1518 * to be put first so they are processed before any items that might
1510 xlog_tid_t tid, 1519 * modify the buffers. If they are cancelled, then the modifications
1511 xfs_lsn_t lsn) 1520 * don't need to be replayed.
1512{ 1521 */
1513 xlog_recover_t *trans;
1514
1515 trans = kmem_zalloc(sizeof(xlog_recover_t), KM_SLEEP);
1516 trans->r_log_tid = tid;
1517 trans->r_lsn = lsn;
1518 xlog_recover_put_hashq(q, trans);
1519}
1520
1521STATIC int
1522xlog_recover_unlink_tid(
1523 xlog_recover_t **q,
1524 xlog_recover_t *trans)
1525{
1526 xlog_recover_t *tp;
1527 int found = 0;
1528
1529 ASSERT(trans != NULL);
1530 if (trans == *q) {
1531 *q = (*q)->r_next;
1532 } else {
1533 tp = *q;
1534 while (tp) {
1535 if (tp->r_next == trans) {
1536 found = 1;
1537 break;
1538 }
1539 tp = tp->r_next;
1540 }
1541 if (!found) {
1542 xlog_warn(
1543 "XFS: xlog_recover_unlink_tid: trans not found");
1544 ASSERT(0);
1545 return XFS_ERROR(EIO);
1546 }
1547 tp->r_next = tp->r_next->r_next;
1548 }
1549 return 0;
1550}
1551
1552STATIC void
1553xlog_recover_insert_item_backq(
1554 xlog_recover_item_t **q,
1555 xlog_recover_item_t *item)
1556{
1557 if (*q == NULL) {
1558 item->ri_prev = item->ri_next = item;
1559 *q = item;
1560 } else {
1561 item->ri_next = *q;
1562 item->ri_prev = (*q)->ri_prev;
1563 (*q)->ri_prev = item;
1564 item->ri_prev->ri_next = item;
1565 }
1566}
1567
1568STATIC void
1569xlog_recover_insert_item_frontq(
1570 xlog_recover_item_t **q,
1571 xlog_recover_item_t *item)
1572{
1573 xlog_recover_insert_item_backq(q, item);
1574 *q = item;
1575}
1576
1577STATIC int 1522STATIC int
1578xlog_recover_reorder_trans( 1523xlog_recover_reorder_trans(
1579 xlog_recover_t *trans) 1524 xlog_recover_t *trans)
1580{ 1525{
1581 xlog_recover_item_t *first_item, *itemq, *itemq_next; 1526 xlog_recover_item_t *item, *n;
1582 xfs_buf_log_format_t *buf_f; 1527 LIST_HEAD(sort_list);
1583 ushort flags = 0;
1584 1528
1585 first_item = itemq = trans->r_itemq; 1529 list_splice_init(&trans->r_itemq, &sort_list);
1586 trans->r_itemq = NULL; 1530 list_for_each_entry_safe(item, n, &sort_list, ri_list) {
1587 do { 1531 xfs_buf_log_format_t *buf_f;
1588 itemq_next = itemq->ri_next;
1589 buf_f = (xfs_buf_log_format_t *)itemq->ri_buf[0].i_addr;
1590 1532
1591 switch (ITEM_TYPE(itemq)) { 1533 buf_f = (xfs_buf_log_format_t *)item->ri_buf[0].i_addr;
1534
1535 switch (ITEM_TYPE(item)) {
1592 case XFS_LI_BUF: 1536 case XFS_LI_BUF:
1593 flags = buf_f->blf_flags; 1537 if (!(buf_f->blf_flags & XFS_BLI_CANCEL)) {
1594 if (!(flags & XFS_BLI_CANCEL)) { 1538 list_move(&item->ri_list, &trans->r_itemq);
1595 xlog_recover_insert_item_frontq(&trans->r_itemq,
1596 itemq);
1597 break; 1539 break;
1598 } 1540 }
1599 case XFS_LI_INODE: 1541 case XFS_LI_INODE:
@@ -1601,7 +1543,7 @@ xlog_recover_reorder_trans(
1601 case XFS_LI_QUOTAOFF: 1543 case XFS_LI_QUOTAOFF:
1602 case XFS_LI_EFD: 1544 case XFS_LI_EFD:
1603 case XFS_LI_EFI: 1545 case XFS_LI_EFI:
1604 xlog_recover_insert_item_backq(&trans->r_itemq, itemq); 1546 list_move_tail(&item->ri_list, &trans->r_itemq);
1605 break; 1547 break;
1606 default: 1548 default:
1607 xlog_warn( 1549 xlog_warn(
@@ -1609,8 +1551,8 @@ xlog_recover_reorder_trans(
1609 ASSERT(0); 1551 ASSERT(0);
1610 return XFS_ERROR(EIO); 1552 return XFS_ERROR(EIO);
1611 } 1553 }
1612 itemq = itemq_next; 1554 }
1613 } while (first_item != itemq); 1555 ASSERT(list_empty(&sort_list));
1614 return 0; 1556 return 0;
1615} 1557}
1616 1558
@@ -2242,9 +2184,9 @@ xlog_recover_do_buffer_trans(
2242 } 2184 }
2243 2185
2244 mp = log->l_mp; 2186 mp = log->l_mp;
2245 buf_flags = XFS_BUF_LOCK; 2187 buf_flags = XBF_LOCK;
2246 if (!(flags & XFS_BLI_INODE_BUF)) 2188 if (!(flags & XFS_BLI_INODE_BUF))
2247 buf_flags |= XFS_BUF_MAPPED; 2189 buf_flags |= XBF_MAPPED;
2248 2190
2249 bp = xfs_buf_read(mp->m_ddev_targp, blkno, len, buf_flags); 2191 bp = xfs_buf_read(mp->m_ddev_targp, blkno, len, buf_flags);
2250 if (XFS_BUF_ISERROR(bp)) { 2192 if (XFS_BUF_ISERROR(bp)) {
@@ -2346,7 +2288,7 @@ xlog_recover_do_inode_trans(
2346 } 2288 }
2347 2289
2348 bp = xfs_buf_read(mp->m_ddev_targp, in_f->ilf_blkno, in_f->ilf_len, 2290 bp = xfs_buf_read(mp->m_ddev_targp, in_f->ilf_blkno, in_f->ilf_len,
2349 XFS_BUF_LOCK); 2291 XBF_LOCK);
2350 if (XFS_BUF_ISERROR(bp)) { 2292 if (XFS_BUF_ISERROR(bp)) {
2351 xfs_ioerror_alert("xlog_recover_do..(read#2)", mp, 2293 xfs_ioerror_alert("xlog_recover_do..(read#2)", mp,
2352 bp, in_f->ilf_blkno); 2294 bp, in_f->ilf_blkno);
@@ -2814,14 +2756,13 @@ xlog_recover_do_trans(
2814 int pass) 2756 int pass)
2815{ 2757{
2816 int error = 0; 2758 int error = 0;
2817 xlog_recover_item_t *item, *first_item; 2759 xlog_recover_item_t *item;
2818 2760
2819 error = xlog_recover_reorder_trans(trans); 2761 error = xlog_recover_reorder_trans(trans);
2820 if (error) 2762 if (error)
2821 return error; 2763 return error;
2822 2764
2823 first_item = item = trans->r_itemq; 2765 list_for_each_entry(item, &trans->r_itemq, ri_list) {
2824 do {
2825 switch (ITEM_TYPE(item)) { 2766 switch (ITEM_TYPE(item)) {
2826 case XFS_LI_BUF: 2767 case XFS_LI_BUF:
2827 error = xlog_recover_do_buffer_trans(log, item, pass); 2768 error = xlog_recover_do_buffer_trans(log, item, pass);
@@ -2854,8 +2795,7 @@ xlog_recover_do_trans(
2854 2795
2855 if (error) 2796 if (error)
2856 return error; 2797 return error;
2857 item = item->ri_next; 2798 }
2858 } while (first_item != item);
2859 2799
2860 return 0; 2800 return 0;
2861} 2801}
@@ -2869,21 +2809,18 @@ STATIC void
2869xlog_recover_free_trans( 2809xlog_recover_free_trans(
2870 xlog_recover_t *trans) 2810 xlog_recover_t *trans)
2871{ 2811{
2872 xlog_recover_item_t *first_item, *item, *free_item; 2812 xlog_recover_item_t *item, *n;
2873 int i; 2813 int i;
2874 2814
2875 item = first_item = trans->r_itemq; 2815 list_for_each_entry_safe(item, n, &trans->r_itemq, ri_list) {
2876 do { 2816 /* Free the regions in the item. */
2877 free_item = item; 2817 list_del(&item->ri_list);
2878 item = item->ri_next; 2818 for (i = 0; i < item->ri_cnt; i++)
2879 /* Free the regions in the item. */ 2819 kmem_free(item->ri_buf[i].i_addr);
2880 for (i = 0; i < free_item->ri_cnt; i++) {
2881 kmem_free(free_item->ri_buf[i].i_addr);
2882 }
2883 /* Free the item itself */ 2820 /* Free the item itself */
2884 kmem_free(free_item->ri_buf); 2821 kmem_free(item->ri_buf);
2885 kmem_free(free_item); 2822 kmem_free(item);
2886 } while (first_item != item); 2823 }
2887 /* Free the transaction recover structure */ 2824 /* Free the transaction recover structure */
2888 kmem_free(trans); 2825 kmem_free(trans);
2889} 2826}
@@ -2891,14 +2828,12 @@ xlog_recover_free_trans(
2891STATIC int 2828STATIC int
2892xlog_recover_commit_trans( 2829xlog_recover_commit_trans(
2893 xlog_t *log, 2830 xlog_t *log,
2894 xlog_recover_t **q,
2895 xlog_recover_t *trans, 2831 xlog_recover_t *trans,
2896 int pass) 2832 int pass)
2897{ 2833{
2898 int error; 2834 int error;
2899 2835
2900 if ((error = xlog_recover_unlink_tid(q, trans))) 2836 hlist_del(&trans->r_list);
2901 return error;
2902 if ((error = xlog_recover_do_trans(log, trans, pass))) 2837 if ((error = xlog_recover_do_trans(log, trans, pass)))
2903 return error; 2838 return error;
2904 xlog_recover_free_trans(trans); /* no error */ 2839 xlog_recover_free_trans(trans); /* no error */
@@ -2926,7 +2861,7 @@ xlog_recover_unmount_trans(
2926STATIC int 2861STATIC int
2927xlog_recover_process_data( 2862xlog_recover_process_data(
2928 xlog_t *log, 2863 xlog_t *log,
2929 xlog_recover_t *rhash[], 2864 struct hlist_head rhash[],
2930 xlog_rec_header_t *rhead, 2865 xlog_rec_header_t *rhead,
2931 xfs_caddr_t dp, 2866 xfs_caddr_t dp,
2932 int pass) 2867 int pass)
@@ -2960,7 +2895,7 @@ xlog_recover_process_data(
2960 } 2895 }
2961 tid = be32_to_cpu(ohead->oh_tid); 2896 tid = be32_to_cpu(ohead->oh_tid);
2962 hash = XLOG_RHASH(tid); 2897 hash = XLOG_RHASH(tid);
2963 trans = xlog_recover_find_tid(rhash[hash], tid); 2898 trans = xlog_recover_find_tid(&rhash[hash], tid);
2964 if (trans == NULL) { /* not found; add new tid */ 2899 if (trans == NULL) { /* not found; add new tid */
2965 if (ohead->oh_flags & XLOG_START_TRANS) 2900 if (ohead->oh_flags & XLOG_START_TRANS)
2966 xlog_recover_new_tid(&rhash[hash], tid, 2901 xlog_recover_new_tid(&rhash[hash], tid,
@@ -2978,7 +2913,7 @@ xlog_recover_process_data(
2978 switch (flags) { 2913 switch (flags) {
2979 case XLOG_COMMIT_TRANS: 2914 case XLOG_COMMIT_TRANS:
2980 error = xlog_recover_commit_trans(log, 2915 error = xlog_recover_commit_trans(log,
2981 &rhash[hash], trans, pass); 2916 trans, pass);
2982 break; 2917 break;
2983 case XLOG_UNMOUNT_TRANS: 2918 case XLOG_UNMOUNT_TRANS:
2984 error = xlog_recover_unmount_trans(trans); 2919 error = xlog_recover_unmount_trans(trans);
@@ -3211,7 +3146,7 @@ xlog_recover_process_one_iunlink(
3211 /* 3146 /*
3212 * Get the on disk inode to find the next inode in the bucket. 3147 * Get the on disk inode to find the next inode in the bucket.
3213 */ 3148 */
3214 error = xfs_itobp(mp, NULL, ip, &dip, &ibp, XFS_BUF_LOCK); 3149 error = xfs_itobp(mp, NULL, ip, &dip, &ibp, XBF_LOCK);
3215 if (error) 3150 if (error)
3216 goto fail_iput; 3151 goto fail_iput;
3217 3152
@@ -3517,7 +3452,7 @@ xlog_do_recovery_pass(
3517 int error = 0, h_size; 3452 int error = 0, h_size;
3518 int bblks, split_bblks; 3453 int bblks, split_bblks;
3519 int hblks, split_hblks, wrapped_hblks; 3454 int hblks, split_hblks, wrapped_hblks;
3520 xlog_recover_t *rhash[XLOG_RHASH_SIZE]; 3455 struct hlist_head rhash[XLOG_RHASH_SIZE];
3521 3456
3522 ASSERT(head_blk != tail_blk); 3457 ASSERT(head_blk != tail_blk);
3523 3458
@@ -3978,8 +3913,7 @@ xlog_recover_finish(
3978 * case the unlink transactions would have problems 3913 * case the unlink transactions would have problems
3979 * pushing the EFIs out of the way. 3914 * pushing the EFIs out of the way.
3980 */ 3915 */
3981 xfs_log_force(log->l_mp, (xfs_lsn_t)0, 3916 xfs_log_force(log->l_mp, XFS_LOG_SYNC);
3982 (XFS_LOG_FORCE | XFS_LOG_SYNC));
3983 3917
3984 xlog_recover_process_iunlinks(log); 3918 xlog_recover_process_iunlinks(log);
3985 3919
diff --git a/fs/xfs/xfs_log_recover.h b/fs/xfs/xfs_log_recover.h
index b22545555301..75d749207258 100644
--- a/fs/xfs/xfs_log_recover.h
+++ b/fs/xfs/xfs_log_recover.h
@@ -35,22 +35,21 @@
35 * item headers are in ri_buf[0]. Additional buffers follow. 35 * item headers are in ri_buf[0]. Additional buffers follow.
36 */ 36 */
37typedef struct xlog_recover_item { 37typedef struct xlog_recover_item {
38 struct xlog_recover_item *ri_next; 38 struct list_head ri_list;
39 struct xlog_recover_item *ri_prev; 39 int ri_type;
40 int ri_type; 40 int ri_cnt; /* count of regions found */
41 int ri_cnt; /* count of regions found */ 41 int ri_total; /* total regions */
42 int ri_total; /* total regions */ 42 xfs_log_iovec_t *ri_buf; /* ptr to regions buffer */
43 xfs_log_iovec_t *ri_buf; /* ptr to regions buffer */
44} xlog_recover_item_t; 43} xlog_recover_item_t;
45 44
46struct xlog_tid; 45struct xlog_tid;
47typedef struct xlog_recover { 46typedef struct xlog_recover {
48 struct xlog_recover *r_next; 47 struct hlist_node r_list;
49 xlog_tid_t r_log_tid; /* log's transaction id */ 48 xlog_tid_t r_log_tid; /* log's transaction id */
50 xfs_trans_header_t r_theader; /* trans header for partial */ 49 xfs_trans_header_t r_theader; /* trans header for partial */
51 int r_state; /* not needed */ 50 int r_state; /* not needed */
52 xfs_lsn_t r_lsn; /* xact lsn */ 51 xfs_lsn_t r_lsn; /* xact lsn */
53 xlog_recover_item_t *r_itemq; /* q for items */ 52 struct list_head r_itemq; /* q for items */
54} xlog_recover_t; 53} xlog_recover_t;
55 54
56#define ITEM_TYPE(i) (*(ushort *)(i)->ri_buf[0].i_addr) 55#define ITEM_TYPE(i) (*(ushort *)(i)->ri_buf[0].i_addr)
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index eb403b40e120..6afaaeb2950a 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -201,6 +201,38 @@ xfs_uuid_unmount(
201 201
202 202
203/* 203/*
204 * Reference counting access wrappers to the perag structures.
205 */
206struct xfs_perag *
207xfs_perag_get(struct xfs_mount *mp, xfs_agnumber_t agno)
208{
209 struct xfs_perag *pag;
210 int ref = 0;
211
212 spin_lock(&mp->m_perag_lock);
213 pag = radix_tree_lookup(&mp->m_perag_tree, agno);
214 if (pag) {
215 ASSERT(atomic_read(&pag->pag_ref) >= 0);
216 /* catch leaks in the positive direction during testing */
217 ASSERT(atomic_read(&pag->pag_ref) < 1000);
218 ref = atomic_inc_return(&pag->pag_ref);
219 }
220 spin_unlock(&mp->m_perag_lock);
221 trace_xfs_perag_get(mp, agno, ref, _RET_IP_);
222 return pag;
223}
224
225void
226xfs_perag_put(struct xfs_perag *pag)
227{
228 int ref;
229
230 ASSERT(atomic_read(&pag->pag_ref) > 0);
231 ref = atomic_dec_return(&pag->pag_ref);
232 trace_xfs_perag_put(pag->pag_mount, pag->pag_agno, ref, _RET_IP_);
233}
234
235/*
204 * Free up the resources associated with a mount structure. Assume that 236 * Free up the resources associated with a mount structure. Assume that
205 * the structure was initially zeroed, so we can tell which fields got 237 * the structure was initially zeroed, so we can tell which fields got
206 * initialized. 238 * initialized.
@@ -209,13 +241,16 @@ STATIC void
209xfs_free_perag( 241xfs_free_perag(
210 xfs_mount_t *mp) 242 xfs_mount_t *mp)
211{ 243{
212 if (mp->m_perag) { 244 xfs_agnumber_t agno;
213 int agno; 245 struct xfs_perag *pag;
214 246
215 for (agno = 0; agno < mp->m_maxagi; agno++) 247 for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) {
216 if (mp->m_perag[agno].pagb_list) 248 spin_lock(&mp->m_perag_lock);
217 kmem_free(mp->m_perag[agno].pagb_list); 249 pag = radix_tree_delete(&mp->m_perag_tree, agno);
218 kmem_free(mp->m_perag); 250 ASSERT(pag);
251 ASSERT(atomic_read(&pag->pag_ref) == 0);
252 spin_unlock(&mp->m_perag_lock);
253 kmem_free(pag);
219 } 254 }
220} 255}
221 256
@@ -389,22 +424,57 @@ xfs_initialize_perag_icache(
389 } 424 }
390} 425}
391 426
392xfs_agnumber_t 427int
393xfs_initialize_perag( 428xfs_initialize_perag(
394 xfs_mount_t *mp, 429 xfs_mount_t *mp,
395 xfs_agnumber_t agcount) 430 xfs_agnumber_t agcount,
431 xfs_agnumber_t *maxagi)
396{ 432{
397 xfs_agnumber_t index, max_metadata; 433 xfs_agnumber_t index, max_metadata;
434 xfs_agnumber_t first_initialised = 0;
398 xfs_perag_t *pag; 435 xfs_perag_t *pag;
399 xfs_agino_t agino; 436 xfs_agino_t agino;
400 xfs_ino_t ino; 437 xfs_ino_t ino;
401 xfs_sb_t *sbp = &mp->m_sb; 438 xfs_sb_t *sbp = &mp->m_sb;
402 xfs_ino_t max_inum = XFS_MAXINUMBER_32; 439 xfs_ino_t max_inum = XFS_MAXINUMBER_32;
440 int error = -ENOMEM;
403 441
404 /* Check to see if the filesystem can overflow 32 bit inodes */ 442 /* Check to see if the filesystem can overflow 32 bit inodes */
405 agino = XFS_OFFBNO_TO_AGINO(mp, sbp->sb_agblocks - 1, 0); 443 agino = XFS_OFFBNO_TO_AGINO(mp, sbp->sb_agblocks - 1, 0);
406 ino = XFS_AGINO_TO_INO(mp, agcount - 1, agino); 444 ino = XFS_AGINO_TO_INO(mp, agcount - 1, agino);
407 445
446 /*
447 * Walk the current per-ag tree so we don't try to initialise AGs
448 * that already exist (growfs case). Allocate and insert all the
449 * AGs we don't find ready for initialisation.
450 */
451 for (index = 0; index < agcount; index++) {
452 pag = xfs_perag_get(mp, index);
453 if (pag) {
454 xfs_perag_put(pag);
455 continue;
456 }
457 if (!first_initialised)
458 first_initialised = index;
459 pag = kmem_zalloc(sizeof(*pag), KM_MAYFAIL);
460 if (!pag)
461 goto out_unwind;
462 if (radix_tree_preload(GFP_NOFS))
463 goto out_unwind;
464 spin_lock(&mp->m_perag_lock);
465 if (radix_tree_insert(&mp->m_perag_tree, index, pag)) {
466 BUG();
467 spin_unlock(&mp->m_perag_lock);
468 radix_tree_preload_end();
469 error = -EEXIST;
470 goto out_unwind;
471 }
472 pag->pag_agno = index;
473 pag->pag_mount = mp;
474 spin_unlock(&mp->m_perag_lock);
475 radix_tree_preload_end();
476 }
477
408 /* Clear the mount flag if no inode can overflow 32 bits 478 /* Clear the mount flag if no inode can overflow 32 bits
409 * on this filesystem, or if specifically requested.. 479 * on this filesystem, or if specifically requested..
410 */ 480 */
@@ -438,21 +508,33 @@ xfs_initialize_perag(
438 } 508 }
439 509
440 /* This ag is preferred for inodes */ 510 /* This ag is preferred for inodes */
441 pag = &mp->m_perag[index]; 511 pag = xfs_perag_get(mp, index);
442 pag->pagi_inodeok = 1; 512 pag->pagi_inodeok = 1;
443 if (index < max_metadata) 513 if (index < max_metadata)
444 pag->pagf_metadata = 1; 514 pag->pagf_metadata = 1;
445 xfs_initialize_perag_icache(pag); 515 xfs_initialize_perag_icache(pag);
516 xfs_perag_put(pag);
446 } 517 }
447 } else { 518 } else {
448 /* Setup default behavior for smaller filesystems */ 519 /* Setup default behavior for smaller filesystems */
449 for (index = 0; index < agcount; index++) { 520 for (index = 0; index < agcount; index++) {
450 pag = &mp->m_perag[index]; 521 pag = xfs_perag_get(mp, index);
451 pag->pagi_inodeok = 1; 522 pag->pagi_inodeok = 1;
452 xfs_initialize_perag_icache(pag); 523 xfs_initialize_perag_icache(pag);
524 xfs_perag_put(pag);
453 } 525 }
454 } 526 }
455 return index; 527 if (maxagi)
528 *maxagi = index;
529 return 0;
530
531out_unwind:
532 kmem_free(pag);
533 for (; index > first_initialised; index--) {
534 pag = radix_tree_delete(&mp->m_perag_tree, index);
535 kmem_free(pag);
536 }
537 return error;
456} 538}
457 539
458void 540void
@@ -583,7 +665,7 @@ xfs_readsb(xfs_mount_t *mp, int flags)
583 * access to the superblock. 665 * access to the superblock.
584 */ 666 */
585 sector_size = xfs_getsize_buftarg(mp->m_ddev_targp); 667 sector_size = xfs_getsize_buftarg(mp->m_ddev_targp);
586 extra_flags = XFS_BUF_LOCK | XFS_BUF_MANAGE | XFS_BUF_MAPPED; 668 extra_flags = XBF_LOCK | XBF_FS_MANAGED | XBF_MAPPED;
587 669
588 bp = xfs_buf_read(mp->m_ddev_targp, XFS_SB_DADDR, BTOBB(sector_size), 670 bp = xfs_buf_read(mp->m_ddev_targp, XFS_SB_DADDR, BTOBB(sector_size),
589 extra_flags); 671 extra_flags);
@@ -731,12 +813,13 @@ xfs_initialize_perag_data(xfs_mount_t *mp, xfs_agnumber_t agcount)
731 error = xfs_ialloc_pagi_init(mp, NULL, index); 813 error = xfs_ialloc_pagi_init(mp, NULL, index);
732 if (error) 814 if (error)
733 return error; 815 return error;
734 pag = &mp->m_perag[index]; 816 pag = xfs_perag_get(mp, index);
735 ifree += pag->pagi_freecount; 817 ifree += pag->pagi_freecount;
736 ialloc += pag->pagi_count; 818 ialloc += pag->pagi_count;
737 bfree += pag->pagf_freeblks; 819 bfree += pag->pagf_freeblks;
738 bfreelst += pag->pagf_flcount; 820 bfreelst += pag->pagf_flcount;
739 btree += pag->pagf_btreeblks; 821 btree += pag->pagf_btreeblks;
822 xfs_perag_put(pag);
740 } 823 }
741 /* 824 /*
742 * Overwrite incore superblock counters with just-read data 825 * Overwrite incore superblock counters with just-read data
@@ -1008,6 +1091,22 @@ xfs_mount_reset_sbqflags(
1008 return xfs_trans_commit(tp, 0); 1091 return xfs_trans_commit(tp, 0);
1009} 1092}
1010 1093
1094__uint64_t
1095xfs_default_resblks(xfs_mount_t *mp)
1096{
1097 __uint64_t resblks;
1098
1099 /*
1100 * We default to 5% or 1024 fsbs of space reserved, whichever is smaller.
1101 * This may drive us straight to ENOSPC on mount, but that implies
1102 * we were already there on the last unmount. Warn if this occurs.
1103 */
1104 resblks = mp->m_sb.sb_dblocks;
1105 do_div(resblks, 20);
1106 resblks = min_t(__uint64_t, resblks, 1024);
1107 return resblks;
1108}
1109
1011/* 1110/*
1012 * This function does the following on an initial mount of a file system: 1111 * This function does the following on an initial mount of a file system:
1013 * - reads the superblock from disk and init the mount struct 1112 * - reads the superblock from disk and init the mount struct
@@ -1152,13 +1251,13 @@ xfs_mountfs(
1152 /* 1251 /*
1153 * Allocate and initialize the per-ag data. 1252 * Allocate and initialize the per-ag data.
1154 */ 1253 */
1155 init_rwsem(&mp->m_peraglock); 1254 spin_lock_init(&mp->m_perag_lock);
1156 mp->m_perag = kmem_zalloc(sbp->sb_agcount * sizeof(xfs_perag_t), 1255 INIT_RADIX_TREE(&mp->m_perag_tree, GFP_NOFS);
1157 KM_MAYFAIL); 1256 error = xfs_initialize_perag(mp, sbp->sb_agcount, &mp->m_maxagi);
1158 if (!mp->m_perag) 1257 if (error) {
1258 cmn_err(CE_WARN, "XFS: Failed per-ag init: %d", error);
1159 goto out_remove_uuid; 1259 goto out_remove_uuid;
1160 1260 }
1161 mp->m_maxagi = xfs_initialize_perag(mp, sbp->sb_agcount);
1162 1261
1163 if (!sbp->sb_logblocks) { 1262 if (!sbp->sb_logblocks) {
1164 cmn_err(CE_WARN, "XFS: no log defined"); 1263 cmn_err(CE_WARN, "XFS: no log defined");
@@ -1318,18 +1417,14 @@ xfs_mountfs(
1318 * when at ENOSPC. This is needed for operations like create with 1417 * when at ENOSPC. This is needed for operations like create with
1319 * attr, unwritten extent conversion at ENOSPC, etc. Data allocations 1418 * attr, unwritten extent conversion at ENOSPC, etc. Data allocations
1320 * are not allowed to use this reserved space. 1419 * are not allowed to use this reserved space.
1321 *
1322 * We default to 5% or 1024 fsbs of space reserved, whichever is smaller.
1323 * This may drive us straight to ENOSPC on mount, but that implies
1324 * we were already there on the last unmount. Warn if this occurs.
1325 */ 1420 */
1326 resblks = mp->m_sb.sb_dblocks; 1421 if (!(mp->m_flags & XFS_MOUNT_RDONLY)) {
1327 do_div(resblks, 20); 1422 resblks = xfs_default_resblks(mp);
1328 resblks = min_t(__uint64_t, resblks, 1024); 1423 error = xfs_reserve_blocks(mp, &resblks, NULL);
1329 error = xfs_reserve_blocks(mp, &resblks, NULL); 1424 if (error)
1330 if (error) 1425 cmn_err(CE_WARN, "XFS: Unable to allocate reserve "
1331 cmn_err(CE_WARN, "XFS: Unable to allocate reserve blocks. " 1426 "blocks. Continuing without a reserve pool.");
1332 "Continuing without a reserve pool."); 1427 }
1333 1428
1334 return 0; 1429 return 0;
1335 1430
@@ -1372,8 +1467,19 @@ xfs_unmountfs(
1372 * push out the iclog we will never get that unlocked. hence we 1467 * push out the iclog we will never get that unlocked. hence we
1373 * need to force the log first. 1468 * need to force the log first.
1374 */ 1469 */
1375 xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE | XFS_LOG_SYNC); 1470 xfs_log_force(mp, XFS_LOG_SYNC);
1376 xfs_reclaim_inodes(mp, XFS_IFLUSH_ASYNC); 1471
1472 /*
1473 * Do a delwri reclaim pass first so that as many dirty inodes are
1474 * queued up for IO as possible. Then flush the buffers before making
1475 * a synchronous path to catch all the remaining inodes are reclaimed.
1476 * This makes the reclaim process as quick as possible by avoiding
1477 * synchronous writeout and blocking on inodes already in the delwri
1478 * state as much as possible.
1479 */
1480 xfs_reclaim_inodes(mp, 0);
1481 XFS_bflush(mp->m_ddev_targp);
1482 xfs_reclaim_inodes(mp, SYNC_WAIT);
1377 1483
1378 xfs_qm_unmount(mp); 1484 xfs_qm_unmount(mp);
1379 1485
@@ -1382,7 +1488,7 @@ xfs_unmountfs(
1382 * that nothing is pinned. This is important because bflush() 1488 * that nothing is pinned. This is important because bflush()
1383 * will skip pinned buffers. 1489 * will skip pinned buffers.
1384 */ 1490 */
1385 xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE | XFS_LOG_SYNC); 1491 xfs_log_force(mp, XFS_LOG_SYNC);
1386 1492
1387 xfs_binval(mp->m_ddev_targp); 1493 xfs_binval(mp->m_ddev_targp);
1388 if (mp->m_rtdev_targp) { 1494 if (mp->m_rtdev_targp) {
@@ -1548,15 +1654,14 @@ xfs_mod_sb(xfs_trans_t *tp, __int64_t fields)
1548 xfs_sb_to_disk(XFS_BUF_TO_SBP(bp), &mp->m_sb, fields); 1654 xfs_sb_to_disk(XFS_BUF_TO_SBP(bp), &mp->m_sb, fields);
1549 1655
1550 /* find modified range */ 1656 /* find modified range */
1657 f = (xfs_sb_field_t)xfs_highbit64((__uint64_t)fields);
1658 ASSERT((1LL << f) & XFS_SB_MOD_BITS);
1659 last = xfs_sb_info[f + 1].offset - 1;
1551 1660
1552 f = (xfs_sb_field_t)xfs_lowbit64((__uint64_t)fields); 1661 f = (xfs_sb_field_t)xfs_lowbit64((__uint64_t)fields);
1553 ASSERT((1LL << f) & XFS_SB_MOD_BITS); 1662 ASSERT((1LL << f) & XFS_SB_MOD_BITS);
1554 first = xfs_sb_info[f].offset; 1663 first = xfs_sb_info[f].offset;
1555 1664
1556 f = (xfs_sb_field_t)xfs_highbit64((__uint64_t)fields);
1557 ASSERT((1LL << f) & XFS_SB_MOD_BITS);
1558 last = xfs_sb_info[f + 1].offset - 1;
1559
1560 xfs_trans_log_buf(tp, bp, first, last); 1665 xfs_trans_log_buf(tp, bp, first, last);
1561} 1666}
1562 1667
@@ -1887,7 +1992,7 @@ xfs_getsb(
1887 1992
1888 ASSERT(mp->m_sb_bp != NULL); 1993 ASSERT(mp->m_sb_bp != NULL);
1889 bp = mp->m_sb_bp; 1994 bp = mp->m_sb_bp;
1890 if (flags & XFS_BUF_TRYLOCK) { 1995 if (flags & XBF_TRYLOCK) {
1891 if (!XFS_BUF_CPSEMA(bp)) { 1996 if (!XFS_BUF_CPSEMA(bp)) {
1892 return NULL; 1997 return NULL;
1893 } 1998 }
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 1df7e4502967..14dafd608230 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -78,7 +78,8 @@ typedef int (*xfs_send_destroy_t)(struct xfs_inode *, dm_right_t);
78typedef int (*xfs_send_namesp_t)(dm_eventtype_t, struct xfs_mount *, 78typedef int (*xfs_send_namesp_t)(dm_eventtype_t, struct xfs_mount *,
79 struct xfs_inode *, dm_right_t, 79 struct xfs_inode *, dm_right_t,
80 struct xfs_inode *, dm_right_t, 80 struct xfs_inode *, dm_right_t,
81 const char *, const char *, mode_t, int, int); 81 const unsigned char *, const unsigned char *,
82 mode_t, int, int);
82typedef int (*xfs_send_mount_t)(struct xfs_mount *, dm_right_t, 83typedef int (*xfs_send_mount_t)(struct xfs_mount *, dm_right_t,
83 char *, char *); 84 char *, char *);
84typedef void (*xfs_send_unmount_t)(struct xfs_mount *, struct xfs_inode *, 85typedef void (*xfs_send_unmount_t)(struct xfs_mount *, struct xfs_inode *,
@@ -207,8 +208,8 @@ typedef struct xfs_mount {
207 uint m_ag_maxlevels; /* XFS_AG_MAXLEVELS */ 208 uint m_ag_maxlevels; /* XFS_AG_MAXLEVELS */
208 uint m_bm_maxlevels[2]; /* XFS_BM_MAXLEVELS */ 209 uint m_bm_maxlevels[2]; /* XFS_BM_MAXLEVELS */
209 uint m_in_maxlevels; /* max inobt btree levels. */ 210 uint m_in_maxlevels; /* max inobt btree levels. */
210 struct xfs_perag *m_perag; /* per-ag accounting info */ 211 struct radix_tree_root m_perag_tree; /* per-ag accounting info */
211 struct rw_semaphore m_peraglock; /* lock for m_perag (pointer) */ 212 spinlock_t m_perag_lock; /* lock for m_perag_tree */
212 struct mutex m_growlock; /* growfs mutex */ 213 struct mutex m_growlock; /* growfs mutex */
213 int m_fixedfsid[2]; /* unchanged for life of FS */ 214 int m_fixedfsid[2]; /* unchanged for life of FS */
214 uint m_dmevmask; /* DMI events for this FS */ 215 uint m_dmevmask; /* DMI events for this FS */
@@ -224,6 +225,7 @@ typedef struct xfs_mount {
224 __uint64_t m_maxioffset; /* maximum inode offset */ 225 __uint64_t m_maxioffset; /* maximum inode offset */
225 __uint64_t m_resblks; /* total reserved blocks */ 226 __uint64_t m_resblks; /* total reserved blocks */
226 __uint64_t m_resblks_avail;/* available reserved blocks */ 227 __uint64_t m_resblks_avail;/* available reserved blocks */
228 __uint64_t m_resblks_save; /* reserved blks @ remount,ro */
227 int m_dalign; /* stripe unit */ 229 int m_dalign; /* stripe unit */
228 int m_swidth; /* stripe width */ 230 int m_swidth; /* stripe width */
229 int m_sinoalign; /* stripe unit inode alignment */ 231 int m_sinoalign; /* stripe unit inode alignment */
@@ -243,7 +245,7 @@ typedef struct xfs_mount {
243 struct xfs_qmops *m_qm_ops; /* vector of XQM ops */ 245 struct xfs_qmops *m_qm_ops; /* vector of XQM ops */
244 atomic_t m_active_trans; /* number trans frozen */ 246 atomic_t m_active_trans; /* number trans frozen */
245#ifdef HAVE_PERCPU_SB 247#ifdef HAVE_PERCPU_SB
246 xfs_icsb_cnts_t *m_sb_cnts; /* per-cpu superblock counters */ 248 xfs_icsb_cnts_t __percpu *m_sb_cnts; /* per-cpu superblock counters */
247 unsigned long m_icsb_counters; /* disabled per-cpu counters */ 249 unsigned long m_icsb_counters; /* disabled per-cpu counters */
248 struct notifier_block m_icsb_notifier; /* hotplug cpu notifier */ 250 struct notifier_block m_icsb_notifier; /* hotplug cpu notifier */
249 struct mutex m_icsb_mutex; /* balancer sync lock */ 251 struct mutex m_icsb_mutex; /* balancer sync lock */
@@ -384,19 +386,10 @@ xfs_daddr_to_agbno(struct xfs_mount *mp, xfs_daddr_t d)
384} 386}
385 387
386/* 388/*
387 * perag get/put wrappers for eventual ref counting 389 * perag get/put wrappers for ref counting
388 */ 390 */
389static inline xfs_perag_t * 391struct xfs_perag *xfs_perag_get(struct xfs_mount *mp, xfs_agnumber_t agno);
390xfs_get_perag(struct xfs_mount *mp, xfs_ino_t ino) 392void xfs_perag_put(struct xfs_perag *pag);
391{
392 return &mp->m_perag[XFS_INO_TO_AGNO(mp, ino)];
393}
394
395static inline void
396xfs_put_perag(struct xfs_mount *mp, xfs_perag_t *pag)
397{
398 /* nothing to see here, move along */
399}
400 393
401/* 394/*
402 * Per-cpu superblock locking functions 395 * Per-cpu superblock locking functions
@@ -428,6 +421,7 @@ typedef struct xfs_mod_sb {
428} xfs_mod_sb_t; 421} xfs_mod_sb_t;
429 422
430extern int xfs_log_sbcount(xfs_mount_t *, uint); 423extern int xfs_log_sbcount(xfs_mount_t *, uint);
424extern __uint64_t xfs_default_resblks(xfs_mount_t *mp);
431extern int xfs_mountfs(xfs_mount_t *mp); 425extern int xfs_mountfs(xfs_mount_t *mp);
432 426
433extern void xfs_unmountfs(xfs_mount_t *); 427extern void xfs_unmountfs(xfs_mount_t *);
@@ -450,7 +444,8 @@ extern struct xfs_dmops xfs_dmcore_xfs;
450#endif /* __KERNEL__ */ 444#endif /* __KERNEL__ */
451 445
452extern void xfs_mod_sb(struct xfs_trans *, __int64_t); 446extern void xfs_mod_sb(struct xfs_trans *, __int64_t);
453extern xfs_agnumber_t xfs_initialize_perag(struct xfs_mount *, xfs_agnumber_t); 447extern int xfs_initialize_perag(struct xfs_mount *, xfs_agnumber_t,
448 xfs_agnumber_t *);
454extern void xfs_sb_from_disk(struct xfs_sb *, struct xfs_dsb *); 449extern void xfs_sb_from_disk(struct xfs_sb *, struct xfs_dsb *);
455extern void xfs_sb_to_disk(struct xfs_dsb *, struct xfs_sb *, __int64_t); 450extern void xfs_sb_to_disk(struct xfs_dsb *, struct xfs_sb *, __int64_t);
456 451
diff --git a/fs/xfs/xfs_mru_cache.c b/fs/xfs/xfs_mru_cache.c
index 4b0613d99faa..45ce15dc5b2b 100644
--- a/fs/xfs/xfs_mru_cache.c
+++ b/fs/xfs/xfs_mru_cache.c
@@ -398,7 +398,7 @@ exit:
398 * guaranteed that all the free functions for all the elements have finished 398 * guaranteed that all the free functions for all the elements have finished
399 * executing and the reaper is not running. 399 * executing and the reaper is not running.
400 */ 400 */
401void 401static void
402xfs_mru_cache_flush( 402xfs_mru_cache_flush(
403 xfs_mru_cache_t *mru) 403 xfs_mru_cache_t *mru)
404{ 404{
diff --git a/fs/xfs/xfs_mru_cache.h b/fs/xfs/xfs_mru_cache.h
index 5d439f34b0c9..36dd3ec8b4eb 100644
--- a/fs/xfs/xfs_mru_cache.h
+++ b/fs/xfs/xfs_mru_cache.h
@@ -42,7 +42,6 @@ void xfs_mru_cache_uninit(void);
42int xfs_mru_cache_create(struct xfs_mru_cache **mrup, unsigned int lifetime_ms, 42int xfs_mru_cache_create(struct xfs_mru_cache **mrup, unsigned int lifetime_ms,
43 unsigned int grp_count, 43 unsigned int grp_count,
44 xfs_mru_cache_free_func_t free_func); 44 xfs_mru_cache_free_func_t free_func);
45void xfs_mru_cache_flush(xfs_mru_cache_t *mru);
46void xfs_mru_cache_destroy(struct xfs_mru_cache *mru); 45void xfs_mru_cache_destroy(struct xfs_mru_cache *mru);
47int xfs_mru_cache_insert(struct xfs_mru_cache *mru, unsigned long key, 46int xfs_mru_cache_insert(struct xfs_mru_cache *mru, unsigned long key,
48 void *value); 47 void *value);
diff --git a/fs/xfs/xfs_quota.h b/fs/xfs/xfs_quota.h
index 91bfd60f4c74..fdcab3f81dde 100644
--- a/fs/xfs/xfs_quota.h
+++ b/fs/xfs/xfs_quota.h
@@ -223,16 +223,9 @@ typedef struct xfs_qoff_logformat {
223#define XFS_QMOPT_RES_INOS 0x0800000 223#define XFS_QMOPT_RES_INOS 0x0800000
224 224
225/* 225/*
226 * flags for dqflush and dqflush_all.
227 */
228#define XFS_QMOPT_SYNC 0x1000000
229#define XFS_QMOPT_ASYNC 0x2000000
230#define XFS_QMOPT_DELWRI 0x4000000
231
232/*
233 * flags for dqalloc. 226 * flags for dqalloc.
234 */ 227 */
235#define XFS_QMOPT_INHERIT 0x8000000 228#define XFS_QMOPT_INHERIT 0x1000000
236 229
237/* 230/*
238 * flags to xfs_trans_mod_dquot. 231 * flags to xfs_trans_mod_dquot.
diff --git a/fs/xfs/xfs_rw.c b/fs/xfs/xfs_rw.c
index 5aa07caea5f1..e336742a58a4 100644
--- a/fs/xfs/xfs_rw.c
+++ b/fs/xfs/xfs_rw.c
@@ -47,48 +47,6 @@
47#include "xfs_trace.h" 47#include "xfs_trace.h"
48 48
49/* 49/*
50 * This is a subroutine for xfs_write() and other writers (xfs_ioctl)
51 * which clears the setuid and setgid bits when a file is written.
52 */
53int
54xfs_write_clear_setuid(
55 xfs_inode_t *ip)
56{
57 xfs_mount_t *mp;
58 xfs_trans_t *tp;
59 int error;
60
61 mp = ip->i_mount;
62 tp = xfs_trans_alloc(mp, XFS_TRANS_WRITEID);
63 if ((error = xfs_trans_reserve(tp, 0,
64 XFS_WRITEID_LOG_RES(mp),
65 0, 0, 0))) {
66 xfs_trans_cancel(tp, 0);
67 return error;
68 }
69 xfs_ilock(ip, XFS_ILOCK_EXCL);
70 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
71 xfs_trans_ihold(tp, ip);
72 ip->i_d.di_mode &= ~S_ISUID;
73
74 /*
75 * Note that we don't have to worry about mandatory
76 * file locking being disabled here because we only
77 * clear the S_ISGID bit if the Group execute bit is
78 * on, but if it was on then mandatory locking wouldn't
79 * have been enabled.
80 */
81 if (ip->i_d.di_mode & S_IXGRP) {
82 ip->i_d.di_mode &= ~S_ISGID;
83 }
84 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
85 xfs_trans_set_sync(tp);
86 error = xfs_trans_commit(tp, 0);
87 xfs_iunlock(ip, XFS_ILOCK_EXCL);
88 return 0;
89}
90
91/*
92 * Force a shutdown of the filesystem instantly while keeping 50 * Force a shutdown of the filesystem instantly while keeping
93 * the filesystem consistent. We don't do an unmount here; just shutdown 51 * the filesystem consistent. We don't do an unmount here; just shutdown
94 * the shop, make sure that absolutely nothing persistent happens to 52 * the shop, make sure that absolutely nothing persistent happens to
@@ -153,88 +111,6 @@ xfs_do_force_shutdown(
153 } 111 }
154} 112}
155 113
156
157/*
158 * Called when we want to stop a buffer from getting written or read.
159 * We attach the EIO error, muck with its flags, and call biodone
160 * so that the proper iodone callbacks get called.
161 */
162int
163xfs_bioerror(
164 xfs_buf_t *bp)
165{
166
167#ifdef XFSERRORDEBUG
168 ASSERT(XFS_BUF_ISREAD(bp) || bp->b_iodone);
169#endif
170
171 /*
172 * No need to wait until the buffer is unpinned.
173 * We aren't flushing it.
174 */
175 XFS_BUF_ERROR(bp, EIO);
176 /*
177 * We're calling biodone, so delete B_DONE flag. Either way
178 * we have to call the iodone callback, and calling biodone
179 * probably is the best way since it takes care of
180 * GRIO as well.
181 */
182 XFS_BUF_UNREAD(bp);
183 XFS_BUF_UNDELAYWRITE(bp);
184 XFS_BUF_UNDONE(bp);
185 XFS_BUF_STALE(bp);
186
187 XFS_BUF_CLR_BDSTRAT_FUNC(bp);
188 xfs_biodone(bp);
189
190 return (EIO);
191}
192
193/*
194 * Same as xfs_bioerror, except that we are releasing the buffer
195 * here ourselves, and avoiding the biodone call.
196 * This is meant for userdata errors; metadata bufs come with
197 * iodone functions attached, so that we can track down errors.
198 */
199int
200xfs_bioerror_relse(
201 xfs_buf_t *bp)
202{
203 int64_t fl;
204
205 ASSERT(XFS_BUF_IODONE_FUNC(bp) != xfs_buf_iodone_callbacks);
206 ASSERT(XFS_BUF_IODONE_FUNC(bp) != xlog_iodone);
207
208 fl = XFS_BUF_BFLAGS(bp);
209 /*
210 * No need to wait until the buffer is unpinned.
211 * We aren't flushing it.
212 *
213 * chunkhold expects B_DONE to be set, whether
214 * we actually finish the I/O or not. We don't want to
215 * change that interface.
216 */
217 XFS_BUF_UNREAD(bp);
218 XFS_BUF_UNDELAYWRITE(bp);
219 XFS_BUF_DONE(bp);
220 XFS_BUF_STALE(bp);
221 XFS_BUF_CLR_IODONE_FUNC(bp);
222 XFS_BUF_CLR_BDSTRAT_FUNC(bp);
223 if (!(fl & XFS_B_ASYNC)) {
224 /*
225 * Mark b_error and B_ERROR _both_.
226 * Lot's of chunkcache code assumes that.
227 * There's no reason to mark error for
228 * ASYNC buffers.
229 */
230 XFS_BUF_ERROR(bp, EIO);
231 XFS_BUF_FINISH_IOWAIT(bp);
232 } else {
233 xfs_buf_relse(bp);
234 }
235 return (EIO);
236}
237
238/* 114/*
239 * Prints out an ALERT message about I/O error. 115 * Prints out an ALERT message about I/O error.
240 */ 116 */
@@ -306,37 +182,6 @@ xfs_read_buf(
306} 182}
307 183
308/* 184/*
309 * Wrapper around bwrite() so that we can trap
310 * write errors, and act accordingly.
311 */
312int
313xfs_bwrite(
314 struct xfs_mount *mp,
315 struct xfs_buf *bp)
316{
317 int error;
318
319 /*
320 * XXXsup how does this work for quotas.
321 */
322 XFS_BUF_SET_BDSTRAT_FUNC(bp, xfs_bdstrat_cb);
323 bp->b_mount = mp;
324 XFS_BUF_WRITE(bp);
325
326 if ((error = XFS_bwrite(bp))) {
327 ASSERT(mp);
328 /*
329 * Cannot put a buftrace here since if the buffer is not
330 * B_HOLD then we will brelse() the buffer before returning
331 * from bwrite and we could be tracing a buffer that has
332 * been reused.
333 */
334 xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR);
335 }
336 return (error);
337}
338
339/*
340 * helper function to extract extent size hint from inode 185 * helper function to extract extent size hint from inode
341 */ 186 */
342xfs_extlen_t 187xfs_extlen_t
diff --git a/fs/xfs/xfs_rw.h b/fs/xfs/xfs_rw.h
index 571f2174435c..11c41ec6ed75 100644
--- a/fs/xfs/xfs_rw.h
+++ b/fs/xfs/xfs_rw.h
@@ -39,10 +39,6 @@ xfs_fsb_to_db(struct xfs_inode *ip, xfs_fsblock_t fsb)
39/* 39/*
40 * Prototypes for functions in xfs_rw.c. 40 * Prototypes for functions in xfs_rw.c.
41 */ 41 */
42extern int xfs_write_clear_setuid(struct xfs_inode *ip);
43extern int xfs_bwrite(struct xfs_mount *mp, struct xfs_buf *bp);
44extern int xfs_bioerror(struct xfs_buf *bp);
45extern int xfs_bioerror_relse(struct xfs_buf *bp);
46extern int xfs_read_buf(struct xfs_mount *mp, xfs_buftarg_t *btp, 42extern int xfs_read_buf(struct xfs_mount *mp, xfs_buftarg_t *btp,
47 xfs_daddr_t blkno, int len, uint flags, 43 xfs_daddr_t blkno, int len, uint flags,
48 struct xfs_buf **bpp); 44 struct xfs_buf **bpp);
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index 237badcbac3b..be942d4e3324 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -981,9 +981,8 @@ shut_us_down:
981 */ 981 */
982 if (sync) { 982 if (sync) {
983 if (!error) { 983 if (!error) {
984 error = _xfs_log_force(mp, commit_lsn, 984 error = _xfs_log_force_lsn(mp, commit_lsn,
985 XFS_LOG_FORCE | XFS_LOG_SYNC, 985 XFS_LOG_SYNC, log_flushed);
986 log_flushed);
987 } 986 }
988 XFS_STATS_INC(xs_trans_sync); 987 XFS_STATS_INC(xs_trans_sync);
989 } else { 988 } else {
@@ -1121,7 +1120,7 @@ xfs_trans_fill_vecs(
1121 tp->t_header.th_num_items = nitems; 1120 tp->t_header.th_num_items = nitems;
1122 log_vector->i_addr = (xfs_caddr_t)&tp->t_header; 1121 log_vector->i_addr = (xfs_caddr_t)&tp->t_header;
1123 log_vector->i_len = sizeof(xfs_trans_header_t); 1122 log_vector->i_len = sizeof(xfs_trans_header_t);
1124 XLOG_VEC_SET_TYPE(log_vector, XLOG_REG_TYPE_TRANSHDR); 1123 log_vector->i_type = XLOG_REG_TYPE_TRANSHDR;
1125} 1124}
1126 1125
1127 1126
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index ca64f33c63a3..c93e3a102857 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -861,8 +861,7 @@ typedef struct xfs_item_ops {
861#define XFS_ITEM_SUCCESS 0 861#define XFS_ITEM_SUCCESS 0
862#define XFS_ITEM_PINNED 1 862#define XFS_ITEM_PINNED 1
863#define XFS_ITEM_LOCKED 2 863#define XFS_ITEM_LOCKED 2
864#define XFS_ITEM_FLUSHING 3 864#define XFS_ITEM_PUSHBUF 3
865#define XFS_ITEM_PUSHBUF 4
866 865
867/* 866/*
868 * This structure is used to maintain a list of block ranges that have been 867 * This structure is used to maintain a list of block ranges that have been
diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c
index 2ffc570679be..e799824f7245 100644
--- a/fs/xfs/xfs_trans_ail.c
+++ b/fs/xfs/xfs_trans_ail.c
@@ -237,14 +237,15 @@ out:
237} 237}
238 238
239/* 239/*
240 * Function that does the work of pushing on the AIL 240 * xfsaild_push does the work of pushing on the AIL. Returning a timeout of
241 * zero indicates that the caller should sleep until woken.
241 */ 242 */
242long 243long
243xfsaild_push( 244xfsaild_push(
244 struct xfs_ail *ailp, 245 struct xfs_ail *ailp,
245 xfs_lsn_t *last_lsn) 246 xfs_lsn_t *last_lsn)
246{ 247{
247 long tout = 1000; /* milliseconds */ 248 long tout = 0;
248 xfs_lsn_t last_pushed_lsn = *last_lsn; 249 xfs_lsn_t last_pushed_lsn = *last_lsn;
249 xfs_lsn_t target = ailp->xa_target; 250 xfs_lsn_t target = ailp->xa_target;
250 xfs_lsn_t lsn; 251 xfs_lsn_t lsn;
@@ -252,6 +253,7 @@ xfsaild_push(
252 int flush_log, count, stuck; 253 int flush_log, count, stuck;
253 xfs_mount_t *mp = ailp->xa_mount; 254 xfs_mount_t *mp = ailp->xa_mount;
254 struct xfs_ail_cursor *cur = &ailp->xa_cursors; 255 struct xfs_ail_cursor *cur = &ailp->xa_cursors;
256 int push_xfsbufd = 0;
255 257
256 spin_lock(&ailp->xa_lock); 258 spin_lock(&ailp->xa_lock);
257 xfs_trans_ail_cursor_init(ailp, cur); 259 xfs_trans_ail_cursor_init(ailp, cur);
@@ -262,7 +264,7 @@ xfsaild_push(
262 */ 264 */
263 xfs_trans_ail_cursor_done(ailp, cur); 265 xfs_trans_ail_cursor_done(ailp, cur);
264 spin_unlock(&ailp->xa_lock); 266 spin_unlock(&ailp->xa_lock);
265 last_pushed_lsn = 0; 267 *last_lsn = 0;
266 return tout; 268 return tout;
267 } 269 }
268 270
@@ -279,7 +281,6 @@ xfsaild_push(
279 * prevents use from spinning when we can't do anything or there is 281 * prevents use from spinning when we can't do anything or there is
280 * lots of contention on the AIL lists. 282 * lots of contention on the AIL lists.
281 */ 283 */
282 tout = 10;
283 lsn = lip->li_lsn; 284 lsn = lip->li_lsn;
284 flush_log = stuck = count = 0; 285 flush_log = stuck = count = 0;
285 while ((XFS_LSN_CMP(lip->li_lsn, target) < 0)) { 286 while ((XFS_LSN_CMP(lip->li_lsn, target) < 0)) {
@@ -308,6 +309,7 @@ xfsaild_push(
308 XFS_STATS_INC(xs_push_ail_pushbuf); 309 XFS_STATS_INC(xs_push_ail_pushbuf);
309 IOP_PUSHBUF(lip); 310 IOP_PUSHBUF(lip);
310 last_pushed_lsn = lsn; 311 last_pushed_lsn = lsn;
312 push_xfsbufd = 1;
311 break; 313 break;
312 314
313 case XFS_ITEM_PINNED: 315 case XFS_ITEM_PINNED:
@@ -322,12 +324,6 @@ xfsaild_push(
322 stuck++; 324 stuck++;
323 break; 325 break;
324 326
325 case XFS_ITEM_FLUSHING:
326 XFS_STATS_INC(xs_push_ail_flushing);
327 last_pushed_lsn = lsn;
328 stuck++;
329 break;
330
331 default: 327 default:
332 ASSERT(0); 328 ASSERT(0);
333 break; 329 break;
@@ -371,19 +367,24 @@ xfsaild_push(
371 * move forward in the AIL. 367 * move forward in the AIL.
372 */ 368 */
373 XFS_STATS_INC(xs_push_ail_flush); 369 XFS_STATS_INC(xs_push_ail_flush);
374 xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE); 370 xfs_log_force(mp, 0);
371 }
372
373 if (push_xfsbufd) {
374 /* we've got delayed write buffers to flush */
375 wake_up_process(mp->m_ddev_targp->bt_task);
375 } 376 }
376 377
377 if (!count) { 378 if (!count) {
378 /* We're past our target or empty, so idle */ 379 /* We're past our target or empty, so idle */
379 tout = 1000; 380 last_pushed_lsn = 0;
380 } else if (XFS_LSN_CMP(lsn, target) >= 0) { 381 } else if (XFS_LSN_CMP(lsn, target) >= 0) {
381 /* 382 /*
382 * We reached the target so wait a bit longer for I/O to 383 * We reached the target so wait a bit longer for I/O to
383 * complete and remove pushed items from the AIL before we 384 * complete and remove pushed items from the AIL before we
384 * start the next scan from the start of the AIL. 385 * start the next scan from the start of the AIL.
385 */ 386 */
386 tout += 20; 387 tout = 50;
387 last_pushed_lsn = 0; 388 last_pushed_lsn = 0;
388 } else if ((stuck * 100) / count > 90) { 389 } else if ((stuck * 100) / count > 90) {
389 /* 390 /*
@@ -395,11 +396,14 @@ xfsaild_push(
395 * Backoff a bit more to allow some I/O to complete before 396 * Backoff a bit more to allow some I/O to complete before
396 * continuing from where we were. 397 * continuing from where we were.
397 */ 398 */
398 tout += 10; 399 tout = 20;
400 } else {
401 /* more to do, but wait a short while before continuing */
402 tout = 10;
399 } 403 }
400 *last_lsn = last_pushed_lsn; 404 *last_lsn = last_pushed_lsn;
401 return tout; 405 return tout;
402} /* xfsaild_push */ 406}
403 407
404 408
405/* 409/*
diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c
index 49130628d5ef..5ffd544434eb 100644
--- a/fs/xfs/xfs_trans_buf.c
+++ b/fs/xfs/xfs_trans_buf.c
@@ -75,13 +75,14 @@ xfs_trans_get_buf(xfs_trans_t *tp,
75 xfs_buf_log_item_t *bip; 75 xfs_buf_log_item_t *bip;
76 76
77 if (flags == 0) 77 if (flags == 0)
78 flags = XFS_BUF_LOCK | XFS_BUF_MAPPED; 78 flags = XBF_LOCK | XBF_MAPPED;
79 79
80 /* 80 /*
81 * Default to a normal get_buf() call if the tp is NULL. 81 * Default to a normal get_buf() call if the tp is NULL.
82 */ 82 */
83 if (tp == NULL) 83 if (tp == NULL)
84 return xfs_buf_get(target_dev, blkno, len, flags | BUF_BUSY); 84 return xfs_buf_get(target_dev, blkno, len,
85 flags | XBF_DONT_BLOCK);
85 86
86 /* 87 /*
87 * If we find the buffer in the cache with this transaction 88 * If we find the buffer in the cache with this transaction
@@ -117,14 +118,14 @@ xfs_trans_get_buf(xfs_trans_t *tp,
117 } 118 }
118 119
119 /* 120 /*
120 * We always specify the BUF_BUSY flag within a transaction so 121 * We always specify the XBF_DONT_BLOCK flag within a transaction
121 * that get_buf does not try to push out a delayed write buffer 122 * so that get_buf does not try to push out a delayed write buffer
122 * which might cause another transaction to take place (if the 123 * which might cause another transaction to take place (if the
123 * buffer was delayed alloc). Such recursive transactions can 124 * buffer was delayed alloc). Such recursive transactions can
124 * easily deadlock with our current transaction as well as cause 125 * easily deadlock with our current transaction as well as cause
125 * us to run out of stack space. 126 * us to run out of stack space.
126 */ 127 */
127 bp = xfs_buf_get(target_dev, blkno, len, flags | BUF_BUSY); 128 bp = xfs_buf_get(target_dev, blkno, len, flags | XBF_DONT_BLOCK);
128 if (bp == NULL) { 129 if (bp == NULL) {
129 return NULL; 130 return NULL;
130 } 131 }
@@ -290,15 +291,15 @@ xfs_trans_read_buf(
290 int error; 291 int error;
291 292
292 if (flags == 0) 293 if (flags == 0)
293 flags = XFS_BUF_LOCK | XFS_BUF_MAPPED; 294 flags = XBF_LOCK | XBF_MAPPED;
294 295
295 /* 296 /*
296 * Default to a normal get_buf() call if the tp is NULL. 297 * Default to a normal get_buf() call if the tp is NULL.
297 */ 298 */
298 if (tp == NULL) { 299 if (tp == NULL) {
299 bp = xfs_buf_read(target, blkno, len, flags | BUF_BUSY); 300 bp = xfs_buf_read(target, blkno, len, flags | XBF_DONT_BLOCK);
300 if (!bp) 301 if (!bp)
301 return (flags & XFS_BUF_TRYLOCK) ? 302 return (flags & XBF_TRYLOCK) ?
302 EAGAIN : XFS_ERROR(ENOMEM); 303 EAGAIN : XFS_ERROR(ENOMEM);
303 304
304 if (XFS_BUF_GETERROR(bp) != 0) { 305 if (XFS_BUF_GETERROR(bp) != 0) {
@@ -385,14 +386,14 @@ xfs_trans_read_buf(
385 } 386 }
386 387
387 /* 388 /*
388 * We always specify the BUF_BUSY flag within a transaction so 389 * We always specify the XBF_DONT_BLOCK flag within a transaction
389 * that get_buf does not try to push out a delayed write buffer 390 * so that get_buf does not try to push out a delayed write buffer
390 * which might cause another transaction to take place (if the 391 * which might cause another transaction to take place (if the
391 * buffer was delayed alloc). Such recursive transactions can 392 * buffer was delayed alloc). Such recursive transactions can
392 * easily deadlock with our current transaction as well as cause 393 * easily deadlock with our current transaction as well as cause
393 * us to run out of stack space. 394 * us to run out of stack space.
394 */ 395 */
395 bp = xfs_buf_read(target, blkno, len, flags | BUF_BUSY); 396 bp = xfs_buf_read(target, blkno, len, flags | XBF_DONT_BLOCK);
396 if (bp == NULL) { 397 if (bp == NULL) {
397 *bpp = NULL; 398 *bpp = NULL;
398 return 0; 399 return 0;
@@ -472,8 +473,8 @@ shutdown_abort:
472 if (XFS_BUF_ISSTALE(bp) && XFS_BUF_ISDELAYWRITE(bp)) 473 if (XFS_BUF_ISSTALE(bp) && XFS_BUF_ISDELAYWRITE(bp))
473 cmn_err(CE_NOTE, "about to pop assert, bp == 0x%p", bp); 474 cmn_err(CE_NOTE, "about to pop assert, bp == 0x%p", bp);
474#endif 475#endif
475 ASSERT((XFS_BUF_BFLAGS(bp) & (XFS_B_STALE|XFS_B_DELWRI)) != 476 ASSERT((XFS_BUF_BFLAGS(bp) & (XBF_STALE|XBF_DELWRI)) !=
476 (XFS_B_STALE|XFS_B_DELWRI)); 477 (XBF_STALE|XBF_DELWRI));
477 478
478 trace_xfs_trans_read_buf_shut(bp, _RET_IP_); 479 trace_xfs_trans_read_buf_shut(bp, _RET_IP_);
479 xfs_buf_relse(bp); 480 xfs_buf_relse(bp);
diff --git a/fs/xfs/xfs_types.h b/fs/xfs/xfs_types.h
index d725428c9df6..b09904555d07 100644
--- a/fs/xfs/xfs_types.h
+++ b/fs/xfs/xfs_types.h
@@ -151,8 +151,8 @@ typedef enum {
151} xfs_btnum_t; 151} xfs_btnum_t;
152 152
153struct xfs_name { 153struct xfs_name {
154 const char *name; 154 const unsigned char *name;
155 int len; 155 int len;
156}; 156};
157 157
158#endif /* __XFS_TYPES_H__ */ 158#endif /* __XFS_TYPES_H__ */
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index 6f268756bf36..ddd2c5d1b854 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -256,7 +256,7 @@ xfs_setattr(
256 iattr->ia_size > ip->i_d.di_size) { 256 iattr->ia_size > ip->i_d.di_size) {
257 code = xfs_flush_pages(ip, 257 code = xfs_flush_pages(ip,
258 ip->i_d.di_size, iattr->ia_size, 258 ip->i_d.di_size, iattr->ia_size,
259 XFS_B_ASYNC, FI_NONE); 259 XBF_ASYNC, FI_NONE);
260 } 260 }
261 261
262 /* wait for all I/O to complete */ 262 /* wait for all I/O to complete */
@@ -597,7 +597,7 @@ xfs_fsync(
597{ 597{
598 xfs_trans_t *tp; 598 xfs_trans_t *tp;
599 int error = 0; 599 int error = 0;
600 int log_flushed = 0, changed = 1; 600 int log_flushed = 0;
601 601
602 xfs_itrace_entry(ip); 602 xfs_itrace_entry(ip);
603 603
@@ -627,19 +627,16 @@ xfs_fsync(
627 * disk yet, the inode will be still be pinned. If it is, 627 * disk yet, the inode will be still be pinned. If it is,
628 * force the log. 628 * force the log.
629 */ 629 */
630
631 xfs_iunlock(ip, XFS_ILOCK_SHARED); 630 xfs_iunlock(ip, XFS_ILOCK_SHARED);
632
633 if (xfs_ipincount(ip)) { 631 if (xfs_ipincount(ip)) {
634 error = _xfs_log_force(ip->i_mount, (xfs_lsn_t)0, 632 if (ip->i_itemp->ili_last_lsn) {
635 XFS_LOG_FORCE | XFS_LOG_SYNC, 633 error = _xfs_log_force_lsn(ip->i_mount,
636 &log_flushed); 634 ip->i_itemp->ili_last_lsn,
637 } else { 635 XFS_LOG_SYNC, &log_flushed);
638 /* 636 } else {
639 * If the inode is not pinned and nothing has changed 637 error = _xfs_log_force(ip->i_mount,
640 * we don't need to flush the cache. 638 XFS_LOG_SYNC, &log_flushed);
641 */ 639 }
642 changed = 0;
643 } 640 }
644 } else { 641 } else {
645 /* 642 /*
@@ -674,7 +671,7 @@ xfs_fsync(
674 xfs_iunlock(ip, XFS_ILOCK_EXCL); 671 xfs_iunlock(ip, XFS_ILOCK_EXCL);
675 } 672 }
676 673
677 if ((ip->i_mount->m_flags & XFS_MOUNT_BARRIER) && changed) { 674 if (ip->i_mount->m_flags & XFS_MOUNT_BARRIER) {
678 /* 675 /*
679 * If the log write didn't issue an ordered tag we need 676 * If the log write didn't issue an ordered tag we need
680 * to flush the disk cache for the data device now. 677 * to flush the disk cache for the data device now.
@@ -1096,7 +1093,7 @@ xfs_release(
1096 */ 1093 */
1097 truncated = xfs_iflags_test_and_clear(ip, XFS_ITRUNCATED); 1094 truncated = xfs_iflags_test_and_clear(ip, XFS_ITRUNCATED);
1098 if (truncated && VN_DIRTY(VFS_I(ip)) && ip->i_delayed_blks > 0) 1095 if (truncated && VN_DIRTY(VFS_I(ip)) && ip->i_delayed_blks > 0)
1099 xfs_flush_pages(ip, 0, -1, XFS_B_ASYNC, FI_NONE); 1096 xfs_flush_pages(ip, 0, -1, XBF_ASYNC, FI_NONE);
1100 } 1097 }
1101 1098
1102 if (ip->i_d.di_nlink != 0) { 1099 if (ip->i_d.di_nlink != 0) {
@@ -2199,7 +2196,8 @@ xfs_symlink(
2199 if (DM_EVENT_ENABLED(dp, DM_EVENT_SYMLINK)) { 2196 if (DM_EVENT_ENABLED(dp, DM_EVENT_SYMLINK)) {
2200 error = XFS_SEND_NAMESP(mp, DM_EVENT_SYMLINK, dp, 2197 error = XFS_SEND_NAMESP(mp, DM_EVENT_SYMLINK, dp,
2201 DM_RIGHT_NULL, NULL, DM_RIGHT_NULL, 2198 DM_RIGHT_NULL, NULL, DM_RIGHT_NULL,
2202 link_name->name, target_path, 0, 0, 0); 2199 link_name->name,
2200 (unsigned char *)target_path, 0, 0, 0);
2203 if (error) 2201 if (error)
2204 return error; 2202 return error;
2205 } 2203 }
@@ -2395,7 +2393,8 @@ std_return:
2395 dp, DM_RIGHT_NULL, 2393 dp, DM_RIGHT_NULL,
2396 error ? NULL : ip, 2394 error ? NULL : ip,
2397 DM_RIGHT_NULL, link_name->name, 2395 DM_RIGHT_NULL, link_name->name,
2398 target_path, 0, error, 0); 2396 (unsigned char *)target_path,
2397 0, error, 0);
2399 } 2398 }
2400 2399
2401 if (!error) 2400 if (!error)
diff --git a/fs/xfs/xfs_vnodeops.h b/fs/xfs/xfs_vnodeops.h
index 167a467403a5..774f40729ca1 100644
--- a/fs/xfs/xfs_vnodeops.h
+++ b/fs/xfs/xfs_vnodeops.h
@@ -43,11 +43,11 @@ int xfs_change_file_space(struct xfs_inode *ip, int cmd,
43int xfs_rename(struct xfs_inode *src_dp, struct xfs_name *src_name, 43int xfs_rename(struct xfs_inode *src_dp, struct xfs_name *src_name,
44 struct xfs_inode *src_ip, struct xfs_inode *target_dp, 44 struct xfs_inode *src_ip, struct xfs_inode *target_dp,
45 struct xfs_name *target_name, struct xfs_inode *target_ip); 45 struct xfs_name *target_name, struct xfs_inode *target_ip);
46int xfs_attr_get(struct xfs_inode *ip, const char *name, char *value, 46int xfs_attr_get(struct xfs_inode *ip, const unsigned char *name,
47 int *valuelenp, int flags); 47 unsigned char *value, int *valuelenp, int flags);
48int xfs_attr_set(struct xfs_inode *dp, const char *name, char *value, 48int xfs_attr_set(struct xfs_inode *dp, const unsigned char *name,
49 int valuelen, int flags); 49 unsigned char *value, int valuelen, int flags);
50int xfs_attr_remove(struct xfs_inode *dp, const char *name, int flags); 50int xfs_attr_remove(struct xfs_inode *dp, const unsigned char *name, int flags);
51int xfs_attr_list(struct xfs_inode *dp, char *buffer, int bufsize, 51int xfs_attr_list(struct xfs_inode *dp, char *buffer, int bufsize,
52 int flags, struct attrlist_cursor_kern *cursor); 52 int flags, struct attrlist_cursor_kern *cursor);
53ssize_t xfs_read(struct xfs_inode *ip, struct kiocb *iocb, 53ssize_t xfs_read(struct xfs_inode *ip, struct kiocb *iocb,