aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2017-09-06 14:42:31 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2017-09-06 14:42:31 -0400
commit77d0ab600af4bf5152bc98d0ac1edbc34c1e5fdf (patch)
treeabe9390bf6e87c034c7f331c25db11d292de6ca9
parente7d0c41ecc2e372a81741a30894f556afec24315 (diff)
parent309e8cda596f6552a32dd14b969ce9b17f837f2f (diff)
Merge tag 'gfs2-4.14.fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/gfs2/linux-gfs2
Pull GFS2 updates from Bob Peterson: "We've got a whopping 29 GFS2 patches for this merge window, mainly because we held some back from the previous merge window until we could get them perfected and well tested. We have a couple patch sets, including my patch set for protecting glock gl_object and Andreas Gruenbacher's patch set to fix the long-standing shrink- slab hang, plus a bunch of assorted bugs and cleanups. Summary: - I fixed a bug whereby an IO error would lead to a double-brelse. - Andreas Gruenbacher made a minor cleanup to call his relatively new function, gfs2_holder_initialized, rather than doing it manually. This was just missed by a previous patch set. - Jan Kara fixed a bug whereby the SGID was being cleared when inheriting ACLs. - Andreas found a bug and fixed it in his previous patch, "Get rid of flush_delayed_work in gfs2_evict_inode". A call to flush_delayed_work was deleted from *gfs2_inode_lookup and added to gfs2_create_inode. - Wang Xibo found and fixed a list_add call in inode_go_lock that specified the parameters in the wrong order. - Coly Li submitted a patch to add the REQ_PRIO to some of GFS2's metadata reads that were accidentally missing them. - I submitted a 4-patch set to protect the glock gl_object field. GFS2 was setting and checking gl_object with no locking mechanism, so the value was occasionally stomped on, which caused file system corruption. - I submitted a small cleanup to function gfs2_clear_rgrpd. It was needlessly adding rgrp glocks to the lru list, then pulling them back off immediately. The rgrp glocks don't use the lru list anyway, so doing so was just a waste of time. - I submitted a patch that checks the GLOF_LRU flag on a glock before trying to remove it from the lru_list. This avoids a lot of unnecessary spin_lock contention. - I submitted a patch to delete GFS2's debugfs files only after we evict all the glocks. Before this patch, GFS2 would delete the debugfs files, and if unmount hung waiting for a glock, there was no way to debug the problem. Now, if a hang occurs during umount, we can examine the debugfs files to figure out why it's hung. - Andreas Gruenbacher submitted a patch to fix some trivial typos. - Andreas also submitted a five-part patch set to fix the longstanding hang involving the slab shrinker: dlm requires memory, calls the inode shrinker, which calls gfs2's evict, which calls back into DLM before it can evict an inode. - Abhi Das submitted a patch to forcibly flush the active items list to relieve memory pressure. This fixes a long-standing bug whereby GFS2 was getting hung permanently in balance_dirty_pages. - Thomas Tai submitted a patch to fix a slab corruption problem due to a residual pointer left in the lock_dlm lockstruct. - I submitted a patch to withdraw the file system if IO errors are encountered while writing to the journals or statfs system file which were previously not being sent back up. Before, some IO errors were sometimes not be detected for several hours, and at recovery time, the journal errors made journal replay impossible. - Andreas has a patch to fix an annoying format-truncation compiler warning so GFS2 compiles cleanly. - I have a patch that fixes a handful of sparse compiler warnings. - Andreas fixed up an useless gl_object warning caused by an earlier patch. - Arvind Yadav added a patch to properly constify our rhashtable params declare. - I added a patch to fix a regression caused by the non-recursive delete and truncate patch that caused file system blocks to not be properly freed. - Ernesto A. Fernández added a patch to fix a place where GFS2 would send back the wrong return code setting extended attributes. - Ernesto also added a patch to fix a case in which GFS2 was improperly setting an inode's i_mode, potentially granting access to the wrong users" * tag 'gfs2-4.14.fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/gfs2/linux-gfs2: (29 commits) gfs2: preserve i_mode if __gfs2_set_acl() fails gfs2: don't return ENODATA in __gfs2_xattr_set unless replacing GFS2: Fix non-recursive truncate bug gfs2: constify rhashtable_params GFS2: Fix gl_object warnings GFS2: Fix up some sparse warnings gfs2: Silence gcc format-truncation warning GFS2: Withdraw for IO errors writing to the journal or statfs gfs2: fix slab corruption during mounting and umounting gfs file system gfs2: forcibly flush ail to relieve memory pressure gfs2: Clean up waiting on glocks gfs2: Defer deleting inodes under memory pressure gfs2: gfs2_evict_inode: Put glocks asynchronously gfs2: Get rid of gfs2_set_nlink gfs2: gfs2_glock_get: Wait on freeing glocks gfs2: Fix trivial typos GFS2: Delete debugfs files only after we evict the glocks GFS2: Don't waste time locking lru_lock for non-lru glocks GFS2: Don't bother trying to add rgrps to the lru list GFS2: Clear gl_object when deleting an inode in gfs2_delete_inode ...
-rw-r--r--fs/gfs2/acl.c30
-rw-r--r--fs/gfs2/aops.c14
-rw-r--r--fs/gfs2/bmap.c24
-rw-r--r--fs/gfs2/dir.c4
-rw-r--r--fs/gfs2/file.c3
-rw-r--r--fs/gfs2/glock.c137
-rw-r--r--fs/gfs2/glock.h36
-rw-r--r--fs/gfs2/glops.c30
-rw-r--r--fs/gfs2/incore.h4
-rw-r--r--fs/gfs2/inode.c17
-rw-r--r--fs/gfs2/lock_dlm.c5
-rw-r--r--fs/gfs2/log.c13
-rw-r--r--fs/gfs2/lops.c7
-rw-r--r--fs/gfs2/meta_io.c9
-rw-r--r--fs/gfs2/ops_fstype.c7
-rw-r--r--fs/gfs2/quota.c7
-rw-r--r--fs/gfs2/rgrp.c3
-rw-r--r--fs/gfs2/super.c71
-rw-r--r--fs/gfs2/util.h1
-rw-r--r--fs/gfs2/xattr.c9
20 files changed, 321 insertions, 110 deletions
diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c
index 2524807ee070..9d5eecb123de 100644
--- a/fs/gfs2/acl.c
+++ b/fs/gfs2/acl.c
@@ -86,19 +86,6 @@ int __gfs2_set_acl(struct inode *inode, struct posix_acl *acl, int type)
86 char *data; 86 char *data;
87 const char *name = gfs2_acl_name(type); 87 const char *name = gfs2_acl_name(type);
88 88
89 if (acl && acl->a_count > GFS2_ACL_MAX_ENTRIES(GFS2_SB(inode)))
90 return -E2BIG;
91
92 if (type == ACL_TYPE_ACCESS) {
93 umode_t mode = inode->i_mode;
94
95 error = posix_acl_update_mode(inode, &inode->i_mode, &acl);
96 if (error)
97 return error;
98 if (mode != inode->i_mode)
99 mark_inode_dirty(inode);
100 }
101
102 if (acl) { 89 if (acl) {
103 len = posix_acl_to_xattr(&init_user_ns, acl, NULL, 0); 90 len = posix_acl_to_xattr(&init_user_ns, acl, NULL, 0);
104 if (len == 0) 91 if (len == 0)
@@ -129,6 +116,10 @@ int gfs2_set_acl(struct inode *inode, struct posix_acl *acl, int type)
129 struct gfs2_holder gh; 116 struct gfs2_holder gh;
130 bool need_unlock = false; 117 bool need_unlock = false;
131 int ret; 118 int ret;
119 umode_t mode;
120
121 if (acl && acl->a_count > GFS2_ACL_MAX_ENTRIES(GFS2_SB(inode)))
122 return -E2BIG;
132 123
133 ret = gfs2_rsqa_alloc(ip); 124 ret = gfs2_rsqa_alloc(ip);
134 if (ret) 125 if (ret)
@@ -140,7 +131,20 @@ int gfs2_set_acl(struct inode *inode, struct posix_acl *acl, int type)
140 return ret; 131 return ret;
141 need_unlock = true; 132 need_unlock = true;
142 } 133 }
134
135 mode = inode->i_mode;
136 if (type == ACL_TYPE_ACCESS && acl) {
137 ret = posix_acl_update_mode(inode, &mode, &acl);
138 if (ret)
139 goto unlock;
140 }
141
143 ret = __gfs2_set_acl(inode, acl, type); 142 ret = __gfs2_set_acl(inode, acl, type);
143 if (!ret && mode != inode->i_mode) {
144 inode->i_mode = mode;
145 mark_inode_dirty(inode);
146 }
147unlock:
144 if (need_unlock) 148 if (need_unlock)
145 gfs2_glock_dq_uninit(&gh); 149 gfs2_glock_dq_uninit(&gh);
146 return ret; 150 return ret;
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index ed7a2e252ad8..68ed06962537 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -234,7 +234,19 @@ out:
234static int gfs2_writepages(struct address_space *mapping, 234static int gfs2_writepages(struct address_space *mapping,
235 struct writeback_control *wbc) 235 struct writeback_control *wbc)
236{ 236{
237 return mpage_writepages(mapping, wbc, gfs2_get_block_noalloc); 237 struct gfs2_sbd *sdp = gfs2_mapping2sbd(mapping);
238 int ret = mpage_writepages(mapping, wbc, gfs2_get_block_noalloc);
239
240 /*
241 * Even if we didn't write any pages here, we might still be holding
242 * dirty pages in the ail. We forcibly flush the ail because we don't
243 * want balance_dirty_pages() to loop indefinitely trying to write out
244 * pages held in the ail that it can't find.
245 */
246 if (ret == 0)
247 set_bit(SDF_FORCE_AIL_FLUSH, &sdp->sd_flags);
248
249 return ret;
238} 250}
239 251
240/** 252/**
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 9fa3aef9a5b3..3dd0cceefa43 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -291,8 +291,9 @@ static void gfs2_metapath_ra(struct gfs2_glock *gl,
291 if (trylock_buffer(rabh)) { 291 if (trylock_buffer(rabh)) {
292 if (!buffer_uptodate(rabh)) { 292 if (!buffer_uptodate(rabh)) {
293 rabh->b_end_io = end_buffer_read_sync; 293 rabh->b_end_io = end_buffer_read_sync;
294 submit_bh(REQ_OP_READ, REQ_RAHEAD | REQ_META, 294 submit_bh(REQ_OP_READ,
295 rabh); 295 REQ_RAHEAD | REQ_META | REQ_PRIO,
296 rabh);
296 continue; 297 continue;
297 } 298 }
298 unlock_buffer(rabh); 299 unlock_buffer(rabh);
@@ -1103,8 +1104,15 @@ static bool find_nonnull_ptr(struct gfs2_sbd *sdp, struct metapath *mp,
1103 1104
1104 while (true) { 1105 while (true) {
1105 ptr = metapointer(h, mp); 1106 ptr = metapointer(h, mp);
1106 if (*ptr) /* if we have a non-null pointer */ 1107 if (*ptr) { /* if we have a non-null pointer */
1108 /* Now zero the metapath after the current height. */
1109 h++;
1110 if (h < GFS2_MAX_META_HEIGHT)
1111 memset(&mp->mp_list[h], 0,
1112 (GFS2_MAX_META_HEIGHT - h) *
1113 sizeof(mp->mp_list[0]));
1107 return true; 1114 return true;
1115 }
1108 1116
1109 if (mp->mp_list[h] < ptrs) 1117 if (mp->mp_list[h] < ptrs)
1110 mp->mp_list[h]++; 1118 mp->mp_list[h]++;
@@ -1120,6 +1128,13 @@ enum dealloc_states {
1120 DEALLOC_DONE = 3, /* process complete */ 1128 DEALLOC_DONE = 3, /* process complete */
1121}; 1129};
1122 1130
1131static bool mp_eq_to_hgt(struct metapath *mp, __u16 *nbof, unsigned int h)
1132{
1133 if (memcmp(mp->mp_list, nbof, h * sizeof(mp->mp_list[0])))
1134 return false;
1135 return true;
1136}
1137
1123/** 1138/**
1124 * trunc_dealloc - truncate a file down to a desired size 1139 * trunc_dealloc - truncate a file down to a desired size
1125 * @ip: inode to truncate 1140 * @ip: inode to truncate
@@ -1197,8 +1212,7 @@ static int trunc_dealloc(struct gfs2_inode *ip, u64 newsize)
1197 /* If we're truncating to a non-zero size and the mp is 1212 /* If we're truncating to a non-zero size and the mp is
1198 at the beginning of file for the strip height, we 1213 at the beginning of file for the strip height, we
1199 need to preserve the first metadata pointer. */ 1214 need to preserve the first metadata pointer. */
1200 preserve1 = (newsize && 1215 preserve1 = (newsize && mp_eq_to_hgt(&mp, nbof, mp_h));
1201 (mp.mp_list[mp_h] == nbof[mp_h]));
1202 bh = mp.mp_bh[mp_h]; 1216 bh = mp.mp_bh[mp_h];
1203 gfs2_assert_withdraw(sdp, bh); 1217 gfs2_assert_withdraw(sdp, bh);
1204 if (gfs2_assert_withdraw(sdp, 1218 if (gfs2_assert_withdraw(sdp,
diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c
index 5ee2e2f8576c..06a0d1947c77 100644
--- a/fs/gfs2/dir.c
+++ b/fs/gfs2/dir.c
@@ -1513,7 +1513,9 @@ static void gfs2_dir_readahead(struct inode *inode, unsigned hsize, u32 index,
1513 continue; 1513 continue;
1514 } 1514 }
1515 bh->b_end_io = end_buffer_read_sync; 1515 bh->b_end_io = end_buffer_read_sync;
1516 submit_bh(REQ_OP_READ, REQ_RAHEAD | REQ_META, bh); 1516 submit_bh(REQ_OP_READ,
1517 REQ_RAHEAD | REQ_META | REQ_PRIO,
1518 bh);
1517 continue; 1519 continue;
1518 } 1520 }
1519 brelse(bh); 1521 brelse(bh);
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index c2062a108d19..bb48074be019 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -1030,8 +1030,7 @@ static int do_flock(struct file *file, int cmd, struct file_lock *fl)
1030 1030
1031 mutex_lock(&fp->f_fl_mutex); 1031 mutex_lock(&fp->f_fl_mutex);
1032 1032
1033 gl = fl_gh->gh_gl; 1033 if (gfs2_holder_initialized(fl_gh)) {
1034 if (gl) {
1035 if (fl_gh->gh_state == state) 1034 if (fl_gh->gh_state == state)
1036 goto out; 1035 goto out;
1037 locks_lock_file_wait(file, 1036 locks_lock_file_wait(file,
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index c38ab6c81898..98e845b7841b 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -15,6 +15,7 @@
15#include <linux/buffer_head.h> 15#include <linux/buffer_head.h>
16#include <linux/delay.h> 16#include <linux/delay.h>
17#include <linux/sort.h> 17#include <linux/sort.h>
18#include <linux/hash.h>
18#include <linux/jhash.h> 19#include <linux/jhash.h>
19#include <linux/kallsyms.h> 20#include <linux/kallsyms.h>
20#include <linux/gfs2_ondisk.h> 21#include <linux/gfs2_ondisk.h>
@@ -71,7 +72,7 @@ static DEFINE_SPINLOCK(lru_lock);
71#define GFS2_GL_HASH_SHIFT 15 72#define GFS2_GL_HASH_SHIFT 15
72#define GFS2_GL_HASH_SIZE BIT(GFS2_GL_HASH_SHIFT) 73#define GFS2_GL_HASH_SIZE BIT(GFS2_GL_HASH_SHIFT)
73 74
74static struct rhashtable_params ht_parms = { 75static const struct rhashtable_params ht_parms = {
75 .nelem_hint = GFS2_GL_HASH_SIZE * 3 / 4, 76 .nelem_hint = GFS2_GL_HASH_SIZE * 3 / 4,
76 .key_len = offsetofend(struct lm_lockname, ln_type), 77 .key_len = offsetofend(struct lm_lockname, ln_type),
77 .key_offset = offsetof(struct gfs2_glock, gl_name), 78 .key_offset = offsetof(struct gfs2_glock, gl_name),
@@ -80,6 +81,49 @@ static struct rhashtable_params ht_parms = {
80 81
81static struct rhashtable gl_hash_table; 82static struct rhashtable gl_hash_table;
82 83
84#define GLOCK_WAIT_TABLE_BITS 12
85#define GLOCK_WAIT_TABLE_SIZE (1 << GLOCK_WAIT_TABLE_BITS)
86static wait_queue_head_t glock_wait_table[GLOCK_WAIT_TABLE_SIZE] __cacheline_aligned;
87
88struct wait_glock_queue {
89 struct lm_lockname *name;
90 wait_queue_entry_t wait;
91};
92
93static int glock_wake_function(wait_queue_entry_t *wait, unsigned int mode,
94 int sync, void *key)
95{
96 struct wait_glock_queue *wait_glock =
97 container_of(wait, struct wait_glock_queue, wait);
98 struct lm_lockname *wait_name = wait_glock->name;
99 struct lm_lockname *wake_name = key;
100
101 if (wake_name->ln_sbd != wait_name->ln_sbd ||
102 wake_name->ln_number != wait_name->ln_number ||
103 wake_name->ln_type != wait_name->ln_type)
104 return 0;
105 return autoremove_wake_function(wait, mode, sync, key);
106}
107
108static wait_queue_head_t *glock_waitqueue(struct lm_lockname *name)
109{
110 u32 hash = jhash2((u32 *)name, sizeof(*name) / 4, 0);
111
112 return glock_wait_table + hash_32(hash, GLOCK_WAIT_TABLE_BITS);
113}
114
115/**
116 * wake_up_glock - Wake up waiters on a glock
117 * @gl: the glock
118 */
119static void wake_up_glock(struct gfs2_glock *gl)
120{
121 wait_queue_head_t *wq = glock_waitqueue(&gl->gl_name);
122
123 if (waitqueue_active(wq))
124 __wake_up(wq, TASK_NORMAL, 1, &gl->gl_name);
125}
126
83static void gfs2_glock_dealloc(struct rcu_head *rcu) 127static void gfs2_glock_dealloc(struct rcu_head *rcu)
84{ 128{
85 struct gfs2_glock *gl = container_of(rcu, struct gfs2_glock, gl_rcu); 129 struct gfs2_glock *gl = container_of(rcu, struct gfs2_glock, gl_rcu);
@@ -96,6 +140,9 @@ void gfs2_glock_free(struct gfs2_glock *gl)
96{ 140{
97 struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; 141 struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
98 142
143 rhashtable_remove_fast(&gl_hash_table, &gl->gl_node, ht_parms);
144 smp_mb();
145 wake_up_glock(gl);
99 call_rcu(&gl->gl_rcu, gfs2_glock_dealloc); 146 call_rcu(&gl->gl_rcu, gfs2_glock_dealloc);
100 if (atomic_dec_and_test(&sdp->sd_glock_disposal)) 147 if (atomic_dec_and_test(&sdp->sd_glock_disposal))
101 wake_up(&sdp->sd_glock_wait); 148 wake_up(&sdp->sd_glock_wait);
@@ -107,7 +154,7 @@ void gfs2_glock_free(struct gfs2_glock *gl)
107 * 154 *
108 */ 155 */
109 156
110static void gfs2_glock_hold(struct gfs2_glock *gl) 157void gfs2_glock_hold(struct gfs2_glock *gl)
111{ 158{
112 GLOCK_BUG_ON(gl, __lockref_is_dead(&gl->gl_lockref)); 159 GLOCK_BUG_ON(gl, __lockref_is_dead(&gl->gl_lockref));
113 lockref_get(&gl->gl_lockref); 160 lockref_get(&gl->gl_lockref);
@@ -150,6 +197,9 @@ void gfs2_glock_add_to_lru(struct gfs2_glock *gl)
150 197
151static void gfs2_glock_remove_from_lru(struct gfs2_glock *gl) 198static void gfs2_glock_remove_from_lru(struct gfs2_glock *gl)
152{ 199{
200 if (!(gl->gl_ops->go_flags & GLOF_LRU))
201 return;
202
153 spin_lock(&lru_lock); 203 spin_lock(&lru_lock);
154 if (!list_empty(&gl->gl_lru)) { 204 if (!list_empty(&gl->gl_lru)) {
155 list_del_init(&gl->gl_lru); 205 list_del_init(&gl->gl_lru);
@@ -191,13 +241,20 @@ static void __gfs2_glock_put(struct gfs2_glock *gl)
191 241
192 gfs2_glock_remove_from_lru(gl); 242 gfs2_glock_remove_from_lru(gl);
193 spin_unlock(&gl->gl_lockref.lock); 243 spin_unlock(&gl->gl_lockref.lock);
194 rhashtable_remove_fast(&gl_hash_table, &gl->gl_node, ht_parms);
195 GLOCK_BUG_ON(gl, !list_empty(&gl->gl_holders)); 244 GLOCK_BUG_ON(gl, !list_empty(&gl->gl_holders));
196 GLOCK_BUG_ON(gl, mapping && mapping->nrpages); 245 GLOCK_BUG_ON(gl, mapping && mapping->nrpages);
197 trace_gfs2_glock_put(gl); 246 trace_gfs2_glock_put(gl);
198 sdp->sd_lockstruct.ls_ops->lm_put_lock(gl); 247 sdp->sd_lockstruct.ls_ops->lm_put_lock(gl);
199} 248}
200 249
250/*
251 * Cause the glock to be put in work queue context.
252 */
253void gfs2_glock_queue_put(struct gfs2_glock *gl)
254{
255 gfs2_glock_queue_work(gl, 0);
256}
257
201/** 258/**
202 * gfs2_glock_put() - Decrement reference count on glock 259 * gfs2_glock_put() - Decrement reference count on glock
203 * @gl: The glock to put 260 * @gl: The glock to put
@@ -676,6 +733,40 @@ static void glock_work_func(struct work_struct *work)
676 spin_unlock(&gl->gl_lockref.lock); 733 spin_unlock(&gl->gl_lockref.lock);
677} 734}
678 735
736static struct gfs2_glock *find_insert_glock(struct lm_lockname *name,
737 struct gfs2_glock *new)
738{
739 struct wait_glock_queue wait;
740 wait_queue_head_t *wq = glock_waitqueue(name);
741 struct gfs2_glock *gl;
742
743 wait.name = name;
744 init_wait(&wait.wait);
745 wait.wait.func = glock_wake_function;
746
747again:
748 prepare_to_wait(wq, &wait.wait, TASK_UNINTERRUPTIBLE);
749 rcu_read_lock();
750 if (new) {
751 gl = rhashtable_lookup_get_insert_fast(&gl_hash_table,
752 &new->gl_node, ht_parms);
753 if (IS_ERR(gl))
754 goto out;
755 } else {
756 gl = rhashtable_lookup_fast(&gl_hash_table,
757 name, ht_parms);
758 }
759 if (gl && !lockref_get_not_dead(&gl->gl_lockref)) {
760 rcu_read_unlock();
761 schedule();
762 goto again;
763 }
764out:
765 rcu_read_unlock();
766 finish_wait(wq, &wait.wait);
767 return gl;
768}
769
679/** 770/**
680 * gfs2_glock_get() - Get a glock, or create one if one doesn't exist 771 * gfs2_glock_get() - Get a glock, or create one if one doesn't exist
681 * @sdp: The GFS2 superblock 772 * @sdp: The GFS2 superblock
@@ -702,15 +793,11 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
702 struct kmem_cache *cachep; 793 struct kmem_cache *cachep;
703 int ret = 0; 794 int ret = 0;
704 795
705 rcu_read_lock(); 796 gl = find_insert_glock(&name, NULL);
706 gl = rhashtable_lookup_fast(&gl_hash_table, &name, ht_parms); 797 if (gl) {
707 if (gl && !lockref_get_not_dead(&gl->gl_lockref)) 798 *glp = gl;
708 gl = NULL;
709 rcu_read_unlock();
710
711 *glp = gl;
712 if (gl)
713 return 0; 799 return 0;
800 }
714 if (!create) 801 if (!create)
715 return -ENOENT; 802 return -ENOENT;
716 803
@@ -764,10 +851,7 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
764 mapping->writeback_index = 0; 851 mapping->writeback_index = 0;
765 } 852 }
766 853
767again: 854 tmp = find_insert_glock(&name, gl);
768 rcu_read_lock();
769 tmp = rhashtable_lookup_get_insert_fast(&gl_hash_table, &gl->gl_node,
770 ht_parms);
771 if (!tmp) { 855 if (!tmp) {
772 *glp = gl; 856 *glp = gl;
773 goto out; 857 goto out;
@@ -776,13 +860,7 @@ again:
776 ret = PTR_ERR(tmp); 860 ret = PTR_ERR(tmp);
777 goto out_free; 861 goto out_free;
778 } 862 }
779 if (lockref_get_not_dead(&tmp->gl_lockref)) { 863 *glp = tmp;
780 *glp = tmp;
781 goto out_free;
782 }
783 rcu_read_unlock();
784 cond_resched();
785 goto again;
786 864
787out_free: 865out_free:
788 kfree(gl->gl_lksb.sb_lvbptr); 866 kfree(gl->gl_lksb.sb_lvbptr);
@@ -790,7 +868,6 @@ out_free:
790 atomic_dec(&sdp->sd_glock_disposal); 868 atomic_dec(&sdp->sd_glock_disposal);
791 869
792out: 870out:
793 rcu_read_unlock();
794 return ret; 871 return ret;
795} 872}
796 873
@@ -1473,14 +1550,15 @@ static void glock_hash_walk(glock_examiner examiner, const struct gfs2_sbd *sdp)
1473 1550
1474 do { 1551 do {
1475 gl = ERR_PTR(rhashtable_walk_start(&iter)); 1552 gl = ERR_PTR(rhashtable_walk_start(&iter));
1476 if (gl) 1553 if (IS_ERR(gl))
1477 continue; 1554 goto walk_stop;
1478 1555
1479 while ((gl = rhashtable_walk_next(&iter)) && !IS_ERR(gl)) 1556 while ((gl = rhashtable_walk_next(&iter)) && !IS_ERR(gl))
1480 if ((gl->gl_name.ln_sbd == sdp) && 1557 if (gl->gl_name.ln_sbd == sdp &&
1481 lockref_get_not_dead(&gl->gl_lockref)) 1558 lockref_get_not_dead(&gl->gl_lockref))
1482 examiner(gl); 1559 examiner(gl);
1483 1560
1561walk_stop:
1484 rhashtable_walk_stop(&iter); 1562 rhashtable_walk_stop(&iter);
1485 } while (cond_resched(), gl == ERR_PTR(-EAGAIN)); 1563 } while (cond_resched(), gl == ERR_PTR(-EAGAIN));
1486 1564
@@ -1803,7 +1881,7 @@ static int gfs2_sbstats_seq_show(struct seq_file *seq, void *iter_ptr)
1803 1881
1804int __init gfs2_glock_init(void) 1882int __init gfs2_glock_init(void)
1805{ 1883{
1806 int ret; 1884 int i, ret;
1807 1885
1808 ret = rhashtable_init(&gl_hash_table, &ht_parms); 1886 ret = rhashtable_init(&gl_hash_table, &ht_parms);
1809 if (ret < 0) 1887 if (ret < 0)
@@ -1832,6 +1910,9 @@ int __init gfs2_glock_init(void)
1832 return ret; 1910 return ret;
1833 } 1911 }
1834 1912
1913 for (i = 0; i < GLOCK_WAIT_TABLE_SIZE; i++)
1914 init_waitqueue_head(glock_wait_table + i);
1915
1835 return 0; 1916 return 0;
1836} 1917}
1837 1918
@@ -1860,6 +1941,7 @@ static void gfs2_glock_iter_next(struct gfs2_glock_iter *gi)
1860} 1941}
1861 1942
1862static void *gfs2_glock_seq_start(struct seq_file *seq, loff_t *pos) 1943static void *gfs2_glock_seq_start(struct seq_file *seq, loff_t *pos)
1944 __acquires(RCU)
1863{ 1945{
1864 struct gfs2_glock_iter *gi = seq->private; 1946 struct gfs2_glock_iter *gi = seq->private;
1865 loff_t n = *pos; 1947 loff_t n = *pos;
@@ -1892,6 +1974,7 @@ static void *gfs2_glock_seq_next(struct seq_file *seq, void *iter_ptr,
1892} 1974}
1893 1975
1894static void gfs2_glock_seq_stop(struct seq_file *seq, void *iter_ptr) 1976static void gfs2_glock_seq_stop(struct seq_file *seq, void *iter_ptr)
1977 __releases(RCU)
1895{ 1978{
1896 struct gfs2_glock_iter *gi = seq->private; 1979 struct gfs2_glock_iter *gi = seq->private;
1897 1980
diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h
index 9ad4a6ac6c84..5e12220cc0c2 100644
--- a/fs/gfs2/glock.h
+++ b/fs/gfs2/glock.h
@@ -13,6 +13,7 @@
13#include <linux/sched.h> 13#include <linux/sched.h>
14#include <linux/parser.h> 14#include <linux/parser.h>
15#include "incore.h" 15#include "incore.h"
16#include "util.h"
16 17
17/* Options for hostdata parser */ 18/* Options for hostdata parser */
18 19
@@ -181,7 +182,9 @@ static inline struct address_space *gfs2_glock2aspace(struct gfs2_glock *gl)
181extern int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number, 182extern int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
182 const struct gfs2_glock_operations *glops, 183 const struct gfs2_glock_operations *glops,
183 int create, struct gfs2_glock **glp); 184 int create, struct gfs2_glock **glp);
185extern void gfs2_glock_hold(struct gfs2_glock *gl);
184extern void gfs2_glock_put(struct gfs2_glock *gl); 186extern void gfs2_glock_put(struct gfs2_glock *gl);
187extern void gfs2_glock_queue_put(struct gfs2_glock *gl);
185extern void gfs2_holder_init(struct gfs2_glock *gl, unsigned int state, 188extern void gfs2_holder_init(struct gfs2_glock *gl, unsigned int state,
186 u16 flags, struct gfs2_holder *gh); 189 u16 flags, struct gfs2_holder *gh);
187extern void gfs2_holder_reinit(unsigned int state, u16 flags, 190extern void gfs2_holder_reinit(unsigned int state, u16 flags,
@@ -257,11 +260,44 @@ static inline bool gfs2_holder_initialized(struct gfs2_holder *gh)
257 return gh->gh_gl; 260 return gh->gh_gl;
258} 261}
259 262
263/**
264 * glock_set_object - set the gl_object field of a glock
265 * @gl: the glock
266 * @object: the object
267 */
260static inline void glock_set_object(struct gfs2_glock *gl, void *object) 268static inline void glock_set_object(struct gfs2_glock *gl, void *object)
261{ 269{
262 spin_lock(&gl->gl_lockref.lock); 270 spin_lock(&gl->gl_lockref.lock);
271 if (gfs2_assert_warn(gl->gl_name.ln_sbd, gl->gl_object == NULL))
272 gfs2_dump_glock(NULL, gl);
263 gl->gl_object = object; 273 gl->gl_object = object;
264 spin_unlock(&gl->gl_lockref.lock); 274 spin_unlock(&gl->gl_lockref.lock);
265} 275}
266 276
277/**
278 * glock_clear_object - clear the gl_object field of a glock
279 * @gl: the glock
280 * @object: the object
281 *
282 * I'd love to similarly add this:
283 * else if (gfs2_assert_warn(gl->gl_sbd, gl->gl_object == object))
284 * gfs2_dump_glock(NULL, gl);
285 * Unfortunately, that's not possible because as soon as gfs2_delete_inode
286 * frees the block in the rgrp, another process can reassign it for an I_NEW
287 * inode in gfs2_create_inode because that calls new_inode, not gfs2_iget.
288 * That means gfs2_delete_inode may subsequently try to call this function
289 * for a glock that's already pointing to a brand new inode. If we clear the
290 * new inode's gl_object, we'll introduce metadata corruption. Function
291 * gfs2_delete_inode calls clear_inode which calls gfs2_clear_inode which also
292 * tries to clear gl_object, so it's more than just gfs2_delete_inode.
293 *
294 */
295static inline void glock_clear_object(struct gfs2_glock *gl, void *object)
296{
297 spin_lock(&gl->gl_lockref.lock);
298 if (gl->gl_object == object)
299 gl->gl_object = NULL;
300 spin_unlock(&gl->gl_lockref.lock);
301}
302
267#endif /* __GLOCK_DOT_H__ */ 303#endif /* __GLOCK_DOT_H__ */
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index 5e69636d4dd3..dac6559e2195 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -329,32 +329,6 @@ static int inode_go_demote_ok(const struct gfs2_glock *gl)
329 return 1; 329 return 1;
330} 330}
331 331
332/**
333 * gfs2_set_nlink - Set the inode's link count based on on-disk info
334 * @inode: The inode in question
335 * @nlink: The link count
336 *
337 * If the link count has hit zero, it must never be raised, whatever the
338 * on-disk inode might say. When new struct inodes are created the link
339 * count is set to 1, so that we can safely use this test even when reading
340 * in on disk information for the first time.
341 */
342
343static void gfs2_set_nlink(struct inode *inode, u32 nlink)
344{
345 /*
346 * We will need to review setting the nlink count here in the
347 * light of the forthcoming ro bind mount work. This is a reminder
348 * to do that.
349 */
350 if ((inode->i_nlink != nlink) && (inode->i_nlink != 0)) {
351 if (nlink == 0)
352 clear_nlink(inode);
353 else
354 set_nlink(inode, nlink);
355 }
356}
357
358static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf) 332static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf)
359{ 333{
360 const struct gfs2_dinode *str = buf; 334 const struct gfs2_dinode *str = buf;
@@ -376,7 +350,7 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf)
376 350
377 i_uid_write(&ip->i_inode, be32_to_cpu(str->di_uid)); 351 i_uid_write(&ip->i_inode, be32_to_cpu(str->di_uid));
378 i_gid_write(&ip->i_inode, be32_to_cpu(str->di_gid)); 352 i_gid_write(&ip->i_inode, be32_to_cpu(str->di_gid));
379 gfs2_set_nlink(&ip->i_inode, be32_to_cpu(str->di_nlink)); 353 set_nlink(&ip->i_inode, be32_to_cpu(str->di_nlink));
380 i_size_write(&ip->i_inode, be64_to_cpu(str->di_size)); 354 i_size_write(&ip->i_inode, be64_to_cpu(str->di_size));
381 gfs2_set_inode_blocks(&ip->i_inode, be64_to_cpu(str->di_blocks)); 355 gfs2_set_inode_blocks(&ip->i_inode, be64_to_cpu(str->di_blocks));
382 atime.tv_sec = be64_to_cpu(str->di_atime); 356 atime.tv_sec = be64_to_cpu(str->di_atime);
@@ -470,7 +444,7 @@ static int inode_go_lock(struct gfs2_holder *gh)
470 (gh->gh_state == LM_ST_EXCLUSIVE)) { 444 (gh->gh_state == LM_ST_EXCLUSIVE)) {
471 spin_lock(&sdp->sd_trunc_lock); 445 spin_lock(&sdp->sd_trunc_lock);
472 if (list_empty(&ip->i_trunc_list)) 446 if (list_empty(&ip->i_trunc_list))
473 list_add(&sdp->sd_trunc_list, &ip->i_trunc_list); 447 list_add(&ip->i_trunc_list, &sdp->sd_trunc_list);
474 spin_unlock(&sdp->sd_trunc_lock); 448 spin_unlock(&sdp->sd_trunc_lock);
475 wake_up(&sdp->sd_quota_wait); 449 wake_up(&sdp->sd_quota_wait);
476 return 1; 450 return 1;
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 73fce76e67ee..6e18e9793ec4 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -606,6 +606,7 @@ enum {
606 SDF_NOJOURNALID = 6, 606 SDF_NOJOURNALID = 6,
607 SDF_RORECOVERY = 7, /* read only recovery */ 607 SDF_RORECOVERY = 7, /* read only recovery */
608 SDF_SKIP_DLM_UNLOCK = 8, 608 SDF_SKIP_DLM_UNLOCK = 8,
609 SDF_FORCE_AIL_FLUSH = 9,
609}; 610};
610 611
611enum gfs2_freeze_state { 612enum gfs2_freeze_state {
@@ -816,6 +817,7 @@ struct gfs2_sbd {
816 atomic_t sd_log_in_flight; 817 atomic_t sd_log_in_flight;
817 struct bio *sd_log_bio; 818 struct bio *sd_log_bio;
818 wait_queue_head_t sd_log_flush_wait; 819 wait_queue_head_t sd_log_flush_wait;
820 int sd_log_error;
819 821
820 atomic_t sd_reserving_log; 822 atomic_t sd_reserving_log;
821 wait_queue_head_t sd_reserving_log_wait; 823 wait_queue_head_t sd_reserving_log_wait;
@@ -831,7 +833,7 @@ struct gfs2_sbd {
831 atomic_t sd_freeze_state; 833 atomic_t sd_freeze_state;
832 struct mutex sd_freeze_mutex; 834 struct mutex sd_freeze_mutex;
833 835
834 char sd_fsname[GFS2_FSNAME_LEN]; 836 char sd_fsname[GFS2_FSNAME_LEN + 3 * sizeof(int) + 2];
835 char sd_table_name[GFS2_FSNAME_LEN]; 837 char sd_table_name[GFS2_FSNAME_LEN];
836 char sd_proto_name[GFS2_FSNAME_LEN]; 838 char sd_proto_name[GFS2_FSNAME_LEN];
837 839
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index acca501f8110..863749e29bf9 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -109,7 +109,7 @@ static void gfs2_set_iop(struct inode *inode)
109 * @no_addr: The inode number 109 * @no_addr: The inode number
110 * @no_formal_ino: The inode generation number 110 * @no_formal_ino: The inode generation number
111 * @blktype: Requested block type (GFS2_BLKST_DINODE or GFS2_BLKST_UNLINKED; 111 * @blktype: Requested block type (GFS2_BLKST_DINODE or GFS2_BLKST_UNLINKED;
112 * GFS2_BLKST_FREE do indicate not to verify) 112 * GFS2_BLKST_FREE to indicate not to verify)
113 * 113 *
114 * If @type is DT_UNKNOWN, the inode type is fetched from disk. 114 * If @type is DT_UNKNOWN, the inode type is fetched from disk.
115 * 115 *
@@ -145,7 +145,6 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type,
145 if (unlikely(error)) 145 if (unlikely(error))
146 goto fail; 146 goto fail;
147 flush_delayed_work(&ip->i_gl->gl_work); 147 flush_delayed_work(&ip->i_gl->gl_work);
148 glock_set_object(ip->i_gl, ip);
149 148
150 error = gfs2_glock_get(sdp, no_addr, &gfs2_iopen_glops, CREATE, &io_gl); 149 error = gfs2_glock_get(sdp, no_addr, &gfs2_iopen_glops, CREATE, &io_gl);
151 if (unlikely(error)) 150 if (unlikely(error))
@@ -170,11 +169,11 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type,
170 } 169 }
171 } 170 }
172 171
172 glock_set_object(ip->i_gl, ip);
173 set_bit(GIF_INVALID, &ip->i_flags); 173 set_bit(GIF_INVALID, &ip->i_flags);
174 error = gfs2_glock_nq_init(io_gl, LM_ST_SHARED, GL_EXACT, &ip->i_iopen_gh); 174 error = gfs2_glock_nq_init(io_gl, LM_ST_SHARED, GL_EXACT, &ip->i_iopen_gh);
175 if (unlikely(error)) 175 if (unlikely(error))
176 goto fail_put; 176 goto fail_put;
177 flush_delayed_work(&ip->i_iopen_gh.gh_gl->gl_work);
178 glock_set_object(ip->i_iopen_gh.gh_gl, ip); 177 glock_set_object(ip->i_iopen_gh.gh_gl, ip);
179 gfs2_glock_put(io_gl); 178 gfs2_glock_put(io_gl);
180 io_gl = NULL; 179 io_gl = NULL;
@@ -202,14 +201,14 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type,
202 201
203fail_refresh: 202fail_refresh:
204 ip->i_iopen_gh.gh_flags |= GL_NOCACHE; 203 ip->i_iopen_gh.gh_flags |= GL_NOCACHE;
205 glock_set_object(ip->i_iopen_gh.gh_gl, NULL); 204 glock_clear_object(ip->i_iopen_gh.gh_gl, ip);
206 gfs2_glock_dq_uninit(&ip->i_iopen_gh); 205 gfs2_glock_dq_uninit(&ip->i_iopen_gh);
207fail_put: 206fail_put:
208 if (io_gl) 207 if (io_gl)
209 gfs2_glock_put(io_gl); 208 gfs2_glock_put(io_gl);
209 glock_clear_object(ip->i_gl, ip);
210 if (gfs2_holder_initialized(&i_gh)) 210 if (gfs2_holder_initialized(&i_gh))
211 gfs2_glock_dq_uninit(&i_gh); 211 gfs2_glock_dq_uninit(&i_gh);
212 glock_set_object(ip->i_gl, NULL);
213fail: 212fail:
214 iget_failed(inode); 213 iget_failed(inode);
215 return ERR_PTR(error); 214 return ERR_PTR(error);
@@ -706,8 +705,9 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
706 error = gfs2_glock_get(sdp, ip->i_no_addr, &gfs2_inode_glops, CREATE, &ip->i_gl); 705 error = gfs2_glock_get(sdp, ip->i_no_addr, &gfs2_inode_glops, CREATE, &ip->i_gl);
707 if (error) 706 if (error)
708 goto fail_free_inode; 707 goto fail_free_inode;
709 708 flush_delayed_work(&ip->i_gl->gl_work);
710 glock_set_object(ip->i_gl, ip); 709 glock_set_object(ip->i_gl, ip);
710
711 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_SKIP, ghs + 1); 711 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_SKIP, ghs + 1);
712 if (error) 712 if (error)
713 goto fail_free_inode; 713 goto fail_free_inode;
@@ -775,14 +775,17 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
775 return error; 775 return error;
776 776
777fail_gunlock3: 777fail_gunlock3:
778 glock_clear_object(io_gl, ip);
778 gfs2_glock_dq_uninit(&ip->i_iopen_gh); 779 gfs2_glock_dq_uninit(&ip->i_iopen_gh);
779 gfs2_glock_put(io_gl); 780 gfs2_glock_put(io_gl);
780fail_gunlock2: 781fail_gunlock2:
781 if (io_gl) 782 if (io_gl)
782 clear_bit(GLF_INODE_CREATING, &io_gl->gl_flags); 783 clear_bit(GLF_INODE_CREATING, &io_gl->gl_flags);
783fail_free_inode: 784fail_free_inode:
784 if (ip->i_gl) 785 if (ip->i_gl) {
786 glock_clear_object(ip->i_gl, ip);
785 gfs2_glock_put(ip->i_gl); 787 gfs2_glock_put(ip->i_gl);
788 }
786 gfs2_rsqa_delete(ip, NULL); 789 gfs2_rsqa_delete(ip, NULL);
787fail_free_acls: 790fail_free_acls:
788 if (default_acl) 791 if (default_acl)
diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c
index 0515f0a68637..65f33a0ac190 100644
--- a/fs/gfs2/lock_dlm.c
+++ b/fs/gfs2/lock_dlm.c
@@ -23,8 +23,6 @@
23#include "sys.h" 23#include "sys.h"
24#include "trace_gfs2.h" 24#include "trace_gfs2.h"
25 25
26extern struct workqueue_struct *gfs2_control_wq;
27
28/** 26/**
29 * gfs2_update_stats - Update time based stats 27 * gfs2_update_stats - Update time based stats
30 * @mv: Pointer to mean/variance structure to update 28 * @mv: Pointer to mean/variance structure to update
@@ -1059,6 +1057,7 @@ static void free_recover_size(struct lm_lockstruct *ls)
1059 ls->ls_recover_submit = NULL; 1057 ls->ls_recover_submit = NULL;
1060 ls->ls_recover_result = NULL; 1058 ls->ls_recover_result = NULL;
1061 ls->ls_recover_size = 0; 1059 ls->ls_recover_size = 0;
1060 ls->ls_lvb_bits = NULL;
1062} 1061}
1063 1062
1064/* dlm calls before it does lock recovery */ 1063/* dlm calls before it does lock recovery */
@@ -1175,7 +1174,7 @@ static void gdlm_recovery_result(struct gfs2_sbd *sdp, unsigned int jid,
1175 spin_unlock(&ls->ls_recover_spin); 1174 spin_unlock(&ls->ls_recover_spin);
1176} 1175}
1177 1176
1178const struct dlm_lockspace_ops gdlm_lockspace_ops = { 1177static const struct dlm_lockspace_ops gdlm_lockspace_ops = {
1179 .recover_prep = gdlm_recover_prep, 1178 .recover_prep = gdlm_recover_prep,
1180 .recover_slot = gdlm_recover_slot, 1179 .recover_slot = gdlm_recover_slot,
1181 .recover_done = gdlm_recover_done, 1180 .recover_done = gdlm_recover_done,
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index 9a624f694400..f72c44231406 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -898,6 +898,10 @@ static inline int gfs2_jrnl_flush_reqd(struct gfs2_sbd *sdp)
898static inline int gfs2_ail_flush_reqd(struct gfs2_sbd *sdp) 898static inline int gfs2_ail_flush_reqd(struct gfs2_sbd *sdp)
899{ 899{
900 unsigned int used_blocks = sdp->sd_jdesc->jd_blocks - atomic_read(&sdp->sd_log_blks_free); 900 unsigned int used_blocks = sdp->sd_jdesc->jd_blocks - atomic_read(&sdp->sd_log_blks_free);
901
902 if (test_and_clear_bit(SDF_FORCE_AIL_FLUSH, &sdp->sd_flags))
903 return 1;
904
901 return used_blocks + atomic_read(&sdp->sd_log_blks_needed) >= 905 return used_blocks + atomic_read(&sdp->sd_log_blks_needed) >=
902 atomic_read(&sdp->sd_log_thresh2); 906 atomic_read(&sdp->sd_log_thresh2);
903} 907}
@@ -919,6 +923,15 @@ int gfs2_logd(void *data)
919 923
920 while (!kthread_should_stop()) { 924 while (!kthread_should_stop()) {
921 925
926 /* Check for errors writing to the journal */
927 if (sdp->sd_log_error) {
928 gfs2_lm_withdraw(sdp,
929 "GFS2: fsid=%s: error %d: "
930 "withdrawing the file system to "
931 "prevent further damage.\n",
932 sdp->sd_fsname, sdp->sd_log_error);
933 }
934
922 did_flush = false; 935 did_flush = false;
923 if (gfs2_jrnl_flush_reqd(sdp) || t == 0) { 936 if (gfs2_jrnl_flush_reqd(sdp) || t == 0) {
924 gfs2_ail1_empty(sdp); 937 gfs2_ail1_empty(sdp);
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index 3010f9edd177..7dabbe721dba 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -207,8 +207,11 @@ static void gfs2_end_log_write(struct bio *bio)
207 struct page *page; 207 struct page *page;
208 int i; 208 int i;
209 209
210 if (bio->bi_status) 210 if (bio->bi_status) {
211 fs_err(sdp, "Error %d writing to log\n", bio->bi_status); 211 fs_err(sdp, "Error %d writing to journal, jid=%u\n",
212 bio->bi_status, sdp->sd_jdesc->jd_jid);
213 wake_up(&sdp->sd_logd_waitq);
214 }
212 215
213 bio_for_each_segment_all(bvec, bio, i) { 216 bio_for_each_segment_all(bvec, bio, i) {
214 page = bvec->bv_page; 217 page = bvec->bv_page;
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
index fabe1614f879..61ef6c9be816 100644
--- a/fs/gfs2/meta_io.c
+++ b/fs/gfs2/meta_io.c
@@ -419,8 +419,9 @@ int gfs2_meta_indirect_buffer(struct gfs2_inode *ip, int height, u64 num,
419 if (ret == 0 && gfs2_metatype_check(sdp, bh, mtype)) { 419 if (ret == 0 && gfs2_metatype_check(sdp, bh, mtype)) {
420 brelse(bh); 420 brelse(bh);
421 ret = -EIO; 421 ret = -EIO;
422 } else {
423 *bhp = bh;
422 } 424 }
423 *bhp = bh;
424 return ret; 425 return ret;
425} 426}
426 427
@@ -452,7 +453,7 @@ struct buffer_head *gfs2_meta_ra(struct gfs2_glock *gl, u64 dblock, u32 extlen)
452 if (buffer_uptodate(first_bh)) 453 if (buffer_uptodate(first_bh))
453 goto out; 454 goto out;
454 if (!buffer_locked(first_bh)) 455 if (!buffer_locked(first_bh))
455 ll_rw_block(REQ_OP_READ, REQ_META, 1, &first_bh); 456 ll_rw_block(REQ_OP_READ, REQ_META | REQ_PRIO, 1, &first_bh);
456 457
457 dblock++; 458 dblock++;
458 extlen--; 459 extlen--;
@@ -461,7 +462,9 @@ struct buffer_head *gfs2_meta_ra(struct gfs2_glock *gl, u64 dblock, u32 extlen)
461 bh = gfs2_getbuf(gl, dblock, CREATE); 462 bh = gfs2_getbuf(gl, dblock, CREATE);
462 463
463 if (!buffer_uptodate(bh) && !buffer_locked(bh)) 464 if (!buffer_uptodate(bh) && !buffer_locked(bh))
464 ll_rw_block(REQ_OP_READ, REQ_RAHEAD | REQ_META, 1, &bh); 465 ll_rw_block(REQ_OP_READ,
466 REQ_RAHEAD | REQ_META | REQ_PRIO,
467 1, &bh);
465 brelse(bh); 468 brelse(bh);
466 dblock++; 469 dblock++;
467 extlen--; 470 extlen--;
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index e76058d34b74..c0a4b3778f3f 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -1113,7 +1113,7 @@ static int fill_super(struct super_block *sb, struct gfs2_args *args, int silent
1113 return error; 1113 return error;
1114 } 1114 }
1115 1115
1116 snprintf(sdp->sd_fsname, GFS2_FSNAME_LEN, "%s", sdp->sd_table_name); 1116 snprintf(sdp->sd_fsname, sizeof(sdp->sd_fsname), "%s", sdp->sd_table_name);
1117 1117
1118 error = gfs2_sys_fs_add(sdp); 1118 error = gfs2_sys_fs_add(sdp);
1119 /* 1119 /*
@@ -1159,10 +1159,10 @@ static int fill_super(struct super_block *sb, struct gfs2_args *args, int silent
1159 } 1159 }
1160 1160
1161 if (sdp->sd_args.ar_spectator) 1161 if (sdp->sd_args.ar_spectator)
1162 snprintf(sdp->sd_fsname, GFS2_FSNAME_LEN, "%s.s", 1162 snprintf(sdp->sd_fsname, sizeof(sdp->sd_fsname), "%s.s",
1163 sdp->sd_table_name); 1163 sdp->sd_table_name);
1164 else 1164 else
1165 snprintf(sdp->sd_fsname, GFS2_FSNAME_LEN, "%s.%u", 1165 snprintf(sdp->sd_fsname, sizeof(sdp->sd_fsname), "%s.%u",
1166 sdp->sd_table_name, sdp->sd_lockstruct.ls_jid); 1166 sdp->sd_table_name, sdp->sd_lockstruct.ls_jid);
1167 1167
1168 error = init_inodes(sdp, DO); 1168 error = init_inodes(sdp, DO);
@@ -1388,7 +1388,6 @@ static void gfs2_kill_sb(struct super_block *sb)
1388 sdp->sd_root_dir = NULL; 1388 sdp->sd_root_dir = NULL;
1389 sdp->sd_master_dir = NULL; 1389 sdp->sd_master_dir = NULL;
1390 shrink_dcache_sb(sb); 1390 shrink_dcache_sb(sb);
1391 gfs2_delete_debugfs_file(sdp);
1392 free_percpu(sdp->sd_lkstats); 1391 free_percpu(sdp->sd_lkstats);
1393 kill_block_super(sb); 1392 kill_block_super(sb);
1394} 1393}
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index c2ca9566b764..e647938432bd 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -730,7 +730,7 @@ static int gfs2_write_buf_to_page(struct gfs2_inode *ip, unsigned long index,
730 if (PageUptodate(page)) 730 if (PageUptodate(page))
731 set_buffer_uptodate(bh); 731 set_buffer_uptodate(bh);
732 if (!buffer_uptodate(bh)) { 732 if (!buffer_uptodate(bh)) {
733 ll_rw_block(REQ_OP_READ, REQ_META, 1, &bh); 733 ll_rw_block(REQ_OP_READ, REQ_META | REQ_PRIO, 1, &bh);
734 wait_on_buffer(bh); 734 wait_on_buffer(bh);
735 if (!buffer_uptodate(bh)) 735 if (!buffer_uptodate(bh))
736 goto unlock_out; 736 goto unlock_out;
@@ -1474,8 +1474,11 @@ static void quotad_error(struct gfs2_sbd *sdp, const char *msg, int error)
1474{ 1474{
1475 if (error == 0 || error == -EROFS) 1475 if (error == 0 || error == -EROFS)
1476 return; 1476 return;
1477 if (!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)) 1477 if (!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)) {
1478 fs_err(sdp, "gfs2_quotad: %s error %d\n", msg, error); 1478 fs_err(sdp, "gfs2_quotad: %s error %d\n", msg, error);
1479 sdp->sd_log_error = error;
1480 wake_up(&sdp->sd_logd_waitq);
1481 }
1479} 1482}
1480 1483
1481static void quotad_check_timeo(struct gfs2_sbd *sdp, const char *msg, 1484static void quotad_check_timeo(struct gfs2_sbd *sdp, const char *msg,
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 836e38ba5d0a..95b2a57ded33 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -705,8 +705,7 @@ void gfs2_clear_rgrpd(struct gfs2_sbd *sdp)
705 rb_erase(n, &sdp->sd_rindex_tree); 705 rb_erase(n, &sdp->sd_rindex_tree);
706 706
707 if (gl) { 707 if (gl) {
708 glock_set_object(gl, NULL); 708 glock_clear_object(gl, rgd);
709 gfs2_glock_add_to_lru(gl);
710 gfs2_glock_put(gl); 709 gfs2_glock_put(gl);
711 } 710 }
712 711
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index fdedec379b78..769841185ce5 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -924,6 +924,7 @@ restart:
924 gfs2_jindex_free(sdp); 924 gfs2_jindex_free(sdp);
925 /* Take apart glock structures and buffer lists */ 925 /* Take apart glock structures and buffer lists */
926 gfs2_gl_hash_clear(sdp); 926 gfs2_gl_hash_clear(sdp);
927 gfs2_delete_debugfs_file(sdp);
927 /* Unmount the locking protocol */ 928 /* Unmount the locking protocol */
928 gfs2_lm_unmount(sdp); 929 gfs2_lm_unmount(sdp);
929 930
@@ -943,9 +944,9 @@ static int gfs2_sync_fs(struct super_block *sb, int wait)
943 struct gfs2_sbd *sdp = sb->s_fs_info; 944 struct gfs2_sbd *sdp = sb->s_fs_info;
944 945
945 gfs2_quota_sync(sb, -1); 946 gfs2_quota_sync(sb, -1);
946 if (wait && sdp) 947 if (wait)
947 gfs2_log_flush(sdp, NULL, NORMAL_FLUSH); 948 gfs2_log_flush(sdp, NULL, NORMAL_FLUSH);
948 return 0; 949 return sdp->sd_log_error;
949} 950}
950 951
951void gfs2_freeze_func(struct work_struct *work) 952void gfs2_freeze_func(struct work_struct *work)
@@ -1295,7 +1296,7 @@ static int gfs2_remount_fs(struct super_block *sb, int *flags, char *data)
1295 * gfs2_drop_inode - Drop an inode (test for remote unlink) 1296 * gfs2_drop_inode - Drop an inode (test for remote unlink)
1296 * @inode: The inode to drop 1297 * @inode: The inode to drop
1297 * 1298 *
1298 * If we've received a callback on an iopen lock then its because a 1299 * If we've received a callback on an iopen lock then it's because a
1299 * remote node tried to deallocate the inode but failed due to this node 1300 * remote node tried to deallocate the inode but failed due to this node
1300 * still having the inode open. Here we mark the link count zero 1301 * still having the inode open. Here we mark the link count zero
1301 * since we know that it must have reached zero if the GLF_DEMOTE flag 1302 * since we know that it must have reached zero if the GLF_DEMOTE flag
@@ -1317,6 +1318,23 @@ static int gfs2_drop_inode(struct inode *inode)
1317 if (test_bit(GLF_DEMOTE, &gl->gl_flags)) 1318 if (test_bit(GLF_DEMOTE, &gl->gl_flags))
1318 clear_nlink(inode); 1319 clear_nlink(inode);
1319 } 1320 }
1321
1322 /*
1323 * When under memory pressure when an inode's link count has dropped to
1324 * zero, defer deleting the inode to the delete workqueue. This avoids
1325 * calling into DLM under memory pressure, which can deadlock.
1326 */
1327 if (!inode->i_nlink &&
1328 unlikely(current->flags & PF_MEMALLOC) &&
1329 gfs2_holder_initialized(&ip->i_iopen_gh)) {
1330 struct gfs2_glock *gl = ip->i_iopen_gh.gh_gl;
1331
1332 gfs2_glock_hold(gl);
1333 if (queue_work(gfs2_delete_workqueue, &gl->gl_delete) == 0)
1334 gfs2_glock_queue_put(gl);
1335 return false;
1336 }
1337
1320 return generic_drop_inode(inode); 1338 return generic_drop_inode(inode);
1321} 1339}
1322 1340
@@ -1501,6 +1519,22 @@ out_qs:
1501} 1519}
1502 1520
1503/** 1521/**
1522 * gfs2_glock_put_eventually
1523 * @gl: The glock to put
1524 *
1525 * When under memory pressure, trigger a deferred glock put to make sure we
1526 * won't call into DLM and deadlock. Otherwise, put the glock directly.
1527 */
1528
1529static void gfs2_glock_put_eventually(struct gfs2_glock *gl)
1530{
1531 if (current->flags & PF_MEMALLOC)
1532 gfs2_glock_queue_put(gl);
1533 else
1534 gfs2_glock_put(gl);
1535}
1536
1537/**
1504 * gfs2_evict_inode - Remove an inode from cache 1538 * gfs2_evict_inode - Remove an inode from cache
1505 * @inode: The inode to evict 1539 * @inode: The inode to evict
1506 * 1540 *
@@ -1544,9 +1578,14 @@ static void gfs2_evict_inode(struct inode *inode)
1544 goto alloc_failed; 1578 goto alloc_failed;
1545 } 1579 }
1546 1580
1581 /* Deletes should never happen under memory pressure anymore. */
1582 if (WARN_ON_ONCE(current->flags & PF_MEMALLOC))
1583 goto out;
1584
1547 /* Must not read inode block until block type has been verified */ 1585 /* Must not read inode block until block type has been verified */
1548 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_SKIP, &gh); 1586 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_SKIP, &gh);
1549 if (unlikely(error)) { 1587 if (unlikely(error)) {
1588 glock_clear_object(ip->i_iopen_gh.gh_gl, ip);
1550 ip->i_iopen_gh.gh_flags |= GL_NOCACHE; 1589 ip->i_iopen_gh.gh_flags |= GL_NOCACHE;
1551 gfs2_glock_dq_uninit(&ip->i_iopen_gh); 1590 gfs2_glock_dq_uninit(&ip->i_iopen_gh);
1552 goto out; 1591 goto out;
@@ -1562,6 +1601,12 @@ static void gfs2_evict_inode(struct inode *inode)
1562 goto out_truncate; 1601 goto out_truncate;
1563 } 1602 }
1564 1603
1604 /*
1605 * The inode may have been recreated in the meantime.
1606 */
1607 if (inode->i_nlink)
1608 goto out_truncate;
1609
1565alloc_failed: 1610alloc_failed:
1566 if (gfs2_holder_initialized(&ip->i_iopen_gh) && 1611 if (gfs2_holder_initialized(&ip->i_iopen_gh) &&
1567 test_bit(HIF_HOLDER, &ip->i_iopen_gh.gh_iflags)) { 1612 test_bit(HIF_HOLDER, &ip->i_iopen_gh.gh_iflags)) {
@@ -1595,6 +1640,11 @@ alloc_failed:
1595 goto out_unlock; 1640 goto out_unlock;
1596 } 1641 }
1597 1642
1643 /* We're about to clear the bitmap for the dinode, but as soon as we
1644 do, gfs2_create_inode can create another inode at the same block
1645 location and try to set gl_object again. We clear gl_object here so
1646 that subsequent inode creates don't see an old gl_object. */
1647 glock_clear_object(ip->i_gl, ip);
1598 error = gfs2_dinode_dealloc(ip); 1648 error = gfs2_dinode_dealloc(ip);
1599 goto out_unlock; 1649 goto out_unlock;
1600 1650
@@ -1623,14 +1673,17 @@ out_unlock:
1623 gfs2_rs_deltree(&ip->i_res); 1673 gfs2_rs_deltree(&ip->i_res);
1624 1674
1625 if (gfs2_holder_initialized(&ip->i_iopen_gh)) { 1675 if (gfs2_holder_initialized(&ip->i_iopen_gh)) {
1676 glock_clear_object(ip->i_iopen_gh.gh_gl, ip);
1626 if (test_bit(HIF_HOLDER, &ip->i_iopen_gh.gh_iflags)) { 1677 if (test_bit(HIF_HOLDER, &ip->i_iopen_gh.gh_iflags)) {
1627 ip->i_iopen_gh.gh_flags |= GL_NOCACHE; 1678 ip->i_iopen_gh.gh_flags |= GL_NOCACHE;
1628 gfs2_glock_dq(&ip->i_iopen_gh); 1679 gfs2_glock_dq(&ip->i_iopen_gh);
1629 } 1680 }
1630 gfs2_holder_uninit(&ip->i_iopen_gh); 1681 gfs2_holder_uninit(&ip->i_iopen_gh);
1631 } 1682 }
1632 if (gfs2_holder_initialized(&gh)) 1683 if (gfs2_holder_initialized(&gh)) {
1684 glock_clear_object(ip->i_gl, ip);
1633 gfs2_glock_dq_uninit(&gh); 1685 gfs2_glock_dq_uninit(&gh);
1686 }
1634 if (error && error != GLR_TRYFAILED && error != -EROFS) 1687 if (error && error != GLR_TRYFAILED && error != -EROFS)
1635 fs_warn(sdp, "gfs2_evict_inode: %d\n", error); 1688 fs_warn(sdp, "gfs2_evict_inode: %d\n", error);
1636out: 1689out:
@@ -1640,15 +1693,19 @@ out:
1640 gfs2_ordered_del_inode(ip); 1693 gfs2_ordered_del_inode(ip);
1641 clear_inode(inode); 1694 clear_inode(inode);
1642 gfs2_dir_hash_inval(ip); 1695 gfs2_dir_hash_inval(ip);
1643 glock_set_object(ip->i_gl, NULL); 1696 glock_clear_object(ip->i_gl, ip);
1644 wait_on_bit_io(&ip->i_flags, GIF_GLOP_PENDING, TASK_UNINTERRUPTIBLE); 1697 wait_on_bit_io(&ip->i_flags, GIF_GLOP_PENDING, TASK_UNINTERRUPTIBLE);
1645 gfs2_glock_add_to_lru(ip->i_gl); 1698 gfs2_glock_add_to_lru(ip->i_gl);
1646 gfs2_glock_put(ip->i_gl); 1699 gfs2_glock_put_eventually(ip->i_gl);
1647 ip->i_gl = NULL; 1700 ip->i_gl = NULL;
1648 if (gfs2_holder_initialized(&ip->i_iopen_gh)) { 1701 if (gfs2_holder_initialized(&ip->i_iopen_gh)) {
1649 glock_set_object(ip->i_iopen_gh.gh_gl, NULL); 1702 struct gfs2_glock *gl = ip->i_iopen_gh.gh_gl;
1703
1704 glock_clear_object(gl, ip);
1650 ip->i_iopen_gh.gh_flags |= GL_NOCACHE; 1705 ip->i_iopen_gh.gh_flags |= GL_NOCACHE;
1706 gfs2_glock_hold(gl);
1651 gfs2_glock_dq_uninit(&ip->i_iopen_gh); 1707 gfs2_glock_dq_uninit(&ip->i_iopen_gh);
1708 gfs2_glock_put_eventually(gl);
1652 } 1709 }
1653} 1710}
1654 1711
diff --git a/fs/gfs2/util.h b/fs/gfs2/util.h
index c81295f407f6..3926f95a6eb7 100644
--- a/fs/gfs2/util.h
+++ b/fs/gfs2/util.h
@@ -151,6 +151,7 @@ extern struct kmem_cache *gfs2_rgrpd_cachep;
151extern struct kmem_cache *gfs2_quotad_cachep; 151extern struct kmem_cache *gfs2_quotad_cachep;
152extern struct kmem_cache *gfs2_qadata_cachep; 152extern struct kmem_cache *gfs2_qadata_cachep;
153extern mempool_t *gfs2_page_pool; 153extern mempool_t *gfs2_page_pool;
154extern struct workqueue_struct *gfs2_control_wq;
154 155
155static inline unsigned int gfs2_tune_get_i(struct gfs2_tune *gt, 156static inline unsigned int gfs2_tune_get_i(struct gfs2_tune *gt,
156 unsigned int *p) 157 unsigned int *p)
diff --git a/fs/gfs2/xattr.c b/fs/gfs2/xattr.c
index 54179554c7d2..ea09e41dbb49 100644
--- a/fs/gfs2/xattr.c
+++ b/fs/gfs2/xattr.c
@@ -25,6 +25,7 @@
25#include "meta_io.h" 25#include "meta_io.h"
26#include "quota.h" 26#include "quota.h"
27#include "rgrp.h" 27#include "rgrp.h"
28#include "super.h"
28#include "trans.h" 29#include "trans.h"
29#include "util.h" 30#include "util.h"
30 31
@@ -1209,8 +1210,12 @@ int __gfs2_xattr_set(struct inode *inode, const char *name,
1209 if (namel > GFS2_EA_MAX_NAME_LEN) 1210 if (namel > GFS2_EA_MAX_NAME_LEN)
1210 return -ERANGE; 1211 return -ERANGE;
1211 1212
1212 if (value == NULL) 1213 if (value == NULL) {
1213 return gfs2_xattr_remove(ip, type, name); 1214 error = gfs2_xattr_remove(ip, type, name);
1215 if (error == -ENODATA && !(flags & XATTR_REPLACE))
1216 error = 0;
1217 return error;
1218 }
1214 1219
1215 if (ea_check_size(sdp, namel, size)) 1220 if (ea_check_size(sdp, namel, size))
1216 return -ERANGE; 1221 return -ERANGE;