aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSteven Whitehouse <swhiteho@redhat.com>2011-01-19 04:30:01 -0500
committerSteven Whitehouse <swhiteho@redhat.com>2011-01-21 04:39:08 -0500
commitbc015cb84129eb1451913cfebece270bf7a39e0f (patch)
tree4f116a61b802d87ae80051e9ae05d8fcb73d9ae7
parent2b1caf6ed7b888c95a1909d343799672731651a5 (diff)
GFS2: Use RCU for glock hash table
This has a number of advantages: - Reduces contention on the hash table lock - Makes the code smaller and simpler - Should speed up glock dumps when under load - Removes ref count changing in examine_bucket - No longer need hash chain lock in glock_put() in common case There are some further changes which this enables and which we may do in the future. One is to look at using SLAB_RCU, and another is to look at using a per-cpu counter for the per-sb glock counter, since that is touched twice in the lifetime of each glock (but only used at umount time). Signed-off-by: Steven Whitehouse <swhiteho@redhat.com> Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
-rw-r--r--fs/gfs2/glock.c390
-rw-r--r--fs/gfs2/glock.h39
-rw-r--r--fs/gfs2/glops.c23
-rw-r--r--fs/gfs2/incore.h5
-rw-r--r--fs/gfs2/lock_dlm.c14
-rw-r--r--fs/gfs2/lops.c3
-rw-r--r--fs/gfs2/main.c6
-rw-r--r--fs/gfs2/ops_fstype.c7
8 files changed, 190 insertions, 297 deletions
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 08a8beb152e6..c75d4998519e 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -26,6 +26,9 @@
26#include <linux/freezer.h> 26#include <linux/freezer.h>
27#include <linux/workqueue.h> 27#include <linux/workqueue.h>
28#include <linux/jiffies.h> 28#include <linux/jiffies.h>
29#include <linux/rcupdate.h>
30#include <linux/rculist_bl.h>
31#include <linux/bit_spinlock.h>
29 32
30#include "gfs2.h" 33#include "gfs2.h"
31#include "incore.h" 34#include "incore.h"
@@ -41,10 +44,6 @@
41#define CREATE_TRACE_POINTS 44#define CREATE_TRACE_POINTS
42#include "trace_gfs2.h" 45#include "trace_gfs2.h"
43 46
44struct gfs2_gl_hash_bucket {
45 struct hlist_head hb_list;
46};
47
48struct gfs2_glock_iter { 47struct gfs2_glock_iter {
49 int hash; /* hash bucket index */ 48 int hash; /* hash bucket index */
50 struct gfs2_sbd *sdp; /* incore superblock */ 49 struct gfs2_sbd *sdp; /* incore superblock */
@@ -54,7 +53,6 @@ struct gfs2_glock_iter {
54 53
55typedef void (*glock_examiner) (struct gfs2_glock * gl); 54typedef void (*glock_examiner) (struct gfs2_glock * gl);
56 55
57static int gfs2_dump_lockstate(struct gfs2_sbd *sdp);
58static int __dump_glock(struct seq_file *seq, const struct gfs2_glock *gl); 56static int __dump_glock(struct seq_file *seq, const struct gfs2_glock *gl);
59#define GLOCK_BUG_ON(gl,x) do { if (unlikely(x)) { __dump_glock(NULL, gl); BUG(); } } while(0) 57#define GLOCK_BUG_ON(gl,x) do { if (unlikely(x)) { __dump_glock(NULL, gl); BUG(); } } while(0)
60static void do_xmote(struct gfs2_glock *gl, struct gfs2_holder *gh, unsigned int target); 58static void do_xmote(struct gfs2_glock *gl, struct gfs2_holder *gh, unsigned int target);
@@ -70,57 +68,9 @@ static DEFINE_SPINLOCK(lru_lock);
70#define GFS2_GL_HASH_SIZE (1 << GFS2_GL_HASH_SHIFT) 68#define GFS2_GL_HASH_SIZE (1 << GFS2_GL_HASH_SHIFT)
71#define GFS2_GL_HASH_MASK (GFS2_GL_HASH_SIZE - 1) 69#define GFS2_GL_HASH_MASK (GFS2_GL_HASH_SIZE - 1)
72 70
73static struct gfs2_gl_hash_bucket gl_hash_table[GFS2_GL_HASH_SIZE]; 71static struct hlist_bl_head gl_hash_table[GFS2_GL_HASH_SIZE];
74static struct dentry *gfs2_root; 72static struct dentry *gfs2_root;
75 73
76/*
77 * Despite what you might think, the numbers below are not arbitrary :-)
78 * They are taken from the ipv4 routing hash code, which is well tested
79 * and thus should be nearly optimal. Later on we might tweek the numbers
80 * but for now this should be fine.
81 *
82 * The reason for putting the locks in a separate array from the list heads
83 * is that we can have fewer locks than list heads and save memory. We use
84 * the same hash function for both, but with a different hash mask.
85 */
86#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) || \
87 defined(CONFIG_PROVE_LOCKING)
88
89#ifdef CONFIG_LOCKDEP
90# define GL_HASH_LOCK_SZ 256
91#else
92# if NR_CPUS >= 32
93# define GL_HASH_LOCK_SZ 4096
94# elif NR_CPUS >= 16
95# define GL_HASH_LOCK_SZ 2048
96# elif NR_CPUS >= 8
97# define GL_HASH_LOCK_SZ 1024
98# elif NR_CPUS >= 4
99# define GL_HASH_LOCK_SZ 512
100# else
101# define GL_HASH_LOCK_SZ 256
102# endif
103#endif
104
105/* We never want more locks than chains */
106#if GFS2_GL_HASH_SIZE < GL_HASH_LOCK_SZ
107# undef GL_HASH_LOCK_SZ
108# define GL_HASH_LOCK_SZ GFS2_GL_HASH_SIZE
109#endif
110
111static rwlock_t gl_hash_locks[GL_HASH_LOCK_SZ];
112
113static inline rwlock_t *gl_lock_addr(unsigned int x)
114{
115 return &gl_hash_locks[x & (GL_HASH_LOCK_SZ-1)];
116}
117#else /* not SMP, so no spinlocks required */
118static inline rwlock_t *gl_lock_addr(unsigned int x)
119{
120 return NULL;
121}
122#endif
123
124/** 74/**
125 * gl_hash() - Turn glock number into hash bucket number 75 * gl_hash() - Turn glock number into hash bucket number
126 * @lock: The glock number 76 * @lock: The glock number
@@ -141,25 +91,30 @@ static unsigned int gl_hash(const struct gfs2_sbd *sdp,
141 return h; 91 return h;
142} 92}
143 93
144/** 94static inline void spin_lock_bucket(unsigned int hash)
145 * glock_free() - Perform a few checks and then release struct gfs2_glock 95{
146 * @gl: The glock to release 96 struct hlist_bl_head *bl = &gl_hash_table[hash];
147 * 97 bit_spin_lock(0, (unsigned long *)bl);
148 * Also calls lock module to release its internal structure for this glock. 98}
149 * 99
150 */ 100static inline void spin_unlock_bucket(unsigned int hash)
101{
102 struct hlist_bl_head *bl = &gl_hash_table[hash];
103 __bit_spin_unlock(0, (unsigned long *)bl);
104}
151 105
152static void glock_free(struct gfs2_glock *gl) 106void gfs2_glock_free(struct rcu_head *rcu)
153{ 107{
108 struct gfs2_glock *gl = container_of(rcu, struct gfs2_glock, gl_rcu);
154 struct gfs2_sbd *sdp = gl->gl_sbd; 109 struct gfs2_sbd *sdp = gl->gl_sbd;
155 struct address_space *mapping = gfs2_glock2aspace(gl);
156 struct kmem_cache *cachep = gfs2_glock_cachep;
157 110
158 GLOCK_BUG_ON(gl, mapping && mapping->nrpages); 111 if (gl->gl_ops->go_flags & GLOF_ASPACE)
159 trace_gfs2_glock_put(gl); 112 kmem_cache_free(gfs2_glock_aspace_cachep, gl);
160 if (mapping) 113 else
161 cachep = gfs2_glock_aspace_cachep; 114 kmem_cache_free(gfs2_glock_cachep, gl);
162 sdp->sd_lockstruct.ls_ops->lm_put_lock(cachep, gl); 115
116 if (atomic_dec_and_test(&sdp->sd_glock_disposal))
117 wake_up(&sdp->sd_glock_wait);
163} 118}
164 119
165/** 120/**
@@ -185,34 +140,49 @@ static int demote_ok(const struct gfs2_glock *gl)
185{ 140{
186 const struct gfs2_glock_operations *glops = gl->gl_ops; 141 const struct gfs2_glock_operations *glops = gl->gl_ops;
187 142
143 /* assert_spin_locked(&gl->gl_spin); */
144
188 if (gl->gl_state == LM_ST_UNLOCKED) 145 if (gl->gl_state == LM_ST_UNLOCKED)
189 return 0; 146 return 0;
190 if (!list_empty(&gl->gl_holders)) 147 if (test_bit(GLF_LFLUSH, &gl->gl_flags))
148 return 0;
149 if ((gl->gl_name.ln_type != LM_TYPE_INODE) &&
150 !list_empty(&gl->gl_holders))
191 return 0; 151 return 0;
192 if (glops->go_demote_ok) 152 if (glops->go_demote_ok)
193 return glops->go_demote_ok(gl); 153 return glops->go_demote_ok(gl);
194 return 1; 154 return 1;
195} 155}
196 156
157
197/** 158/**
198 * gfs2_glock_schedule_for_reclaim - Add a glock to the reclaim list 159 * __gfs2_glock_schedule_for_reclaim - Add a glock to the reclaim list
199 * @gl: the glock 160 * @gl: the glock
200 * 161 *
162 * If the glock is demotable, then we add it (or move it) to the end
163 * of the glock LRU list.
201 */ 164 */
202 165
203static void gfs2_glock_schedule_for_reclaim(struct gfs2_glock *gl) 166static void __gfs2_glock_schedule_for_reclaim(struct gfs2_glock *gl)
204{ 167{
205 int may_reclaim; 168 if (demote_ok(gl)) {
206 may_reclaim = (demote_ok(gl) && 169 spin_lock(&lru_lock);
207 (atomic_read(&gl->gl_ref) == 1 || 170
208 (gl->gl_name.ln_type == LM_TYPE_INODE && 171 if (!list_empty(&gl->gl_lru))
209 atomic_read(&gl->gl_ref) <= 2))); 172 list_del_init(&gl->gl_lru);
210 spin_lock(&lru_lock); 173 else
211 if (list_empty(&gl->gl_lru) && may_reclaim) { 174 atomic_inc(&lru_count);
175
212 list_add_tail(&gl->gl_lru, &lru_list); 176 list_add_tail(&gl->gl_lru, &lru_list);
213 atomic_inc(&lru_count); 177 spin_unlock(&lru_lock);
214 } 178 }
215 spin_unlock(&lru_lock); 179}
180
181void gfs2_glock_schedule_for_reclaim(struct gfs2_glock *gl)
182{
183 spin_lock(&gl->gl_spin);
184 __gfs2_glock_schedule_for_reclaim(gl);
185 spin_unlock(&gl->gl_spin);
216} 186}
217 187
218/** 188/**
@@ -227,7 +197,6 @@ void gfs2_glock_put_nolock(struct gfs2_glock *gl)
227{ 197{
228 if (atomic_dec_and_test(&gl->gl_ref)) 198 if (atomic_dec_and_test(&gl->gl_ref))
229 GLOCK_BUG_ON(gl, 1); 199 GLOCK_BUG_ON(gl, 1);
230 gfs2_glock_schedule_for_reclaim(gl);
231} 200}
232 201
233/** 202/**
@@ -236,30 +205,26 @@ void gfs2_glock_put_nolock(struct gfs2_glock *gl)
236 * 205 *
237 */ 206 */
238 207
239int gfs2_glock_put(struct gfs2_glock *gl) 208void gfs2_glock_put(struct gfs2_glock *gl)
240{ 209{
241 int rv = 0; 210 struct gfs2_sbd *sdp = gl->gl_sbd;
211 struct address_space *mapping = gfs2_glock2aspace(gl);
242 212
243 write_lock(gl_lock_addr(gl->gl_hash)); 213 if (atomic_dec_and_test(&gl->gl_ref)) {
244 if (atomic_dec_and_lock(&gl->gl_ref, &lru_lock)) { 214 spin_lock_bucket(gl->gl_hash);
245 hlist_del(&gl->gl_list); 215 hlist_bl_del_rcu(&gl->gl_list);
216 spin_unlock_bucket(gl->gl_hash);
217 spin_lock(&lru_lock);
246 if (!list_empty(&gl->gl_lru)) { 218 if (!list_empty(&gl->gl_lru)) {
247 list_del_init(&gl->gl_lru); 219 list_del_init(&gl->gl_lru);
248 atomic_dec(&lru_count); 220 atomic_dec(&lru_count);
249 } 221 }
250 spin_unlock(&lru_lock); 222 spin_unlock(&lru_lock);
251 write_unlock(gl_lock_addr(gl->gl_hash));
252 GLOCK_BUG_ON(gl, !list_empty(&gl->gl_holders)); 223 GLOCK_BUG_ON(gl, !list_empty(&gl->gl_holders));
253 glock_free(gl); 224 GLOCK_BUG_ON(gl, mapping && mapping->nrpages);
254 rv = 1; 225 trace_gfs2_glock_put(gl);
255 goto out; 226 sdp->sd_lockstruct.ls_ops->lm_put_lock(gl);
256 } 227 }
257 spin_lock(&gl->gl_spin);
258 gfs2_glock_schedule_for_reclaim(gl);
259 spin_unlock(&gl->gl_spin);
260 write_unlock(gl_lock_addr(gl->gl_hash));
261out:
262 return rv;
263} 228}
264 229
265/** 230/**
@@ -275,17 +240,15 @@ static struct gfs2_glock *search_bucket(unsigned int hash,
275 const struct lm_lockname *name) 240 const struct lm_lockname *name)
276{ 241{
277 struct gfs2_glock *gl; 242 struct gfs2_glock *gl;
278 struct hlist_node *h; 243 struct hlist_bl_node *h;
279 244
280 hlist_for_each_entry(gl, h, &gl_hash_table[hash].hb_list, gl_list) { 245 hlist_bl_for_each_entry_rcu(gl, h, &gl_hash_table[hash], gl_list) {
281 if (!lm_name_equal(&gl->gl_name, name)) 246 if (!lm_name_equal(&gl->gl_name, name))
282 continue; 247 continue;
283 if (gl->gl_sbd != sdp) 248 if (gl->gl_sbd != sdp)
284 continue; 249 continue;
285 250 if (atomic_inc_not_zero(&gl->gl_ref))
286 atomic_inc(&gl->gl_ref); 251 return gl;
287
288 return gl;
289 } 252 }
290 253
291 return NULL; 254 return NULL;
@@ -743,10 +706,11 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
743 struct gfs2_glock *gl, *tmp; 706 struct gfs2_glock *gl, *tmp;
744 unsigned int hash = gl_hash(sdp, &name); 707 unsigned int hash = gl_hash(sdp, &name);
745 struct address_space *mapping; 708 struct address_space *mapping;
709 struct kmem_cache *cachep;
746 710
747 read_lock(gl_lock_addr(hash)); 711 rcu_read_lock();
748 gl = search_bucket(hash, sdp, &name); 712 gl = search_bucket(hash, sdp, &name);
749 read_unlock(gl_lock_addr(hash)); 713 rcu_read_unlock();
750 714
751 *glp = gl; 715 *glp = gl;
752 if (gl) 716 if (gl)
@@ -755,9 +719,10 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
755 return -ENOENT; 719 return -ENOENT;
756 720
757 if (glops->go_flags & GLOF_ASPACE) 721 if (glops->go_flags & GLOF_ASPACE)
758 gl = kmem_cache_alloc(gfs2_glock_aspace_cachep, GFP_KERNEL); 722 cachep = gfs2_glock_aspace_cachep;
759 else 723 else
760 gl = kmem_cache_alloc(gfs2_glock_cachep, GFP_KERNEL); 724 cachep = gfs2_glock_cachep;
725 gl = kmem_cache_alloc(cachep, GFP_KERNEL);
761 if (!gl) 726 if (!gl)
762 return -ENOMEM; 727 return -ENOMEM;
763 728
@@ -790,15 +755,15 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
790 mapping->writeback_index = 0; 755 mapping->writeback_index = 0;
791 } 756 }
792 757
793 write_lock(gl_lock_addr(hash)); 758 spin_lock_bucket(hash);
794 tmp = search_bucket(hash, sdp, &name); 759 tmp = search_bucket(hash, sdp, &name);
795 if (tmp) { 760 if (tmp) {
796 write_unlock(gl_lock_addr(hash)); 761 spin_unlock_bucket(hash);
797 glock_free(gl); 762 kmem_cache_free(cachep, gl);
798 gl = tmp; 763 gl = tmp;
799 } else { 764 } else {
800 hlist_add_head(&gl->gl_list, &gl_hash_table[hash].hb_list); 765 hlist_bl_add_head_rcu(&gl->gl_list, &gl_hash_table[hash]);
801 write_unlock(gl_lock_addr(hash)); 766 spin_unlock_bucket(hash);
802 } 767 }
803 768
804 *glp = gl; 769 *glp = gl;
@@ -1113,6 +1078,7 @@ void gfs2_glock_dq(struct gfs2_holder *gh)
1113 !test_bit(GLF_DEMOTE, &gl->gl_flags)) 1078 !test_bit(GLF_DEMOTE, &gl->gl_flags))
1114 fast_path = 1; 1079 fast_path = 1;
1115 } 1080 }
1081 __gfs2_glock_schedule_for_reclaim(gl);
1116 trace_gfs2_glock_queue(gh, 0); 1082 trace_gfs2_glock_queue(gh, 0);
1117 spin_unlock(&gl->gl_spin); 1083 spin_unlock(&gl->gl_spin);
1118 if (likely(fast_path)) 1084 if (likely(fast_path))
@@ -1440,42 +1406,30 @@ static struct shrinker glock_shrinker = {
1440 * @sdp: the filesystem 1406 * @sdp: the filesystem
1441 * @bucket: the bucket 1407 * @bucket: the bucket
1442 * 1408 *
1443 * Returns: 1 if the bucket has entries
1444 */ 1409 */
1445 1410
1446static int examine_bucket(glock_examiner examiner, struct gfs2_sbd *sdp, 1411static void examine_bucket(glock_examiner examiner, const struct gfs2_sbd *sdp,
1447 unsigned int hash) 1412 unsigned int hash)
1448{ 1413{
1449 struct gfs2_glock *gl, *prev = NULL; 1414 struct gfs2_glock *gl;
1450 int has_entries = 0; 1415 struct hlist_bl_head *head = &gl_hash_table[hash];
1451 struct hlist_head *head = &gl_hash_table[hash].hb_list; 1416 struct hlist_bl_node *pos;
1452 1417
1453 read_lock(gl_lock_addr(hash)); 1418 rcu_read_lock();
1454 /* Can't use hlist_for_each_entry - don't want prefetch here */ 1419 hlist_bl_for_each_entry_rcu(gl, pos, head, gl_list) {
1455 if (hlist_empty(head)) 1420 if ((gl->gl_sbd == sdp) && atomic_read(&gl->gl_ref))
1456 goto out;
1457 gl = list_entry(head->first, struct gfs2_glock, gl_list);
1458 while(1) {
1459 if (!sdp || gl->gl_sbd == sdp) {
1460 gfs2_glock_hold(gl);
1461 read_unlock(gl_lock_addr(hash));
1462 if (prev)
1463 gfs2_glock_put(prev);
1464 prev = gl;
1465 examiner(gl); 1421 examiner(gl);
1466 has_entries = 1;
1467 read_lock(gl_lock_addr(hash));
1468 }
1469 if (gl->gl_list.next == NULL)
1470 break;
1471 gl = list_entry(gl->gl_list.next, struct gfs2_glock, gl_list);
1472 } 1422 }
1473out: 1423 rcu_read_unlock();
1474 read_unlock(gl_lock_addr(hash));
1475 if (prev)
1476 gfs2_glock_put(prev);
1477 cond_resched(); 1424 cond_resched();
1478 return has_entries; 1425}
1426
1427static void glock_hash_walk(glock_examiner examiner, const struct gfs2_sbd *sdp)
1428{
1429 unsigned x;
1430
1431 for (x = 0; x < GFS2_GL_HASH_SIZE; x++)
1432 examine_bucket(examiner, sdp, x);
1479} 1433}
1480 1434
1481 1435
@@ -1529,10 +1483,21 @@ static void clear_glock(struct gfs2_glock *gl)
1529 1483
1530void gfs2_glock_thaw(struct gfs2_sbd *sdp) 1484void gfs2_glock_thaw(struct gfs2_sbd *sdp)
1531{ 1485{
1532 unsigned x; 1486 glock_hash_walk(thaw_glock, sdp);
1487}
1533 1488
1534 for (x = 0; x < GFS2_GL_HASH_SIZE; x++) 1489static int dump_glock(struct seq_file *seq, struct gfs2_glock *gl)
1535 examine_bucket(thaw_glock, sdp, x); 1490{
1491 int ret;
1492 spin_lock(&gl->gl_spin);
1493 ret = __dump_glock(seq, gl);
1494 spin_unlock(&gl->gl_spin);
1495 return ret;
1496}
1497
1498static void dump_glock_func(struct gfs2_glock *gl)
1499{
1500 dump_glock(NULL, gl);
1536} 1501}
1537 1502
1538/** 1503/**
@@ -1545,13 +1510,10 @@ void gfs2_glock_thaw(struct gfs2_sbd *sdp)
1545 1510
1546void gfs2_gl_hash_clear(struct gfs2_sbd *sdp) 1511void gfs2_gl_hash_clear(struct gfs2_sbd *sdp)
1547{ 1512{
1548 unsigned int x; 1513 glock_hash_walk(clear_glock, sdp);
1549
1550 for (x = 0; x < GFS2_GL_HASH_SIZE; x++)
1551 examine_bucket(clear_glock, sdp, x);
1552 flush_workqueue(glock_workqueue); 1514 flush_workqueue(glock_workqueue);
1553 wait_event(sdp->sd_glock_wait, atomic_read(&sdp->sd_glock_disposal) == 0); 1515 wait_event(sdp->sd_glock_wait, atomic_read(&sdp->sd_glock_disposal) == 0);
1554 gfs2_dump_lockstate(sdp); 1516 glock_hash_walk(dump_glock_func, sdp);
1555} 1517}
1556 1518
1557void gfs2_glock_finish_truncate(struct gfs2_inode *ip) 1519void gfs2_glock_finish_truncate(struct gfs2_inode *ip)
@@ -1717,66 +1679,15 @@ out:
1717 return error; 1679 return error;
1718} 1680}
1719 1681
1720static int dump_glock(struct seq_file *seq, struct gfs2_glock *gl)
1721{
1722 int ret;
1723 spin_lock(&gl->gl_spin);
1724 ret = __dump_glock(seq, gl);
1725 spin_unlock(&gl->gl_spin);
1726 return ret;
1727}
1728
1729/**
1730 * gfs2_dump_lockstate - print out the current lockstate
1731 * @sdp: the filesystem
1732 * @ub: the buffer to copy the information into
1733 *
1734 * If @ub is NULL, dump the lockstate to the console.
1735 *
1736 */
1737
1738static int gfs2_dump_lockstate(struct gfs2_sbd *sdp)
1739{
1740 struct gfs2_glock *gl;
1741 struct hlist_node *h;
1742 unsigned int x;
1743 int error = 0;
1744
1745 for (x = 0; x < GFS2_GL_HASH_SIZE; x++) {
1746
1747 read_lock(gl_lock_addr(x));
1748
1749 hlist_for_each_entry(gl, h, &gl_hash_table[x].hb_list, gl_list) {
1750 if (gl->gl_sbd != sdp)
1751 continue;
1752
1753 error = dump_glock(NULL, gl);
1754 if (error)
1755 break;
1756 }
1757
1758 read_unlock(gl_lock_addr(x));
1759
1760 if (error)
1761 break;
1762 }
1763
1764 1682
1765 return error;
1766}
1767 1683
1768 1684
1769int __init gfs2_glock_init(void) 1685int __init gfs2_glock_init(void)
1770{ 1686{
1771 unsigned i; 1687 unsigned i;
1772 for(i = 0; i < GFS2_GL_HASH_SIZE; i++) { 1688 for(i = 0; i < GFS2_GL_HASH_SIZE; i++) {
1773 INIT_HLIST_HEAD(&gl_hash_table[i].hb_list); 1689 INIT_HLIST_BL_HEAD(&gl_hash_table[i]);
1774 }
1775#ifdef GL_HASH_LOCK_SZ
1776 for(i = 0; i < GL_HASH_LOCK_SZ; i++) {
1777 rwlock_init(&gl_hash_locks[i]);
1778 } 1690 }
1779#endif
1780 1691
1781 glock_workqueue = alloc_workqueue("glock_workqueue", WQ_MEM_RECLAIM | 1692 glock_workqueue = alloc_workqueue("glock_workqueue", WQ_MEM_RECLAIM |
1782 WQ_HIGHPRI | WQ_FREEZEABLE, 0); 1693 WQ_HIGHPRI | WQ_FREEZEABLE, 0);
@@ -1802,62 +1713,54 @@ void gfs2_glock_exit(void)
1802 destroy_workqueue(gfs2_delete_workqueue); 1713 destroy_workqueue(gfs2_delete_workqueue);
1803} 1714}
1804 1715
1716static inline struct gfs2_glock *glock_hash_chain(unsigned hash)
1717{
1718 return hlist_bl_entry(hlist_bl_first_rcu(&gl_hash_table[hash]),
1719 struct gfs2_glock, gl_list);
1720}
1721
1722static inline struct gfs2_glock *glock_hash_next(struct gfs2_glock *gl)
1723{
1724 return hlist_bl_entry(rcu_dereference_raw(gl->gl_list.next),
1725 struct gfs2_glock, gl_list);
1726}
1727
1805static int gfs2_glock_iter_next(struct gfs2_glock_iter *gi) 1728static int gfs2_glock_iter_next(struct gfs2_glock_iter *gi)
1806{ 1729{
1807 struct gfs2_glock *gl; 1730 struct gfs2_glock *gl;
1808 1731
1809restart: 1732 do {
1810 read_lock(gl_lock_addr(gi->hash)); 1733 gl = gi->gl;
1811 gl = gi->gl; 1734 if (gl) {
1812 if (gl) { 1735 gi->gl = glock_hash_next(gl);
1813 gi->gl = hlist_entry(gl->gl_list.next, 1736 } else {
1814 struct gfs2_glock, gl_list); 1737 gi->gl = glock_hash_chain(gi->hash);
1815 } else { 1738 }
1816 gi->gl = hlist_entry(gl_hash_table[gi->hash].hb_list.first, 1739 while (gi->gl == NULL) {
1817 struct gfs2_glock, gl_list); 1740 gi->hash++;
1818 } 1741 if (gi->hash >= GFS2_GL_HASH_SIZE) {
1819 if (gi->gl) 1742 rcu_read_unlock();
1820 gfs2_glock_hold(gi->gl); 1743 return 1;
1821 read_unlock(gl_lock_addr(gi->hash)); 1744 }
1822 if (gl) 1745 gi->gl = glock_hash_chain(gi->hash);
1823 gfs2_glock_put(gl); 1746 }
1824 while (gi->gl == NULL) { 1747 /* Skip entries for other sb and dead entries */
1825 gi->hash++; 1748 } while (gi->sdp != gi->gl->gl_sbd || atomic_read(&gi->gl->gl_ref) == 0);
1826 if (gi->hash >= GFS2_GL_HASH_SIZE)
1827 return 1;
1828 read_lock(gl_lock_addr(gi->hash));
1829 gi->gl = hlist_entry(gl_hash_table[gi->hash].hb_list.first,
1830 struct gfs2_glock, gl_list);
1831 if (gi->gl)
1832 gfs2_glock_hold(gi->gl);
1833 read_unlock(gl_lock_addr(gi->hash));
1834 }
1835
1836 if (gi->sdp != gi->gl->gl_sbd)
1837 goto restart;
1838 1749
1839 return 0; 1750 return 0;
1840} 1751}
1841 1752
1842static void gfs2_glock_iter_free(struct gfs2_glock_iter *gi)
1843{
1844 if (gi->gl)
1845 gfs2_glock_put(gi->gl);
1846 gi->gl = NULL;
1847}
1848
1849static void *gfs2_glock_seq_start(struct seq_file *seq, loff_t *pos) 1753static void *gfs2_glock_seq_start(struct seq_file *seq, loff_t *pos)
1850{ 1754{
1851 struct gfs2_glock_iter *gi = seq->private; 1755 struct gfs2_glock_iter *gi = seq->private;
1852 loff_t n = *pos; 1756 loff_t n = *pos;
1853 1757
1854 gi->hash = 0; 1758 gi->hash = 0;
1759 rcu_read_lock();
1855 1760
1856 do { 1761 do {
1857 if (gfs2_glock_iter_next(gi)) { 1762 if (gfs2_glock_iter_next(gi))
1858 gfs2_glock_iter_free(gi);
1859 return NULL; 1763 return NULL;
1860 }
1861 } while (n--); 1764 } while (n--);
1862 1765
1863 return gi->gl; 1766 return gi->gl;
@@ -1870,10 +1773,8 @@ static void *gfs2_glock_seq_next(struct seq_file *seq, void *iter_ptr,
1870 1773
1871 (*pos)++; 1774 (*pos)++;
1872 1775
1873 if (gfs2_glock_iter_next(gi)) { 1776 if (gfs2_glock_iter_next(gi))
1874 gfs2_glock_iter_free(gi);
1875 return NULL; 1777 return NULL;
1876 }
1877 1778
1878 return gi->gl; 1779 return gi->gl;
1879} 1780}
@@ -1881,7 +1782,10 @@ static void *gfs2_glock_seq_next(struct seq_file *seq, void *iter_ptr,
1881static void gfs2_glock_seq_stop(struct seq_file *seq, void *iter_ptr) 1782static void gfs2_glock_seq_stop(struct seq_file *seq, void *iter_ptr)
1882{ 1783{
1883 struct gfs2_glock_iter *gi = seq->private; 1784 struct gfs2_glock_iter *gi = seq->private;
1884 gfs2_glock_iter_free(gi); 1785
1786 if (gi->gl)
1787 rcu_read_unlock();
1788 gi->gl = NULL;
1885} 1789}
1886 1790
1887static int gfs2_glock_seq_show(struct seq_file *seq, void *iter_ptr) 1791static int gfs2_glock_seq_show(struct seq_file *seq, void *iter_ptr)
diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h
index 691851ceb615..afa8bfea5647 100644
--- a/fs/gfs2/glock.h
+++ b/fs/gfs2/glock.h
@@ -118,7 +118,7 @@ struct lm_lockops {
118 int (*lm_mount) (struct gfs2_sbd *sdp, const char *fsname); 118 int (*lm_mount) (struct gfs2_sbd *sdp, const char *fsname);
119 void (*lm_unmount) (struct gfs2_sbd *sdp); 119 void (*lm_unmount) (struct gfs2_sbd *sdp);
120 void (*lm_withdraw) (struct gfs2_sbd *sdp); 120 void (*lm_withdraw) (struct gfs2_sbd *sdp);
121 void (*lm_put_lock) (struct kmem_cache *cachep, struct gfs2_glock *gl); 121 void (*lm_put_lock) (struct gfs2_glock *gl);
122 int (*lm_lock) (struct gfs2_glock *gl, unsigned int req_state, 122 int (*lm_lock) (struct gfs2_glock *gl, unsigned int req_state,
123 unsigned int flags); 123 unsigned int flags);
124 void (*lm_cancel) (struct gfs2_glock *gl); 124 void (*lm_cancel) (struct gfs2_glock *gl);
@@ -174,7 +174,7 @@ int gfs2_glock_get(struct gfs2_sbd *sdp,
174 int create, struct gfs2_glock **glp); 174 int create, struct gfs2_glock **glp);
175void gfs2_glock_hold(struct gfs2_glock *gl); 175void gfs2_glock_hold(struct gfs2_glock *gl);
176void gfs2_glock_put_nolock(struct gfs2_glock *gl); 176void gfs2_glock_put_nolock(struct gfs2_glock *gl);
177int gfs2_glock_put(struct gfs2_glock *gl); 177void gfs2_glock_put(struct gfs2_glock *gl);
178void gfs2_holder_init(struct gfs2_glock *gl, unsigned int state, unsigned flags, 178void gfs2_holder_init(struct gfs2_glock *gl, unsigned int state, unsigned flags,
179 struct gfs2_holder *gh); 179 struct gfs2_holder *gh);
180void gfs2_holder_reinit(unsigned int state, unsigned flags, 180void gfs2_holder_reinit(unsigned int state, unsigned flags,
@@ -223,25 +223,22 @@ static inline int gfs2_glock_nq_init(struct gfs2_glock *gl,
223 return error; 223 return error;
224} 224}
225 225
226/* Lock Value Block functions */ 226extern void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state);
227 227extern void gfs2_glock_complete(struct gfs2_glock *gl, int ret);
228int gfs2_lvb_hold(struct gfs2_glock *gl); 228extern void gfs2_reclaim_glock(struct gfs2_sbd *sdp);
229void gfs2_lvb_unhold(struct gfs2_glock *gl); 229extern void gfs2_gl_hash_clear(struct gfs2_sbd *sdp);
230 230extern void gfs2_glock_finish_truncate(struct gfs2_inode *ip);
231void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state); 231extern void gfs2_glock_thaw(struct gfs2_sbd *sdp);
232void gfs2_glock_complete(struct gfs2_glock *gl, int ret); 232extern void gfs2_glock_schedule_for_reclaim(struct gfs2_glock *gl);
233void gfs2_reclaim_glock(struct gfs2_sbd *sdp); 233extern void gfs2_glock_free(struct rcu_head *rcu);
234void gfs2_gl_hash_clear(struct gfs2_sbd *sdp); 234
235void gfs2_glock_finish_truncate(struct gfs2_inode *ip); 235extern int __init gfs2_glock_init(void);
236void gfs2_glock_thaw(struct gfs2_sbd *sdp); 236extern void gfs2_glock_exit(void);
237 237
238int __init gfs2_glock_init(void); 238extern int gfs2_create_debugfs_file(struct gfs2_sbd *sdp);
239void gfs2_glock_exit(void); 239extern void gfs2_delete_debugfs_file(struct gfs2_sbd *sdp);
240 240extern int gfs2_register_debugfs(void);
241int gfs2_create_debugfs_file(struct gfs2_sbd *sdp); 241extern void gfs2_unregister_debugfs(void);
242void gfs2_delete_debugfs_file(struct gfs2_sbd *sdp);
243int gfs2_register_debugfs(void);
244void gfs2_unregister_debugfs(void);
245 242
246extern const struct lm_lockops gfs2_dlm_ops; 243extern const struct lm_lockops gfs2_dlm_ops;
247 244
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index 263561bf1a50..ac5fac948f87 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -206,8 +206,17 @@ static void inode_go_inval(struct gfs2_glock *gl, int flags)
206static int inode_go_demote_ok(const struct gfs2_glock *gl) 206static int inode_go_demote_ok(const struct gfs2_glock *gl)
207{ 207{
208 struct gfs2_sbd *sdp = gl->gl_sbd; 208 struct gfs2_sbd *sdp = gl->gl_sbd;
209 struct gfs2_holder *gh;
210
209 if (sdp->sd_jindex == gl->gl_object || sdp->sd_rindex == gl->gl_object) 211 if (sdp->sd_jindex == gl->gl_object || sdp->sd_rindex == gl->gl_object)
210 return 0; 212 return 0;
213
214 if (!list_empty(&gl->gl_holders)) {
215 gh = list_entry(gl->gl_holders.next, struct gfs2_holder, gh_list);
216 if (gh->gh_list.next != &gl->gl_holders)
217 return 0;
218 }
219
211 return 1; 220 return 1;
212} 221}
213 222
@@ -272,19 +281,6 @@ static int inode_go_dump(struct seq_file *seq, const struct gfs2_glock *gl)
272} 281}
273 282
274/** 283/**
275 * rgrp_go_demote_ok - Check to see if it's ok to unlock a RG's glock
276 * @gl: the glock
277 *
278 * Returns: 1 if it's ok
279 */
280
281static int rgrp_go_demote_ok(const struct gfs2_glock *gl)
282{
283 const struct address_space *mapping = (const struct address_space *)(gl + 1);
284 return !mapping->nrpages;
285}
286
287/**
288 * rgrp_go_lock - operation done after an rgrp lock is locked by 284 * rgrp_go_lock - operation done after an rgrp lock is locked by
289 * a first holder on this node. 285 * a first holder on this node.
290 * @gl: the glock 286 * @gl: the glock
@@ -410,7 +406,6 @@ const struct gfs2_glock_operations gfs2_inode_glops = {
410const struct gfs2_glock_operations gfs2_rgrp_glops = { 406const struct gfs2_glock_operations gfs2_rgrp_glops = {
411 .go_xmote_th = rgrp_go_sync, 407 .go_xmote_th = rgrp_go_sync,
412 .go_inval = rgrp_go_inval, 408 .go_inval = rgrp_go_inval,
413 .go_demote_ok = rgrp_go_demote_ok,
414 .go_lock = rgrp_go_lock, 409 .go_lock = rgrp_go_lock,
415 .go_unlock = rgrp_go_unlock, 410 .go_unlock = rgrp_go_unlock,
416 .go_dump = gfs2_rgrp_dump, 411 .go_dump = gfs2_rgrp_dump,
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index a79790c06275..720c1e66b343 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -15,6 +15,8 @@
15#include <linux/workqueue.h> 15#include <linux/workqueue.h>
16#include <linux/dlm.h> 16#include <linux/dlm.h>
17#include <linux/buffer_head.h> 17#include <linux/buffer_head.h>
18#include <linux/rcupdate.h>
19#include <linux/rculist_bl.h>
18 20
19#define DIO_WAIT 0x00000010 21#define DIO_WAIT 0x00000010
20#define DIO_METADATA 0x00000020 22#define DIO_METADATA 0x00000020
@@ -201,7 +203,7 @@ enum {
201}; 203};
202 204
203struct gfs2_glock { 205struct gfs2_glock {
204 struct hlist_node gl_list; 206 struct hlist_bl_node gl_list;
205 unsigned long gl_flags; /* GLF_... */ 207 unsigned long gl_flags; /* GLF_... */
206 struct lm_lockname gl_name; 208 struct lm_lockname gl_name;
207 atomic_t gl_ref; 209 atomic_t gl_ref;
@@ -234,6 +236,7 @@ struct gfs2_glock {
234 atomic_t gl_ail_count; 236 atomic_t gl_ail_count;
235 struct delayed_work gl_work; 237 struct delayed_work gl_work;
236 struct work_struct gl_delete; 238 struct work_struct gl_delete;
239 struct rcu_head gl_rcu;
237}; 240};
238 241
239#define GFS2_MIN_LVB_SIZE 32 /* Min size of LVB that gfs2 supports */ 242#define GFS2_MIN_LVB_SIZE 32 /* Min size of LVB that gfs2 supports */
diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c
index 6e493aee28f8..c80485cb6f25 100644
--- a/fs/gfs2/lock_dlm.c
+++ b/fs/gfs2/lock_dlm.c
@@ -22,7 +22,6 @@ static void gdlm_ast(void *arg)
22{ 22{
23 struct gfs2_glock *gl = arg; 23 struct gfs2_glock *gl = arg;
24 unsigned ret = gl->gl_state; 24 unsigned ret = gl->gl_state;
25 struct gfs2_sbd *sdp = gl->gl_sbd;
26 25
27 BUG_ON(gl->gl_lksb.sb_flags & DLM_SBF_DEMOTED); 26 BUG_ON(gl->gl_lksb.sb_flags & DLM_SBF_DEMOTED);
28 27
@@ -31,12 +30,7 @@ static void gdlm_ast(void *arg)
31 30
32 switch (gl->gl_lksb.sb_status) { 31 switch (gl->gl_lksb.sb_status) {
33 case -DLM_EUNLOCK: /* Unlocked, so glock can be freed */ 32 case -DLM_EUNLOCK: /* Unlocked, so glock can be freed */
34 if (gl->gl_ops->go_flags & GLOF_ASPACE) 33 call_rcu(&gl->gl_rcu, gfs2_glock_free);
35 kmem_cache_free(gfs2_glock_aspace_cachep, gl);
36 else
37 kmem_cache_free(gfs2_glock_cachep, gl);
38 if (atomic_dec_and_test(&sdp->sd_glock_disposal))
39 wake_up(&sdp->sd_glock_wait);
40 return; 34 return;
41 case -DLM_ECANCEL: /* Cancel while getting lock */ 35 case -DLM_ECANCEL: /* Cancel while getting lock */
42 ret |= LM_OUT_CANCELED; 36 ret |= LM_OUT_CANCELED;
@@ -164,16 +158,14 @@ static int gdlm_lock(struct gfs2_glock *gl, unsigned int req_state,
164 GDLM_STRNAME_BYTES - 1, 0, gdlm_ast, gl, gdlm_bast); 158 GDLM_STRNAME_BYTES - 1, 0, gdlm_ast, gl, gdlm_bast);
165} 159}
166 160
167static void gdlm_put_lock(struct kmem_cache *cachep, struct gfs2_glock *gl) 161static void gdlm_put_lock(struct gfs2_glock *gl)
168{ 162{
169 struct gfs2_sbd *sdp = gl->gl_sbd; 163 struct gfs2_sbd *sdp = gl->gl_sbd;
170 struct lm_lockstruct *ls = &sdp->sd_lockstruct; 164 struct lm_lockstruct *ls = &sdp->sd_lockstruct;
171 int error; 165 int error;
172 166
173 if (gl->gl_lksb.sb_lkid == 0) { 167 if (gl->gl_lksb.sb_lkid == 0) {
174 kmem_cache_free(cachep, gl); 168 call_rcu(&gl->gl_rcu, gfs2_glock_free);
175 if (atomic_dec_and_test(&sdp->sd_glock_disposal))
176 wake_up(&sdp->sd_glock_wait);
177 return; 169 return;
178 } 170 }
179 171
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index bf33f822058d..11a73efa8261 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -91,7 +91,8 @@ static void gfs2_unpin(struct gfs2_sbd *sdp, struct buffer_head *bh,
91 } 91 }
92 bd->bd_ail = ai; 92 bd->bd_ail = ai;
93 list_add(&bd->bd_ail_st_list, &ai->ai_ail1_list); 93 list_add(&bd->bd_ail_st_list, &ai->ai_ail1_list);
94 clear_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags); 94 if (test_and_clear_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags))
95 gfs2_glock_schedule_for_reclaim(bd->bd_gl);
95 trace_gfs2_pin(bd, 0); 96 trace_gfs2_pin(bd, 0);
96 gfs2_log_unlock(sdp); 97 gfs2_log_unlock(sdp);
97 unlock_buffer(bh); 98 unlock_buffer(bh);
diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c
index ebef7ab6e17e..d850004f2080 100644
--- a/fs/gfs2/main.c
+++ b/fs/gfs2/main.c
@@ -14,6 +14,8 @@
14#include <linux/module.h> 14#include <linux/module.h>
15#include <linux/init.h> 15#include <linux/init.h>
16#include <linux/gfs2_ondisk.h> 16#include <linux/gfs2_ondisk.h>
17#include <linux/rcupdate.h>
18#include <linux/rculist_bl.h>
17#include <asm/atomic.h> 19#include <asm/atomic.h>
18 20
19#include "gfs2.h" 21#include "gfs2.h"
@@ -45,7 +47,7 @@ static void gfs2_init_glock_once(void *foo)
45{ 47{
46 struct gfs2_glock *gl = foo; 48 struct gfs2_glock *gl = foo;
47 49
48 INIT_HLIST_NODE(&gl->gl_list); 50 INIT_HLIST_BL_NODE(&gl->gl_list);
49 spin_lock_init(&gl->gl_spin); 51 spin_lock_init(&gl->gl_spin);
50 INIT_LIST_HEAD(&gl->gl_holders); 52 INIT_LIST_HEAD(&gl->gl_holders);
51 INIT_LIST_HEAD(&gl->gl_lru); 53 INIT_LIST_HEAD(&gl->gl_lru);
@@ -198,6 +200,8 @@ static void __exit exit_gfs2_fs(void)
198 unregister_filesystem(&gfs2meta_fs_type); 200 unregister_filesystem(&gfs2meta_fs_type);
199 destroy_workqueue(gfs_recovery_wq); 201 destroy_workqueue(gfs_recovery_wq);
200 202
203 rcu_barrier();
204
201 kmem_cache_destroy(gfs2_quotad_cachep); 205 kmem_cache_destroy(gfs2_quotad_cachep);
202 kmem_cache_destroy(gfs2_rgrpd_cachep); 206 kmem_cache_destroy(gfs2_rgrpd_cachep);
203 kmem_cache_destroy(gfs2_bufdata_cachep); 207 kmem_cache_destroy(gfs2_bufdata_cachep);
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 777927ce6f79..a39c103ba499 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -928,12 +928,9 @@ static const match_table_t nolock_tokens = {
928 { Opt_err, NULL }, 928 { Opt_err, NULL },
929}; 929};
930 930
931static void nolock_put_lock(struct kmem_cache *cachep, struct gfs2_glock *gl) 931static void nolock_put_lock(struct gfs2_glock *gl)
932{ 932{
933 struct gfs2_sbd *sdp = gl->gl_sbd; 933 call_rcu(&gl->gl_rcu, gfs2_glock_free);
934 kmem_cache_free(cachep, gl);
935 if (atomic_dec_and_test(&sdp->sd_glock_disposal))
936 wake_up(&sdp->sd_glock_wait);
937} 934}
938 935
939static const struct lm_lockops nolock_ops = { 936static const struct lm_lockops nolock_ops = {