aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBenjamin Marzinski <bmarzins@redhat.com>2007-08-23 14:19:05 -0400
committerSteven Whitehouse <swhiteho@redhat.com>2007-10-10 03:55:48 -0400
commitc4f68a130fc1795e4a75ec5bdaf9e85d86c22419 (patch)
tree37251ae5634d4b73b5224e3e8679f92472de0ebe
parentd1e2777d4f419a865ddccdb9b3412021d0e4de51 (diff)
[GFS2] delay glock demote for a minimum hold time
When a lot of IO, with some distributed mmap IO, is run on a GFS2 filesystem in a cluster, it will deadlock. The reason is that do_no_page() will repeatedly call gfs2_sharewrite_nopage(), because each node keeps giving up the glock too early, and is forced to call unmap_mapping_range(). This bumps the mapping->truncate_count sequence count, forcing do_no_page() to retry. This patch institutes a minimum glock hold time a tenth a second. This insures that even in heavy contention cases, the node has enough time to get some useful work done before it gives up the glock. A second issue is that when gfs2_glock_dq() is called from within a page fault to demote a lock, and the associated page needs to be written out, it will try to acqire a lock on it, but it has already been locked at a higher level. This patch puts makes gfs2_glock_dq() use the work queue as well, to avoid this issue. This is the same patch as Steve Whitehouse originally proposed to fix this issue, execpt that gfs2_glock_dq() now grabs a reference to the glock before it queues up the work on it. Signed-off-by: Benjamin E. Marzinski <bmarzins@redhat.com> Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
-rw-r--r--fs/gfs2/glock.c75
-rw-r--r--fs/gfs2/glops.c2
-rw-r--r--fs/gfs2/incore.h5
3 files changed, 66 insertions, 16 deletions
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 3d949187fed0..931368a385c8 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -27,6 +27,8 @@
27#include <linux/debugfs.h> 27#include <linux/debugfs.h>
28#include <linux/kthread.h> 28#include <linux/kthread.h>
29#include <linux/freezer.h> 29#include <linux/freezer.h>
30#include <linux/workqueue.h>
31#include <linux/jiffies.h>
30 32
31#include "gfs2.h" 33#include "gfs2.h"
32#include "incore.h" 34#include "incore.h"
@@ -58,10 +60,13 @@ static int gfs2_dump_lockstate(struct gfs2_sbd *sdp);
58static int dump_glock(struct glock_iter *gi, struct gfs2_glock *gl); 60static int dump_glock(struct glock_iter *gi, struct gfs2_glock *gl);
59static void gfs2_glock_xmote_th(struct gfs2_glock *gl, struct gfs2_holder *gh); 61static void gfs2_glock_xmote_th(struct gfs2_glock *gl, struct gfs2_holder *gh);
60static void gfs2_glock_drop_th(struct gfs2_glock *gl); 62static void gfs2_glock_drop_th(struct gfs2_glock *gl);
63static void run_queue(struct gfs2_glock *gl);
64
61static DECLARE_RWSEM(gfs2_umount_flush_sem); 65static DECLARE_RWSEM(gfs2_umount_flush_sem);
62static struct dentry *gfs2_root; 66static struct dentry *gfs2_root;
63static struct task_struct *scand_process; 67static struct task_struct *scand_process;
64static unsigned int scand_secs = 5; 68static unsigned int scand_secs = 5;
69static struct workqueue_struct *glock_workqueue;
65 70
66#define GFS2_GL_HASH_SHIFT 15 71#define GFS2_GL_HASH_SHIFT 15
67#define GFS2_GL_HASH_SIZE (1 << GFS2_GL_HASH_SHIFT) 72#define GFS2_GL_HASH_SIZE (1 << GFS2_GL_HASH_SHIFT)
@@ -277,6 +282,18 @@ static struct gfs2_glock *gfs2_glock_find(const struct gfs2_sbd *sdp,
277 return gl; 282 return gl;
278} 283}
279 284
285static void glock_work_func(struct work_struct *work)
286{
287 struct gfs2_glock *gl = container_of(work, struct gfs2_glock, gl_work.work);
288
289 spin_lock(&gl->gl_spin);
290 if (test_and_clear_bit(GLF_PENDING_DEMOTE, &gl->gl_flags))
291 set_bit(GLF_DEMOTE, &gl->gl_flags);
292 run_queue(gl);
293 spin_unlock(&gl->gl_spin);
294 gfs2_glock_put(gl);
295}
296
280/** 297/**
281 * gfs2_glock_get() - Get a glock, or create one if one doesn't exist 298 * gfs2_glock_get() - Get a glock, or create one if one doesn't exist
282 * @sdp: The GFS2 superblock 299 * @sdp: The GFS2 superblock
@@ -316,6 +333,7 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
316 gl->gl_name = name; 333 gl->gl_name = name;
317 atomic_set(&gl->gl_ref, 1); 334 atomic_set(&gl->gl_ref, 1);
318 gl->gl_state = LM_ST_UNLOCKED; 335 gl->gl_state = LM_ST_UNLOCKED;
336 gl->gl_demote_state = LM_ST_EXCLUSIVE;
319 gl->gl_hash = hash; 337 gl->gl_hash = hash;
320 gl->gl_owner_pid = 0; 338 gl->gl_owner_pid = 0;
321 gl->gl_ip = 0; 339 gl->gl_ip = 0;
@@ -324,10 +342,12 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
324 gl->gl_req_bh = NULL; 342 gl->gl_req_bh = NULL;
325 gl->gl_vn = 0; 343 gl->gl_vn = 0;
326 gl->gl_stamp = jiffies; 344 gl->gl_stamp = jiffies;
345 gl->gl_tchange = jiffies;
327 gl->gl_object = NULL; 346 gl->gl_object = NULL;
328 gl->gl_sbd = sdp; 347 gl->gl_sbd = sdp;
329 gl->gl_aspace = NULL; 348 gl->gl_aspace = NULL;
330 lops_init_le(&gl->gl_le, &gfs2_glock_lops); 349 lops_init_le(&gl->gl_le, &gfs2_glock_lops);
350 INIT_DELAYED_WORK(&gl->gl_work, glock_work_func);
331 351
332 /* If this glock protects actual on-disk data or metadata blocks, 352 /* If this glock protects actual on-disk data or metadata blocks,
333 create a VFS inode to manage the pages/buffers holding them. */ 353 create a VFS inode to manage the pages/buffers holding them. */
@@ -441,6 +461,8 @@ static void wait_on_holder(struct gfs2_holder *gh)
441 461
442static void gfs2_demote_wake(struct gfs2_glock *gl) 462static void gfs2_demote_wake(struct gfs2_glock *gl)
443{ 463{
464 BUG_ON(!spin_is_locked(&gl->gl_spin));
465 gl->gl_demote_state = LM_ST_EXCLUSIVE;
444 clear_bit(GLF_DEMOTE, &gl->gl_flags); 466 clear_bit(GLF_DEMOTE, &gl->gl_flags);
445 smp_mb__after_clear_bit(); 467 smp_mb__after_clear_bit();
446 wake_up_bit(&gl->gl_flags, GLF_DEMOTE); 468 wake_up_bit(&gl->gl_flags, GLF_DEMOTE);
@@ -682,10 +704,14 @@ static void gfs2_glmutex_unlock(struct gfs2_glock *gl)
682 * practise: LM_ST_SHARED and LM_ST_UNLOCKED 704 * practise: LM_ST_SHARED and LM_ST_UNLOCKED
683 */ 705 */
684 706
685static void handle_callback(struct gfs2_glock *gl, unsigned int state, int remote) 707static void handle_callback(struct gfs2_glock *gl, unsigned int state,
708 int remote, unsigned long delay)
686{ 709{
710 int bit = delay ? GLF_PENDING_DEMOTE : GLF_DEMOTE;
711
687 spin_lock(&gl->gl_spin); 712 spin_lock(&gl->gl_spin);
688 if (test_and_set_bit(GLF_DEMOTE, &gl->gl_flags) == 0) { 713 set_bit(bit, &gl->gl_flags);
714 if (gl->gl_demote_state == LM_ST_EXCLUSIVE) {
689 gl->gl_demote_state = state; 715 gl->gl_demote_state = state;
690 gl->gl_demote_time = jiffies; 716 gl->gl_demote_time = jiffies;
691 if (remote && gl->gl_ops->go_type == LM_TYPE_IOPEN && 717 if (remote && gl->gl_ops->go_type == LM_TYPE_IOPEN &&
@@ -727,6 +753,7 @@ static void state_change(struct gfs2_glock *gl, unsigned int new_state)
727 } 753 }
728 754
729 gl->gl_state = new_state; 755 gl->gl_state = new_state;
756 gl->gl_tchange = jiffies;
730} 757}
731 758
732/** 759/**
@@ -813,7 +840,6 @@ out:
813 gl->gl_req_gh = NULL; 840 gl->gl_req_gh = NULL;
814 gl->gl_req_bh = NULL; 841 gl->gl_req_bh = NULL;
815 clear_bit(GLF_LOCK, &gl->gl_flags); 842 clear_bit(GLF_LOCK, &gl->gl_flags);
816 run_queue(gl);
817 spin_unlock(&gl->gl_spin); 843 spin_unlock(&gl->gl_spin);
818 } 844 }
819 845
@@ -885,7 +911,6 @@ static void drop_bh(struct gfs2_glock *gl, unsigned int ret)
885 gfs2_assert_warn(sdp, !ret); 911 gfs2_assert_warn(sdp, !ret);
886 912
887 state_change(gl, LM_ST_UNLOCKED); 913 state_change(gl, LM_ST_UNLOCKED);
888 gfs2_demote_wake(gl);
889 914
890 if (glops->go_inval) 915 if (glops->go_inval)
891 glops->go_inval(gl, DIO_METADATA); 916 glops->go_inval(gl, DIO_METADATA);
@@ -898,10 +923,10 @@ static void drop_bh(struct gfs2_glock *gl, unsigned int ret)
898 } 923 }
899 924
900 spin_lock(&gl->gl_spin); 925 spin_lock(&gl->gl_spin);
926 gfs2_demote_wake(gl);
901 gl->gl_req_gh = NULL; 927 gl->gl_req_gh = NULL;
902 gl->gl_req_bh = NULL; 928 gl->gl_req_bh = NULL;
903 clear_bit(GLF_LOCK, &gl->gl_flags); 929 clear_bit(GLF_LOCK, &gl->gl_flags);
904 run_queue(gl);
905 spin_unlock(&gl->gl_spin); 930 spin_unlock(&gl->gl_spin);
906 931
907 gfs2_glock_put(gl); 932 gfs2_glock_put(gl);
@@ -1209,9 +1234,10 @@ void gfs2_glock_dq(struct gfs2_holder *gh)
1209{ 1234{
1210 struct gfs2_glock *gl = gh->gh_gl; 1235 struct gfs2_glock *gl = gh->gh_gl;
1211 const struct gfs2_glock_operations *glops = gl->gl_ops; 1236 const struct gfs2_glock_operations *glops = gl->gl_ops;
1237 unsigned delay = 0;
1212 1238
1213 if (gh->gh_flags & GL_NOCACHE) 1239 if (gh->gh_flags & GL_NOCACHE)
1214 handle_callback(gl, LM_ST_UNLOCKED, 0); 1240 handle_callback(gl, LM_ST_UNLOCKED, 0, 0);
1215 1241
1216 gfs2_glmutex_lock(gl); 1242 gfs2_glmutex_lock(gl);
1217 1243
@@ -1229,8 +1255,14 @@ void gfs2_glock_dq(struct gfs2_holder *gh)
1229 } 1255 }
1230 1256
1231 clear_bit(GLF_LOCK, &gl->gl_flags); 1257 clear_bit(GLF_LOCK, &gl->gl_flags);
1232 run_queue(gl);
1233 spin_unlock(&gl->gl_spin); 1258 spin_unlock(&gl->gl_spin);
1259
1260 gfs2_glock_hold(gl);
1261 if (test_bit(GLF_PENDING_DEMOTE, &gl->gl_flags) &&
1262 !test_bit(GLF_DEMOTE, &gl->gl_flags))
1263 delay = gl->gl_ops->go_min_hold_time;
1264 if (queue_delayed_work(glock_workqueue, &gl->gl_work, delay) == 0)
1265 gfs2_glock_put(gl);
1234} 1266}
1235 1267
1236void gfs2_glock_dq_wait(struct gfs2_holder *gh) 1268void gfs2_glock_dq_wait(struct gfs2_holder *gh)
@@ -1457,18 +1489,21 @@ static void blocking_cb(struct gfs2_sbd *sdp, struct lm_lockname *name,
1457 unsigned int state) 1489 unsigned int state)
1458{ 1490{
1459 struct gfs2_glock *gl; 1491 struct gfs2_glock *gl;
1492 unsigned long delay = 0;
1493 unsigned long holdtime;
1494 unsigned long now = jiffies;
1460 1495
1461 gl = gfs2_glock_find(sdp, name); 1496 gl = gfs2_glock_find(sdp, name);
1462 if (!gl) 1497 if (!gl)
1463 return; 1498 return;
1464 1499
1465 handle_callback(gl, state, 1); 1500 holdtime = gl->gl_tchange + gl->gl_ops->go_min_hold_time;
1466 1501 if (time_before(now, holdtime))
1467 spin_lock(&gl->gl_spin); 1502 delay = holdtime - now;
1468 run_queue(gl);
1469 spin_unlock(&gl->gl_spin);
1470 1503
1471 gfs2_glock_put(gl); 1504 handle_callback(gl, state, 1, delay);
1505 if (queue_delayed_work(glock_workqueue, &gl->gl_work, delay) == 0)
1506 gfs2_glock_put(gl);
1472} 1507}
1473 1508
1474/** 1509/**
@@ -1509,7 +1544,8 @@ void gfs2_glock_cb(void *cb_data, unsigned int type, void *data)
1509 return; 1544 return;
1510 if (!gfs2_assert_warn(sdp, gl->gl_req_bh)) 1545 if (!gfs2_assert_warn(sdp, gl->gl_req_bh))
1511 gl->gl_req_bh(gl, async->lc_ret); 1546 gl->gl_req_bh(gl, async->lc_ret);
1512 gfs2_glock_put(gl); 1547 if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0)
1548 gfs2_glock_put(gl);
1513 up_read(&gfs2_umount_flush_sem); 1549 up_read(&gfs2_umount_flush_sem);
1514 return; 1550 return;
1515 } 1551 }
@@ -1602,7 +1638,7 @@ void gfs2_reclaim_glock(struct gfs2_sbd *sdp)
1602 if (gfs2_glmutex_trylock(gl)) { 1638 if (gfs2_glmutex_trylock(gl)) {
1603 if (list_empty(&gl->gl_holders) && 1639 if (list_empty(&gl->gl_holders) &&
1604 gl->gl_state != LM_ST_UNLOCKED && demote_ok(gl)) 1640 gl->gl_state != LM_ST_UNLOCKED && demote_ok(gl))
1605 handle_callback(gl, LM_ST_UNLOCKED, 0); 1641 handle_callback(gl, LM_ST_UNLOCKED, 0, 0);
1606 gfs2_glmutex_unlock(gl); 1642 gfs2_glmutex_unlock(gl);
1607 } 1643 }
1608 1644
@@ -1702,7 +1738,7 @@ static void clear_glock(struct gfs2_glock *gl)
1702 if (gfs2_glmutex_trylock(gl)) { 1738 if (gfs2_glmutex_trylock(gl)) {
1703 if (list_empty(&gl->gl_holders) && 1739 if (list_empty(&gl->gl_holders) &&
1704 gl->gl_state != LM_ST_UNLOCKED) 1740 gl->gl_state != LM_ST_UNLOCKED)
1705 handle_callback(gl, LM_ST_UNLOCKED, 0); 1741 handle_callback(gl, LM_ST_UNLOCKED, 0, 0);
1706 gfs2_glmutex_unlock(gl); 1742 gfs2_glmutex_unlock(gl);
1707 } 1743 }
1708} 1744}
@@ -2009,11 +2045,18 @@ int __init gfs2_glock_init(void)
2009 if (IS_ERR(scand_process)) 2045 if (IS_ERR(scand_process))
2010 return PTR_ERR(scand_process); 2046 return PTR_ERR(scand_process);
2011 2047
2048 glock_workqueue = create_workqueue("glock_workqueue");
2049 if (IS_ERR(glock_workqueue)) {
2050 kthread_stop(scand_process);
2051 return PTR_ERR(glock_workqueue);
2052 }
2053
2012 return 0; 2054 return 0;
2013} 2055}
2014 2056
2015void gfs2_glock_exit(void) 2057void gfs2_glock_exit(void)
2016{ 2058{
2059 destroy_workqueue(glock_workqueue);
2017 kthread_stop(scand_process); 2060 kthread_stop(scand_process);
2018} 2061}
2019 2062
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index 88342e0b4bc7..7ef6b23bb38a 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -454,6 +454,7 @@ const struct gfs2_glock_operations gfs2_inode_glops = {
454 .go_lock = inode_go_lock, 454 .go_lock = inode_go_lock,
455 .go_unlock = inode_go_unlock, 455 .go_unlock = inode_go_unlock,
456 .go_type = LM_TYPE_INODE, 456 .go_type = LM_TYPE_INODE,
457 .go_min_hold_time = HZ / 10,
457}; 458};
458 459
459const struct gfs2_glock_operations gfs2_rgrp_glops = { 460const struct gfs2_glock_operations gfs2_rgrp_glops = {
@@ -464,6 +465,7 @@ const struct gfs2_glock_operations gfs2_rgrp_glops = {
464 .go_lock = rgrp_go_lock, 465 .go_lock = rgrp_go_lock,
465 .go_unlock = rgrp_go_unlock, 466 .go_unlock = rgrp_go_unlock,
466 .go_type = LM_TYPE_RGRP, 467 .go_type = LM_TYPE_RGRP,
468 .go_min_hold_time = HZ / 10,
467}; 469};
468 470
469const struct gfs2_glock_operations gfs2_trans_glops = { 471const struct gfs2_glock_operations gfs2_trans_glops = {
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 1390b30daf19..23b611aa70d2 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -11,6 +11,7 @@
11#define __INCORE_DOT_H__ 11#define __INCORE_DOT_H__
12 12
13#include <linux/fs.h> 13#include <linux/fs.h>
14#include <linux/workqueue.h>
14 15
15#define DIO_WAIT 0x00000010 16#define DIO_WAIT 0x00000010
16#define DIO_METADATA 0x00000020 17#define DIO_METADATA 0x00000020
@@ -130,6 +131,7 @@ struct gfs2_glock_operations {
130 int (*go_lock) (struct gfs2_holder *gh); 131 int (*go_lock) (struct gfs2_holder *gh);
131 void (*go_unlock) (struct gfs2_holder *gh); 132 void (*go_unlock) (struct gfs2_holder *gh);
132 const int go_type; 133 const int go_type;
134 const unsigned long go_min_hold_time;
133}; 135};
134 136
135enum { 137enum {
@@ -161,6 +163,7 @@ enum {
161 GLF_LOCK = 1, 163 GLF_LOCK = 1,
162 GLF_STICKY = 2, 164 GLF_STICKY = 2,
163 GLF_DEMOTE = 3, 165 GLF_DEMOTE = 3,
166 GLF_PENDING_DEMOTE = 4,
164 GLF_DIRTY = 5, 167 GLF_DIRTY = 5,
165}; 168};
166 169
@@ -193,6 +196,7 @@ struct gfs2_glock {
193 196
194 u64 gl_vn; 197 u64 gl_vn;
195 unsigned long gl_stamp; 198 unsigned long gl_stamp;
199 unsigned long gl_tchange;
196 void *gl_object; 200 void *gl_object;
197 201
198 struct list_head gl_reclaim; 202 struct list_head gl_reclaim;
@@ -203,6 +207,7 @@ struct gfs2_glock {
203 struct gfs2_log_element gl_le; 207 struct gfs2_log_element gl_le;
204 struct list_head gl_ail_list; 208 struct list_head gl_ail_list;
205 atomic_t gl_ail_count; 209 atomic_t gl_ail_count;
210 struct delayed_work gl_work;
206}; 211};
207 212
208struct gfs2_alloc { 213struct gfs2_alloc {