diff options
author | Benjamin Marzinski <bmarzins@redhat.com> | 2007-08-23 14:19:05 -0400 |
---|---|---|
committer | Steven Whitehouse <swhiteho@redhat.com> | 2007-10-10 03:55:48 -0400 |
commit | c4f68a130fc1795e4a75ec5bdaf9e85d86c22419 (patch) | |
tree | 37251ae5634d4b73b5224e3e8679f92472de0ebe | |
parent | d1e2777d4f419a865ddccdb9b3412021d0e4de51 (diff) |
[GFS2] delay glock demote for a minimum hold time
When a lot of IO, with some distributed mmap IO, is run on a GFS2 filesystem in
a cluster, it will deadlock. The reason is that do_no_page() will repeatedly
call gfs2_sharewrite_nopage(), because each node keeps giving up the glock
too early, and is forced to call unmap_mapping_range(). This bumps the
mapping->truncate_count sequence count, forcing do_no_page() to retry. This
patch institutes a minimum glock hold time a tenth a second. This insures
that even in heavy contention cases, the node has enough time to get some
useful work done before it gives up the glock.
A second issue is that when gfs2_glock_dq() is called from within a page fault
to demote a lock, and the associated page needs to be written out, it will
try to acqire a lock on it, but it has already been locked at a higher level.
This patch puts makes gfs2_glock_dq() use the work queue as well, to avoid this
issue. This is the same patch as Steve Whitehouse originally proposed to fix
this issue, execpt that gfs2_glock_dq() now grabs a reference to the glock
before it queues up the work on it.
Signed-off-by: Benjamin E. Marzinski <bmarzins@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
-rw-r--r-- | fs/gfs2/glock.c | 75 | ||||
-rw-r--r-- | fs/gfs2/glops.c | 2 | ||||
-rw-r--r-- | fs/gfs2/incore.h | 5 |
3 files changed, 66 insertions, 16 deletions
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 3d949187fed0..931368a385c8 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c | |||
@@ -27,6 +27,8 @@ | |||
27 | #include <linux/debugfs.h> | 27 | #include <linux/debugfs.h> |
28 | #include <linux/kthread.h> | 28 | #include <linux/kthread.h> |
29 | #include <linux/freezer.h> | 29 | #include <linux/freezer.h> |
30 | #include <linux/workqueue.h> | ||
31 | #include <linux/jiffies.h> | ||
30 | 32 | ||
31 | #include "gfs2.h" | 33 | #include "gfs2.h" |
32 | #include "incore.h" | 34 | #include "incore.h" |
@@ -58,10 +60,13 @@ static int gfs2_dump_lockstate(struct gfs2_sbd *sdp); | |||
58 | static int dump_glock(struct glock_iter *gi, struct gfs2_glock *gl); | 60 | static int dump_glock(struct glock_iter *gi, struct gfs2_glock *gl); |
59 | static void gfs2_glock_xmote_th(struct gfs2_glock *gl, struct gfs2_holder *gh); | 61 | static void gfs2_glock_xmote_th(struct gfs2_glock *gl, struct gfs2_holder *gh); |
60 | static void gfs2_glock_drop_th(struct gfs2_glock *gl); | 62 | static void gfs2_glock_drop_th(struct gfs2_glock *gl); |
63 | static void run_queue(struct gfs2_glock *gl); | ||
64 | |||
61 | static DECLARE_RWSEM(gfs2_umount_flush_sem); | 65 | static DECLARE_RWSEM(gfs2_umount_flush_sem); |
62 | static struct dentry *gfs2_root; | 66 | static struct dentry *gfs2_root; |
63 | static struct task_struct *scand_process; | 67 | static struct task_struct *scand_process; |
64 | static unsigned int scand_secs = 5; | 68 | static unsigned int scand_secs = 5; |
69 | static struct workqueue_struct *glock_workqueue; | ||
65 | 70 | ||
66 | #define GFS2_GL_HASH_SHIFT 15 | 71 | #define GFS2_GL_HASH_SHIFT 15 |
67 | #define GFS2_GL_HASH_SIZE (1 << GFS2_GL_HASH_SHIFT) | 72 | #define GFS2_GL_HASH_SIZE (1 << GFS2_GL_HASH_SHIFT) |
@@ -277,6 +282,18 @@ static struct gfs2_glock *gfs2_glock_find(const struct gfs2_sbd *sdp, | |||
277 | return gl; | 282 | return gl; |
278 | } | 283 | } |
279 | 284 | ||
285 | static void glock_work_func(struct work_struct *work) | ||
286 | { | ||
287 | struct gfs2_glock *gl = container_of(work, struct gfs2_glock, gl_work.work); | ||
288 | |||
289 | spin_lock(&gl->gl_spin); | ||
290 | if (test_and_clear_bit(GLF_PENDING_DEMOTE, &gl->gl_flags)) | ||
291 | set_bit(GLF_DEMOTE, &gl->gl_flags); | ||
292 | run_queue(gl); | ||
293 | spin_unlock(&gl->gl_spin); | ||
294 | gfs2_glock_put(gl); | ||
295 | } | ||
296 | |||
280 | /** | 297 | /** |
281 | * gfs2_glock_get() - Get a glock, or create one if one doesn't exist | 298 | * gfs2_glock_get() - Get a glock, or create one if one doesn't exist |
282 | * @sdp: The GFS2 superblock | 299 | * @sdp: The GFS2 superblock |
@@ -316,6 +333,7 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number, | |||
316 | gl->gl_name = name; | 333 | gl->gl_name = name; |
317 | atomic_set(&gl->gl_ref, 1); | 334 | atomic_set(&gl->gl_ref, 1); |
318 | gl->gl_state = LM_ST_UNLOCKED; | 335 | gl->gl_state = LM_ST_UNLOCKED; |
336 | gl->gl_demote_state = LM_ST_EXCLUSIVE; | ||
319 | gl->gl_hash = hash; | 337 | gl->gl_hash = hash; |
320 | gl->gl_owner_pid = 0; | 338 | gl->gl_owner_pid = 0; |
321 | gl->gl_ip = 0; | 339 | gl->gl_ip = 0; |
@@ -324,10 +342,12 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number, | |||
324 | gl->gl_req_bh = NULL; | 342 | gl->gl_req_bh = NULL; |
325 | gl->gl_vn = 0; | 343 | gl->gl_vn = 0; |
326 | gl->gl_stamp = jiffies; | 344 | gl->gl_stamp = jiffies; |
345 | gl->gl_tchange = jiffies; | ||
327 | gl->gl_object = NULL; | 346 | gl->gl_object = NULL; |
328 | gl->gl_sbd = sdp; | 347 | gl->gl_sbd = sdp; |
329 | gl->gl_aspace = NULL; | 348 | gl->gl_aspace = NULL; |
330 | lops_init_le(&gl->gl_le, &gfs2_glock_lops); | 349 | lops_init_le(&gl->gl_le, &gfs2_glock_lops); |
350 | INIT_DELAYED_WORK(&gl->gl_work, glock_work_func); | ||
331 | 351 | ||
332 | /* If this glock protects actual on-disk data or metadata blocks, | 352 | /* If this glock protects actual on-disk data or metadata blocks, |
333 | create a VFS inode to manage the pages/buffers holding them. */ | 353 | create a VFS inode to manage the pages/buffers holding them. */ |
@@ -441,6 +461,8 @@ static void wait_on_holder(struct gfs2_holder *gh) | |||
441 | 461 | ||
442 | static void gfs2_demote_wake(struct gfs2_glock *gl) | 462 | static void gfs2_demote_wake(struct gfs2_glock *gl) |
443 | { | 463 | { |
464 | BUG_ON(!spin_is_locked(&gl->gl_spin)); | ||
465 | gl->gl_demote_state = LM_ST_EXCLUSIVE; | ||
444 | clear_bit(GLF_DEMOTE, &gl->gl_flags); | 466 | clear_bit(GLF_DEMOTE, &gl->gl_flags); |
445 | smp_mb__after_clear_bit(); | 467 | smp_mb__after_clear_bit(); |
446 | wake_up_bit(&gl->gl_flags, GLF_DEMOTE); | 468 | wake_up_bit(&gl->gl_flags, GLF_DEMOTE); |
@@ -682,10 +704,14 @@ static void gfs2_glmutex_unlock(struct gfs2_glock *gl) | |||
682 | * practise: LM_ST_SHARED and LM_ST_UNLOCKED | 704 | * practise: LM_ST_SHARED and LM_ST_UNLOCKED |
683 | */ | 705 | */ |
684 | 706 | ||
685 | static void handle_callback(struct gfs2_glock *gl, unsigned int state, int remote) | 707 | static void handle_callback(struct gfs2_glock *gl, unsigned int state, |
708 | int remote, unsigned long delay) | ||
686 | { | 709 | { |
710 | int bit = delay ? GLF_PENDING_DEMOTE : GLF_DEMOTE; | ||
711 | |||
687 | spin_lock(&gl->gl_spin); | 712 | spin_lock(&gl->gl_spin); |
688 | if (test_and_set_bit(GLF_DEMOTE, &gl->gl_flags) == 0) { | 713 | set_bit(bit, &gl->gl_flags); |
714 | if (gl->gl_demote_state == LM_ST_EXCLUSIVE) { | ||
689 | gl->gl_demote_state = state; | 715 | gl->gl_demote_state = state; |
690 | gl->gl_demote_time = jiffies; | 716 | gl->gl_demote_time = jiffies; |
691 | if (remote && gl->gl_ops->go_type == LM_TYPE_IOPEN && | 717 | if (remote && gl->gl_ops->go_type == LM_TYPE_IOPEN && |
@@ -727,6 +753,7 @@ static void state_change(struct gfs2_glock *gl, unsigned int new_state) | |||
727 | } | 753 | } |
728 | 754 | ||
729 | gl->gl_state = new_state; | 755 | gl->gl_state = new_state; |
756 | gl->gl_tchange = jiffies; | ||
730 | } | 757 | } |
731 | 758 | ||
732 | /** | 759 | /** |
@@ -813,7 +840,6 @@ out: | |||
813 | gl->gl_req_gh = NULL; | 840 | gl->gl_req_gh = NULL; |
814 | gl->gl_req_bh = NULL; | 841 | gl->gl_req_bh = NULL; |
815 | clear_bit(GLF_LOCK, &gl->gl_flags); | 842 | clear_bit(GLF_LOCK, &gl->gl_flags); |
816 | run_queue(gl); | ||
817 | spin_unlock(&gl->gl_spin); | 843 | spin_unlock(&gl->gl_spin); |
818 | } | 844 | } |
819 | 845 | ||
@@ -885,7 +911,6 @@ static void drop_bh(struct gfs2_glock *gl, unsigned int ret) | |||
885 | gfs2_assert_warn(sdp, !ret); | 911 | gfs2_assert_warn(sdp, !ret); |
886 | 912 | ||
887 | state_change(gl, LM_ST_UNLOCKED); | 913 | state_change(gl, LM_ST_UNLOCKED); |
888 | gfs2_demote_wake(gl); | ||
889 | 914 | ||
890 | if (glops->go_inval) | 915 | if (glops->go_inval) |
891 | glops->go_inval(gl, DIO_METADATA); | 916 | glops->go_inval(gl, DIO_METADATA); |
@@ -898,10 +923,10 @@ static void drop_bh(struct gfs2_glock *gl, unsigned int ret) | |||
898 | } | 923 | } |
899 | 924 | ||
900 | spin_lock(&gl->gl_spin); | 925 | spin_lock(&gl->gl_spin); |
926 | gfs2_demote_wake(gl); | ||
901 | gl->gl_req_gh = NULL; | 927 | gl->gl_req_gh = NULL; |
902 | gl->gl_req_bh = NULL; | 928 | gl->gl_req_bh = NULL; |
903 | clear_bit(GLF_LOCK, &gl->gl_flags); | 929 | clear_bit(GLF_LOCK, &gl->gl_flags); |
904 | run_queue(gl); | ||
905 | spin_unlock(&gl->gl_spin); | 930 | spin_unlock(&gl->gl_spin); |
906 | 931 | ||
907 | gfs2_glock_put(gl); | 932 | gfs2_glock_put(gl); |
@@ -1209,9 +1234,10 @@ void gfs2_glock_dq(struct gfs2_holder *gh) | |||
1209 | { | 1234 | { |
1210 | struct gfs2_glock *gl = gh->gh_gl; | 1235 | struct gfs2_glock *gl = gh->gh_gl; |
1211 | const struct gfs2_glock_operations *glops = gl->gl_ops; | 1236 | const struct gfs2_glock_operations *glops = gl->gl_ops; |
1237 | unsigned delay = 0; | ||
1212 | 1238 | ||
1213 | if (gh->gh_flags & GL_NOCACHE) | 1239 | if (gh->gh_flags & GL_NOCACHE) |
1214 | handle_callback(gl, LM_ST_UNLOCKED, 0); | 1240 | handle_callback(gl, LM_ST_UNLOCKED, 0, 0); |
1215 | 1241 | ||
1216 | gfs2_glmutex_lock(gl); | 1242 | gfs2_glmutex_lock(gl); |
1217 | 1243 | ||
@@ -1229,8 +1255,14 @@ void gfs2_glock_dq(struct gfs2_holder *gh) | |||
1229 | } | 1255 | } |
1230 | 1256 | ||
1231 | clear_bit(GLF_LOCK, &gl->gl_flags); | 1257 | clear_bit(GLF_LOCK, &gl->gl_flags); |
1232 | run_queue(gl); | ||
1233 | spin_unlock(&gl->gl_spin); | 1258 | spin_unlock(&gl->gl_spin); |
1259 | |||
1260 | gfs2_glock_hold(gl); | ||
1261 | if (test_bit(GLF_PENDING_DEMOTE, &gl->gl_flags) && | ||
1262 | !test_bit(GLF_DEMOTE, &gl->gl_flags)) | ||
1263 | delay = gl->gl_ops->go_min_hold_time; | ||
1264 | if (queue_delayed_work(glock_workqueue, &gl->gl_work, delay) == 0) | ||
1265 | gfs2_glock_put(gl); | ||
1234 | } | 1266 | } |
1235 | 1267 | ||
1236 | void gfs2_glock_dq_wait(struct gfs2_holder *gh) | 1268 | void gfs2_glock_dq_wait(struct gfs2_holder *gh) |
@@ -1457,18 +1489,21 @@ static void blocking_cb(struct gfs2_sbd *sdp, struct lm_lockname *name, | |||
1457 | unsigned int state) | 1489 | unsigned int state) |
1458 | { | 1490 | { |
1459 | struct gfs2_glock *gl; | 1491 | struct gfs2_glock *gl; |
1492 | unsigned long delay = 0; | ||
1493 | unsigned long holdtime; | ||
1494 | unsigned long now = jiffies; | ||
1460 | 1495 | ||
1461 | gl = gfs2_glock_find(sdp, name); | 1496 | gl = gfs2_glock_find(sdp, name); |
1462 | if (!gl) | 1497 | if (!gl) |
1463 | return; | 1498 | return; |
1464 | 1499 | ||
1465 | handle_callback(gl, state, 1); | 1500 | holdtime = gl->gl_tchange + gl->gl_ops->go_min_hold_time; |
1466 | 1501 | if (time_before(now, holdtime)) | |
1467 | spin_lock(&gl->gl_spin); | 1502 | delay = holdtime - now; |
1468 | run_queue(gl); | ||
1469 | spin_unlock(&gl->gl_spin); | ||
1470 | 1503 | ||
1471 | gfs2_glock_put(gl); | 1504 | handle_callback(gl, state, 1, delay); |
1505 | if (queue_delayed_work(glock_workqueue, &gl->gl_work, delay) == 0) | ||
1506 | gfs2_glock_put(gl); | ||
1472 | } | 1507 | } |
1473 | 1508 | ||
1474 | /** | 1509 | /** |
@@ -1509,7 +1544,8 @@ void gfs2_glock_cb(void *cb_data, unsigned int type, void *data) | |||
1509 | return; | 1544 | return; |
1510 | if (!gfs2_assert_warn(sdp, gl->gl_req_bh)) | 1545 | if (!gfs2_assert_warn(sdp, gl->gl_req_bh)) |
1511 | gl->gl_req_bh(gl, async->lc_ret); | 1546 | gl->gl_req_bh(gl, async->lc_ret); |
1512 | gfs2_glock_put(gl); | 1547 | if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0) |
1548 | gfs2_glock_put(gl); | ||
1513 | up_read(&gfs2_umount_flush_sem); | 1549 | up_read(&gfs2_umount_flush_sem); |
1514 | return; | 1550 | return; |
1515 | } | 1551 | } |
@@ -1602,7 +1638,7 @@ void gfs2_reclaim_glock(struct gfs2_sbd *sdp) | |||
1602 | if (gfs2_glmutex_trylock(gl)) { | 1638 | if (gfs2_glmutex_trylock(gl)) { |
1603 | if (list_empty(&gl->gl_holders) && | 1639 | if (list_empty(&gl->gl_holders) && |
1604 | gl->gl_state != LM_ST_UNLOCKED && demote_ok(gl)) | 1640 | gl->gl_state != LM_ST_UNLOCKED && demote_ok(gl)) |
1605 | handle_callback(gl, LM_ST_UNLOCKED, 0); | 1641 | handle_callback(gl, LM_ST_UNLOCKED, 0, 0); |
1606 | gfs2_glmutex_unlock(gl); | 1642 | gfs2_glmutex_unlock(gl); |
1607 | } | 1643 | } |
1608 | 1644 | ||
@@ -1702,7 +1738,7 @@ static void clear_glock(struct gfs2_glock *gl) | |||
1702 | if (gfs2_glmutex_trylock(gl)) { | 1738 | if (gfs2_glmutex_trylock(gl)) { |
1703 | if (list_empty(&gl->gl_holders) && | 1739 | if (list_empty(&gl->gl_holders) && |
1704 | gl->gl_state != LM_ST_UNLOCKED) | 1740 | gl->gl_state != LM_ST_UNLOCKED) |
1705 | handle_callback(gl, LM_ST_UNLOCKED, 0); | 1741 | handle_callback(gl, LM_ST_UNLOCKED, 0, 0); |
1706 | gfs2_glmutex_unlock(gl); | 1742 | gfs2_glmutex_unlock(gl); |
1707 | } | 1743 | } |
1708 | } | 1744 | } |
@@ -2009,11 +2045,18 @@ int __init gfs2_glock_init(void) | |||
2009 | if (IS_ERR(scand_process)) | 2045 | if (IS_ERR(scand_process)) |
2010 | return PTR_ERR(scand_process); | 2046 | return PTR_ERR(scand_process); |
2011 | 2047 | ||
2048 | glock_workqueue = create_workqueue("glock_workqueue"); | ||
2049 | if (IS_ERR(glock_workqueue)) { | ||
2050 | kthread_stop(scand_process); | ||
2051 | return PTR_ERR(glock_workqueue); | ||
2052 | } | ||
2053 | |||
2012 | return 0; | 2054 | return 0; |
2013 | } | 2055 | } |
2014 | 2056 | ||
2015 | void gfs2_glock_exit(void) | 2057 | void gfs2_glock_exit(void) |
2016 | { | 2058 | { |
2059 | destroy_workqueue(glock_workqueue); | ||
2017 | kthread_stop(scand_process); | 2060 | kthread_stop(scand_process); |
2018 | } | 2061 | } |
2019 | 2062 | ||
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index 88342e0b4bc7..7ef6b23bb38a 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c | |||
@@ -454,6 +454,7 @@ const struct gfs2_glock_operations gfs2_inode_glops = { | |||
454 | .go_lock = inode_go_lock, | 454 | .go_lock = inode_go_lock, |
455 | .go_unlock = inode_go_unlock, | 455 | .go_unlock = inode_go_unlock, |
456 | .go_type = LM_TYPE_INODE, | 456 | .go_type = LM_TYPE_INODE, |
457 | .go_min_hold_time = HZ / 10, | ||
457 | }; | 458 | }; |
458 | 459 | ||
459 | const struct gfs2_glock_operations gfs2_rgrp_glops = { | 460 | const struct gfs2_glock_operations gfs2_rgrp_glops = { |
@@ -464,6 +465,7 @@ const struct gfs2_glock_operations gfs2_rgrp_glops = { | |||
464 | .go_lock = rgrp_go_lock, | 465 | .go_lock = rgrp_go_lock, |
465 | .go_unlock = rgrp_go_unlock, | 466 | .go_unlock = rgrp_go_unlock, |
466 | .go_type = LM_TYPE_RGRP, | 467 | .go_type = LM_TYPE_RGRP, |
468 | .go_min_hold_time = HZ / 10, | ||
467 | }; | 469 | }; |
468 | 470 | ||
469 | const struct gfs2_glock_operations gfs2_trans_glops = { | 471 | const struct gfs2_glock_operations gfs2_trans_glops = { |
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 1390b30daf19..23b611aa70d2 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h | |||
@@ -11,6 +11,7 @@ | |||
11 | #define __INCORE_DOT_H__ | 11 | #define __INCORE_DOT_H__ |
12 | 12 | ||
13 | #include <linux/fs.h> | 13 | #include <linux/fs.h> |
14 | #include <linux/workqueue.h> | ||
14 | 15 | ||
15 | #define DIO_WAIT 0x00000010 | 16 | #define DIO_WAIT 0x00000010 |
16 | #define DIO_METADATA 0x00000020 | 17 | #define DIO_METADATA 0x00000020 |
@@ -130,6 +131,7 @@ struct gfs2_glock_operations { | |||
130 | int (*go_lock) (struct gfs2_holder *gh); | 131 | int (*go_lock) (struct gfs2_holder *gh); |
131 | void (*go_unlock) (struct gfs2_holder *gh); | 132 | void (*go_unlock) (struct gfs2_holder *gh); |
132 | const int go_type; | 133 | const int go_type; |
134 | const unsigned long go_min_hold_time; | ||
133 | }; | 135 | }; |
134 | 136 | ||
135 | enum { | 137 | enum { |
@@ -161,6 +163,7 @@ enum { | |||
161 | GLF_LOCK = 1, | 163 | GLF_LOCK = 1, |
162 | GLF_STICKY = 2, | 164 | GLF_STICKY = 2, |
163 | GLF_DEMOTE = 3, | 165 | GLF_DEMOTE = 3, |
166 | GLF_PENDING_DEMOTE = 4, | ||
164 | GLF_DIRTY = 5, | 167 | GLF_DIRTY = 5, |
165 | }; | 168 | }; |
166 | 169 | ||
@@ -193,6 +196,7 @@ struct gfs2_glock { | |||
193 | 196 | ||
194 | u64 gl_vn; | 197 | u64 gl_vn; |
195 | unsigned long gl_stamp; | 198 | unsigned long gl_stamp; |
199 | unsigned long gl_tchange; | ||
196 | void *gl_object; | 200 | void *gl_object; |
197 | 201 | ||
198 | struct list_head gl_reclaim; | 202 | struct list_head gl_reclaim; |
@@ -203,6 +207,7 @@ struct gfs2_glock { | |||
203 | struct gfs2_log_element gl_le; | 207 | struct gfs2_log_element gl_le; |
204 | struct list_head gl_ail_list; | 208 | struct list_head gl_ail_list; |
205 | atomic_t gl_ail_count; | 209 | atomic_t gl_ail_count; |
210 | struct delayed_work gl_work; | ||
206 | }; | 211 | }; |
207 | 212 | ||
208 | struct gfs2_alloc { | 213 | struct gfs2_alloc { |