aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@woody.linux-foundation.org>2007-10-12 12:14:51 -0400
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2007-10-12 12:14:51 -0400
commitf26e51f67ae6a75ffc57b96cf5fe096f75e778cb (patch)
tree1e848187885426430cc93bffaadc539312ce636d
parent1462222b76a09a24b240563a51d5f9fbea8bd3e1 (diff)
parentc36258b5925e6cf6bf72904635100593573bfcff (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/steve/gfs2-2.6-nmw
* git://git.kernel.org/pub/scm/linux/kernel/git/steve/gfs2-2.6-nmw: (51 commits) [DLM] block dlm_recv in recovery transition [DLM] don't overwrite castparam if it's NULL [GFS2] Get superblock a different way [GFS2] Don't try to remove buffers that don't exist [GFS2] Alternate gfs2_iget to avoid looking up inodes being freed [GFS2] Data corruption fix [GFS2] Clean up journaled data writing [GFS2] GFS2: chmod hung - fix race in thread creation [DLM] Make dlm_sendd cond_resched more [GFS2] Move inode deletion out of blocking_cb [GFS2] flocks from same process trip kernel BUG at fs/gfs2/glock.c:1118! [GFS2] Clean up gfs2_trans_add_revoke() [GFS2] Use slab operations for all gfs2_bufdata allocations [GFS2] Replace revoke structure with bufdata structure [GFS2] Fix ordering of dirty/journal for ordered buffer unstuffing [GFS2] Clean up ordered write code [GFS2] Move pin/unpin into lops.c, clean up locking [GFS2] Don't mark jdata dirty in gfs2_unstuffer_page() [GFS2] Introduce gfs2_remove_from_ail [GFS2] Correct lock ordering in unlink ...
-rw-r--r--fs/dlm/dlm_internal.h1
-rw-r--r--fs/dlm/lock.c142
-rw-r--r--fs/dlm/lock.h3
-rw-r--r--fs/dlm/lockspace.c1
-rw-r--r--fs/dlm/lowcomms.c23
-rw-r--r--fs/dlm/member.c41
-rw-r--r--fs/dlm/midcomms.c17
-rw-r--r--fs/dlm/rcom.c36
-rw-r--r--fs/dlm/rcom.h5
-rw-r--r--fs/dlm/recoverd.c11
-rw-r--r--fs/dlm/requestqueue.c58
-rw-r--r--fs/dlm/requestqueue.h4
-rw-r--r--fs/gfs2/bmap.c35
-rw-r--r--fs/gfs2/daemon.c24
-rw-r--r--fs/gfs2/daemon.h1
-rw-r--r--fs/gfs2/dir.c3
-rw-r--r--fs/gfs2/eaops.c8
-rw-r--r--fs/gfs2/eaops.h4
-rw-r--r--fs/gfs2/glock.c293
-rw-r--r--fs/gfs2/glock.h5
-rw-r--r--fs/gfs2/glops.c24
-rw-r--r--fs/gfs2/incore.h31
-rw-r--r--fs/gfs2/inode.c78
-rw-r--r--fs/gfs2/inode.h3
-rw-r--r--fs/gfs2/locking/dlm/lock_dlm.h1
-rw-r--r--fs/gfs2/locking/dlm/plock.c11
-rw-r--r--fs/gfs2/locking/dlm/thread.c20
-rw-r--r--fs/gfs2/locking/nolock/main.c1
-rw-r--r--fs/gfs2/log.c230
-rw-r--r--fs/gfs2/log.h2
-rw-r--r--fs/gfs2/lops.c470
-rw-r--r--fs/gfs2/main.c3
-rw-r--r--fs/gfs2/meta_io.c136
-rw-r--r--fs/gfs2/meta_io.h6
-rw-r--r--fs/gfs2/mount.c5
-rw-r--r--fs/gfs2/ops_address.c146
-rw-r--r--fs/gfs2/ops_export.c2
-rw-r--r--fs/gfs2/ops_file.c13
-rw-r--r--fs/gfs2/ops_fstype.c40
-rw-r--r--fs/gfs2/ops_inode.c38
-rw-r--r--fs/gfs2/ops_super.c14
-rw-r--r--fs/gfs2/quota.c13
-rw-r--r--fs/gfs2/recovery.c2
-rw-r--r--fs/gfs2/rgrp.c39
-rw-r--r--fs/gfs2/super.c1
-rw-r--r--fs/gfs2/sys.c2
-rw-r--r--fs/gfs2/trans.c22
-rw-r--r--fs/gfs2/trans.h2
-rw-r--r--include/linux/gfs2_ondisk.h30
49 files changed, 1139 insertions, 961 deletions
diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h
index 74901e981e10..d2fc2384c3be 100644
--- a/fs/dlm/dlm_internal.h
+++ b/fs/dlm/dlm_internal.h
@@ -491,6 +491,7 @@ struct dlm_ls {
491 uint64_t ls_recover_seq; 491 uint64_t ls_recover_seq;
492 struct dlm_recover *ls_recover_args; 492 struct dlm_recover *ls_recover_args;
493 struct rw_semaphore ls_in_recovery; /* block local requests */ 493 struct rw_semaphore ls_in_recovery; /* block local requests */
494 struct rw_semaphore ls_recv_active; /* block dlm_recv */
494 struct list_head ls_requestqueue;/* queue remote requests */ 495 struct list_head ls_requestqueue;/* queue remote requests */
495 struct mutex ls_requestqueue_mutex; 496 struct mutex ls_requestqueue_mutex;
496 char *ls_recover_buf; 497 char *ls_recover_buf;
diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c
index 2082daf083d8..3915b8e14146 100644
--- a/fs/dlm/lock.c
+++ b/fs/dlm/lock.c
@@ -3638,55 +3638,8 @@ static void receive_lookup_reply(struct dlm_ls *ls, struct dlm_message *ms)
3638 dlm_put_lkb(lkb); 3638 dlm_put_lkb(lkb);
3639} 3639}
3640 3640
3641int dlm_receive_message(struct dlm_header *hd, int nodeid, int recovery) 3641static void _receive_message(struct dlm_ls *ls, struct dlm_message *ms)
3642{ 3642{
3643 struct dlm_message *ms = (struct dlm_message *) hd;
3644 struct dlm_ls *ls;
3645 int error = 0;
3646
3647 if (!recovery)
3648 dlm_message_in(ms);
3649
3650 ls = dlm_find_lockspace_global(hd->h_lockspace);
3651 if (!ls) {
3652 log_print("drop message %d from %d for unknown lockspace %d",
3653 ms->m_type, nodeid, hd->h_lockspace);
3654 return -EINVAL;
3655 }
3656
3657 /* recovery may have just ended leaving a bunch of backed-up requests
3658 in the requestqueue; wait while dlm_recoverd clears them */
3659
3660 if (!recovery)
3661 dlm_wait_requestqueue(ls);
3662
3663 /* recovery may have just started while there were a bunch of
3664 in-flight requests -- save them in requestqueue to be processed
3665 after recovery. we can't let dlm_recvd block on the recovery
3666 lock. if dlm_recoverd is calling this function to clear the
3667 requestqueue, it needs to be interrupted (-EINTR) if another
3668 recovery operation is starting. */
3669
3670 while (1) {
3671 if (dlm_locking_stopped(ls)) {
3672 if (recovery) {
3673 error = -EINTR;
3674 goto out;
3675 }
3676 error = dlm_add_requestqueue(ls, nodeid, hd);
3677 if (error == -EAGAIN)
3678 continue;
3679 else {
3680 error = -EINTR;
3681 goto out;
3682 }
3683 }
3684
3685 if (dlm_lock_recovery_try(ls))
3686 break;
3687 schedule();
3688 }
3689
3690 switch (ms->m_type) { 3643 switch (ms->m_type) {
3691 3644
3692 /* messages sent to a master node */ 3645 /* messages sent to a master node */
@@ -3761,17 +3714,90 @@ int dlm_receive_message(struct dlm_header *hd, int nodeid, int recovery)
3761 log_error(ls, "unknown message type %d", ms->m_type); 3714 log_error(ls, "unknown message type %d", ms->m_type);
3762 } 3715 }
3763 3716
3764 dlm_unlock_recovery(ls);
3765 out:
3766 dlm_put_lockspace(ls);
3767 dlm_astd_wake(); 3717 dlm_astd_wake();
3768 return error;
3769} 3718}
3770 3719
3720/* If the lockspace is in recovery mode (locking stopped), then normal
3721 messages are saved on the requestqueue for processing after recovery is
3722 done. When not in recovery mode, we wait for dlm_recoverd to drain saved
3723 messages off the requestqueue before we process new ones. This occurs right
3724 after recovery completes when we transition from saving all messages on
3725 requestqueue, to processing all the saved messages, to processing new
3726 messages as they arrive. */
3771 3727
3772/* 3728static void dlm_receive_message(struct dlm_ls *ls, struct dlm_message *ms,
3773 * Recovery related 3729 int nodeid)
3774 */ 3730{
3731 if (dlm_locking_stopped(ls)) {
3732 dlm_add_requestqueue(ls, nodeid, (struct dlm_header *) ms);
3733 } else {
3734 dlm_wait_requestqueue(ls);
3735 _receive_message(ls, ms);
3736 }
3737}
3738
3739/* This is called by dlm_recoverd to process messages that were saved on
3740 the requestqueue. */
3741
3742void dlm_receive_message_saved(struct dlm_ls *ls, struct dlm_message *ms)
3743{
3744 _receive_message(ls, ms);
3745}
3746
3747/* This is called by the midcomms layer when something is received for
3748 the lockspace. It could be either a MSG (normal message sent as part of
3749 standard locking activity) or an RCOM (recovery message sent as part of
3750 lockspace recovery). */
3751
3752void dlm_receive_buffer(struct dlm_header *hd, int nodeid)
3753{
3754 struct dlm_message *ms = (struct dlm_message *) hd;
3755 struct dlm_rcom *rc = (struct dlm_rcom *) hd;
3756 struct dlm_ls *ls;
3757 int type = 0;
3758
3759 switch (hd->h_cmd) {
3760 case DLM_MSG:
3761 dlm_message_in(ms);
3762 type = ms->m_type;
3763 break;
3764 case DLM_RCOM:
3765 dlm_rcom_in(rc);
3766 type = rc->rc_type;
3767 break;
3768 default:
3769 log_print("invalid h_cmd %d from %u", hd->h_cmd, nodeid);
3770 return;
3771 }
3772
3773 if (hd->h_nodeid != nodeid) {
3774 log_print("invalid h_nodeid %d from %d lockspace %x",
3775 hd->h_nodeid, nodeid, hd->h_lockspace);
3776 return;
3777 }
3778
3779 ls = dlm_find_lockspace_global(hd->h_lockspace);
3780 if (!ls) {
3781 log_print("invalid h_lockspace %x from %d cmd %d type %d",
3782 hd->h_lockspace, nodeid, hd->h_cmd, type);
3783
3784 if (hd->h_cmd == DLM_RCOM && type == DLM_RCOM_STATUS)
3785 dlm_send_ls_not_ready(nodeid, rc);
3786 return;
3787 }
3788
3789 /* this rwsem allows dlm_ls_stop() to wait for all dlm_recv threads to
3790 be inactive (in this ls) before transitioning to recovery mode */
3791
3792 down_read(&ls->ls_recv_active);
3793 if (hd->h_cmd == DLM_MSG)
3794 dlm_receive_message(ls, ms, nodeid);
3795 else
3796 dlm_receive_rcom(ls, rc, nodeid);
3797 up_read(&ls->ls_recv_active);
3798
3799 dlm_put_lockspace(ls);
3800}
3775 3801
3776static void recover_convert_waiter(struct dlm_ls *ls, struct dlm_lkb *lkb) 3802static void recover_convert_waiter(struct dlm_ls *ls, struct dlm_lkb *lkb)
3777{ 3803{
@@ -4429,7 +4455,8 @@ int dlm_user_unlock(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
4429 4455
4430 if (lvb_in && ua->lksb.sb_lvbptr) 4456 if (lvb_in && ua->lksb.sb_lvbptr)
4431 memcpy(ua->lksb.sb_lvbptr, lvb_in, DLM_USER_LVB_LEN); 4457 memcpy(ua->lksb.sb_lvbptr, lvb_in, DLM_USER_LVB_LEN);
4432 ua->castparam = ua_tmp->castparam; 4458 if (ua_tmp->castparam)
4459 ua->castparam = ua_tmp->castparam;
4433 ua->user_lksb = ua_tmp->user_lksb; 4460 ua->user_lksb = ua_tmp->user_lksb;
4434 4461
4435 error = set_unlock_args(flags, ua, &args); 4462 error = set_unlock_args(flags, ua, &args);
@@ -4474,7 +4501,8 @@ int dlm_user_cancel(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
4474 goto out; 4501 goto out;
4475 4502
4476 ua = (struct dlm_user_args *)lkb->lkb_astparam; 4503 ua = (struct dlm_user_args *)lkb->lkb_astparam;
4477 ua->castparam = ua_tmp->castparam; 4504 if (ua_tmp->castparam)
4505 ua->castparam = ua_tmp->castparam;
4478 ua->user_lksb = ua_tmp->user_lksb; 4506 ua->user_lksb = ua_tmp->user_lksb;
4479 4507
4480 error = set_unlock_args(flags, ua, &args); 4508 error = set_unlock_args(flags, ua, &args);
diff --git a/fs/dlm/lock.h b/fs/dlm/lock.h
index 1720313c22df..ada04680a1e5 100644
--- a/fs/dlm/lock.h
+++ b/fs/dlm/lock.h
@@ -16,7 +16,8 @@
16void dlm_print_rsb(struct dlm_rsb *r); 16void dlm_print_rsb(struct dlm_rsb *r);
17void dlm_dump_rsb(struct dlm_rsb *r); 17void dlm_dump_rsb(struct dlm_rsb *r);
18void dlm_print_lkb(struct dlm_lkb *lkb); 18void dlm_print_lkb(struct dlm_lkb *lkb);
19int dlm_receive_message(struct dlm_header *hd, int nodeid, int recovery); 19void dlm_receive_message_saved(struct dlm_ls *ls, struct dlm_message *ms);
20void dlm_receive_buffer(struct dlm_header *hd, int nodeid);
20int dlm_modes_compat(int mode1, int mode2); 21int dlm_modes_compat(int mode1, int mode2);
21int dlm_find_rsb(struct dlm_ls *ls, char *name, int namelen, 22int dlm_find_rsb(struct dlm_ls *ls, char *name, int namelen,
22 unsigned int flags, struct dlm_rsb **r_ret); 23 unsigned int flags, struct dlm_rsb **r_ret);
diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c
index 1dc72105ab12..628eaa669e68 100644
--- a/fs/dlm/lockspace.c
+++ b/fs/dlm/lockspace.c
@@ -519,6 +519,7 @@ static int new_lockspace(char *name, int namelen, void **lockspace,
519 ls->ls_recover_seq = 0; 519 ls->ls_recover_seq = 0;
520 ls->ls_recover_args = NULL; 520 ls->ls_recover_args = NULL;
521 init_rwsem(&ls->ls_in_recovery); 521 init_rwsem(&ls->ls_in_recovery);
522 init_rwsem(&ls->ls_recv_active);
522 INIT_LIST_HEAD(&ls->ls_requestqueue); 523 INIT_LIST_HEAD(&ls->ls_requestqueue);
523 mutex_init(&ls->ls_requestqueue_mutex); 524 mutex_init(&ls->ls_requestqueue_mutex);
524 mutex_init(&ls->ls_clear_proc_locks); 525 mutex_init(&ls->ls_clear_proc_locks);
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index 9e9d2e82f40f..58bf3f5cdbe2 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -334,18 +334,8 @@ static void close_connection(struct connection *con, bool and_other)
334 con->rx_page = NULL; 334 con->rx_page = NULL;
335 } 335 }
336 336
337 /* If we are an 'othercon' then NULL the pointer to us 337 con->retries = 0;
338 from the parent and tidy ourself up */ 338 mutex_unlock(&con->sock_mutex);
339 if (test_bit(CF_IS_OTHERCON, &con->flags)) {
340 struct connection *parent = __nodeid2con(con->nodeid, 0);
341 parent->othercon = NULL;
342 kmem_cache_free(con_cache, con);
343 }
344 else {
345 /* Parent connections get reused */
346 con->retries = 0;
347 mutex_unlock(&con->sock_mutex);
348 }
349} 339}
350 340
351/* We only send shutdown messages to nodes that are not part of the cluster */ 341/* We only send shutdown messages to nodes that are not part of the cluster */
@@ -731,6 +721,8 @@ static int tcp_accept_from_sock(struct connection *con)
731 INIT_WORK(&othercon->swork, process_send_sockets); 721 INIT_WORK(&othercon->swork, process_send_sockets);
732 INIT_WORK(&othercon->rwork, process_recv_sockets); 722 INIT_WORK(&othercon->rwork, process_recv_sockets);
733 set_bit(CF_IS_OTHERCON, &othercon->flags); 723 set_bit(CF_IS_OTHERCON, &othercon->flags);
724 }
725 if (!othercon->sock) {
734 newcon->othercon = othercon; 726 newcon->othercon = othercon;
735 othercon->sock = newsock; 727 othercon->sock = newsock;
736 newsock->sk->sk_user_data = othercon; 728 newsock->sk->sk_user_data = othercon;
@@ -1272,14 +1264,15 @@ static void send_to_sock(struct connection *con)
1272 if (len) { 1264 if (len) {
1273 ret = sendpage(con->sock, e->page, offset, len, 1265 ret = sendpage(con->sock, e->page, offset, len,
1274 msg_flags); 1266 msg_flags);
1275 if (ret == -EAGAIN || ret == 0) 1267 if (ret == -EAGAIN || ret == 0) {
1268 cond_resched();
1276 goto out; 1269 goto out;
1270 }
1277 if (ret <= 0) 1271 if (ret <= 0)
1278 goto send_error; 1272 goto send_error;
1279 } else { 1273 }
1280 /* Don't starve people filling buffers */ 1274 /* Don't starve people filling buffers */
1281 cond_resched(); 1275 cond_resched();
1282 }
1283 1276
1284 spin_lock(&con->writequeue_lock); 1277 spin_lock(&con->writequeue_lock);
1285 e->offset += ret; 1278 e->offset += ret;
diff --git a/fs/dlm/member.c b/fs/dlm/member.c
index d09977528f69..e9cdcab306e2 100644
--- a/fs/dlm/member.c
+++ b/fs/dlm/member.c
@@ -18,10 +18,6 @@
18#include "rcom.h" 18#include "rcom.h"
19#include "config.h" 19#include "config.h"
20 20
21/*
22 * Following called by dlm_recoverd thread
23 */
24
25static void add_ordered_member(struct dlm_ls *ls, struct dlm_member *new) 21static void add_ordered_member(struct dlm_ls *ls, struct dlm_member *new)
26{ 22{
27 struct dlm_member *memb = NULL; 23 struct dlm_member *memb = NULL;
@@ -250,18 +246,30 @@ int dlm_recover_members(struct dlm_ls *ls, struct dlm_recover *rv, int *neg_out)
250 return error; 246 return error;
251} 247}
252 248
253/* 249/* Userspace guarantees that dlm_ls_stop() has completed on all nodes before
254 * Following called from lockspace.c 250 dlm_ls_start() is called on any of them to start the new recovery. */
255 */
256 251
257int dlm_ls_stop(struct dlm_ls *ls) 252int dlm_ls_stop(struct dlm_ls *ls)
258{ 253{
259 int new; 254 int new;
260 255
261 /* 256 /*
262 * A stop cancels any recovery that's in progress (see RECOVERY_STOP, 257 * Prevent dlm_recv from being in the middle of something when we do
263 * dlm_recovery_stopped()) and prevents any new locks from being 258 * the stop. This includes ensuring dlm_recv isn't processing a
264 * processed (see RUNNING, dlm_locking_stopped()). 259 * recovery message (rcom), while dlm_recoverd is aborting and
260 * resetting things from an in-progress recovery. i.e. we want
261 * dlm_recoverd to abort its recovery without worrying about dlm_recv
262 * processing an rcom at the same time. Stopping dlm_recv also makes
263 * it easy for dlm_receive_message() to check locking stopped and add a
264 * message to the requestqueue without races.
265 */
266
267 down_write(&ls->ls_recv_active);
268
269 /*
270 * Abort any recovery that's in progress (see RECOVERY_STOP,
271 * dlm_recovery_stopped()) and tell any other threads running in the
272 * dlm to quit any processing (see RUNNING, dlm_locking_stopped()).
265 */ 273 */
266 274
267 spin_lock(&ls->ls_recover_lock); 275 spin_lock(&ls->ls_recover_lock);
@@ -271,8 +279,14 @@ int dlm_ls_stop(struct dlm_ls *ls)
271 spin_unlock(&ls->ls_recover_lock); 279 spin_unlock(&ls->ls_recover_lock);
272 280
273 /* 281 /*
282 * Let dlm_recv run again, now any normal messages will be saved on the
283 * requestqueue for later.
284 */
285
286 up_write(&ls->ls_recv_active);
287
288 /*
274 * This in_recovery lock does two things: 289 * This in_recovery lock does two things:
275 *
276 * 1) Keeps this function from returning until all threads are out 290 * 1) Keeps this function from returning until all threads are out
277 * of locking routines and locking is truely stopped. 291 * of locking routines and locking is truely stopped.
278 * 2) Keeps any new requests from being processed until it's unlocked 292 * 2) Keeps any new requests from being processed until it's unlocked
@@ -284,9 +298,8 @@ int dlm_ls_stop(struct dlm_ls *ls)
284 298
285 /* 299 /*
286 * The recoverd suspend/resume makes sure that dlm_recoverd (if 300 * The recoverd suspend/resume makes sure that dlm_recoverd (if
287 * running) has noticed the clearing of RUNNING above and quit 301 * running) has noticed RECOVERY_STOP above and quit processing the
288 * processing the previous recovery. This will be true for all nodes 302 * previous recovery.
289 * before any nodes start the new recovery.
290 */ 303 */
291 304
292 dlm_recoverd_suspend(ls); 305 dlm_recoverd_suspend(ls);
diff --git a/fs/dlm/midcomms.c b/fs/dlm/midcomms.c
index a5126e0c68a6..f8c69dda16a0 100644
--- a/fs/dlm/midcomms.c
+++ b/fs/dlm/midcomms.c
@@ -2,7 +2,7 @@
2******************************************************************************* 2*******************************************************************************
3** 3**
4** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. 4** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
5** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. 5** Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved.
6** 6**
7** This copyrighted material is made available to anyone wishing to use, 7** This copyrighted material is made available to anyone wishing to use,
8** modify, copy, or redistribute it subject to the terms and conditions 8** modify, copy, or redistribute it subject to the terms and conditions
@@ -27,7 +27,6 @@
27#include "dlm_internal.h" 27#include "dlm_internal.h"
28#include "lowcomms.h" 28#include "lowcomms.h"
29#include "config.h" 29#include "config.h"
30#include "rcom.h"
31#include "lock.h" 30#include "lock.h"
32#include "midcomms.h" 31#include "midcomms.h"
33 32
@@ -117,19 +116,7 @@ int dlm_process_incoming_buffer(int nodeid, const void *base,
117 offset &= (limit - 1); 116 offset &= (limit - 1);
118 len -= msglen; 117 len -= msglen;
119 118
120 switch (msg->h_cmd) { 119 dlm_receive_buffer(msg, nodeid);
121 case DLM_MSG:
122 dlm_receive_message(msg, nodeid, 0);
123 break;
124
125 case DLM_RCOM:
126 dlm_receive_rcom(msg, nodeid);
127 break;
128
129 default:
130 log_print("unknown msg type %x from %u: %u %u %u %u",
131 msg->h_cmd, nodeid, msglen, len, offset, ret);
132 }
133 } 120 }
134 121
135 if (msg != (struct dlm_header *) __tmp) 122 if (msg != (struct dlm_header *) __tmp)
diff --git a/fs/dlm/rcom.c b/fs/dlm/rcom.c
index 188b91c027e4..ae2fd97fa4ad 100644
--- a/fs/dlm/rcom.c
+++ b/fs/dlm/rcom.c
@@ -2,7 +2,7 @@
2******************************************************************************* 2*******************************************************************************
3** 3**
4** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. 4** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
5** Copyright (C) 2005 Red Hat, Inc. All rights reserved. 5** Copyright (C) 2005-2007 Red Hat, Inc. All rights reserved.
6** 6**
7** This copyrighted material is made available to anyone wishing to use, 7** This copyrighted material is made available to anyone wishing to use,
8** modify, copy, or redistribute it subject to the terms and conditions 8** modify, copy, or redistribute it subject to the terms and conditions
@@ -386,7 +386,10 @@ static void receive_rcom_lock_reply(struct dlm_ls *ls, struct dlm_rcom *rc_in)
386 dlm_recover_process_copy(ls, rc_in); 386 dlm_recover_process_copy(ls, rc_in);
387} 387}
388 388
389static int send_ls_not_ready(int nodeid, struct dlm_rcom *rc_in) 389/* If the lockspace doesn't exist then still send a status message
390 back; it's possible that it just doesn't have its global_id yet. */
391
392int dlm_send_ls_not_ready(int nodeid, struct dlm_rcom *rc_in)
390{ 393{
391 struct dlm_rcom *rc; 394 struct dlm_rcom *rc;
392 struct rcom_config *rf; 395 struct rcom_config *rf;
@@ -446,28 +449,11 @@ static int is_old_reply(struct dlm_ls *ls, struct dlm_rcom *rc)
446 return rv; 449 return rv;
447} 450}
448 451
449/* Called by dlm_recvd; corresponds to dlm_receive_message() but special 452/* Called by dlm_recv; corresponds to dlm_receive_message() but special
450 recovery-only comms are sent through here. */ 453 recovery-only comms are sent through here. */
451 454
452void dlm_receive_rcom(struct dlm_header *hd, int nodeid) 455void dlm_receive_rcom(struct dlm_ls *ls, struct dlm_rcom *rc, int nodeid)
453{ 456{
454 struct dlm_rcom *rc = (struct dlm_rcom *) hd;
455 struct dlm_ls *ls;
456
457 dlm_rcom_in(rc);
458
459 /* If the lockspace doesn't exist then still send a status message
460 back; it's possible that it just doesn't have its global_id yet. */
461
462 ls = dlm_find_lockspace_global(hd->h_lockspace);
463 if (!ls) {
464 log_print("lockspace %x from %d type %x not found",
465 hd->h_lockspace, nodeid, rc->rc_type);
466 if (rc->rc_type == DLM_RCOM_STATUS)
467 send_ls_not_ready(nodeid, rc);
468 return;
469 }
470
471 if (dlm_recovery_stopped(ls) && (rc->rc_type != DLM_RCOM_STATUS)) { 457 if (dlm_recovery_stopped(ls) && (rc->rc_type != DLM_RCOM_STATUS)) {
472 log_debug(ls, "ignoring recovery message %x from %d", 458 log_debug(ls, "ignoring recovery message %x from %d",
473 rc->rc_type, nodeid); 459 rc->rc_type, nodeid);
@@ -477,12 +463,6 @@ void dlm_receive_rcom(struct dlm_header *hd, int nodeid)
477 if (is_old_reply(ls, rc)) 463 if (is_old_reply(ls, rc))
478 goto out; 464 goto out;
479 465
480 if (nodeid != rc->rc_header.h_nodeid) {
481 log_error(ls, "bad rcom nodeid %d from %d",
482 rc->rc_header.h_nodeid, nodeid);
483 goto out;
484 }
485
486 switch (rc->rc_type) { 466 switch (rc->rc_type) {
487 case DLM_RCOM_STATUS: 467 case DLM_RCOM_STATUS:
488 receive_rcom_status(ls, rc); 468 receive_rcom_status(ls, rc);
@@ -520,6 +500,6 @@ void dlm_receive_rcom(struct dlm_header *hd, int nodeid)
520 DLM_ASSERT(0, printk("rc_type=%x\n", rc->rc_type);); 500 DLM_ASSERT(0, printk("rc_type=%x\n", rc->rc_type););
521 } 501 }
522 out: 502 out:
523 dlm_put_lockspace(ls); 503 return;
524} 504}
525 505
diff --git a/fs/dlm/rcom.h b/fs/dlm/rcom.h
index d7984321ff41..b09abd29ba38 100644
--- a/fs/dlm/rcom.h
+++ b/fs/dlm/rcom.h
@@ -2,7 +2,7 @@
2******************************************************************************* 2*******************************************************************************
3** 3**
4** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. 4** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
5** Copyright (C) 2005 Red Hat, Inc. All rights reserved. 5** Copyright (C) 2005-2007 Red Hat, Inc. All rights reserved.
6** 6**
7** This copyrighted material is made available to anyone wishing to use, 7** This copyrighted material is made available to anyone wishing to use,
8** modify, copy, or redistribute it subject to the terms and conditions 8** modify, copy, or redistribute it subject to the terms and conditions
@@ -18,7 +18,8 @@ int dlm_rcom_status(struct dlm_ls *ls, int nodeid);
18int dlm_rcom_names(struct dlm_ls *ls, int nodeid, char *last_name,int last_len); 18int dlm_rcom_names(struct dlm_ls *ls, int nodeid, char *last_name,int last_len);
19int dlm_send_rcom_lookup(struct dlm_rsb *r, int dir_nodeid); 19int dlm_send_rcom_lookup(struct dlm_rsb *r, int dir_nodeid);
20int dlm_send_rcom_lock(struct dlm_rsb *r, struct dlm_lkb *lkb); 20int dlm_send_rcom_lock(struct dlm_rsb *r, struct dlm_lkb *lkb);
21void dlm_receive_rcom(struct dlm_header *hd, int nodeid); 21void dlm_receive_rcom(struct dlm_ls *ls, struct dlm_rcom *rc, int nodeid);
22int dlm_send_ls_not_ready(int nodeid, struct dlm_rcom *rc_in);
22 23
23#endif 24#endif
24 25
diff --git a/fs/dlm/recoverd.c b/fs/dlm/recoverd.c
index 66575997861c..4b89e20eebe7 100644
--- a/fs/dlm/recoverd.c
+++ b/fs/dlm/recoverd.c
@@ -24,19 +24,28 @@
24 24
25 25
26/* If the start for which we're re-enabling locking (seq) has been superseded 26/* If the start for which we're re-enabling locking (seq) has been superseded
27 by a newer stop (ls_recover_seq), we need to leave locking disabled. */ 27 by a newer stop (ls_recover_seq), we need to leave locking disabled.
28
29 We suspend dlm_recv threads here to avoid the race where dlm_recv a) sees
30 locking stopped and b) adds a message to the requestqueue, but dlm_recoverd
31 enables locking and clears the requestqueue between a and b. */
28 32
29static int enable_locking(struct dlm_ls *ls, uint64_t seq) 33static int enable_locking(struct dlm_ls *ls, uint64_t seq)
30{ 34{
31 int error = -EINTR; 35 int error = -EINTR;
32 36
37 down_write(&ls->ls_recv_active);
38
33 spin_lock(&ls->ls_recover_lock); 39 spin_lock(&ls->ls_recover_lock);
34 if (ls->ls_recover_seq == seq) { 40 if (ls->ls_recover_seq == seq) {
35 set_bit(LSFL_RUNNING, &ls->ls_flags); 41 set_bit(LSFL_RUNNING, &ls->ls_flags);
42 /* unblocks processes waiting to enter the dlm */
36 up_write(&ls->ls_in_recovery); 43 up_write(&ls->ls_in_recovery);
37 error = 0; 44 error = 0;
38 } 45 }
39 spin_unlock(&ls->ls_recover_lock); 46 spin_unlock(&ls->ls_recover_lock);
47
48 up_write(&ls->ls_recv_active);
40 return error; 49 return error;
41} 50}
42 51
diff --git a/fs/dlm/requestqueue.c b/fs/dlm/requestqueue.c
index 65008d79c96d..0de04f17ccea 100644
--- a/fs/dlm/requestqueue.c
+++ b/fs/dlm/requestqueue.c
@@ -1,7 +1,7 @@
1/****************************************************************************** 1/******************************************************************************
2******************************************************************************* 2*******************************************************************************
3** 3**
4** Copyright (C) 2005 Red Hat, Inc. All rights reserved. 4** Copyright (C) 2005-2007 Red Hat, Inc. All rights reserved.
5** 5**
6** This copyrighted material is made available to anyone wishing to use, 6** This copyrighted material is made available to anyone wishing to use,
7** modify, copy, or redistribute it subject to the terms and conditions 7** modify, copy, or redistribute it subject to the terms and conditions
@@ -20,7 +20,7 @@
20struct rq_entry { 20struct rq_entry {
21 struct list_head list; 21 struct list_head list;
22 int nodeid; 22 int nodeid;
23 char request[1]; 23 char request[0];
24}; 24};
25 25
26/* 26/*
@@ -30,42 +30,39 @@ struct rq_entry {
30 * lockspace is enabled on some while still suspended on others. 30 * lockspace is enabled on some while still suspended on others.
31 */ 31 */
32 32
33int dlm_add_requestqueue(struct dlm_ls *ls, int nodeid, struct dlm_header *hd) 33void dlm_add_requestqueue(struct dlm_ls *ls, int nodeid, struct dlm_header *hd)
34{ 34{
35 struct rq_entry *e; 35 struct rq_entry *e;
36 int length = hd->h_length; 36 int length = hd->h_length;
37 int rv = 0;
38 37
39 e = kmalloc(sizeof(struct rq_entry) + length, GFP_KERNEL); 38 e = kmalloc(sizeof(struct rq_entry) + length, GFP_KERNEL);
40 if (!e) { 39 if (!e) {
41 log_print("dlm_add_requestqueue: out of memory\n"); 40 log_print("dlm_add_requestqueue: out of memory len %d", length);
42 return 0; 41 return;
43 } 42 }
44 43
45 e->nodeid = nodeid; 44 e->nodeid = nodeid;
46 memcpy(e->request, hd, length); 45 memcpy(e->request, hd, length);
47 46
48 /* We need to check dlm_locking_stopped() after taking the mutex to
49 avoid a race where dlm_recoverd enables locking and runs
50 process_requestqueue between our earlier dlm_locking_stopped check
51 and this addition to the requestqueue. */
52
53 mutex_lock(&ls->ls_requestqueue_mutex); 47 mutex_lock(&ls->ls_requestqueue_mutex);
54 if (dlm_locking_stopped(ls)) 48 list_add_tail(&e->list, &ls->ls_requestqueue);
55 list_add_tail(&e->list, &ls->ls_requestqueue);
56 else {
57 log_debug(ls, "dlm_add_requestqueue skip from %d", nodeid);
58 kfree(e);
59 rv = -EAGAIN;
60 }
61 mutex_unlock(&ls->ls_requestqueue_mutex); 49 mutex_unlock(&ls->ls_requestqueue_mutex);
62 return rv;
63} 50}
64 51
52/*
53 * Called by dlm_recoverd to process normal messages saved while recovery was
54 * happening. Normal locking has been enabled before this is called. dlm_recv
55 * upon receiving a message, will wait for all saved messages to be drained
56 * here before processing the message it got. If a new dlm_ls_stop() arrives
57 * while we're processing these saved messages, it may block trying to suspend
58 * dlm_recv if dlm_recv is waiting for us in dlm_wait_requestqueue. In that
59 * case, we don't abort since locking_stopped is still 0. If dlm_recv is not
60 * waiting for us, then this processing may be aborted due to locking_stopped.
61 */
62
65int dlm_process_requestqueue(struct dlm_ls *ls) 63int dlm_process_requestqueue(struct dlm_ls *ls)
66{ 64{
67 struct rq_entry *e; 65 struct rq_entry *e;
68 struct dlm_header *hd;
69 int error = 0; 66 int error = 0;
70 67
71 mutex_lock(&ls->ls_requestqueue_mutex); 68 mutex_lock(&ls->ls_requestqueue_mutex);
@@ -79,14 +76,7 @@ int dlm_process_requestqueue(struct dlm_ls *ls)
79 e = list_entry(ls->ls_requestqueue.next, struct rq_entry, list); 76 e = list_entry(ls->ls_requestqueue.next, struct rq_entry, list);
80 mutex_unlock(&ls->ls_requestqueue_mutex); 77 mutex_unlock(&ls->ls_requestqueue_mutex);
81 78
82 hd = (struct dlm_header *) e->request; 79 dlm_receive_message_saved(ls, (struct dlm_message *)e->request);
83 error = dlm_receive_message(hd, e->nodeid, 1);
84
85 if (error == -EINTR) {
86 /* entry is left on requestqueue */
87 log_debug(ls, "process_requestqueue abort eintr");
88 break;
89 }
90 80
91 mutex_lock(&ls->ls_requestqueue_mutex); 81 mutex_lock(&ls->ls_requestqueue_mutex);
92 list_del(&e->list); 82 list_del(&e->list);
@@ -106,10 +96,12 @@ int dlm_process_requestqueue(struct dlm_ls *ls)
106 96
107/* 97/*
108 * After recovery is done, locking is resumed and dlm_recoverd takes all the 98 * After recovery is done, locking is resumed and dlm_recoverd takes all the
109 * saved requests and processes them as they would have been by dlm_recvd. At 99 * saved requests and processes them as they would have been by dlm_recv. At
110 * the same time, dlm_recvd will start receiving new requests from remote 100 * the same time, dlm_recv will start receiving new requests from remote nodes.
111 * nodes. We want to delay dlm_recvd processing new requests until 101 * We want to delay dlm_recv processing new requests until dlm_recoverd has
112 * dlm_recoverd has finished processing the old saved requests. 102 * finished processing the old saved requests. We don't check for locking
103 * stopped here because dlm_ls_stop won't stop locking until it's suspended us
104 * (dlm_recv).
113 */ 105 */
114 106
115void dlm_wait_requestqueue(struct dlm_ls *ls) 107void dlm_wait_requestqueue(struct dlm_ls *ls)
@@ -118,8 +110,6 @@ void dlm_wait_requestqueue(struct dlm_ls *ls)
118 mutex_lock(&ls->ls_requestqueue_mutex); 110 mutex_lock(&ls->ls_requestqueue_mutex);
119 if (list_empty(&ls->ls_requestqueue)) 111 if (list_empty(&ls->ls_requestqueue))
120 break; 112 break;
121 if (dlm_locking_stopped(ls))
122 break;
123 mutex_unlock(&ls->ls_requestqueue_mutex); 113 mutex_unlock(&ls->ls_requestqueue_mutex);
124 schedule(); 114 schedule();
125 } 115 }
diff --git a/fs/dlm/requestqueue.h b/fs/dlm/requestqueue.h
index 6a53ea03335d..aba34fc05ee4 100644
--- a/fs/dlm/requestqueue.h
+++ b/fs/dlm/requestqueue.h
@@ -1,7 +1,7 @@
1/****************************************************************************** 1/******************************************************************************
2******************************************************************************* 2*******************************************************************************
3** 3**
4** Copyright (C) 2005 Red Hat, Inc. All rights reserved. 4** Copyright (C) 2005-2007 Red Hat, Inc. All rights reserved.
5** 5**
6** This copyrighted material is made available to anyone wishing to use, 6** This copyrighted material is made available to anyone wishing to use,
7** modify, copy, or redistribute it subject to the terms and conditions 7** modify, copy, or redistribute it subject to the terms and conditions
@@ -13,7 +13,7 @@
13#ifndef __REQUESTQUEUE_DOT_H__ 13#ifndef __REQUESTQUEUE_DOT_H__
14#define __REQUESTQUEUE_DOT_H__ 14#define __REQUESTQUEUE_DOT_H__
15 15
16int dlm_add_requestqueue(struct dlm_ls *ls, int nodeid, struct dlm_header *hd); 16void dlm_add_requestqueue(struct dlm_ls *ls, int nodeid, struct dlm_header *hd);
17int dlm_process_requestqueue(struct dlm_ls *ls); 17int dlm_process_requestqueue(struct dlm_ls *ls);
18void dlm_wait_requestqueue(struct dlm_ls *ls); 18void dlm_wait_requestqueue(struct dlm_ls *ls);
19void dlm_purge_requestqueue(struct dlm_ls *ls); 19void dlm_purge_requestqueue(struct dlm_ls *ls);
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index cd805a66880d..93fa427bb5f5 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -93,9 +93,10 @@ static int gfs2_unstuffer_page(struct gfs2_inode *ip, struct buffer_head *dibh,
93 map_bh(bh, inode->i_sb, block); 93 map_bh(bh, inode->i_sb, block);
94 94
95 set_buffer_uptodate(bh); 95 set_buffer_uptodate(bh);
96 if (!gfs2_is_jdata(ip))
97 mark_buffer_dirty(bh);
96 if (sdp->sd_args.ar_data == GFS2_DATA_ORDERED || gfs2_is_jdata(ip)) 98 if (sdp->sd_args.ar_data == GFS2_DATA_ORDERED || gfs2_is_jdata(ip))
97 gfs2_trans_add_bh(ip->i_gl, bh, 0); 99 gfs2_trans_add_bh(ip->i_gl, bh, 0);
98 mark_buffer_dirty(bh);
99 100
100 if (release) { 101 if (release) {
101 unlock_page(page); 102 unlock_page(page);
@@ -1085,6 +1086,33 @@ static int do_shrink(struct gfs2_inode *ip, u64 size)
1085 return error; 1086 return error;
1086} 1087}
1087 1088
1089static int do_touch(struct gfs2_inode *ip, u64 size)
1090{
1091 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1092 struct buffer_head *dibh;
1093 int error;
1094
1095 error = gfs2_trans_begin(sdp, RES_DINODE, 0);
1096 if (error)
1097 return error;
1098
1099 down_write(&ip->i_rw_mutex);
1100
1101 error = gfs2_meta_inode_buffer(ip, &dibh);
1102 if (error)
1103 goto do_touch_out;
1104
1105 ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;
1106 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
1107 gfs2_dinode_out(ip, dibh->b_data);
1108 brelse(dibh);
1109
1110do_touch_out:
1111 up_write(&ip->i_rw_mutex);
1112 gfs2_trans_end(sdp);
1113 return error;
1114}
1115
1088/** 1116/**
1089 * gfs2_truncatei - make a file a given size 1117 * gfs2_truncatei - make a file a given size
1090 * @ip: the inode 1118 * @ip: the inode
@@ -1105,8 +1133,11 @@ int gfs2_truncatei(struct gfs2_inode *ip, u64 size)
1105 1133
1106 if (size > ip->i_di.di_size) 1134 if (size > ip->i_di.di_size)
1107 error = do_grow(ip, size); 1135 error = do_grow(ip, size);
1108 else 1136 else if (size < ip->i_di.di_size)
1109 error = do_shrink(ip, size); 1137 error = do_shrink(ip, size);
1138 else
1139 /* update time stamps */
1140 error = do_touch(ip, size);
1110 1141
1111 return error; 1142 return error;
1112} 1143}
diff --git a/fs/gfs2/daemon.c b/fs/gfs2/daemon.c
index 3548d9f31e0d..3731ab0771d5 100644
--- a/fs/gfs2/daemon.c
+++ b/fs/gfs2/daemon.c
@@ -35,30 +35,6 @@
35 The kthread functions used to start these daemons block and flush signals. */ 35 The kthread functions used to start these daemons block and flush signals. */
36 36
37/** 37/**
38 * gfs2_scand - Look for cached glocks and inodes to toss from memory
39 * @sdp: Pointer to GFS2 superblock
40 *
41 * One of these daemons runs, finding candidates to add to sd_reclaim_list.
42 * See gfs2_glockd()
43 */
44
45int gfs2_scand(void *data)
46{
47 struct gfs2_sbd *sdp = data;
48 unsigned long t;
49
50 while (!kthread_should_stop()) {
51 gfs2_scand_internal(sdp);
52 t = gfs2_tune_get(sdp, gt_scand_secs) * HZ;
53 if (freezing(current))
54 refrigerator();
55 schedule_timeout_interruptible(t);
56 }
57
58 return 0;
59}
60
61/**
62 * gfs2_glockd - Reclaim unused glock structures 38 * gfs2_glockd - Reclaim unused glock structures
63 * @sdp: Pointer to GFS2 superblock 39 * @sdp: Pointer to GFS2 superblock
64 * 40 *
diff --git a/fs/gfs2/daemon.h b/fs/gfs2/daemon.h
index 801007120fb2..0de9b3557955 100644
--- a/fs/gfs2/daemon.h
+++ b/fs/gfs2/daemon.h
@@ -10,7 +10,6 @@
10#ifndef __DAEMON_DOT_H__ 10#ifndef __DAEMON_DOT_H__
11#define __DAEMON_DOT_H__ 11#define __DAEMON_DOT_H__
12 12
13int gfs2_scand(void *data);
14int gfs2_glockd(void *data); 13int gfs2_glockd(void *data);
15int gfs2_recoverd(void *data); 14int gfs2_recoverd(void *data);
16int gfs2_logd(void *data); 15int gfs2_logd(void *data);
diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c
index 2beb2f401aa2..9949bb746a52 100644
--- a/fs/gfs2/dir.c
+++ b/fs/gfs2/dir.c
@@ -1043,6 +1043,7 @@ static int dir_split_leaf(struct inode *inode, const struct qstr *name)
1043 1043
1044 error = gfs2_meta_inode_buffer(dip, &dibh); 1044 error = gfs2_meta_inode_buffer(dip, &dibh);
1045 if (!gfs2_assert_withdraw(GFS2_SB(&dip->i_inode), !error)) { 1045 if (!gfs2_assert_withdraw(GFS2_SB(&dip->i_inode), !error)) {
1046 gfs2_trans_add_bh(dip->i_gl, dibh, 1);
1046 dip->i_di.di_blocks++; 1047 dip->i_di.di_blocks++;
1047 gfs2_set_inode_blocks(&dip->i_inode); 1048 gfs2_set_inode_blocks(&dip->i_inode);
1048 gfs2_dinode_out(dip, dibh->b_data); 1049 gfs2_dinode_out(dip, dibh->b_data);
@@ -1501,7 +1502,7 @@ struct inode *gfs2_dir_search(struct inode *dir, const struct qstr *name)
1501 inode = gfs2_inode_lookup(dir->i_sb, 1502 inode = gfs2_inode_lookup(dir->i_sb,
1502 be16_to_cpu(dent->de_type), 1503 be16_to_cpu(dent->de_type),
1503 be64_to_cpu(dent->de_inum.no_addr), 1504 be64_to_cpu(dent->de_inum.no_addr),
1504 be64_to_cpu(dent->de_inum.no_formal_ino)); 1505 be64_to_cpu(dent->de_inum.no_formal_ino), 0);
1505 brelse(bh); 1506 brelse(bh);
1506 return inode; 1507 return inode;
1507 } 1508 }
diff --git a/fs/gfs2/eaops.c b/fs/gfs2/eaops.c
index 1ab3e9d73886..aa8dbf303f6d 100644
--- a/fs/gfs2/eaops.c
+++ b/fs/gfs2/eaops.c
@@ -200,28 +200,28 @@ static int security_eo_remove(struct gfs2_inode *ip, struct gfs2_ea_request *er)
200 return gfs2_ea_remove_i(ip, er); 200 return gfs2_ea_remove_i(ip, er);
201} 201}
202 202
203static struct gfs2_eattr_operations gfs2_user_eaops = { 203static const struct gfs2_eattr_operations gfs2_user_eaops = {
204 .eo_get = user_eo_get, 204 .eo_get = user_eo_get,
205 .eo_set = user_eo_set, 205 .eo_set = user_eo_set,
206 .eo_remove = user_eo_remove, 206 .eo_remove = user_eo_remove,
207 .eo_name = "user", 207 .eo_name = "user",
208}; 208};
209 209
210struct gfs2_eattr_operations gfs2_system_eaops = { 210const struct gfs2_eattr_operations gfs2_system_eaops = {
211 .eo_get = system_eo_get, 211 .eo_get = system_eo_get,
212 .eo_set = system_eo_set, 212 .eo_set = system_eo_set,
213 .eo_remove = system_eo_remove, 213 .eo_remove = system_eo_remove,
214 .eo_name = "system", 214 .eo_name = "system",
215}; 215};
216 216
217static struct gfs2_eattr_operations gfs2_security_eaops = { 217static const struct gfs2_eattr_operations gfs2_security_eaops = {
218 .eo_get = security_eo_get, 218 .eo_get = security_eo_get,
219 .eo_set = security_eo_set, 219 .eo_set = security_eo_set,
220 .eo_remove = security_eo_remove, 220 .eo_remove = security_eo_remove,
221 .eo_name = "security", 221 .eo_name = "security",
222}; 222};
223 223
224struct gfs2_eattr_operations *gfs2_ea_ops[] = { 224const struct gfs2_eattr_operations *gfs2_ea_ops[] = {
225 NULL, 225 NULL,
226 &gfs2_user_eaops, 226 &gfs2_user_eaops,
227 &gfs2_system_eaops, 227 &gfs2_system_eaops,
diff --git a/fs/gfs2/eaops.h b/fs/gfs2/eaops.h
index 508b4f7a2449..da2f7fbbb40d 100644
--- a/fs/gfs2/eaops.h
+++ b/fs/gfs2/eaops.h
@@ -22,9 +22,9 @@ struct gfs2_eattr_operations {
22 22
23unsigned int gfs2_ea_name2type(const char *name, const char **truncated_name); 23unsigned int gfs2_ea_name2type(const char *name, const char **truncated_name);
24 24
25extern struct gfs2_eattr_operations gfs2_system_eaops; 25extern const struct gfs2_eattr_operations gfs2_system_eaops;
26 26
27extern struct gfs2_eattr_operations *gfs2_ea_ops[]; 27extern const struct gfs2_eattr_operations *gfs2_ea_ops[];
28 28
29#endif /* __EAOPS_DOT_H__ */ 29#endif /* __EAOPS_DOT_H__ */
30 30
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 3f0974e1afef..a37efe4aae6f 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -25,8 +25,10 @@
25#include <asm/uaccess.h> 25#include <asm/uaccess.h>
26#include <linux/seq_file.h> 26#include <linux/seq_file.h>
27#include <linux/debugfs.h> 27#include <linux/debugfs.h>
28#include <linux/module.h> 28#include <linux/kthread.h>
29#include <linux/kallsyms.h> 29#include <linux/freezer.h>
30#include <linux/workqueue.h>
31#include <linux/jiffies.h>
30 32
31#include "gfs2.h" 33#include "gfs2.h"
32#include "incore.h" 34#include "incore.h"
@@ -48,7 +50,6 @@ struct glock_iter {
48 int hash; /* hash bucket index */ 50 int hash; /* hash bucket index */
49 struct gfs2_sbd *sdp; /* incore superblock */ 51 struct gfs2_sbd *sdp; /* incore superblock */
50 struct gfs2_glock *gl; /* current glock struct */ 52 struct gfs2_glock *gl; /* current glock struct */
51 struct hlist_head *hb_list; /* current hash bucket ptr */
52 struct seq_file *seq; /* sequence file for debugfs */ 53 struct seq_file *seq; /* sequence file for debugfs */
53 char string[512]; /* scratch space */ 54 char string[512]; /* scratch space */
54}; 55};
@@ -59,8 +60,13 @@ static int gfs2_dump_lockstate(struct gfs2_sbd *sdp);
59static int dump_glock(struct glock_iter *gi, struct gfs2_glock *gl); 60static int dump_glock(struct glock_iter *gi, struct gfs2_glock *gl);
60static void gfs2_glock_xmote_th(struct gfs2_glock *gl, struct gfs2_holder *gh); 61static void gfs2_glock_xmote_th(struct gfs2_glock *gl, struct gfs2_holder *gh);
61static void gfs2_glock_drop_th(struct gfs2_glock *gl); 62static void gfs2_glock_drop_th(struct gfs2_glock *gl);
63static void run_queue(struct gfs2_glock *gl);
64
62static DECLARE_RWSEM(gfs2_umount_flush_sem); 65static DECLARE_RWSEM(gfs2_umount_flush_sem);
63static struct dentry *gfs2_root; 66static struct dentry *gfs2_root;
67static struct task_struct *scand_process;
68static unsigned int scand_secs = 5;
69static struct workqueue_struct *glock_workqueue;
64 70
65#define GFS2_GL_HASH_SHIFT 15 71#define GFS2_GL_HASH_SHIFT 15
66#define GFS2_GL_HASH_SIZE (1 << GFS2_GL_HASH_SHIFT) 72#define GFS2_GL_HASH_SIZE (1 << GFS2_GL_HASH_SHIFT)
@@ -276,6 +282,18 @@ static struct gfs2_glock *gfs2_glock_find(const struct gfs2_sbd *sdp,
276 return gl; 282 return gl;
277} 283}
278 284
285static void glock_work_func(struct work_struct *work)
286{
287 struct gfs2_glock *gl = container_of(work, struct gfs2_glock, gl_work.work);
288
289 spin_lock(&gl->gl_spin);
290 if (test_and_clear_bit(GLF_PENDING_DEMOTE, &gl->gl_flags))
291 set_bit(GLF_DEMOTE, &gl->gl_flags);
292 run_queue(gl);
293 spin_unlock(&gl->gl_spin);
294 gfs2_glock_put(gl);
295}
296
279/** 297/**
280 * gfs2_glock_get() - Get a glock, or create one if one doesn't exist 298 * gfs2_glock_get() - Get a glock, or create one if one doesn't exist
281 * @sdp: The GFS2 superblock 299 * @sdp: The GFS2 superblock
@@ -315,6 +333,7 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
315 gl->gl_name = name; 333 gl->gl_name = name;
316 atomic_set(&gl->gl_ref, 1); 334 atomic_set(&gl->gl_ref, 1);
317 gl->gl_state = LM_ST_UNLOCKED; 335 gl->gl_state = LM_ST_UNLOCKED;
336 gl->gl_demote_state = LM_ST_EXCLUSIVE;
318 gl->gl_hash = hash; 337 gl->gl_hash = hash;
319 gl->gl_owner_pid = 0; 338 gl->gl_owner_pid = 0;
320 gl->gl_ip = 0; 339 gl->gl_ip = 0;
@@ -323,10 +342,12 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
323 gl->gl_req_bh = NULL; 342 gl->gl_req_bh = NULL;
324 gl->gl_vn = 0; 343 gl->gl_vn = 0;
325 gl->gl_stamp = jiffies; 344 gl->gl_stamp = jiffies;
345 gl->gl_tchange = jiffies;
326 gl->gl_object = NULL; 346 gl->gl_object = NULL;
327 gl->gl_sbd = sdp; 347 gl->gl_sbd = sdp;
328 gl->gl_aspace = NULL; 348 gl->gl_aspace = NULL;
329 lops_init_le(&gl->gl_le, &gfs2_glock_lops); 349 lops_init_le(&gl->gl_le, &gfs2_glock_lops);
350 INIT_DELAYED_WORK(&gl->gl_work, glock_work_func);
330 351
331 /* If this glock protects actual on-disk data or metadata blocks, 352 /* If this glock protects actual on-disk data or metadata blocks,
332 create a VFS inode to manage the pages/buffers holding them. */ 353 create a VFS inode to manage the pages/buffers holding them. */
@@ -440,6 +461,8 @@ static void wait_on_holder(struct gfs2_holder *gh)
440 461
441static void gfs2_demote_wake(struct gfs2_glock *gl) 462static void gfs2_demote_wake(struct gfs2_glock *gl)
442{ 463{
464 BUG_ON(!spin_is_locked(&gl->gl_spin));
465 gl->gl_demote_state = LM_ST_EXCLUSIVE;
443 clear_bit(GLF_DEMOTE, &gl->gl_flags); 466 clear_bit(GLF_DEMOTE, &gl->gl_flags);
444 smp_mb__after_clear_bit(); 467 smp_mb__after_clear_bit();
445 wake_up_bit(&gl->gl_flags, GLF_DEMOTE); 468 wake_up_bit(&gl->gl_flags, GLF_DEMOTE);
@@ -545,12 +568,14 @@ static int rq_demote(struct gfs2_glock *gl)
545 return 0; 568 return 0;
546 } 569 }
547 set_bit(GLF_LOCK, &gl->gl_flags); 570 set_bit(GLF_LOCK, &gl->gl_flags);
548 spin_unlock(&gl->gl_spin);
549 if (gl->gl_demote_state == LM_ST_UNLOCKED || 571 if (gl->gl_demote_state == LM_ST_UNLOCKED ||
550 gl->gl_state != LM_ST_EXCLUSIVE) 572 gl->gl_state != LM_ST_EXCLUSIVE) {
573 spin_unlock(&gl->gl_spin);
551 gfs2_glock_drop_th(gl); 574 gfs2_glock_drop_th(gl);
552 else 575 } else {
576 spin_unlock(&gl->gl_spin);
553 gfs2_glock_xmote_th(gl, NULL); 577 gfs2_glock_xmote_th(gl, NULL);
578 }
554 spin_lock(&gl->gl_spin); 579 spin_lock(&gl->gl_spin);
555 580
556 return 0; 581 return 0;
@@ -679,24 +704,25 @@ static void gfs2_glmutex_unlock(struct gfs2_glock *gl)
679 * practise: LM_ST_SHARED and LM_ST_UNLOCKED 704 * practise: LM_ST_SHARED and LM_ST_UNLOCKED
680 */ 705 */
681 706
682static void handle_callback(struct gfs2_glock *gl, unsigned int state, int remote) 707static void handle_callback(struct gfs2_glock *gl, unsigned int state,
708 int remote, unsigned long delay)
683{ 709{
710 int bit = delay ? GLF_PENDING_DEMOTE : GLF_DEMOTE;
711
684 spin_lock(&gl->gl_spin); 712 spin_lock(&gl->gl_spin);
685 if (test_and_set_bit(GLF_DEMOTE, &gl->gl_flags) == 0) { 713 set_bit(bit, &gl->gl_flags);
714 if (gl->gl_demote_state == LM_ST_EXCLUSIVE) {
686 gl->gl_demote_state = state; 715 gl->gl_demote_state = state;
687 gl->gl_demote_time = jiffies; 716 gl->gl_demote_time = jiffies;
688 if (remote && gl->gl_ops->go_type == LM_TYPE_IOPEN && 717 if (remote && gl->gl_ops->go_type == LM_TYPE_IOPEN &&
689 gl->gl_object) { 718 gl->gl_object) {
690 struct inode *inode = igrab(gl->gl_object); 719 gfs2_glock_schedule_for_reclaim(gl);
691 spin_unlock(&gl->gl_spin); 720 spin_unlock(&gl->gl_spin);
692 if (inode) {
693 d_prune_aliases(inode);
694 iput(inode);
695 }
696 return; 721 return;
697 } 722 }
698 } else if (gl->gl_demote_state != LM_ST_UNLOCKED) { 723 } else if (gl->gl_demote_state != LM_ST_UNLOCKED &&
699 gl->gl_demote_state = state; 724 gl->gl_demote_state != state) {
725 gl->gl_demote_state = LM_ST_UNLOCKED;
700 } 726 }
701 spin_unlock(&gl->gl_spin); 727 spin_unlock(&gl->gl_spin);
702} 728}
@@ -723,6 +749,7 @@ static void state_change(struct gfs2_glock *gl, unsigned int new_state)
723 } 749 }
724 750
725 gl->gl_state = new_state; 751 gl->gl_state = new_state;
752 gl->gl_tchange = jiffies;
726} 753}
727 754
728/** 755/**
@@ -760,10 +787,20 @@ static void xmote_bh(struct gfs2_glock *gl, unsigned int ret)
760 787
761 if (!gh) { 788 if (!gh) {
762 gl->gl_stamp = jiffies; 789 gl->gl_stamp = jiffies;
763 if (ret & LM_OUT_CANCELED) 790 if (ret & LM_OUT_CANCELED) {
764 op_done = 0; 791 op_done = 0;
765 else 792 } else {
793 spin_lock(&gl->gl_spin);
794 if (gl->gl_state != gl->gl_demote_state) {
795 gl->gl_req_bh = NULL;
796 spin_unlock(&gl->gl_spin);
797 gfs2_glock_drop_th(gl);
798 gfs2_glock_put(gl);
799 return;
800 }
766 gfs2_demote_wake(gl); 801 gfs2_demote_wake(gl);
802 spin_unlock(&gl->gl_spin);
803 }
767 } else { 804 } else {
768 spin_lock(&gl->gl_spin); 805 spin_lock(&gl->gl_spin);
769 list_del_init(&gh->gh_list); 806 list_del_init(&gh->gh_list);
@@ -799,7 +836,6 @@ out:
799 gl->gl_req_gh = NULL; 836 gl->gl_req_gh = NULL;
800 gl->gl_req_bh = NULL; 837 gl->gl_req_bh = NULL;
801 clear_bit(GLF_LOCK, &gl->gl_flags); 838 clear_bit(GLF_LOCK, &gl->gl_flags);
802 run_queue(gl);
803 spin_unlock(&gl->gl_spin); 839 spin_unlock(&gl->gl_spin);
804 } 840 }
805 841
@@ -817,7 +853,7 @@ out:
817 * 853 *
818 */ 854 */
819 855
820void gfs2_glock_xmote_th(struct gfs2_glock *gl, struct gfs2_holder *gh) 856static void gfs2_glock_xmote_th(struct gfs2_glock *gl, struct gfs2_holder *gh)
821{ 857{
822 struct gfs2_sbd *sdp = gl->gl_sbd; 858 struct gfs2_sbd *sdp = gl->gl_sbd;
823 int flags = gh ? gh->gh_flags : 0; 859 int flags = gh ? gh->gh_flags : 0;
@@ -871,7 +907,6 @@ static void drop_bh(struct gfs2_glock *gl, unsigned int ret)
871 gfs2_assert_warn(sdp, !ret); 907 gfs2_assert_warn(sdp, !ret);
872 908
873 state_change(gl, LM_ST_UNLOCKED); 909 state_change(gl, LM_ST_UNLOCKED);
874 gfs2_demote_wake(gl);
875 910
876 if (glops->go_inval) 911 if (glops->go_inval)
877 glops->go_inval(gl, DIO_METADATA); 912 glops->go_inval(gl, DIO_METADATA);
@@ -884,10 +919,10 @@ static void drop_bh(struct gfs2_glock *gl, unsigned int ret)
884 } 919 }
885 920
886 spin_lock(&gl->gl_spin); 921 spin_lock(&gl->gl_spin);
922 gfs2_demote_wake(gl);
887 gl->gl_req_gh = NULL; 923 gl->gl_req_gh = NULL;
888 gl->gl_req_bh = NULL; 924 gl->gl_req_bh = NULL;
889 clear_bit(GLF_LOCK, &gl->gl_flags); 925 clear_bit(GLF_LOCK, &gl->gl_flags);
890 run_queue(gl);
891 spin_unlock(&gl->gl_spin); 926 spin_unlock(&gl->gl_spin);
892 927
893 gfs2_glock_put(gl); 928 gfs2_glock_put(gl);
@@ -1067,24 +1102,31 @@ static void add_to_queue(struct gfs2_holder *gh)
1067 if (test_and_set_bit(HIF_WAIT, &gh->gh_iflags)) 1102 if (test_and_set_bit(HIF_WAIT, &gh->gh_iflags))
1068 BUG(); 1103 BUG();
1069 1104
1070 existing = find_holder_by_owner(&gl->gl_holders, gh->gh_owner_pid); 1105 if (!(gh->gh_flags & GL_FLOCK)) {
1071 if (existing) { 1106 existing = find_holder_by_owner(&gl->gl_holders,
1072 print_symbol(KERN_WARNING "original: %s\n", existing->gh_ip); 1107 gh->gh_owner_pid);
1073 printk(KERN_INFO "pid : %d\n", existing->gh_owner_pid); 1108 if (existing) {
1074 printk(KERN_INFO "lock type : %d lock state : %d\n", 1109 print_symbol(KERN_WARNING "original: %s\n",
1075 existing->gh_gl->gl_name.ln_type, existing->gh_gl->gl_state); 1110 existing->gh_ip);
1076 print_symbol(KERN_WARNING "new: %s\n", gh->gh_ip); 1111 printk(KERN_INFO "pid : %d\n", existing->gh_owner_pid);
1077 printk(KERN_INFO "pid : %d\n", gh->gh_owner_pid); 1112 printk(KERN_INFO "lock type : %d lock state : %d\n",
1078 printk(KERN_INFO "lock type : %d lock state : %d\n", 1113 existing->gh_gl->gl_name.ln_type,
1079 gl->gl_name.ln_type, gl->gl_state); 1114 existing->gh_gl->gl_state);
1080 BUG(); 1115 print_symbol(KERN_WARNING "new: %s\n", gh->gh_ip);
1081 } 1116 printk(KERN_INFO "pid : %d\n", gh->gh_owner_pid);
1082 1117 printk(KERN_INFO "lock type : %d lock state : %d\n",
1083 existing = find_holder_by_owner(&gl->gl_waiters3, gh->gh_owner_pid); 1118 gl->gl_name.ln_type, gl->gl_state);
1084 if (existing) { 1119 BUG();
1085 print_symbol(KERN_WARNING "original: %s\n", existing->gh_ip); 1120 }
1086 print_symbol(KERN_WARNING "new: %s\n", gh->gh_ip); 1121
1087 BUG(); 1122 existing = find_holder_by_owner(&gl->gl_waiters3,
1123 gh->gh_owner_pid);
1124 if (existing) {
1125 print_symbol(KERN_WARNING "original: %s\n",
1126 existing->gh_ip);
1127 print_symbol(KERN_WARNING "new: %s\n", gh->gh_ip);
1128 BUG();
1129 }
1088 } 1130 }
1089 1131
1090 if (gh->gh_flags & LM_FLAG_PRIORITY) 1132 if (gh->gh_flags & LM_FLAG_PRIORITY)
@@ -1195,9 +1237,10 @@ void gfs2_glock_dq(struct gfs2_holder *gh)
1195{ 1237{
1196 struct gfs2_glock *gl = gh->gh_gl; 1238 struct gfs2_glock *gl = gh->gh_gl;
1197 const struct gfs2_glock_operations *glops = gl->gl_ops; 1239 const struct gfs2_glock_operations *glops = gl->gl_ops;
1240 unsigned delay = 0;
1198 1241
1199 if (gh->gh_flags & GL_NOCACHE) 1242 if (gh->gh_flags & GL_NOCACHE)
1200 handle_callback(gl, LM_ST_UNLOCKED, 0); 1243 handle_callback(gl, LM_ST_UNLOCKED, 0, 0);
1201 1244
1202 gfs2_glmutex_lock(gl); 1245 gfs2_glmutex_lock(gl);
1203 1246
@@ -1215,8 +1258,14 @@ void gfs2_glock_dq(struct gfs2_holder *gh)
1215 } 1258 }
1216 1259
1217 clear_bit(GLF_LOCK, &gl->gl_flags); 1260 clear_bit(GLF_LOCK, &gl->gl_flags);
1218 run_queue(gl);
1219 spin_unlock(&gl->gl_spin); 1261 spin_unlock(&gl->gl_spin);
1262
1263 gfs2_glock_hold(gl);
1264 if (test_bit(GLF_PENDING_DEMOTE, &gl->gl_flags) &&
1265 !test_bit(GLF_DEMOTE, &gl->gl_flags))
1266 delay = gl->gl_ops->go_min_hold_time;
1267 if (queue_delayed_work(glock_workqueue, &gl->gl_work, delay) == 0)
1268 gfs2_glock_put(gl);
1220} 1269}
1221 1270
1222void gfs2_glock_dq_wait(struct gfs2_holder *gh) 1271void gfs2_glock_dq_wait(struct gfs2_holder *gh)
@@ -1443,18 +1492,21 @@ static void blocking_cb(struct gfs2_sbd *sdp, struct lm_lockname *name,
1443 unsigned int state) 1492 unsigned int state)
1444{ 1493{
1445 struct gfs2_glock *gl; 1494 struct gfs2_glock *gl;
1495 unsigned long delay = 0;
1496 unsigned long holdtime;
1497 unsigned long now = jiffies;
1446 1498
1447 gl = gfs2_glock_find(sdp, name); 1499 gl = gfs2_glock_find(sdp, name);
1448 if (!gl) 1500 if (!gl)
1449 return; 1501 return;
1450 1502
1451 handle_callback(gl, state, 1); 1503 holdtime = gl->gl_tchange + gl->gl_ops->go_min_hold_time;
1452 1504 if (time_before(now, holdtime))
1453 spin_lock(&gl->gl_spin); 1505 delay = holdtime - now;
1454 run_queue(gl);
1455 spin_unlock(&gl->gl_spin);
1456 1506
1457 gfs2_glock_put(gl); 1507 handle_callback(gl, state, 1, delay);
1508 if (queue_delayed_work(glock_workqueue, &gl->gl_work, delay) == 0)
1509 gfs2_glock_put(gl);
1458} 1510}
1459 1511
1460/** 1512/**
@@ -1495,7 +1547,8 @@ void gfs2_glock_cb(void *cb_data, unsigned int type, void *data)
1495 return; 1547 return;
1496 if (!gfs2_assert_warn(sdp, gl->gl_req_bh)) 1548 if (!gfs2_assert_warn(sdp, gl->gl_req_bh))
1497 gl->gl_req_bh(gl, async->lc_ret); 1549 gl->gl_req_bh(gl, async->lc_ret);
1498 gfs2_glock_put(gl); 1550 if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0)
1551 gfs2_glock_put(gl);
1499 up_read(&gfs2_umount_flush_sem); 1552 up_read(&gfs2_umount_flush_sem);
1500 return; 1553 return;
1501 } 1554 }
@@ -1588,7 +1641,7 @@ void gfs2_reclaim_glock(struct gfs2_sbd *sdp)
1588 if (gfs2_glmutex_trylock(gl)) { 1641 if (gfs2_glmutex_trylock(gl)) {
1589 if (list_empty(&gl->gl_holders) && 1642 if (list_empty(&gl->gl_holders) &&
1590 gl->gl_state != LM_ST_UNLOCKED && demote_ok(gl)) 1643 gl->gl_state != LM_ST_UNLOCKED && demote_ok(gl))
1591 handle_callback(gl, LM_ST_UNLOCKED, 0); 1644 handle_callback(gl, LM_ST_UNLOCKED, 0, 0);
1592 gfs2_glmutex_unlock(gl); 1645 gfs2_glmutex_unlock(gl);
1593 } 1646 }
1594 1647
@@ -1617,7 +1670,7 @@ static int examine_bucket(glock_examiner examiner, struct gfs2_sbd *sdp,
1617 goto out; 1670 goto out;
1618 gl = list_entry(head->first, struct gfs2_glock, gl_list); 1671 gl = list_entry(head->first, struct gfs2_glock, gl_list);
1619 while(1) { 1672 while(1) {
1620 if (gl->gl_sbd == sdp) { 1673 if (!sdp || gl->gl_sbd == sdp) {
1621 gfs2_glock_hold(gl); 1674 gfs2_glock_hold(gl);
1622 read_unlock(gl_lock_addr(hash)); 1675 read_unlock(gl_lock_addr(hash));
1623 if (prev) 1676 if (prev)
@@ -1635,6 +1688,7 @@ out:
1635 read_unlock(gl_lock_addr(hash)); 1688 read_unlock(gl_lock_addr(hash));
1636 if (prev) 1689 if (prev)
1637 gfs2_glock_put(prev); 1690 gfs2_glock_put(prev);
1691 cond_resched();
1638 return has_entries; 1692 return has_entries;
1639} 1693}
1640 1694
@@ -1663,20 +1717,6 @@ out_schedule:
1663} 1717}
1664 1718
1665/** 1719/**
1666 * gfs2_scand_internal - Look for glocks and inodes to toss from memory
1667 * @sdp: the filesystem
1668 *
1669 */
1670
1671void gfs2_scand_internal(struct gfs2_sbd *sdp)
1672{
1673 unsigned int x;
1674
1675 for (x = 0; x < GFS2_GL_HASH_SIZE; x++)
1676 examine_bucket(scan_glock, sdp, x);
1677}
1678
1679/**
1680 * clear_glock - look at a glock and see if we can free it from glock cache 1720 * clear_glock - look at a glock and see if we can free it from glock cache
1681 * @gl: the glock to look at 1721 * @gl: the glock to look at
1682 * 1722 *
@@ -1701,7 +1741,7 @@ static void clear_glock(struct gfs2_glock *gl)
1701 if (gfs2_glmutex_trylock(gl)) { 1741 if (gfs2_glmutex_trylock(gl)) {
1702 if (list_empty(&gl->gl_holders) && 1742 if (list_empty(&gl->gl_holders) &&
1703 gl->gl_state != LM_ST_UNLOCKED) 1743 gl->gl_state != LM_ST_UNLOCKED)
1704 handle_callback(gl, LM_ST_UNLOCKED, 0); 1744 handle_callback(gl, LM_ST_UNLOCKED, 0, 0);
1705 gfs2_glmutex_unlock(gl); 1745 gfs2_glmutex_unlock(gl);
1706 } 1746 }
1707} 1747}
@@ -1843,7 +1883,7 @@ static int dump_glock(struct glock_iter *gi, struct gfs2_glock *gl)
1843 1883
1844 spin_lock(&gl->gl_spin); 1884 spin_lock(&gl->gl_spin);
1845 1885
1846 print_dbg(gi, "Glock 0x%p (%u, %llu)\n", gl, gl->gl_name.ln_type, 1886 print_dbg(gi, "Glock 0x%p (%u, 0x%llx)\n", gl, gl->gl_name.ln_type,
1847 (unsigned long long)gl->gl_name.ln_number); 1887 (unsigned long long)gl->gl_name.ln_number);
1848 print_dbg(gi, " gl_flags ="); 1888 print_dbg(gi, " gl_flags =");
1849 for (x = 0; x < 32; x++) { 1889 for (x = 0; x < 32; x++) {
@@ -1963,6 +2003,35 @@ static int gfs2_dump_lockstate(struct gfs2_sbd *sdp)
1963 return error; 2003 return error;
1964} 2004}
1965 2005
2006/**
2007 * gfs2_scand - Look for cached glocks and inodes to toss from memory
2008 * @sdp: Pointer to GFS2 superblock
2009 *
2010 * One of these daemons runs, finding candidates to add to sd_reclaim_list.
2011 * See gfs2_glockd()
2012 */
2013
2014static int gfs2_scand(void *data)
2015{
2016 unsigned x;
2017 unsigned delay;
2018
2019 while (!kthread_should_stop()) {
2020 for (x = 0; x < GFS2_GL_HASH_SIZE; x++)
2021 examine_bucket(scan_glock, NULL, x);
2022 if (freezing(current))
2023 refrigerator();
2024 delay = scand_secs;
2025 if (delay < 1)
2026 delay = 1;
2027 schedule_timeout_interruptible(delay * HZ);
2028 }
2029
2030 return 0;
2031}
2032
2033
2034
1966int __init gfs2_glock_init(void) 2035int __init gfs2_glock_init(void)
1967{ 2036{
1968 unsigned i; 2037 unsigned i;
@@ -1974,52 +2043,69 @@ int __init gfs2_glock_init(void)
1974 rwlock_init(&gl_hash_locks[i]); 2043 rwlock_init(&gl_hash_locks[i]);
1975 } 2044 }
1976#endif 2045#endif
2046
2047 scand_process = kthread_run(gfs2_scand, NULL, "gfs2_scand");
2048 if (IS_ERR(scand_process))
2049 return PTR_ERR(scand_process);
2050
2051 glock_workqueue = create_workqueue("glock_workqueue");
2052 if (IS_ERR(glock_workqueue)) {
2053 kthread_stop(scand_process);
2054 return PTR_ERR(glock_workqueue);
2055 }
2056
1977 return 0; 2057 return 0;
1978} 2058}
1979 2059
2060void gfs2_glock_exit(void)
2061{
2062 destroy_workqueue(glock_workqueue);
2063 kthread_stop(scand_process);
2064}
2065
2066module_param(scand_secs, uint, S_IRUGO|S_IWUSR);
2067MODULE_PARM_DESC(scand_secs, "The number of seconds between scand runs");
2068
1980static int gfs2_glock_iter_next(struct glock_iter *gi) 2069static int gfs2_glock_iter_next(struct glock_iter *gi)
1981{ 2070{
2071 struct gfs2_glock *gl;
2072
2073restart:
1982 read_lock(gl_lock_addr(gi->hash)); 2074 read_lock(gl_lock_addr(gi->hash));
1983 while (1) { 2075 gl = gi->gl;
1984 if (!gi->hb_list) { /* If we don't have a hash bucket yet */ 2076 if (gl) {
1985 gi->hb_list = &gl_hash_table[gi->hash].hb_list; 2077 gi->gl = hlist_entry(gl->gl_list.next,
1986 if (hlist_empty(gi->hb_list)) { 2078 struct gfs2_glock, gl_list);
1987 read_unlock(gl_lock_addr(gi->hash));
1988 gi->hash++;
1989 read_lock(gl_lock_addr(gi->hash));
1990 gi->hb_list = NULL;
1991 if (gi->hash >= GFS2_GL_HASH_SIZE) {
1992 read_unlock(gl_lock_addr(gi->hash));
1993 return 1;
1994 }
1995 else
1996 continue;
1997 }
1998 if (!hlist_empty(gi->hb_list)) {
1999 gi->gl = list_entry(gi->hb_list->first,
2000 struct gfs2_glock,
2001 gl_list);
2002 }
2003 } else {
2004 if (gi->gl->gl_list.next == NULL) {
2005 read_unlock(gl_lock_addr(gi->hash));
2006 gi->hash++;
2007 read_lock(gl_lock_addr(gi->hash));
2008 gi->hb_list = NULL;
2009 continue;
2010 }
2011 gi->gl = list_entry(gi->gl->gl_list.next,
2012 struct gfs2_glock, gl_list);
2013 }
2014 if (gi->gl) 2079 if (gi->gl)
2015 break; 2080 gfs2_glock_hold(gi->gl);
2016 } 2081 }
2017 read_unlock(gl_lock_addr(gi->hash)); 2082 read_unlock(gl_lock_addr(gi->hash));
2083 if (gl)
2084 gfs2_glock_put(gl);
2085 if (gl && gi->gl == NULL)
2086 gi->hash++;
2087 while(gi->gl == NULL) {
2088 if (gi->hash >= GFS2_GL_HASH_SIZE)
2089 return 1;
2090 read_lock(gl_lock_addr(gi->hash));
2091 gi->gl = hlist_entry(gl_hash_table[gi->hash].hb_list.first,
2092 struct gfs2_glock, gl_list);
2093 if (gi->gl)
2094 gfs2_glock_hold(gi->gl);
2095 read_unlock(gl_lock_addr(gi->hash));
2096 gi->hash++;
2097 }
2098
2099 if (gi->sdp != gi->gl->gl_sbd)
2100 goto restart;
2101
2018 return 0; 2102 return 0;
2019} 2103}
2020 2104
2021static void gfs2_glock_iter_free(struct glock_iter *gi) 2105static void gfs2_glock_iter_free(struct glock_iter *gi)
2022{ 2106{
2107 if (gi->gl)
2108 gfs2_glock_put(gi->gl);
2023 kfree(gi); 2109 kfree(gi);
2024} 2110}
2025 2111
@@ -2033,9 +2119,8 @@ static struct glock_iter *gfs2_glock_iter_init(struct gfs2_sbd *sdp)
2033 2119
2034 gi->sdp = sdp; 2120 gi->sdp = sdp;
2035 gi->hash = 0; 2121 gi->hash = 0;
2036 gi->gl = NULL;
2037 gi->hb_list = NULL;
2038 gi->seq = NULL; 2122 gi->seq = NULL;
2123 gi->gl = NULL;
2039 memset(gi->string, 0, sizeof(gi->string)); 2124 memset(gi->string, 0, sizeof(gi->string));
2040 2125
2041 if (gfs2_glock_iter_next(gi)) { 2126 if (gfs2_glock_iter_next(gi)) {
@@ -2055,7 +2140,7 @@ static void *gfs2_glock_seq_start(struct seq_file *file, loff_t *pos)
2055 if (!gi) 2140 if (!gi)
2056 return NULL; 2141 return NULL;
2057 2142
2058 while (n--) { 2143 while(n--) {
2059 if (gfs2_glock_iter_next(gi)) { 2144 if (gfs2_glock_iter_next(gi)) {
2060 gfs2_glock_iter_free(gi); 2145 gfs2_glock_iter_free(gi);
2061 return NULL; 2146 return NULL;
@@ -2082,7 +2167,9 @@ static void *gfs2_glock_seq_next(struct seq_file *file, void *iter_ptr,
2082 2167
2083static void gfs2_glock_seq_stop(struct seq_file *file, void *iter_ptr) 2168static void gfs2_glock_seq_stop(struct seq_file *file, void *iter_ptr)
2084{ 2169{
2085 /* nothing for now */ 2170 struct glock_iter *gi = iter_ptr;
2171 if (gi)
2172 gfs2_glock_iter_free(gi);
2086} 2173}
2087 2174
2088static int gfs2_glock_seq_show(struct seq_file *file, void *iter_ptr) 2175static int gfs2_glock_seq_show(struct seq_file *file, void *iter_ptr)
@@ -2095,7 +2182,7 @@ static int gfs2_glock_seq_show(struct seq_file *file, void *iter_ptr)
2095 return 0; 2182 return 0;
2096} 2183}
2097 2184
2098static struct seq_operations gfs2_glock_seq_ops = { 2185static const struct seq_operations gfs2_glock_seq_ops = {
2099 .start = gfs2_glock_seq_start, 2186 .start = gfs2_glock_seq_start,
2100 .next = gfs2_glock_seq_next, 2187 .next = gfs2_glock_seq_next,
2101 .stop = gfs2_glock_seq_stop, 2188 .stop = gfs2_glock_seq_stop,
diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h
index 7721ca3fff9e..b16f604eea9f 100644
--- a/fs/gfs2/glock.h
+++ b/fs/gfs2/glock.h
@@ -26,6 +26,7 @@
26#define GL_SKIP 0x00000100 26#define GL_SKIP 0x00000100
27#define GL_ATIME 0x00000200 27#define GL_ATIME 0x00000200
28#define GL_NOCACHE 0x00000400 28#define GL_NOCACHE 0x00000400
29#define GL_FLOCK 0x00000800
29#define GL_NOCANCEL 0x00001000 30#define GL_NOCANCEL 0x00001000
30 31
31#define GLR_TRYFAILED 13 32#define GLR_TRYFAILED 13
@@ -132,11 +133,11 @@ void gfs2_glock_cb(void *cb_data, unsigned int type, void *data);
132 133
133void gfs2_glock_schedule_for_reclaim(struct gfs2_glock *gl); 134void gfs2_glock_schedule_for_reclaim(struct gfs2_glock *gl);
134void gfs2_reclaim_glock(struct gfs2_sbd *sdp); 135void gfs2_reclaim_glock(struct gfs2_sbd *sdp);
135
136void gfs2_scand_internal(struct gfs2_sbd *sdp);
137void gfs2_gl_hash_clear(struct gfs2_sbd *sdp, int wait); 136void gfs2_gl_hash_clear(struct gfs2_sbd *sdp, int wait);
138 137
139int __init gfs2_glock_init(void); 138int __init gfs2_glock_init(void);
139void gfs2_glock_exit(void);
140
140int gfs2_create_debugfs_file(struct gfs2_sbd *sdp); 141int gfs2_create_debugfs_file(struct gfs2_sbd *sdp);
141void gfs2_delete_debugfs_file(struct gfs2_sbd *sdp); 142void gfs2_delete_debugfs_file(struct gfs2_sbd *sdp);
142int gfs2_register_debugfs(void); 143int gfs2_register_debugfs(void);
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index 777ca46010e8..4670dcb2a877 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -41,7 +41,6 @@ static void gfs2_ail_empty_gl(struct gfs2_glock *gl)
41 struct list_head *head = &gl->gl_ail_list; 41 struct list_head *head = &gl->gl_ail_list;
42 struct gfs2_bufdata *bd; 42 struct gfs2_bufdata *bd;
43 struct buffer_head *bh; 43 struct buffer_head *bh;
44 u64 blkno;
45 int error; 44 int error;
46 45
47 blocks = atomic_read(&gl->gl_ail_count); 46 blocks = atomic_read(&gl->gl_ail_count);
@@ -57,19 +56,12 @@ static void gfs2_ail_empty_gl(struct gfs2_glock *gl)
57 bd = list_entry(head->next, struct gfs2_bufdata, 56 bd = list_entry(head->next, struct gfs2_bufdata,
58 bd_ail_gl_list); 57 bd_ail_gl_list);
59 bh = bd->bd_bh; 58 bh = bd->bd_bh;
60 blkno = bh->b_blocknr; 59 gfs2_remove_from_ail(NULL, bd);
60 bd->bd_bh = NULL;
61 bh->b_private = NULL;
62 bd->bd_blkno = bh->b_blocknr;
61 gfs2_assert_withdraw(sdp, !buffer_busy(bh)); 63 gfs2_assert_withdraw(sdp, !buffer_busy(bh));
62 64 gfs2_trans_add_revoke(sdp, bd);
63 bd->bd_ail = NULL;
64 list_del(&bd->bd_ail_st_list);
65 list_del(&bd->bd_ail_gl_list);
66 atomic_dec(&gl->gl_ail_count);
67 brelse(bh);
68 gfs2_log_unlock(sdp);
69
70 gfs2_trans_add_revoke(sdp, blkno);
71
72 gfs2_log_lock(sdp);
73 } 65 }
74 gfs2_assert_withdraw(sdp, !atomic_read(&gl->gl_ail_count)); 66 gfs2_assert_withdraw(sdp, !atomic_read(&gl->gl_ail_count));
75 gfs2_log_unlock(sdp); 67 gfs2_log_unlock(sdp);
@@ -156,9 +148,11 @@ static void inode_go_sync(struct gfs2_glock *gl)
156 ip = NULL; 148 ip = NULL;
157 149
158 if (test_bit(GLF_DIRTY, &gl->gl_flags)) { 150 if (test_bit(GLF_DIRTY, &gl->gl_flags)) {
159 if (ip) 151 if (ip && !gfs2_is_jdata(ip))
160 filemap_fdatawrite(ip->i_inode.i_mapping); 152 filemap_fdatawrite(ip->i_inode.i_mapping);
161 gfs2_log_flush(gl->gl_sbd, gl); 153 gfs2_log_flush(gl->gl_sbd, gl);
154 if (ip && gfs2_is_jdata(ip))
155 filemap_fdatawrite(ip->i_inode.i_mapping);
162 gfs2_meta_sync(gl); 156 gfs2_meta_sync(gl);
163 if (ip) { 157 if (ip) {
164 struct address_space *mapping = ip->i_inode.i_mapping; 158 struct address_space *mapping = ip->i_inode.i_mapping;
@@ -452,6 +446,7 @@ const struct gfs2_glock_operations gfs2_inode_glops = {
452 .go_lock = inode_go_lock, 446 .go_lock = inode_go_lock,
453 .go_unlock = inode_go_unlock, 447 .go_unlock = inode_go_unlock,
454 .go_type = LM_TYPE_INODE, 448 .go_type = LM_TYPE_INODE,
449 .go_min_hold_time = HZ / 10,
455}; 450};
456 451
457const struct gfs2_glock_operations gfs2_rgrp_glops = { 452const struct gfs2_glock_operations gfs2_rgrp_glops = {
@@ -462,6 +457,7 @@ const struct gfs2_glock_operations gfs2_rgrp_glops = {
462 .go_lock = rgrp_go_lock, 457 .go_lock = rgrp_go_lock,
463 .go_unlock = rgrp_go_unlock, 458 .go_unlock = rgrp_go_unlock,
464 .go_type = LM_TYPE_RGRP, 459 .go_type = LM_TYPE_RGRP,
460 .go_min_hold_time = HZ / 10,
465}; 461};
466 462
467const struct gfs2_glock_operations gfs2_trans_glops = { 463const struct gfs2_glock_operations gfs2_trans_glops = {
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 170ba93829c0..eaddfb5a8e6f 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -11,6 +11,7 @@
11#define __INCORE_DOT_H__ 11#define __INCORE_DOT_H__
12 12
13#include <linux/fs.h> 13#include <linux/fs.h>
14#include <linux/workqueue.h>
14 15
15#define DIO_WAIT 0x00000010 16#define DIO_WAIT 0x00000010
16#define DIO_METADATA 0x00000020 17#define DIO_METADATA 0x00000020
@@ -113,7 +114,13 @@ struct gfs2_bufdata {
113 struct buffer_head *bd_bh; 114 struct buffer_head *bd_bh;
114 struct gfs2_glock *bd_gl; 115 struct gfs2_glock *bd_gl;
115 116
116 struct list_head bd_list_tr; 117 union {
118 struct list_head list_tr;
119 u64 blkno;
120 } u;
121#define bd_list_tr u.list_tr
122#define bd_blkno u.blkno
123
117 struct gfs2_log_element bd_le; 124 struct gfs2_log_element bd_le;
118 125
119 struct gfs2_ail *bd_ail; 126 struct gfs2_ail *bd_ail;
@@ -130,6 +137,7 @@ struct gfs2_glock_operations {
130 int (*go_lock) (struct gfs2_holder *gh); 137 int (*go_lock) (struct gfs2_holder *gh);
131 void (*go_unlock) (struct gfs2_holder *gh); 138 void (*go_unlock) (struct gfs2_holder *gh);
132 const int go_type; 139 const int go_type;
140 const unsigned long go_min_hold_time;
133}; 141};
134 142
135enum { 143enum {
@@ -161,6 +169,7 @@ enum {
161 GLF_LOCK = 1, 169 GLF_LOCK = 1,
162 GLF_STICKY = 2, 170 GLF_STICKY = 2,
163 GLF_DEMOTE = 3, 171 GLF_DEMOTE = 3,
172 GLF_PENDING_DEMOTE = 4,
164 GLF_DIRTY = 5, 173 GLF_DIRTY = 5,
165}; 174};
166 175
@@ -193,6 +202,7 @@ struct gfs2_glock {
193 202
194 u64 gl_vn; 203 u64 gl_vn;
195 unsigned long gl_stamp; 204 unsigned long gl_stamp;
205 unsigned long gl_tchange;
196 void *gl_object; 206 void *gl_object;
197 207
198 struct list_head gl_reclaim; 208 struct list_head gl_reclaim;
@@ -203,6 +213,7 @@ struct gfs2_glock {
203 struct gfs2_log_element gl_le; 213 struct gfs2_log_element gl_le;
204 struct list_head gl_ail_list; 214 struct list_head gl_ail_list;
205 atomic_t gl_ail_count; 215 atomic_t gl_ail_count;
216 struct delayed_work gl_work;
206}; 217};
207 218
208struct gfs2_alloc { 219struct gfs2_alloc {
@@ -293,11 +304,6 @@ struct gfs2_file {
293 struct gfs2_holder f_fl_gh; 304 struct gfs2_holder f_fl_gh;
294}; 305};
295 306
296struct gfs2_revoke {
297 struct gfs2_log_element rv_le;
298 u64 rv_blkno;
299};
300
301struct gfs2_revoke_replay { 307struct gfs2_revoke_replay {
302 struct list_head rr_list; 308 struct list_head rr_list;
303 u64 rr_blkno; 309 u64 rr_blkno;
@@ -335,12 +341,6 @@ struct gfs2_quota_data {
335 unsigned long qd_last_touched; 341 unsigned long qd_last_touched;
336}; 342};
337 343
338struct gfs2_log_buf {
339 struct list_head lb_list;
340 struct buffer_head *lb_bh;
341 struct buffer_head *lb_real;
342};
343
344struct gfs2_trans { 344struct gfs2_trans {
345 unsigned long tr_ip; 345 unsigned long tr_ip;
346 346
@@ -429,7 +429,6 @@ struct gfs2_tune {
429 unsigned int gt_log_flush_secs; 429 unsigned int gt_log_flush_secs;
430 unsigned int gt_jindex_refresh_secs; /* Check for new journal index */ 430 unsigned int gt_jindex_refresh_secs; /* Check for new journal index */
431 431
432 unsigned int gt_scand_secs;
433 unsigned int gt_recoverd_secs; 432 unsigned int gt_recoverd_secs;
434 unsigned int gt_logd_secs; 433 unsigned int gt_logd_secs;
435 unsigned int gt_quotad_secs; 434 unsigned int gt_quotad_secs;
@@ -574,7 +573,6 @@ struct gfs2_sbd {
574 573
575 /* Daemon stuff */ 574 /* Daemon stuff */
576 575
577 struct task_struct *sd_scand_process;
578 struct task_struct *sd_recoverd_process; 576 struct task_struct *sd_recoverd_process;
579 struct task_struct *sd_logd_process; 577 struct task_struct *sd_logd_process;
580 struct task_struct *sd_quotad_process; 578 struct task_struct *sd_quotad_process;
@@ -609,13 +607,13 @@ struct gfs2_sbd {
609 unsigned int sd_log_num_revoke; 607 unsigned int sd_log_num_revoke;
610 unsigned int sd_log_num_rg; 608 unsigned int sd_log_num_rg;
611 unsigned int sd_log_num_databuf; 609 unsigned int sd_log_num_databuf;
612 unsigned int sd_log_num_jdata;
613 610
614 struct list_head sd_log_le_gl; 611 struct list_head sd_log_le_gl;
615 struct list_head sd_log_le_buf; 612 struct list_head sd_log_le_buf;
616 struct list_head sd_log_le_revoke; 613 struct list_head sd_log_le_revoke;
617 struct list_head sd_log_le_rg; 614 struct list_head sd_log_le_rg;
618 struct list_head sd_log_le_databuf; 615 struct list_head sd_log_le_databuf;
616 struct list_head sd_log_le_ordered;
619 617
620 unsigned int sd_log_blks_free; 618 unsigned int sd_log_blks_free;
621 struct mutex sd_log_reserve_mutex; 619 struct mutex sd_log_reserve_mutex;
@@ -627,7 +625,8 @@ struct gfs2_sbd {
627 625
628 unsigned long sd_log_flush_time; 626 unsigned long sd_log_flush_time;
629 struct rw_semaphore sd_log_flush_lock; 627 struct rw_semaphore sd_log_flush_lock;
630 struct list_head sd_log_flush_list; 628 atomic_t sd_log_in_flight;
629 wait_queue_head_t sd_log_flush_wait;
631 630
632 unsigned int sd_log_flush_head; 631 unsigned int sd_log_flush_head;
633 u64 sd_log_flush_wrapped; 632 u64 sd_log_flush_wrapped;
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 34f7bcdea1e9..5f6dc32946cd 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -77,6 +77,49 @@ static struct inode *gfs2_iget(struct super_block *sb, u64 no_addr)
77 return iget5_locked(sb, hash, iget_test, iget_set, &no_addr); 77 return iget5_locked(sb, hash, iget_test, iget_set, &no_addr);
78} 78}
79 79
80struct gfs2_skip_data {
81 u64 no_addr;
82 int skipped;
83};
84
85static int iget_skip_test(struct inode *inode, void *opaque)
86{
87 struct gfs2_inode *ip = GFS2_I(inode);
88 struct gfs2_skip_data *data = opaque;
89
90 if (ip->i_no_addr == data->no_addr && inode->i_private != NULL){
91 if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE)){
92 data->skipped = 1;
93 return 0;
94 }
95 return 1;
96 }
97 return 0;
98}
99
100static int iget_skip_set(struct inode *inode, void *opaque)
101{
102 struct gfs2_inode *ip = GFS2_I(inode);
103 struct gfs2_skip_data *data = opaque;
104
105 if (data->skipped)
106 return 1;
107 inode->i_ino = (unsigned long)(data->no_addr);
108 ip->i_no_addr = data->no_addr;
109 return 0;
110}
111
112static struct inode *gfs2_iget_skip(struct super_block *sb,
113 u64 no_addr)
114{
115 struct gfs2_skip_data data;
116 unsigned long hash = (unsigned long)no_addr;
117
118 data.no_addr = no_addr;
119 data.skipped = 0;
120 return iget5_locked(sb, hash, iget_skip_test, iget_skip_set, &data);
121}
122
80/** 123/**
81 * GFS2 lookup code fills in vfs inode contents based on info obtained 124 * GFS2 lookup code fills in vfs inode contents based on info obtained
82 * from directory entry inside gfs2_inode_lookup(). This has caused issues 125 * from directory entry inside gfs2_inode_lookup(). This has caused issues
@@ -112,6 +155,7 @@ void gfs2_set_iop(struct inode *inode)
112 * @sb: The super block 155 * @sb: The super block
113 * @no_addr: The inode number 156 * @no_addr: The inode number
114 * @type: The type of the inode 157 * @type: The type of the inode
158 * @skip_freeing: set this not return an inode if it is currently being freed.
115 * 159 *
116 * Returns: A VFS inode, or an error 160 * Returns: A VFS inode, or an error
117 */ 161 */
@@ -119,13 +163,19 @@ void gfs2_set_iop(struct inode *inode)
119struct inode *gfs2_inode_lookup(struct super_block *sb, 163struct inode *gfs2_inode_lookup(struct super_block *sb,
120 unsigned int type, 164 unsigned int type,
121 u64 no_addr, 165 u64 no_addr,
122 u64 no_formal_ino) 166 u64 no_formal_ino, int skip_freeing)
123{ 167{
124 struct inode *inode = gfs2_iget(sb, no_addr); 168 struct inode *inode;
125 struct gfs2_inode *ip = GFS2_I(inode); 169 struct gfs2_inode *ip;
126 struct gfs2_glock *io_gl; 170 struct gfs2_glock *io_gl;
127 int error; 171 int error;
128 172
173 if (skip_freeing)
174 inode = gfs2_iget_skip(sb, no_addr);
175 else
176 inode = gfs2_iget(sb, no_addr);
177 ip = GFS2_I(inode);
178
129 if (!inode) 179 if (!inode)
130 return ERR_PTR(-ENOBUFS); 180 return ERR_PTR(-ENOBUFS);
131 181
@@ -244,6 +294,11 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf)
244 return 0; 294 return 0;
245} 295}
246 296
297static void gfs2_inode_bh(struct gfs2_inode *ip, struct buffer_head *bh)
298{
299 ip->i_cache[0] = bh;
300}
301
247/** 302/**
248 * gfs2_inode_refresh - Refresh the incore copy of the dinode 303 * gfs2_inode_refresh - Refresh the incore copy of the dinode
249 * @ip: The GFS2 inode 304 * @ip: The GFS2 inode
@@ -688,7 +743,7 @@ out:
688static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl, 743static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
689 const struct gfs2_inum_host *inum, unsigned int mode, 744 const struct gfs2_inum_host *inum, unsigned int mode,
690 unsigned int uid, unsigned int gid, 745 unsigned int uid, unsigned int gid,
691 const u64 *generation, dev_t dev) 746 const u64 *generation, dev_t dev, struct buffer_head **bhp)
692{ 747{
693 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); 748 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
694 struct gfs2_dinode *di; 749 struct gfs2_dinode *di;
@@ -743,13 +798,15 @@ static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
743 di->di_mtime_nsec = cpu_to_be32(tv.tv_nsec); 798 di->di_mtime_nsec = cpu_to_be32(tv.tv_nsec);
744 di->di_ctime_nsec = cpu_to_be32(tv.tv_nsec); 799 di->di_ctime_nsec = cpu_to_be32(tv.tv_nsec);
745 memset(&di->di_reserved, 0, sizeof(di->di_reserved)); 800 memset(&di->di_reserved, 0, sizeof(di->di_reserved));
801
802 set_buffer_uptodate(dibh);
746 803
747 brelse(dibh); 804 *bhp = dibh;
748} 805}
749 806
750static int make_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl, 807static int make_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
751 unsigned int mode, const struct gfs2_inum_host *inum, 808 unsigned int mode, const struct gfs2_inum_host *inum,
752 const u64 *generation, dev_t dev) 809 const u64 *generation, dev_t dev, struct buffer_head **bhp)
753{ 810{
754 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); 811 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
755 unsigned int uid, gid; 812 unsigned int uid, gid;
@@ -770,7 +827,7 @@ static int make_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
770 if (error) 827 if (error)
771 goto out_quota; 828 goto out_quota;
772 829
773 init_dinode(dip, gl, inum, mode, uid, gid, generation, dev); 830 init_dinode(dip, gl, inum, mode, uid, gid, generation, dev, bhp);
774 gfs2_quota_change(dip, +1, uid, gid); 831 gfs2_quota_change(dip, +1, uid, gid);
775 gfs2_trans_end(sdp); 832 gfs2_trans_end(sdp);
776 833
@@ -909,6 +966,7 @@ struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name,
909 struct gfs2_inum_host inum = { .no_addr = 0, .no_formal_ino = 0 }; 966 struct gfs2_inum_host inum = { .no_addr = 0, .no_formal_ino = 0 };
910 int error; 967 int error;
911 u64 generation; 968 u64 generation;
969 struct buffer_head *bh=NULL;
912 970
913 if (!name->len || name->len > GFS2_FNAMESIZE) 971 if (!name->len || name->len > GFS2_FNAMESIZE)
914 return ERR_PTR(-ENAMETOOLONG); 972 return ERR_PTR(-ENAMETOOLONG);
@@ -935,16 +993,18 @@ struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name,
935 if (error) 993 if (error)
936 goto fail_gunlock; 994 goto fail_gunlock;
937 995
938 error = make_dinode(dip, ghs[1].gh_gl, mode, &inum, &generation, dev); 996 error = make_dinode(dip, ghs[1].gh_gl, mode, &inum, &generation, dev, &bh);
939 if (error) 997 if (error)
940 goto fail_gunlock2; 998 goto fail_gunlock2;
941 999
942 inode = gfs2_inode_lookup(dir->i_sb, IF2DT(mode), 1000 inode = gfs2_inode_lookup(dir->i_sb, IF2DT(mode),
943 inum.no_addr, 1001 inum.no_addr,
944 inum.no_formal_ino); 1002 inum.no_formal_ino, 0);
945 if (IS_ERR(inode)) 1003 if (IS_ERR(inode))
946 goto fail_gunlock2; 1004 goto fail_gunlock2;
947 1005
1006 gfs2_inode_bh(GFS2_I(inode), bh);
1007
948 error = gfs2_inode_refresh(GFS2_I(inode)); 1008 error = gfs2_inode_refresh(GFS2_I(inode));
949 if (error) 1009 if (error)
950 goto fail_gunlock2; 1010 goto fail_gunlock2;
diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h
index 4517ac82c01c..351ac87ab384 100644
--- a/fs/gfs2/inode.h
+++ b/fs/gfs2/inode.h
@@ -49,7 +49,8 @@ static inline void gfs2_inum_out(const struct gfs2_inode *ip,
49void gfs2_inode_attr_in(struct gfs2_inode *ip); 49void gfs2_inode_attr_in(struct gfs2_inode *ip);
50void gfs2_set_iop(struct inode *inode); 50void gfs2_set_iop(struct inode *inode);
51struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned type, 51struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned type,
52 u64 no_addr, u64 no_formal_ino); 52 u64 no_addr, u64 no_formal_ino,
53 int skip_freeing);
53struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr); 54struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr);
54 55
55int gfs2_inode_refresh(struct gfs2_inode *ip); 56int gfs2_inode_refresh(struct gfs2_inode *ip);
diff --git a/fs/gfs2/locking/dlm/lock_dlm.h b/fs/gfs2/locking/dlm/lock_dlm.h
index 24d70f73b651..9e8265d28377 100644
--- a/fs/gfs2/locking/dlm/lock_dlm.h
+++ b/fs/gfs2/locking/dlm/lock_dlm.h
@@ -13,7 +13,6 @@
13#include <linux/module.h> 13#include <linux/module.h>
14#include <linux/slab.h> 14#include <linux/slab.h>
15#include <linux/spinlock.h> 15#include <linux/spinlock.h>
16#include <linux/module.h>
17#include <linux/types.h> 16#include <linux/types.h>
18#include <linux/string.h> 17#include <linux/string.h>
19#include <linux/list.h> 18#include <linux/list.h>
diff --git a/fs/gfs2/locking/dlm/plock.c b/fs/gfs2/locking/dlm/plock.c
index fba1f1d87e4f..1f7b038530b4 100644
--- a/fs/gfs2/locking/dlm/plock.c
+++ b/fs/gfs2/locking/dlm/plock.c
@@ -346,15 +346,16 @@ static ssize_t dev_write(struct file *file, const char __user *u, size_t count,
346 346
347static unsigned int dev_poll(struct file *file, poll_table *wait) 347static unsigned int dev_poll(struct file *file, poll_table *wait)
348{ 348{
349 unsigned int mask = 0;
350
349 poll_wait(file, &send_wq, wait); 351 poll_wait(file, &send_wq, wait);
350 352
351 spin_lock(&ops_lock); 353 spin_lock(&ops_lock);
352 if (!list_empty(&send_list)) { 354 if (!list_empty(&send_list))
353 spin_unlock(&ops_lock); 355 mask = POLLIN | POLLRDNORM;
354 return POLLIN | POLLRDNORM;
355 }
356 spin_unlock(&ops_lock); 356 spin_unlock(&ops_lock);
357 return 0; 357
358 return mask;
358} 359}
359 360
360static const struct file_operations dev_fops = { 361static const struct file_operations dev_fops = {
diff --git a/fs/gfs2/locking/dlm/thread.c b/fs/gfs2/locking/dlm/thread.c
index 1aca51e45092..bd938f06481d 100644
--- a/fs/gfs2/locking/dlm/thread.c
+++ b/fs/gfs2/locking/dlm/thread.c
@@ -268,20 +268,16 @@ static inline int check_drop(struct gdlm_ls *ls)
268 return 0; 268 return 0;
269} 269}
270 270
271static int gdlm_thread(void *data) 271static int gdlm_thread(void *data, int blist)
272{ 272{
273 struct gdlm_ls *ls = (struct gdlm_ls *) data; 273 struct gdlm_ls *ls = (struct gdlm_ls *) data;
274 struct gdlm_lock *lp = NULL; 274 struct gdlm_lock *lp = NULL;
275 int blist = 0;
276 uint8_t complete, blocking, submit, drop; 275 uint8_t complete, blocking, submit, drop;
277 DECLARE_WAITQUEUE(wait, current); 276 DECLARE_WAITQUEUE(wait, current);
278 277
279 /* Only thread1 is allowed to do blocking callbacks since gfs 278 /* Only thread1 is allowed to do blocking callbacks since gfs
280 may wait for a completion callback within a blocking cb. */ 279 may wait for a completion callback within a blocking cb. */
281 280
282 if (current == ls->thread1)
283 blist = 1;
284
285 while (!kthread_should_stop()) { 281 while (!kthread_should_stop()) {
286 set_current_state(TASK_INTERRUPTIBLE); 282 set_current_state(TASK_INTERRUPTIBLE);
287 add_wait_queue(&ls->thread_wait, &wait); 283 add_wait_queue(&ls->thread_wait, &wait);
@@ -333,12 +329,22 @@ static int gdlm_thread(void *data)
333 return 0; 329 return 0;
334} 330}
335 331
332static int gdlm_thread1(void *data)
333{
334 return gdlm_thread(data, 1);
335}
336
337static int gdlm_thread2(void *data)
338{
339 return gdlm_thread(data, 0);
340}
341
336int gdlm_init_threads(struct gdlm_ls *ls) 342int gdlm_init_threads(struct gdlm_ls *ls)
337{ 343{
338 struct task_struct *p; 344 struct task_struct *p;
339 int error; 345 int error;
340 346
341 p = kthread_run(gdlm_thread, ls, "lock_dlm1"); 347 p = kthread_run(gdlm_thread1, ls, "lock_dlm1");
342 error = IS_ERR(p); 348 error = IS_ERR(p);
343 if (error) { 349 if (error) {
344 log_error("can't start lock_dlm1 thread %d", error); 350 log_error("can't start lock_dlm1 thread %d", error);
@@ -346,7 +352,7 @@ int gdlm_init_threads(struct gdlm_ls *ls)
346 } 352 }
347 ls->thread1 = p; 353 ls->thread1 = p;
348 354
349 p = kthread_run(gdlm_thread, ls, "lock_dlm2"); 355 p = kthread_run(gdlm_thread2, ls, "lock_dlm2");
350 error = IS_ERR(p); 356 error = IS_ERR(p);
351 if (error) { 357 if (error) {
352 log_error("can't start lock_dlm2 thread %d", error); 358 log_error("can't start lock_dlm2 thread %d", error);
diff --git a/fs/gfs2/locking/nolock/main.c b/fs/gfs2/locking/nolock/main.c
index 0d149c8c493a..d3b8ce6fbbe3 100644
--- a/fs/gfs2/locking/nolock/main.c
+++ b/fs/gfs2/locking/nolock/main.c
@@ -9,7 +9,6 @@
9 9
10#include <linux/module.h> 10#include <linux/module.h>
11#include <linux/slab.h> 11#include <linux/slab.h>
12#include <linux/module.h>
13#include <linux/init.h> 12#include <linux/init.h>
14#include <linux/types.h> 13#include <linux/types.h>
15#include <linux/fs.h> 14#include <linux/fs.h>
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index f49a12e24086..7df702473252 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -60,6 +60,26 @@ unsigned int gfs2_struct2blk(struct gfs2_sbd *sdp, unsigned int nstruct,
60} 60}
61 61
62/** 62/**
63 * gfs2_remove_from_ail - Remove an entry from the ail lists, updating counters
64 * @mapping: The associated mapping (maybe NULL)
65 * @bd: The gfs2_bufdata to remove
66 *
67 * The log lock _must_ be held when calling this function
68 *
69 */
70
71void gfs2_remove_from_ail(struct address_space *mapping, struct gfs2_bufdata *bd)
72{
73 bd->bd_ail = NULL;
74 list_del_init(&bd->bd_ail_st_list);
75 list_del_init(&bd->bd_ail_gl_list);
76 atomic_dec(&bd->bd_gl->gl_ail_count);
77 if (mapping)
78 gfs2_meta_cache_flush(GFS2_I(mapping->host));
79 brelse(bd->bd_bh);
80}
81
82/**
63 * gfs2_ail1_start_one - Start I/O on a part of the AIL 83 * gfs2_ail1_start_one - Start I/O on a part of the AIL
64 * @sdp: the filesystem 84 * @sdp: the filesystem
65 * @tr: the part of the AIL 85 * @tr: the part of the AIL
@@ -83,17 +103,9 @@ static void gfs2_ail1_start_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
83 103
84 gfs2_assert(sdp, bd->bd_ail == ai); 104 gfs2_assert(sdp, bd->bd_ail == ai);
85 105
86 if (!bh){
87 list_move(&bd->bd_ail_st_list, &ai->ai_ail2_list);
88 continue;
89 }
90
91 if (!buffer_busy(bh)) { 106 if (!buffer_busy(bh)) {
92 if (!buffer_uptodate(bh)) { 107 if (!buffer_uptodate(bh))
93 gfs2_log_unlock(sdp);
94 gfs2_io_error_bh(sdp, bh); 108 gfs2_io_error_bh(sdp, bh);
95 gfs2_log_lock(sdp);
96 }
97 list_move(&bd->bd_ail_st_list, &ai->ai_ail2_list); 109 list_move(&bd->bd_ail_st_list, &ai->ai_ail2_list);
98 continue; 110 continue;
99 } 111 }
@@ -103,9 +115,16 @@ static void gfs2_ail1_start_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
103 115
104 list_move(&bd->bd_ail_st_list, &ai->ai_ail1_list); 116 list_move(&bd->bd_ail_st_list, &ai->ai_ail1_list);
105 117
118 get_bh(bh);
106 gfs2_log_unlock(sdp); 119 gfs2_log_unlock(sdp);
107 wait_on_buffer(bh); 120 lock_buffer(bh);
108 ll_rw_block(WRITE, 1, &bh); 121 if (test_clear_buffer_dirty(bh)) {
122 bh->b_end_io = end_buffer_write_sync;
123 submit_bh(WRITE, bh);
124 } else {
125 unlock_buffer(bh);
126 brelse(bh);
127 }
109 gfs2_log_lock(sdp); 128 gfs2_log_lock(sdp);
110 129
111 retry = 1; 130 retry = 1;
@@ -130,11 +149,6 @@ static int gfs2_ail1_empty_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai, int fl
130 bd_ail_st_list) { 149 bd_ail_st_list) {
131 bh = bd->bd_bh; 150 bh = bd->bd_bh;
132 151
133 if (!bh){
134 list_move(&bd->bd_ail_st_list, &ai->ai_ail2_list);
135 continue;
136 }
137
138 gfs2_assert(sdp, bd->bd_ail == ai); 152 gfs2_assert(sdp, bd->bd_ail == ai);
139 153
140 if (buffer_busy(bh)) { 154 if (buffer_busy(bh)) {
@@ -155,13 +169,14 @@ static int gfs2_ail1_empty_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai, int fl
155 169
156static void gfs2_ail1_start(struct gfs2_sbd *sdp, int flags) 170static void gfs2_ail1_start(struct gfs2_sbd *sdp, int flags)
157{ 171{
158 struct list_head *head = &sdp->sd_ail1_list; 172 struct list_head *head;
159 u64 sync_gen; 173 u64 sync_gen;
160 struct list_head *first; 174 struct list_head *first;
161 struct gfs2_ail *first_ai, *ai, *tmp; 175 struct gfs2_ail *first_ai, *ai, *tmp;
162 int done = 0; 176 int done = 0;
163 177
164 gfs2_log_lock(sdp); 178 gfs2_log_lock(sdp);
179 head = &sdp->sd_ail1_list;
165 if (list_empty(head)) { 180 if (list_empty(head)) {
166 gfs2_log_unlock(sdp); 181 gfs2_log_unlock(sdp);
167 return; 182 return;
@@ -233,11 +248,7 @@ static void gfs2_ail2_empty_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
233 bd = list_entry(head->prev, struct gfs2_bufdata, 248 bd = list_entry(head->prev, struct gfs2_bufdata,
234 bd_ail_st_list); 249 bd_ail_st_list);
235 gfs2_assert(sdp, bd->bd_ail == ai); 250 gfs2_assert(sdp, bd->bd_ail == ai);
236 bd->bd_ail = NULL; 251 gfs2_remove_from_ail(bd->bd_bh->b_page->mapping, bd);
237 list_del(&bd->bd_ail_st_list);
238 list_del(&bd->bd_ail_gl_list);
239 atomic_dec(&bd->bd_gl->gl_ail_count);
240 brelse(bd->bd_bh);
241 } 252 }
242} 253}
243 254
@@ -439,10 +450,10 @@ static unsigned int current_tail(struct gfs2_sbd *sdp)
439 return tail; 450 return tail;
440} 451}
441 452
442static inline void log_incr_head(struct gfs2_sbd *sdp) 453void gfs2_log_incr_head(struct gfs2_sbd *sdp)
443{ 454{
444 if (sdp->sd_log_flush_head == sdp->sd_log_tail) 455 if (sdp->sd_log_flush_head == sdp->sd_log_tail)
445 gfs2_assert_withdraw(sdp, sdp->sd_log_flush_head == sdp->sd_log_head); 456 BUG_ON(sdp->sd_log_flush_head != sdp->sd_log_head);
446 457
447 if (++sdp->sd_log_flush_head == sdp->sd_jdesc->jd_blocks) { 458 if (++sdp->sd_log_flush_head == sdp->sd_jdesc->jd_blocks) {
448 sdp->sd_log_flush_head = 0; 459 sdp->sd_log_flush_head = 0;
@@ -451,6 +462,23 @@ static inline void log_incr_head(struct gfs2_sbd *sdp)
451} 462}
452 463
453/** 464/**
465 * gfs2_log_write_endio - End of I/O for a log buffer
466 * @bh: The buffer head
467 * @uptodate: I/O Status
468 *
469 */
470
471static void gfs2_log_write_endio(struct buffer_head *bh, int uptodate)
472{
473 struct gfs2_sbd *sdp = bh->b_private;
474 bh->b_private = NULL;
475
476 end_buffer_write_sync(bh, uptodate);
477 if (atomic_dec_and_test(&sdp->sd_log_in_flight))
478 wake_up(&sdp->sd_log_flush_wait);
479}
480
481/**
454 * gfs2_log_get_buf - Get and initialize a buffer to use for log control data 482 * gfs2_log_get_buf - Get and initialize a buffer to use for log control data
455 * @sdp: The GFS2 superblock 483 * @sdp: The GFS2 superblock
456 * 484 *
@@ -460,25 +488,43 @@ static inline void log_incr_head(struct gfs2_sbd *sdp)
460struct buffer_head *gfs2_log_get_buf(struct gfs2_sbd *sdp) 488struct buffer_head *gfs2_log_get_buf(struct gfs2_sbd *sdp)
461{ 489{
462 u64 blkno = log_bmap(sdp, sdp->sd_log_flush_head); 490 u64 blkno = log_bmap(sdp, sdp->sd_log_flush_head);
463 struct gfs2_log_buf *lb;
464 struct buffer_head *bh; 491 struct buffer_head *bh;
465 492
466 lb = kzalloc(sizeof(struct gfs2_log_buf), GFP_NOFS | __GFP_NOFAIL); 493 bh = sb_getblk(sdp->sd_vfs, blkno);
467 list_add(&lb->lb_list, &sdp->sd_log_flush_list);
468
469 bh = lb->lb_bh = sb_getblk(sdp->sd_vfs, blkno);
470 lock_buffer(bh); 494 lock_buffer(bh);
471 memset(bh->b_data, 0, bh->b_size); 495 memset(bh->b_data, 0, bh->b_size);
472 set_buffer_uptodate(bh); 496 set_buffer_uptodate(bh);
473 clear_buffer_dirty(bh); 497 clear_buffer_dirty(bh);
474 unlock_buffer(bh); 498 gfs2_log_incr_head(sdp);
475 499 atomic_inc(&sdp->sd_log_in_flight);
476 log_incr_head(sdp); 500 bh->b_private = sdp;
501 bh->b_end_io = gfs2_log_write_endio;
477 502
478 return bh; 503 return bh;
479} 504}
480 505
481/** 506/**
507 * gfs2_fake_write_endio -
508 * @bh: The buffer head
509 * @uptodate: The I/O Status
510 *
511 */
512
513static void gfs2_fake_write_endio(struct buffer_head *bh, int uptodate)
514{
515 struct buffer_head *real_bh = bh->b_private;
516 struct gfs2_bufdata *bd = real_bh->b_private;
517 struct gfs2_sbd *sdp = bd->bd_gl->gl_sbd;
518
519 end_buffer_write_sync(bh, uptodate);
520 free_buffer_head(bh);
521 unlock_buffer(real_bh);
522 brelse(real_bh);
523 if (atomic_dec_and_test(&sdp->sd_log_in_flight))
524 wake_up(&sdp->sd_log_flush_wait);
525}
526
527/**
482 * gfs2_log_fake_buf - Build a fake buffer head to write metadata buffer to log 528 * gfs2_log_fake_buf - Build a fake buffer head to write metadata buffer to log
483 * @sdp: the filesystem 529 * @sdp: the filesystem
484 * @data: the data the buffer_head should point to 530 * @data: the data the buffer_head should point to
@@ -490,22 +536,20 @@ struct buffer_head *gfs2_log_fake_buf(struct gfs2_sbd *sdp,
490 struct buffer_head *real) 536 struct buffer_head *real)
491{ 537{
492 u64 blkno = log_bmap(sdp, sdp->sd_log_flush_head); 538 u64 blkno = log_bmap(sdp, sdp->sd_log_flush_head);
493 struct gfs2_log_buf *lb;
494 struct buffer_head *bh; 539 struct buffer_head *bh;
495 540
496 lb = kzalloc(sizeof(struct gfs2_log_buf), GFP_NOFS | __GFP_NOFAIL); 541 bh = alloc_buffer_head(GFP_NOFS | __GFP_NOFAIL);
497 list_add(&lb->lb_list, &sdp->sd_log_flush_list);
498 lb->lb_real = real;
499
500 bh = lb->lb_bh = alloc_buffer_head(GFP_NOFS | __GFP_NOFAIL);
501 atomic_set(&bh->b_count, 1); 542 atomic_set(&bh->b_count, 1);
502 bh->b_state = (1 << BH_Mapped) | (1 << BH_Uptodate); 543 bh->b_state = (1 << BH_Mapped) | (1 << BH_Uptodate) | (1 << BH_Lock);
503 set_bh_page(bh, real->b_page, bh_offset(real)); 544 set_bh_page(bh, real->b_page, bh_offset(real));
504 bh->b_blocknr = blkno; 545 bh->b_blocknr = blkno;
505 bh->b_size = sdp->sd_sb.sb_bsize; 546 bh->b_size = sdp->sd_sb.sb_bsize;
506 bh->b_bdev = sdp->sd_vfs->s_bdev; 547 bh->b_bdev = sdp->sd_vfs->s_bdev;
548 bh->b_private = real;
549 bh->b_end_io = gfs2_fake_write_endio;
507 550
508 log_incr_head(sdp); 551 gfs2_log_incr_head(sdp);
552 atomic_inc(&sdp->sd_log_in_flight);
509 553
510 return bh; 554 return bh;
511} 555}
@@ -572,45 +616,75 @@ static void log_write_header(struct gfs2_sbd *sdp, u32 flags, int pull)
572 gfs2_assert_withdraw(sdp, !pull); 616 gfs2_assert_withdraw(sdp, !pull);
573 617
574 sdp->sd_log_idle = (tail == sdp->sd_log_flush_head); 618 sdp->sd_log_idle = (tail == sdp->sd_log_flush_head);
575 log_incr_head(sdp); 619 gfs2_log_incr_head(sdp);
576} 620}
577 621
578static void log_flush_commit(struct gfs2_sbd *sdp) 622static void log_flush_commit(struct gfs2_sbd *sdp)
579{ 623{
580 struct list_head *head = &sdp->sd_log_flush_list; 624 DEFINE_WAIT(wait);
581 struct gfs2_log_buf *lb; 625
582 struct buffer_head *bh; 626 if (atomic_read(&sdp->sd_log_in_flight)) {
583 int flushcount = 0; 627 do {
628 prepare_to_wait(&sdp->sd_log_flush_wait, &wait,
629 TASK_UNINTERRUPTIBLE);
630 if (atomic_read(&sdp->sd_log_in_flight))
631 io_schedule();
632 } while(atomic_read(&sdp->sd_log_in_flight));
633 finish_wait(&sdp->sd_log_flush_wait, &wait);
634 }
584 635
585 while (!list_empty(head)) { 636 log_write_header(sdp, 0, 0);
586 lb = list_entry(head->next, struct gfs2_log_buf, lb_list); 637}
587 list_del(&lb->lb_list);
588 bh = lb->lb_bh;
589 638
590 wait_on_buffer(bh); 639static void gfs2_ordered_write(struct gfs2_sbd *sdp)
591 if (!buffer_uptodate(bh)) 640{
592 gfs2_io_error_bh(sdp, bh); 641 struct gfs2_bufdata *bd;
593 if (lb->lb_real) { 642 struct buffer_head *bh;
594 while (atomic_read(&bh->b_count) != 1) /* Grrrr... */ 643 LIST_HEAD(written);
595 schedule(); 644
596 free_buffer_head(bh); 645 gfs2_log_lock(sdp);
597 } else 646 while (!list_empty(&sdp->sd_log_le_ordered)) {
647 bd = list_entry(sdp->sd_log_le_ordered.next, struct gfs2_bufdata, bd_le.le_list);
648 list_move(&bd->bd_le.le_list, &written);
649 bh = bd->bd_bh;
650 if (!buffer_dirty(bh))
651 continue;
652 get_bh(bh);
653 gfs2_log_unlock(sdp);
654 lock_buffer(bh);
655 if (test_clear_buffer_dirty(bh)) {
656 bh->b_end_io = end_buffer_write_sync;
657 submit_bh(WRITE, bh);
658 } else {
659 unlock_buffer(bh);
598 brelse(bh); 660 brelse(bh);
599 kfree(lb); 661 }
600 flushcount++; 662 gfs2_log_lock(sdp);
601 } 663 }
664 list_splice(&written, &sdp->sd_log_le_ordered);
665 gfs2_log_unlock(sdp);
666}
602 667
603 /* If nothing was journaled, the header is unplanned and unwanted. */ 668static void gfs2_ordered_wait(struct gfs2_sbd *sdp)
604 if (flushcount) { 669{
605 log_write_header(sdp, 0, 0); 670 struct gfs2_bufdata *bd;
606 } else { 671 struct buffer_head *bh;
607 unsigned int tail;
608 tail = current_tail(sdp);
609 672
610 gfs2_ail1_empty(sdp, 0); 673 gfs2_log_lock(sdp);
611 if (sdp->sd_log_tail != tail) 674 while (!list_empty(&sdp->sd_log_le_ordered)) {
612 log_pull_tail(sdp, tail); 675 bd = list_entry(sdp->sd_log_le_ordered.prev, struct gfs2_bufdata, bd_le.le_list);
676 bh = bd->bd_bh;
677 if (buffer_locked(bh)) {
678 get_bh(bh);
679 gfs2_log_unlock(sdp);
680 wait_on_buffer(bh);
681 brelse(bh);
682 gfs2_log_lock(sdp);
683 continue;
684 }
685 list_del_init(&bd->bd_le.le_list);
613 } 686 }
687 gfs2_log_unlock(sdp);
614} 688}
615 689
616/** 690/**
@@ -640,10 +714,16 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl)
640 INIT_LIST_HEAD(&ai->ai_ail1_list); 714 INIT_LIST_HEAD(&ai->ai_ail1_list);
641 INIT_LIST_HEAD(&ai->ai_ail2_list); 715 INIT_LIST_HEAD(&ai->ai_ail2_list);
642 716
643 gfs2_assert_withdraw(sdp, 717 if (sdp->sd_log_num_buf != sdp->sd_log_commited_buf) {
644 sdp->sd_log_num_buf + sdp->sd_log_num_jdata == 718 printk(KERN_INFO "GFS2: log buf %u %u\n", sdp->sd_log_num_buf,
645 sdp->sd_log_commited_buf + 719 sdp->sd_log_commited_buf);
646 sdp->sd_log_commited_databuf); 720 gfs2_assert_withdraw(sdp, 0);
721 }
722 if (sdp->sd_log_num_databuf != sdp->sd_log_commited_databuf) {
723 printk(KERN_INFO "GFS2: log databuf %u %u\n",
724 sdp->sd_log_num_databuf, sdp->sd_log_commited_databuf);
725 gfs2_assert_withdraw(sdp, 0);
726 }
647 gfs2_assert_withdraw(sdp, 727 gfs2_assert_withdraw(sdp,
648 sdp->sd_log_num_revoke == sdp->sd_log_commited_revoke); 728 sdp->sd_log_num_revoke == sdp->sd_log_commited_revoke);
649 729
@@ -651,8 +731,11 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl)
651 sdp->sd_log_flush_wrapped = 0; 731 sdp->sd_log_flush_wrapped = 0;
652 ai->ai_first = sdp->sd_log_flush_head; 732 ai->ai_first = sdp->sd_log_flush_head;
653 733
734 gfs2_ordered_write(sdp);
654 lops_before_commit(sdp); 735 lops_before_commit(sdp);
655 if (!list_empty(&sdp->sd_log_flush_list)) 736 gfs2_ordered_wait(sdp);
737
738 if (sdp->sd_log_head != sdp->sd_log_flush_head)
656 log_flush_commit(sdp); 739 log_flush_commit(sdp);
657 else if (sdp->sd_log_tail != current_tail(sdp) && !sdp->sd_log_idle){ 740 else if (sdp->sd_log_tail != current_tail(sdp) && !sdp->sd_log_idle){
658 gfs2_log_lock(sdp); 741 gfs2_log_lock(sdp);
@@ -744,7 +827,6 @@ void gfs2_log_shutdown(struct gfs2_sbd *sdp)
744 gfs2_assert_withdraw(sdp, !sdp->sd_log_blks_reserved); 827 gfs2_assert_withdraw(sdp, !sdp->sd_log_blks_reserved);
745 gfs2_assert_withdraw(sdp, !sdp->sd_log_num_gl); 828 gfs2_assert_withdraw(sdp, !sdp->sd_log_num_gl);
746 gfs2_assert_withdraw(sdp, !sdp->sd_log_num_buf); 829 gfs2_assert_withdraw(sdp, !sdp->sd_log_num_buf);
747 gfs2_assert_withdraw(sdp, !sdp->sd_log_num_jdata);
748 gfs2_assert_withdraw(sdp, !sdp->sd_log_num_revoke); 830 gfs2_assert_withdraw(sdp, !sdp->sd_log_num_revoke);
749 gfs2_assert_withdraw(sdp, !sdp->sd_log_num_rg); 831 gfs2_assert_withdraw(sdp, !sdp->sd_log_num_rg);
750 gfs2_assert_withdraw(sdp, !sdp->sd_log_num_databuf); 832 gfs2_assert_withdraw(sdp, !sdp->sd_log_num_databuf);
diff --git a/fs/gfs2/log.h b/fs/gfs2/log.h
index 8e7aa0f29109..dae282400627 100644
--- a/fs/gfs2/log.h
+++ b/fs/gfs2/log.h
@@ -52,12 +52,14 @@ int gfs2_ail1_empty(struct gfs2_sbd *sdp, int flags);
52 52
53int gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks); 53int gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks);
54void gfs2_log_release(struct gfs2_sbd *sdp, unsigned int blks); 54void gfs2_log_release(struct gfs2_sbd *sdp, unsigned int blks);
55void gfs2_log_incr_head(struct gfs2_sbd *sdp);
55 56
56struct buffer_head *gfs2_log_get_buf(struct gfs2_sbd *sdp); 57struct buffer_head *gfs2_log_get_buf(struct gfs2_sbd *sdp);
57struct buffer_head *gfs2_log_fake_buf(struct gfs2_sbd *sdp, 58struct buffer_head *gfs2_log_fake_buf(struct gfs2_sbd *sdp,
58 struct buffer_head *real); 59 struct buffer_head *real);
59void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl); 60void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl);
60void gfs2_log_commit(struct gfs2_sbd *sdp, struct gfs2_trans *trans); 61void gfs2_log_commit(struct gfs2_sbd *sdp, struct gfs2_trans *trans);
62void gfs2_remove_from_ail(struct address_space *mapping, struct gfs2_bufdata *bd);
61 63
62void gfs2_log_shutdown(struct gfs2_sbd *sdp); 64void gfs2_log_shutdown(struct gfs2_sbd *sdp);
63void gfs2_meta_syncfs(struct gfs2_sbd *sdp); 65void gfs2_meta_syncfs(struct gfs2_sbd *sdp);
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index 3b395c41b2f3..6c27cea761c6 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -27,7 +27,104 @@
27#include "trans.h" 27#include "trans.h"
28#include "util.h" 28#include "util.h"
29 29
30static void glock_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le) 30/**
31 * gfs2_pin - Pin a buffer in memory
32 * @sdp: The superblock
33 * @bh: The buffer to be pinned
34 *
35 * The log lock must be held when calling this function
36 */
37static void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh)
38{
39 struct gfs2_bufdata *bd;
40
41 gfs2_assert_withdraw(sdp, test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags));
42
43 clear_buffer_dirty(bh);
44 if (test_set_buffer_pinned(bh))
45 gfs2_assert_withdraw(sdp, 0);
46 if (!buffer_uptodate(bh))
47 gfs2_io_error_bh(sdp, bh);
48 bd = bh->b_private;
49 /* If this buffer is in the AIL and it has already been written
50 * to in-place disk block, remove it from the AIL.
51 */
52 if (bd->bd_ail)
53 list_move(&bd->bd_ail_st_list, &bd->bd_ail->ai_ail2_list);
54 get_bh(bh);
55}
56
57/**
58 * gfs2_unpin - Unpin a buffer
59 * @sdp: the filesystem the buffer belongs to
60 * @bh: The buffer to unpin
61 * @ai:
62 *
63 */
64
65static void gfs2_unpin(struct gfs2_sbd *sdp, struct buffer_head *bh,
66 struct gfs2_ail *ai)
67{
68 struct gfs2_bufdata *bd = bh->b_private;
69
70 gfs2_assert_withdraw(sdp, buffer_uptodate(bh));
71
72 if (!buffer_pinned(bh))
73 gfs2_assert_withdraw(sdp, 0);
74
75 lock_buffer(bh);
76 mark_buffer_dirty(bh);
77 clear_buffer_pinned(bh);
78
79 gfs2_log_lock(sdp);
80 if (bd->bd_ail) {
81 list_del(&bd->bd_ail_st_list);
82 brelse(bh);
83 } else {
84 struct gfs2_glock *gl = bd->bd_gl;
85 list_add(&bd->bd_ail_gl_list, &gl->gl_ail_list);
86 atomic_inc(&gl->gl_ail_count);
87 }
88 bd->bd_ail = ai;
89 list_add(&bd->bd_ail_st_list, &ai->ai_ail1_list);
90 gfs2_log_unlock(sdp);
91 unlock_buffer(bh);
92}
93
94
95static inline struct gfs2_log_descriptor *bh_log_desc(struct buffer_head *bh)
96{
97 return (struct gfs2_log_descriptor *)bh->b_data;
98}
99
100static inline __be64 *bh_log_ptr(struct buffer_head *bh)
101{
102 struct gfs2_log_descriptor *ld = bh_log_desc(bh);
103 return (__force __be64 *)(ld + 1);
104}
105
106static inline __be64 *bh_ptr_end(struct buffer_head *bh)
107{
108 return (__force __be64 *)(bh->b_data + bh->b_size);
109}
110
111
112static struct buffer_head *gfs2_get_log_desc(struct gfs2_sbd *sdp, u32 ld_type)
113{
114 struct buffer_head *bh = gfs2_log_get_buf(sdp);
115 struct gfs2_log_descriptor *ld = bh_log_desc(bh);
116 ld->ld_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
117 ld->ld_header.mh_type = cpu_to_be32(GFS2_METATYPE_LD);
118 ld->ld_header.mh_format = cpu_to_be32(GFS2_FORMAT_LD);
119 ld->ld_type = cpu_to_be32(ld_type);
120 ld->ld_length = 0;
121 ld->ld_data1 = 0;
122 ld->ld_data2 = 0;
123 memset(ld->ld_reserved, 0, sizeof(ld->ld_reserved));
124 return bh;
125}
126
127static void __glock_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
31{ 128{
32 struct gfs2_glock *gl; 129 struct gfs2_glock *gl;
33 struct gfs2_trans *tr = current->journal_info; 130 struct gfs2_trans *tr = current->journal_info;
@@ -38,15 +135,19 @@ static void glock_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
38 if (gfs2_assert_withdraw(sdp, gfs2_glock_is_held_excl(gl))) 135 if (gfs2_assert_withdraw(sdp, gfs2_glock_is_held_excl(gl)))
39 return; 136 return;
40 137
41 gfs2_log_lock(sdp); 138 if (!list_empty(&le->le_list))
42 if (!list_empty(&le->le_list)){
43 gfs2_log_unlock(sdp);
44 return; 139 return;
45 } 140
46 gfs2_glock_hold(gl); 141 gfs2_glock_hold(gl);
47 set_bit(GLF_DIRTY, &gl->gl_flags); 142 set_bit(GLF_DIRTY, &gl->gl_flags);
48 sdp->sd_log_num_gl++; 143 sdp->sd_log_num_gl++;
49 list_add(&le->le_list, &sdp->sd_log_le_gl); 144 list_add(&le->le_list, &sdp->sd_log_le_gl);
145}
146
147static void glock_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
148{
149 gfs2_log_lock(sdp);
150 __glock_lo_add(sdp, le);
50 gfs2_log_unlock(sdp); 151 gfs2_log_unlock(sdp);
51} 152}
52 153
@@ -71,30 +172,25 @@ static void buf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
71 struct gfs2_bufdata *bd = container_of(le, struct gfs2_bufdata, bd_le); 172 struct gfs2_bufdata *bd = container_of(le, struct gfs2_bufdata, bd_le);
72 struct gfs2_trans *tr; 173 struct gfs2_trans *tr;
73 174
175 lock_buffer(bd->bd_bh);
74 gfs2_log_lock(sdp); 176 gfs2_log_lock(sdp);
75 if (!list_empty(&bd->bd_list_tr)) { 177 if (!list_empty(&bd->bd_list_tr))
76 gfs2_log_unlock(sdp); 178 goto out;
77 return;
78 }
79 tr = current->journal_info; 179 tr = current->journal_info;
80 tr->tr_touched = 1; 180 tr->tr_touched = 1;
81 tr->tr_num_buf++; 181 tr->tr_num_buf++;
82 list_add(&bd->bd_list_tr, &tr->tr_list_buf); 182 list_add(&bd->bd_list_tr, &tr->tr_list_buf);
83 gfs2_log_unlock(sdp);
84
85 if (!list_empty(&le->le_list)) 183 if (!list_empty(&le->le_list))
86 return; 184 goto out;
87 185 __glock_lo_add(sdp, &bd->bd_gl->gl_le);
88 gfs2_trans_add_gl(bd->bd_gl);
89
90 gfs2_meta_check(sdp, bd->bd_bh); 186 gfs2_meta_check(sdp, bd->bd_bh);
91 gfs2_pin(sdp, bd->bd_bh); 187 gfs2_pin(sdp, bd->bd_bh);
92 gfs2_log_lock(sdp);
93 sdp->sd_log_num_buf++; 188 sdp->sd_log_num_buf++;
94 list_add(&le->le_list, &sdp->sd_log_le_buf); 189 list_add(&le->le_list, &sdp->sd_log_le_buf);
95 gfs2_log_unlock(sdp);
96
97 tr->tr_num_buf_new++; 190 tr->tr_num_buf_new++;
191out:
192 gfs2_log_unlock(sdp);
193 unlock_buffer(bd->bd_bh);
98} 194}
99 195
100static void buf_lo_incore_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr) 196static void buf_lo_incore_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
@@ -117,8 +213,7 @@ static void buf_lo_before_commit(struct gfs2_sbd *sdp)
117 struct buffer_head *bh; 213 struct buffer_head *bh;
118 struct gfs2_log_descriptor *ld; 214 struct gfs2_log_descriptor *ld;
119 struct gfs2_bufdata *bd1 = NULL, *bd2; 215 struct gfs2_bufdata *bd1 = NULL, *bd2;
120 unsigned int total = sdp->sd_log_num_buf; 216 unsigned int total;
121 unsigned int offset = BUF_OFFSET;
122 unsigned int limit; 217 unsigned int limit;
123 unsigned int num; 218 unsigned int num;
124 unsigned n; 219 unsigned n;
@@ -127,22 +222,20 @@ static void buf_lo_before_commit(struct gfs2_sbd *sdp)
127 limit = buf_limit(sdp); 222 limit = buf_limit(sdp);
128 /* for 4k blocks, limit = 503 */ 223 /* for 4k blocks, limit = 503 */
129 224
225 gfs2_log_lock(sdp);
226 total = sdp->sd_log_num_buf;
130 bd1 = bd2 = list_prepare_entry(bd1, &sdp->sd_log_le_buf, bd_le.le_list); 227 bd1 = bd2 = list_prepare_entry(bd1, &sdp->sd_log_le_buf, bd_le.le_list);
131 while(total) { 228 while(total) {
132 num = total; 229 num = total;
133 if (total > limit) 230 if (total > limit)
134 num = limit; 231 num = limit;
135 bh = gfs2_log_get_buf(sdp); 232 gfs2_log_unlock(sdp);
136 ld = (struct gfs2_log_descriptor *)bh->b_data; 233 bh = gfs2_get_log_desc(sdp, GFS2_LOG_DESC_METADATA);
137 ptr = (__be64 *)(bh->b_data + offset); 234 gfs2_log_lock(sdp);
138 ld->ld_header.mh_magic = cpu_to_be32(GFS2_MAGIC); 235 ld = bh_log_desc(bh);
139 ld->ld_header.mh_type = cpu_to_be32(GFS2_METATYPE_LD); 236 ptr = bh_log_ptr(bh);
140 ld->ld_header.mh_format = cpu_to_be32(GFS2_FORMAT_LD);
141 ld->ld_type = cpu_to_be32(GFS2_LOG_DESC_METADATA);
142 ld->ld_length = cpu_to_be32(num + 1); 237 ld->ld_length = cpu_to_be32(num + 1);
143 ld->ld_data1 = cpu_to_be32(num); 238 ld->ld_data1 = cpu_to_be32(num);
144 ld->ld_data2 = cpu_to_be32(0);
145 memset(ld->ld_reserved, 0, sizeof(ld->ld_reserved));
146 239
147 n = 0; 240 n = 0;
148 list_for_each_entry_continue(bd1, &sdp->sd_log_le_buf, 241 list_for_each_entry_continue(bd1, &sdp->sd_log_le_buf,
@@ -152,21 +245,27 @@ static void buf_lo_before_commit(struct gfs2_sbd *sdp)
152 break; 245 break;
153 } 246 }
154 247
155 set_buffer_dirty(bh); 248 gfs2_log_unlock(sdp);
156 ll_rw_block(WRITE, 1, &bh); 249 submit_bh(WRITE, bh);
250 gfs2_log_lock(sdp);
157 251
158 n = 0; 252 n = 0;
159 list_for_each_entry_continue(bd2, &sdp->sd_log_le_buf, 253 list_for_each_entry_continue(bd2, &sdp->sd_log_le_buf,
160 bd_le.le_list) { 254 bd_le.le_list) {
255 get_bh(bd2->bd_bh);
256 gfs2_log_unlock(sdp);
257 lock_buffer(bd2->bd_bh);
161 bh = gfs2_log_fake_buf(sdp, bd2->bd_bh); 258 bh = gfs2_log_fake_buf(sdp, bd2->bd_bh);
162 set_buffer_dirty(bh); 259 submit_bh(WRITE, bh);
163 ll_rw_block(WRITE, 1, &bh); 260 gfs2_log_lock(sdp);
164 if (++n >= num) 261 if (++n >= num)
165 break; 262 break;
166 } 263 }
167 264
265 BUG_ON(total < num);
168 total -= num; 266 total -= num;
169 } 267 }
268 gfs2_log_unlock(sdp);
170} 269}
171 270
172static void buf_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai) 271static void buf_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
@@ -270,11 +369,8 @@ static void revoke_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
270 tr = current->journal_info; 369 tr = current->journal_info;
271 tr->tr_touched = 1; 370 tr->tr_touched = 1;
272 tr->tr_num_revoke++; 371 tr->tr_num_revoke++;
273
274 gfs2_log_lock(sdp);
275 sdp->sd_log_num_revoke++; 372 sdp->sd_log_num_revoke++;
276 list_add(&le->le_list, &sdp->sd_log_le_revoke); 373 list_add(&le->le_list, &sdp->sd_log_le_revoke);
277 gfs2_log_unlock(sdp);
278} 374}
279 375
280static void revoke_lo_before_commit(struct gfs2_sbd *sdp) 376static void revoke_lo_before_commit(struct gfs2_sbd *sdp)
@@ -284,32 +380,25 @@ static void revoke_lo_before_commit(struct gfs2_sbd *sdp)
284 struct buffer_head *bh; 380 struct buffer_head *bh;
285 unsigned int offset; 381 unsigned int offset;
286 struct list_head *head = &sdp->sd_log_le_revoke; 382 struct list_head *head = &sdp->sd_log_le_revoke;
287 struct gfs2_revoke *rv; 383 struct gfs2_bufdata *bd;
288 384
289 if (!sdp->sd_log_num_revoke) 385 if (!sdp->sd_log_num_revoke)
290 return; 386 return;
291 387
292 bh = gfs2_log_get_buf(sdp); 388 bh = gfs2_get_log_desc(sdp, GFS2_LOG_DESC_REVOKE);
293 ld = (struct gfs2_log_descriptor *)bh->b_data; 389 ld = bh_log_desc(bh);
294 ld->ld_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
295 ld->ld_header.mh_type = cpu_to_be32(GFS2_METATYPE_LD);
296 ld->ld_header.mh_format = cpu_to_be32(GFS2_FORMAT_LD);
297 ld->ld_type = cpu_to_be32(GFS2_LOG_DESC_REVOKE);
298 ld->ld_length = cpu_to_be32(gfs2_struct2blk(sdp, sdp->sd_log_num_revoke, 390 ld->ld_length = cpu_to_be32(gfs2_struct2blk(sdp, sdp->sd_log_num_revoke,
299 sizeof(u64))); 391 sizeof(u64)));
300 ld->ld_data1 = cpu_to_be32(sdp->sd_log_num_revoke); 392 ld->ld_data1 = cpu_to_be32(sdp->sd_log_num_revoke);
301 ld->ld_data2 = cpu_to_be32(0);
302 memset(ld->ld_reserved, 0, sizeof(ld->ld_reserved));
303 offset = sizeof(struct gfs2_log_descriptor); 393 offset = sizeof(struct gfs2_log_descriptor);
304 394
305 while (!list_empty(head)) { 395 while (!list_empty(head)) {
306 rv = list_entry(head->next, struct gfs2_revoke, rv_le.le_list); 396 bd = list_entry(head->next, struct gfs2_bufdata, bd_le.le_list);
307 list_del_init(&rv->rv_le.le_list); 397 list_del_init(&bd->bd_le.le_list);
308 sdp->sd_log_num_revoke--; 398 sdp->sd_log_num_revoke--;
309 399
310 if (offset + sizeof(u64) > sdp->sd_sb.sb_bsize) { 400 if (offset + sizeof(u64) > sdp->sd_sb.sb_bsize) {
311 set_buffer_dirty(bh); 401 submit_bh(WRITE, bh);
312 ll_rw_block(WRITE, 1, &bh);
313 402
314 bh = gfs2_log_get_buf(sdp); 403 bh = gfs2_log_get_buf(sdp);
315 mh = (struct gfs2_meta_header *)bh->b_data; 404 mh = (struct gfs2_meta_header *)bh->b_data;
@@ -319,15 +408,14 @@ static void revoke_lo_before_commit(struct gfs2_sbd *sdp)
319 offset = sizeof(struct gfs2_meta_header); 408 offset = sizeof(struct gfs2_meta_header);
320 } 409 }
321 410
322 *(__be64 *)(bh->b_data + offset) = cpu_to_be64(rv->rv_blkno); 411 *(__be64 *)(bh->b_data + offset) = cpu_to_be64(bd->bd_blkno);
323 kfree(rv); 412 kmem_cache_free(gfs2_bufdata_cachep, bd);
324 413
325 offset += sizeof(u64); 414 offset += sizeof(u64);
326 } 415 }
327 gfs2_assert_withdraw(sdp, !sdp->sd_log_num_revoke); 416 gfs2_assert_withdraw(sdp, !sdp->sd_log_num_revoke);
328 417
329 set_buffer_dirty(bh); 418 submit_bh(WRITE, bh);
330 ll_rw_block(WRITE, 1, &bh);
331} 419}
332 420
333static void revoke_lo_before_scan(struct gfs2_jdesc *jd, 421static void revoke_lo_before_scan(struct gfs2_jdesc *jd,
@@ -466,222 +554,136 @@ static void databuf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
466 struct address_space *mapping = bd->bd_bh->b_page->mapping; 554 struct address_space *mapping = bd->bd_bh->b_page->mapping;
467 struct gfs2_inode *ip = GFS2_I(mapping->host); 555 struct gfs2_inode *ip = GFS2_I(mapping->host);
468 556
557 lock_buffer(bd->bd_bh);
469 gfs2_log_lock(sdp); 558 gfs2_log_lock(sdp);
470 if (!list_empty(&bd->bd_list_tr)) { 559 if (!list_empty(&bd->bd_list_tr))
471 gfs2_log_unlock(sdp); 560 goto out;
472 return;
473 }
474 tr->tr_touched = 1; 561 tr->tr_touched = 1;
475 if (gfs2_is_jdata(ip)) { 562 if (gfs2_is_jdata(ip)) {
476 tr->tr_num_buf++; 563 tr->tr_num_buf++;
477 list_add(&bd->bd_list_tr, &tr->tr_list_buf); 564 list_add(&bd->bd_list_tr, &tr->tr_list_buf);
478 } 565 }
479 gfs2_log_unlock(sdp);
480 if (!list_empty(&le->le_list)) 566 if (!list_empty(&le->le_list))
481 return; 567 goto out;
482 568
483 gfs2_trans_add_gl(bd->bd_gl); 569 __glock_lo_add(sdp, &bd->bd_gl->gl_le);
484 if (gfs2_is_jdata(ip)) { 570 if (gfs2_is_jdata(ip)) {
485 sdp->sd_log_num_jdata++;
486 gfs2_pin(sdp, bd->bd_bh); 571 gfs2_pin(sdp, bd->bd_bh);
487 tr->tr_num_databuf_new++; 572 tr->tr_num_databuf_new++;
573 sdp->sd_log_num_databuf++;
574 list_add(&le->le_list, &sdp->sd_log_le_databuf);
575 } else {
576 list_add(&le->le_list, &sdp->sd_log_le_ordered);
488 } 577 }
489 gfs2_log_lock(sdp); 578out:
490 sdp->sd_log_num_databuf++;
491 list_add(&le->le_list, &sdp->sd_log_le_databuf);
492 gfs2_log_unlock(sdp); 579 gfs2_log_unlock(sdp);
580 unlock_buffer(bd->bd_bh);
493} 581}
494 582
495static int gfs2_check_magic(struct buffer_head *bh) 583static void gfs2_check_magic(struct buffer_head *bh)
496{ 584{
497 struct page *page = bh->b_page;
498 void *kaddr; 585 void *kaddr;
499 __be32 *ptr; 586 __be32 *ptr;
500 int rv = 0;
501 587
502 kaddr = kmap_atomic(page, KM_USER0); 588 clear_buffer_escaped(bh);
589 kaddr = kmap_atomic(bh->b_page, KM_USER0);
503 ptr = kaddr + bh_offset(bh); 590 ptr = kaddr + bh_offset(bh);
504 if (*ptr == cpu_to_be32(GFS2_MAGIC)) 591 if (*ptr == cpu_to_be32(GFS2_MAGIC))
505 rv = 1; 592 set_buffer_escaped(bh);
506 kunmap_atomic(kaddr, KM_USER0); 593 kunmap_atomic(kaddr, KM_USER0);
507
508 return rv;
509} 594}
510 595
511/** 596static void gfs2_write_blocks(struct gfs2_sbd *sdp, struct buffer_head *bh,
512 * databuf_lo_before_commit - Scan the data buffers, writing as we go 597 struct list_head *list, struct list_head *done,
513 * 598 unsigned int n)
514 * Here we scan through the lists of buffers and make the assumption
515 * that any buffer thats been pinned is being journaled, and that
516 * any unpinned buffer is an ordered write data buffer and therefore
517 * will be written back rather than journaled.
518 */
519static void databuf_lo_before_commit(struct gfs2_sbd *sdp)
520{ 599{
521 LIST_HEAD(started); 600 struct buffer_head *bh1;
522 struct gfs2_bufdata *bd1 = NULL, *bd2, *bdt;
523 struct buffer_head *bh = NULL,*bh1 = NULL;
524 struct gfs2_log_descriptor *ld; 601 struct gfs2_log_descriptor *ld;
525 unsigned int limit; 602 struct gfs2_bufdata *bd;
526 unsigned int total_dbuf; 603 __be64 *ptr;
527 unsigned int total_jdata = sdp->sd_log_num_jdata; 604
528 unsigned int num, n; 605 if (!bh)
529 __be64 *ptr = NULL; 606 return;
530 607
531 limit = databuf_limit(sdp); 608 ld = bh_log_desc(bh);
609 ld->ld_length = cpu_to_be32(n + 1);
610 ld->ld_data1 = cpu_to_be32(n);
532 611
533 /* 612 ptr = bh_log_ptr(bh);
534 * Start writing ordered buffers, write journaled buffers 613
535 * into the log along with a header 614 get_bh(bh);
536 */ 615 submit_bh(WRITE, bh);
537 gfs2_log_lock(sdp); 616 gfs2_log_lock(sdp);
538 total_dbuf = sdp->sd_log_num_databuf; 617 while(!list_empty(list)) {
539 bd2 = bd1 = list_prepare_entry(bd1, &sdp->sd_log_le_databuf, 618 bd = list_entry(list->next, struct gfs2_bufdata, bd_le.le_list);
540 bd_le.le_list); 619 list_move_tail(&bd->bd_le.le_list, done);
541 while(total_dbuf) { 620 get_bh(bd->bd_bh);
542 num = total_jdata; 621 while (be64_to_cpu(*ptr) != bd->bd_bh->b_blocknr) {
543 if (num > limit) 622 gfs2_log_incr_head(sdp);
544 num = limit; 623 ptr += 2;
545 n = 0;
546 list_for_each_entry_safe_continue(bd1, bdt,
547 &sdp->sd_log_le_databuf,
548 bd_le.le_list) {
549 /* store off the buffer head in a local ptr since
550 * gfs2_bufdata might change when we drop the log lock
551 */
552 bh1 = bd1->bd_bh;
553
554 /* An ordered write buffer */
555 if (bh1 && !buffer_pinned(bh1)) {
556 list_move(&bd1->bd_le.le_list, &started);
557 if (bd1 == bd2) {
558 bd2 = NULL;
559 bd2 = list_prepare_entry(bd2,
560 &sdp->sd_log_le_databuf,
561 bd_le.le_list);
562 }
563 total_dbuf--;
564 if (bh1) {
565 if (buffer_dirty(bh1)) {
566 get_bh(bh1);
567
568 gfs2_log_unlock(sdp);
569
570 ll_rw_block(SWRITE, 1, &bh1);
571 brelse(bh1);
572
573 gfs2_log_lock(sdp);
574 }
575 continue;
576 }
577 continue;
578 } else if (bh1) { /* A journaled buffer */
579 int magic;
580 gfs2_log_unlock(sdp);
581 if (!bh) {
582 bh = gfs2_log_get_buf(sdp);
583 ld = (struct gfs2_log_descriptor *)
584 bh->b_data;
585 ptr = (__be64 *)(bh->b_data +
586 DATABUF_OFFSET);
587 ld->ld_header.mh_magic =
588 cpu_to_be32(GFS2_MAGIC);
589 ld->ld_header.mh_type =
590 cpu_to_be32(GFS2_METATYPE_LD);
591 ld->ld_header.mh_format =
592 cpu_to_be32(GFS2_FORMAT_LD);
593 ld->ld_type =
594 cpu_to_be32(GFS2_LOG_DESC_JDATA);
595 ld->ld_length = cpu_to_be32(num + 1);
596 ld->ld_data1 = cpu_to_be32(num);
597 ld->ld_data2 = cpu_to_be32(0);
598 memset(ld->ld_reserved, 0, sizeof(ld->ld_reserved));
599 }
600 magic = gfs2_check_magic(bh1);
601 *ptr++ = cpu_to_be64(bh1->b_blocknr);
602 *ptr++ = cpu_to_be64((__u64)magic);
603 clear_buffer_escaped(bh1);
604 if (unlikely(magic != 0))
605 set_buffer_escaped(bh1);
606 gfs2_log_lock(sdp);
607 if (++n >= num)
608 break;
609 } else if (!bh1) {
610 total_dbuf--;
611 sdp->sd_log_num_databuf--;
612 list_del_init(&bd1->bd_le.le_list);
613 if (bd1 == bd2) {
614 bd2 = NULL;
615 bd2 = list_prepare_entry(bd2,
616 &sdp->sd_log_le_databuf,
617 bd_le.le_list);
618 }
619 kmem_cache_free(gfs2_bufdata_cachep, bd1);
620 }
621 } 624 }
622 gfs2_log_unlock(sdp); 625 gfs2_log_unlock(sdp);
623 if (bh) { 626 lock_buffer(bd->bd_bh);
624 set_buffer_mapped(bh); 627 if (buffer_escaped(bd->bd_bh)) {
625 set_buffer_dirty(bh); 628 void *kaddr;
626 ll_rw_block(WRITE, 1, &bh); 629 bh1 = gfs2_log_get_buf(sdp);
627 bh = NULL; 630 kaddr = kmap_atomic(bd->bd_bh->b_page, KM_USER0);
631 memcpy(bh1->b_data, kaddr + bh_offset(bd->bd_bh),
632 bh1->b_size);
633 kunmap_atomic(kaddr, KM_USER0);
634 *(__be32 *)bh1->b_data = 0;
635 clear_buffer_escaped(bd->bd_bh);
636 unlock_buffer(bd->bd_bh);
637 brelse(bd->bd_bh);
638 } else {
639 bh1 = gfs2_log_fake_buf(sdp, bd->bd_bh);
628 } 640 }
629 n = 0; 641 submit_bh(WRITE, bh1);
630 gfs2_log_lock(sdp); 642 gfs2_log_lock(sdp);
631 list_for_each_entry_continue(bd2, &sdp->sd_log_le_databuf, 643 ptr += 2;
632 bd_le.le_list) {
633 if (!bd2->bd_bh)
634 continue;
635 /* copy buffer if it needs escaping */
636 gfs2_log_unlock(sdp);
637 if (unlikely(buffer_escaped(bd2->bd_bh))) {
638 void *kaddr;
639 struct page *page = bd2->bd_bh->b_page;
640 bh = gfs2_log_get_buf(sdp);
641 kaddr = kmap_atomic(page, KM_USER0);
642 memcpy(bh->b_data,
643 kaddr + bh_offset(bd2->bd_bh),
644 sdp->sd_sb.sb_bsize);
645 kunmap_atomic(kaddr, KM_USER0);
646 *(__be32 *)bh->b_data = 0;
647 } else {
648 bh = gfs2_log_fake_buf(sdp, bd2->bd_bh);
649 }
650 set_buffer_dirty(bh);
651 ll_rw_block(WRITE, 1, &bh);
652 gfs2_log_lock(sdp);
653 if (++n >= num)
654 break;
655 }
656 bh = NULL;
657 BUG_ON(total_dbuf < num);
658 total_dbuf -= num;
659 total_jdata -= num;
660 } 644 }
661 gfs2_log_unlock(sdp); 645 gfs2_log_unlock(sdp);
646 brelse(bh);
647}
662 648
663 /* Wait on all ordered buffers */ 649/**
664 while (!list_empty(&started)) { 650 * databuf_lo_before_commit - Scan the data buffers, writing as we go
665 gfs2_log_lock(sdp); 651 *
666 bd1 = list_entry(started.next, struct gfs2_bufdata, 652 */
667 bd_le.le_list);
668 list_del_init(&bd1->bd_le.le_list);
669 sdp->sd_log_num_databuf--;
670 bh = bd1->bd_bh;
671 if (bh) {
672 bh->b_private = NULL;
673 get_bh(bh);
674 gfs2_log_unlock(sdp);
675 wait_on_buffer(bh);
676 brelse(bh);
677 } else
678 gfs2_log_unlock(sdp);
679 653
680 kmem_cache_free(gfs2_bufdata_cachep, bd1); 654static void databuf_lo_before_commit(struct gfs2_sbd *sdp)
681 } 655{
656 struct gfs2_bufdata *bd = NULL;
657 struct buffer_head *bh = NULL;
658 unsigned int n = 0;
659 __be64 *ptr = NULL, *end = NULL;
660 LIST_HEAD(processed);
661 LIST_HEAD(in_progress);
682 662
683 /* We've removed all the ordered write bufs here, so only jdata left */ 663 gfs2_log_lock(sdp);
684 gfs2_assert_warn(sdp, sdp->sd_log_num_databuf == sdp->sd_log_num_jdata); 664 while (!list_empty(&sdp->sd_log_le_databuf)) {
665 if (ptr == end) {
666 gfs2_log_unlock(sdp);
667 gfs2_write_blocks(sdp, bh, &in_progress, &processed, n);
668 n = 0;
669 bh = gfs2_get_log_desc(sdp, GFS2_LOG_DESC_JDATA);
670 ptr = bh_log_ptr(bh);
671 end = bh_ptr_end(bh) - 1;
672 gfs2_log_lock(sdp);
673 continue;
674 }
675 bd = list_entry(sdp->sd_log_le_databuf.next, struct gfs2_bufdata, bd_le.le_list);
676 list_move_tail(&bd->bd_le.le_list, &in_progress);
677 gfs2_check_magic(bd->bd_bh);
678 *ptr++ = cpu_to_be64(bd->bd_bh->b_blocknr);
679 *ptr++ = cpu_to_be64(buffer_escaped(bh) ? 1 : 0);
680 n++;
681 }
682 gfs2_log_unlock(sdp);
683 gfs2_write_blocks(sdp, bh, &in_progress, &processed, n);
684 gfs2_log_lock(sdp);
685 list_splice(&processed, &sdp->sd_log_le_databuf);
686 gfs2_log_unlock(sdp);
685} 687}
686 688
687static int databuf_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start, 689static int databuf_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start,
@@ -765,11 +767,9 @@ static void databuf_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
765 bd = list_entry(head->next, struct gfs2_bufdata, bd_le.le_list); 767 bd = list_entry(head->next, struct gfs2_bufdata, bd_le.le_list);
766 list_del_init(&bd->bd_le.le_list); 768 list_del_init(&bd->bd_le.le_list);
767 sdp->sd_log_num_databuf--; 769 sdp->sd_log_num_databuf--;
768 sdp->sd_log_num_jdata--;
769 gfs2_unpin(sdp, bd->bd_bh, ai); 770 gfs2_unpin(sdp, bd->bd_bh, ai);
770 } 771 }
771 gfs2_assert_warn(sdp, !sdp->sd_log_num_databuf); 772 gfs2_assert_warn(sdp, !sdp->sd_log_num_databuf);
772 gfs2_assert_warn(sdp, !sdp->sd_log_num_jdata);
773} 773}
774 774
775 775
@@ -817,10 +817,10 @@ const struct gfs2_log_operations gfs2_databuf_lops = {
817 817
818const struct gfs2_log_operations *gfs2_log_ops[] = { 818const struct gfs2_log_operations *gfs2_log_ops[] = {
819 &gfs2_glock_lops, 819 &gfs2_glock_lops,
820 &gfs2_databuf_lops,
820 &gfs2_buf_lops, 821 &gfs2_buf_lops,
821 &gfs2_revoke_lops,
822 &gfs2_rg_lops, 822 &gfs2_rg_lops,
823 &gfs2_databuf_lops, 823 &gfs2_revoke_lops,
824 NULL, 824 NULL,
825}; 825};
826 826
diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c
index d5d4e68b8807..79c91fd8381b 100644
--- a/fs/gfs2/main.c
+++ b/fs/gfs2/main.c
@@ -107,6 +107,8 @@ static int __init init_gfs2_fs(void)
107fail_unregister: 107fail_unregister:
108 unregister_filesystem(&gfs2_fs_type); 108 unregister_filesystem(&gfs2_fs_type);
109fail: 109fail:
110 gfs2_glock_exit();
111
110 if (gfs2_bufdata_cachep) 112 if (gfs2_bufdata_cachep)
111 kmem_cache_destroy(gfs2_bufdata_cachep); 113 kmem_cache_destroy(gfs2_bufdata_cachep);
112 114
@@ -127,6 +129,7 @@ fail:
127 129
128static void __exit exit_gfs2_fs(void) 130static void __exit exit_gfs2_fs(void)
129{ 131{
132 gfs2_glock_exit();
130 gfs2_unregister_debugfs(); 133 gfs2_unregister_debugfs();
131 unregister_filesystem(&gfs2_fs_type); 134 unregister_filesystem(&gfs2_fs_type);
132 unregister_filesystem(&gfs2meta_fs_type); 135 unregister_filesystem(&gfs2meta_fs_type);
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
index 8da343b34ae7..4da423985e4f 100644
--- a/fs/gfs2/meta_io.c
+++ b/fs/gfs2/meta_io.c
@@ -297,74 +297,35 @@ void gfs2_attach_bufdata(struct gfs2_glock *gl, struct buffer_head *bh,
297 unlock_page(bh->b_page); 297 unlock_page(bh->b_page);
298} 298}
299 299
300/** 300void gfs2_remove_from_journal(struct buffer_head *bh, struct gfs2_trans *tr, int meta)
301 * gfs2_pin - Pin a buffer in memory
302 * @sdp: the filesystem the buffer belongs to
303 * @bh: The buffer to be pinned
304 *
305 */
306
307void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh)
308{ 301{
302 struct gfs2_sbd *sdp = GFS2_SB(bh->b_page->mapping->host);
309 struct gfs2_bufdata *bd = bh->b_private; 303 struct gfs2_bufdata *bd = bh->b_private;
310 304 if (test_clear_buffer_pinned(bh)) {
311 gfs2_assert_withdraw(sdp, test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)); 305 list_del_init(&bd->bd_le.le_list);
312 306 if (meta) {
313 if (test_set_buffer_pinned(bh)) 307 gfs2_assert_warn(sdp, sdp->sd_log_num_buf);
314 gfs2_assert_withdraw(sdp, 0); 308 sdp->sd_log_num_buf--;
315 309 tr->tr_num_buf_rm++;
316 wait_on_buffer(bh); 310 } else {
317 311 gfs2_assert_warn(sdp, sdp->sd_log_num_databuf);
318 /* If this buffer is in the AIL and it has already been written 312 sdp->sd_log_num_databuf--;
319 to in-place disk block, remove it from the AIL. */ 313 tr->tr_num_databuf_rm++;
320 314 }
321 gfs2_log_lock(sdp); 315 tr->tr_touched = 1;
322 if (bd->bd_ail && !buffer_in_io(bh))
323 list_move(&bd->bd_ail_st_list, &bd->bd_ail->ai_ail2_list);
324 gfs2_log_unlock(sdp);
325
326 clear_buffer_dirty(bh);
327 wait_on_buffer(bh);
328
329 if (!buffer_uptodate(bh))
330 gfs2_io_error_bh(sdp, bh);
331
332 get_bh(bh);
333}
334
335/**
336 * gfs2_unpin - Unpin a buffer
337 * @sdp: the filesystem the buffer belongs to
338 * @bh: The buffer to unpin
339 * @ai:
340 *
341 */
342
343void gfs2_unpin(struct gfs2_sbd *sdp, struct buffer_head *bh,
344 struct gfs2_ail *ai)
345{
346 struct gfs2_bufdata *bd = bh->b_private;
347
348 gfs2_assert_withdraw(sdp, buffer_uptodate(bh));
349
350 if (!buffer_pinned(bh))
351 gfs2_assert_withdraw(sdp, 0);
352
353 mark_buffer_dirty(bh);
354 clear_buffer_pinned(bh);
355
356 gfs2_log_lock(sdp);
357 if (bd->bd_ail) {
358 list_del(&bd->bd_ail_st_list);
359 brelse(bh); 316 brelse(bh);
360 } else {
361 struct gfs2_glock *gl = bd->bd_gl;
362 list_add(&bd->bd_ail_gl_list, &gl->gl_ail_list);
363 atomic_inc(&gl->gl_ail_count);
364 } 317 }
365 bd->bd_ail = ai; 318 if (bd) {
366 list_add(&bd->bd_ail_st_list, &ai->ai_ail1_list); 319 if (bd->bd_ail) {
367 gfs2_log_unlock(sdp); 320 gfs2_remove_from_ail(NULL, bd);
321 bh->b_private = NULL;
322 bd->bd_bh = NULL;
323 bd->bd_blkno = bh->b_blocknr;
324 gfs2_trans_add_revoke(sdp, bd);
325 }
326 }
327 clear_buffer_dirty(bh);
328 clear_buffer_uptodate(bh);
368} 329}
369 330
370/** 331/**
@@ -383,44 +344,11 @@ void gfs2_meta_wipe(struct gfs2_inode *ip, u64 bstart, u32 blen)
383 while (blen) { 344 while (blen) {
384 bh = getbuf(ip->i_gl, bstart, NO_CREATE); 345 bh = getbuf(ip->i_gl, bstart, NO_CREATE);
385 if (bh) { 346 if (bh) {
386 struct gfs2_bufdata *bd = bh->b_private;
387
388 if (test_clear_buffer_pinned(bh)) {
389 struct gfs2_trans *tr = current->journal_info;
390 struct gfs2_inode *bh_ip =
391 GFS2_I(bh->b_page->mapping->host);
392
393 gfs2_log_lock(sdp);
394 list_del_init(&bd->bd_le.le_list);
395 gfs2_assert_warn(sdp, sdp->sd_log_num_buf);
396 sdp->sd_log_num_buf--;
397 gfs2_log_unlock(sdp);
398 if (bh_ip->i_inode.i_private != NULL)
399 tr->tr_num_databuf_rm++;
400 else
401 tr->tr_num_buf_rm++;
402 brelse(bh);
403 }
404 if (bd) {
405 gfs2_log_lock(sdp);
406 if (bd->bd_ail) {
407 u64 blkno = bh->b_blocknr;
408 bd->bd_ail = NULL;
409 list_del(&bd->bd_ail_st_list);
410 list_del(&bd->bd_ail_gl_list);
411 atomic_dec(&bd->bd_gl->gl_ail_count);
412 brelse(bh);
413 gfs2_log_unlock(sdp);
414 gfs2_trans_add_revoke(sdp, blkno);
415 } else
416 gfs2_log_unlock(sdp);
417 }
418
419 lock_buffer(bh); 347 lock_buffer(bh);
420 clear_buffer_dirty(bh); 348 gfs2_log_lock(sdp);
421 clear_buffer_uptodate(bh); 349 gfs2_remove_from_journal(bh, current->journal_info, 1);
350 gfs2_log_unlock(sdp);
422 unlock_buffer(bh); 351 unlock_buffer(bh);
423
424 brelse(bh); 352 brelse(bh);
425 } 353 }
426 354
@@ -446,10 +374,10 @@ void gfs2_meta_cache_flush(struct gfs2_inode *ip)
446 374
447 for (x = 0; x < GFS2_MAX_META_HEIGHT; x++) { 375 for (x = 0; x < GFS2_MAX_META_HEIGHT; x++) {
448 bh_slot = &ip->i_cache[x]; 376 bh_slot = &ip->i_cache[x];
449 if (!*bh_slot) 377 if (*bh_slot) {
450 break; 378 brelse(*bh_slot);
451 brelse(*bh_slot); 379 *bh_slot = NULL;
452 *bh_slot = NULL; 380 }
453 } 381 }
454 382
455 spin_unlock(&ip->i_spin); 383 spin_unlock(&ip->i_spin);
diff --git a/fs/gfs2/meta_io.h b/fs/gfs2/meta_io.h
index 527bf19d9690..b7048222ebb4 100644
--- a/fs/gfs2/meta_io.h
+++ b/fs/gfs2/meta_io.h
@@ -50,9 +50,9 @@ int gfs2_meta_wait(struct gfs2_sbd *sdp, struct buffer_head *bh);
50 50
51void gfs2_attach_bufdata(struct gfs2_glock *gl, struct buffer_head *bh, 51void gfs2_attach_bufdata(struct gfs2_glock *gl, struct buffer_head *bh,
52 int meta); 52 int meta);
53void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh); 53
54void gfs2_unpin(struct gfs2_sbd *sdp, struct buffer_head *bh, 54void gfs2_remove_from_journal(struct buffer_head *bh, struct gfs2_trans *tr,
55 struct gfs2_ail *ai); 55 int meta);
56 56
57void gfs2_meta_wipe(struct gfs2_inode *ip, u64 bstart, u32 blen); 57void gfs2_meta_wipe(struct gfs2_inode *ip, u64 bstart, u32 blen);
58 58
diff --git a/fs/gfs2/mount.c b/fs/gfs2/mount.c
index 4864659555d4..b941f9f9f958 100644
--- a/fs/gfs2/mount.c
+++ b/fs/gfs2/mount.c
@@ -42,6 +42,7 @@ enum {
42 Opt_nosuiddir, 42 Opt_nosuiddir,
43 Opt_data_writeback, 43 Opt_data_writeback,
44 Opt_data_ordered, 44 Opt_data_ordered,
45 Opt_err,
45}; 46};
46 47
47static match_table_t tokens = { 48static match_table_t tokens = {
@@ -64,7 +65,8 @@ static match_table_t tokens = {
64 {Opt_suiddir, "suiddir"}, 65 {Opt_suiddir, "suiddir"},
65 {Opt_nosuiddir, "nosuiddir"}, 66 {Opt_nosuiddir, "nosuiddir"},
66 {Opt_data_writeback, "data=writeback"}, 67 {Opt_data_writeback, "data=writeback"},
67 {Opt_data_ordered, "data=ordered"} 68 {Opt_data_ordered, "data=ordered"},
69 {Opt_err, NULL}
68}; 70};
69 71
70/** 72/**
@@ -237,6 +239,7 @@ int gfs2_mount_args(struct gfs2_sbd *sdp, char *data_arg, int remount)
237 case Opt_data_ordered: 239 case Opt_data_ordered:
238 args->ar_data = GFS2_DATA_ORDERED; 240 args->ar_data = GFS2_DATA_ORDERED;
239 break; 241 break;
242 case Opt_err:
240 default: 243 default:
241 fs_info(sdp, "unknown option: %s\n", o); 244 fs_info(sdp, "unknown option: %s\n", o);
242 error = -EINVAL; 245 error = -EINVAL;
diff --git a/fs/gfs2/ops_address.c b/fs/gfs2/ops_address.c
index 42a5f58f6fca..873a511ef2be 100644
--- a/fs/gfs2/ops_address.c
+++ b/fs/gfs2/ops_address.c
@@ -90,7 +90,7 @@ static int gfs2_get_block_noalloc(struct inode *inode, sector_t lblock,
90 error = gfs2_block_map(inode, lblock, 0, bh_result); 90 error = gfs2_block_map(inode, lblock, 0, bh_result);
91 if (error) 91 if (error)
92 return error; 92 return error;
93 if (bh_result->b_blocknr == 0) 93 if (!buffer_mapped(bh_result))
94 return -EIO; 94 return -EIO;
95 return 0; 95 return 0;
96} 96}
@@ -414,7 +414,8 @@ static int gfs2_prepare_write(struct file *file, struct page *page,
414 if (ind_blocks || data_blocks) 414 if (ind_blocks || data_blocks)
415 rblocks += RES_STATFS + RES_QUOTA; 415 rblocks += RES_STATFS + RES_QUOTA;
416 416
417 error = gfs2_trans_begin(sdp, rblocks, 0); 417 error = gfs2_trans_begin(sdp, rblocks,
418 PAGE_CACHE_SIZE/sdp->sd_sb.sb_bsize);
418 if (error) 419 if (error)
419 goto out_trans_fail; 420 goto out_trans_fail;
420 421
@@ -616,58 +617,50 @@ static sector_t gfs2_bmap(struct address_space *mapping, sector_t lblock)
616 return dblock; 617 return dblock;
617} 618}
618 619
619static void discard_buffer(struct gfs2_sbd *sdp, struct buffer_head *bh) 620static void gfs2_discard(struct gfs2_sbd *sdp, struct buffer_head *bh)
620{ 621{
621 struct gfs2_bufdata *bd; 622 struct gfs2_bufdata *bd;
622 623
624 lock_buffer(bh);
623 gfs2_log_lock(sdp); 625 gfs2_log_lock(sdp);
626 clear_buffer_dirty(bh);
624 bd = bh->b_private; 627 bd = bh->b_private;
625 if (bd) { 628 if (bd) {
626 bd->bd_bh = NULL; 629 if (!list_empty(&bd->bd_le.le_list) && !buffer_pinned(bh))
627 bh->b_private = NULL; 630 list_del_init(&bd->bd_le.le_list);
628 if (!bd->bd_ail && list_empty(&bd->bd_le.le_list)) 631 else
629 kmem_cache_free(gfs2_bufdata_cachep, bd); 632 gfs2_remove_from_journal(bh, current->journal_info, 0);
630 } 633 }
631 gfs2_log_unlock(sdp);
632
633 lock_buffer(bh);
634 clear_buffer_dirty(bh);
635 bh->b_bdev = NULL; 634 bh->b_bdev = NULL;
636 clear_buffer_mapped(bh); 635 clear_buffer_mapped(bh);
637 clear_buffer_req(bh); 636 clear_buffer_req(bh);
638 clear_buffer_new(bh); 637 clear_buffer_new(bh);
639 clear_buffer_delay(bh); 638 gfs2_log_unlock(sdp);
640 unlock_buffer(bh); 639 unlock_buffer(bh);
641} 640}
642 641
643static void gfs2_invalidatepage(struct page *page, unsigned long offset) 642static void gfs2_invalidatepage(struct page *page, unsigned long offset)
644{ 643{
645 struct gfs2_sbd *sdp = GFS2_SB(page->mapping->host); 644 struct gfs2_sbd *sdp = GFS2_SB(page->mapping->host);
646 struct buffer_head *head, *bh, *next; 645 struct buffer_head *bh, *head;
647 unsigned int curr_off = 0; 646 unsigned long pos = 0;
648 647
649 BUG_ON(!PageLocked(page)); 648 BUG_ON(!PageLocked(page));
650 if (offset == 0) 649 if (offset == 0)
651 ClearPageChecked(page); 650 ClearPageChecked(page);
652 if (!page_has_buffers(page)) 651 if (!page_has_buffers(page))
653 return; 652 goto out;
654 653
655 bh = head = page_buffers(page); 654 bh = head = page_buffers(page);
656 do { 655 do {
657 unsigned int next_off = curr_off + bh->b_size; 656 if (offset <= pos)
658 next = bh->b_this_page; 657 gfs2_discard(sdp, bh);
659 658 pos += bh->b_size;
660 if (offset <= curr_off) 659 bh = bh->b_this_page;
661 discard_buffer(sdp, bh);
662
663 curr_off = next_off;
664 bh = next;
665 } while (bh != head); 660 } while (bh != head);
666 661out:
667 if (!offset) 662 if (offset == 0)
668 try_to_release_page(page, 0); 663 try_to_release_page(page, 0);
669
670 return;
671} 664}
672 665
673/** 666/**
@@ -736,59 +729,6 @@ out:
736} 729}
737 730
738/** 731/**
739 * stuck_releasepage - We're stuck in gfs2_releasepage(). Print stuff out.
740 * @bh: the buffer we're stuck on
741 *
742 */
743
744static void stuck_releasepage(struct buffer_head *bh)
745{
746 struct inode *inode = bh->b_page->mapping->host;
747 struct gfs2_sbd *sdp = inode->i_sb->s_fs_info;
748 struct gfs2_bufdata *bd = bh->b_private;
749 struct gfs2_glock *gl;
750static unsigned limit = 0;
751
752 if (limit > 3)
753 return;
754 limit++;
755
756 fs_warn(sdp, "stuck in gfs2_releasepage() %p\n", inode);
757 fs_warn(sdp, "blkno = %llu, bh->b_count = %d\n",
758 (unsigned long long)bh->b_blocknr, atomic_read(&bh->b_count));
759 fs_warn(sdp, "pinned = %u\n", buffer_pinned(bh));
760 fs_warn(sdp, "bh->b_private = %s\n", (bd) ? "!NULL" : "NULL");
761
762 if (!bd)
763 return;
764
765 gl = bd->bd_gl;
766
767 fs_warn(sdp, "gl = (%u, %llu)\n",
768 gl->gl_name.ln_type, (unsigned long long)gl->gl_name.ln_number);
769
770 fs_warn(sdp, "bd_list_tr = %s, bd_le.le_list = %s\n",
771 (list_empty(&bd->bd_list_tr)) ? "no" : "yes",
772 (list_empty(&bd->bd_le.le_list)) ? "no" : "yes");
773
774 if (gl->gl_ops == &gfs2_inode_glops) {
775 struct gfs2_inode *ip = gl->gl_object;
776 unsigned int x;
777
778 if (!ip)
779 return;
780
781 fs_warn(sdp, "ip = %llu %llu\n",
782 (unsigned long long)ip->i_no_formal_ino,
783 (unsigned long long)ip->i_no_addr);
784
785 for (x = 0; x < GFS2_MAX_META_HEIGHT; x++)
786 fs_warn(sdp, "ip->i_cache[%u] = %s\n",
787 x, (ip->i_cache[x]) ? "!NULL" : "NULL");
788 }
789}
790
791/**
792 * gfs2_releasepage - free the metadata associated with a page 732 * gfs2_releasepage - free the metadata associated with a page
793 * @page: the page that's being released 733 * @page: the page that's being released
794 * @gfp_mask: passed from Linux VFS, ignored by us 734 * @gfp_mask: passed from Linux VFS, ignored by us
@@ -805,41 +745,39 @@ int gfs2_releasepage(struct page *page, gfp_t gfp_mask)
805 struct gfs2_sbd *sdp = aspace->i_sb->s_fs_info; 745 struct gfs2_sbd *sdp = aspace->i_sb->s_fs_info;
806 struct buffer_head *bh, *head; 746 struct buffer_head *bh, *head;
807 struct gfs2_bufdata *bd; 747 struct gfs2_bufdata *bd;
808 unsigned long t = jiffies + gfs2_tune_get(sdp, gt_stall_secs) * HZ;
809 748
810 if (!page_has_buffers(page)) 749 if (!page_has_buffers(page))
811 goto out; 750 return 0;
812 751
752 gfs2_log_lock(sdp);
813 head = bh = page_buffers(page); 753 head = bh = page_buffers(page);
814 do { 754 do {
815 while (atomic_read(&bh->b_count)) { 755 if (atomic_read(&bh->b_count))
816 if (!atomic_read(&aspace->i_writecount)) 756 goto cannot_release;
817 return 0; 757 bd = bh->b_private;
818 758 if (bd && bd->bd_ail)
819 if (!(gfp_mask & __GFP_WAIT)) 759 goto cannot_release;
820 return 0;
821
822 if (time_after_eq(jiffies, t)) {
823 stuck_releasepage(bh);
824 /* should we withdraw here? */
825 return 0;
826 }
827
828 yield();
829 }
830
831 gfs2_assert_warn(sdp, !buffer_pinned(bh)); 760 gfs2_assert_warn(sdp, !buffer_pinned(bh));
832 gfs2_assert_warn(sdp, !buffer_dirty(bh)); 761 gfs2_assert_warn(sdp, !buffer_dirty(bh));
762 bh = bh->b_this_page;
763 } while(bh != head);
764 gfs2_log_unlock(sdp);
833 765
766 head = bh = page_buffers(page);
767 do {
834 gfs2_log_lock(sdp); 768 gfs2_log_lock(sdp);
835 bd = bh->b_private; 769 bd = bh->b_private;
836 if (bd) { 770 if (bd) {
837 gfs2_assert_warn(sdp, bd->bd_bh == bh); 771 gfs2_assert_warn(sdp, bd->bd_bh == bh);
838 gfs2_assert_warn(sdp, list_empty(&bd->bd_list_tr)); 772 gfs2_assert_warn(sdp, list_empty(&bd->bd_list_tr));
839 gfs2_assert_warn(sdp, !bd->bd_ail); 773 if (!list_empty(&bd->bd_le.le_list)) {
840 bd->bd_bh = NULL; 774 if (!buffer_pinned(bh))
841 if (!list_empty(&bd->bd_le.le_list)) 775 list_del_init(&bd->bd_le.le_list);
842 bd = NULL; 776 else
777 bd = NULL;
778 }
779 if (bd)
780 bd->bd_bh = NULL;
843 bh->b_private = NULL; 781 bh->b_private = NULL;
844 } 782 }
845 gfs2_log_unlock(sdp); 783 gfs2_log_unlock(sdp);
@@ -849,8 +787,10 @@ int gfs2_releasepage(struct page *page, gfp_t gfp_mask)
849 bh = bh->b_this_page; 787 bh = bh->b_this_page;
850 } while (bh != head); 788 } while (bh != head);
851 789
852out:
853 return try_to_free_buffers(page); 790 return try_to_free_buffers(page);
791cannot_release:
792 gfs2_log_unlock(sdp);
793 return 0;
854} 794}
855 795
856const struct address_space_operations gfs2_file_aops = { 796const struct address_space_operations gfs2_file_aops = {
diff --git a/fs/gfs2/ops_export.c b/fs/gfs2/ops_export.c
index b8312edee0e4..e2d1347796a9 100644
--- a/fs/gfs2/ops_export.c
+++ b/fs/gfs2/ops_export.c
@@ -237,7 +237,7 @@ static struct dentry *gfs2_get_dentry(struct super_block *sb, void *inum_obj)
237 237
238 inode = gfs2_inode_lookup(sb, DT_UNKNOWN, 238 inode = gfs2_inode_lookup(sb, DT_UNKNOWN,
239 inum->no_addr, 239 inum->no_addr,
240 0); 240 0, 0);
241 if (!inode) 241 if (!inode)
242 goto fail; 242 goto fail;
243 if (IS_ERR(inode)) { 243 if (IS_ERR(inode)) {
diff --git a/fs/gfs2/ops_file.c b/fs/gfs2/ops_file.c
index 94d76ace0b95..46a9e10ff17b 100644
--- a/fs/gfs2/ops_file.c
+++ b/fs/gfs2/ops_file.c
@@ -571,7 +571,8 @@ static int do_flock(struct file *file, int cmd, struct file_lock *fl)
571 int error = 0; 571 int error = 0;
572 572
573 state = (fl->fl_type == F_WRLCK) ? LM_ST_EXCLUSIVE : LM_ST_SHARED; 573 state = (fl->fl_type == F_WRLCK) ? LM_ST_EXCLUSIVE : LM_ST_SHARED;
574 flags = (IS_SETLKW(cmd) ? 0 : LM_FLAG_TRY) | GL_EXACT | GL_NOCACHE; 574 flags = (IS_SETLKW(cmd) ? 0 : LM_FLAG_TRY) | GL_EXACT | GL_NOCACHE
575 | GL_FLOCK;
575 576
576 mutex_lock(&fp->f_fl_mutex); 577 mutex_lock(&fp->f_fl_mutex);
577 578
@@ -579,21 +580,19 @@ static int do_flock(struct file *file, int cmd, struct file_lock *fl)
579 if (gl) { 580 if (gl) {
580 if (fl_gh->gh_state == state) 581 if (fl_gh->gh_state == state)
581 goto out; 582 goto out;
582 gfs2_glock_hold(gl);
583 flock_lock_file_wait(file, 583 flock_lock_file_wait(file,
584 &(struct file_lock){.fl_type = F_UNLCK}); 584 &(struct file_lock){.fl_type = F_UNLCK});
585 gfs2_glock_dq_uninit(fl_gh); 585 gfs2_glock_dq_wait(fl_gh);
586 gfs2_holder_reinit(state, flags, fl_gh);
586 } else { 587 } else {
587 error = gfs2_glock_get(GFS2_SB(&ip->i_inode), 588 error = gfs2_glock_get(GFS2_SB(&ip->i_inode),
588 ip->i_no_addr, &gfs2_flock_glops, 589 ip->i_no_addr, &gfs2_flock_glops,
589 CREATE, &gl); 590 CREATE, &gl);
590 if (error) 591 if (error)
591 goto out; 592 goto out;
593 gfs2_holder_init(gl, state, flags, fl_gh);
594 gfs2_glock_put(gl);
592 } 595 }
593
594 gfs2_holder_init(gl, state, flags, fl_gh);
595 gfs2_glock_put(gl);
596
597 error = gfs2_glock_nq(fl_gh); 596 error = gfs2_glock_nq(fl_gh);
598 if (error) { 597 if (error) {
599 gfs2_holder_uninit(fl_gh); 598 gfs2_holder_uninit(fl_gh);
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index cf5aa5050548..17de58e83d92 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -28,18 +28,18 @@
28#include "lm.h" 28#include "lm.h"
29#include "mount.h" 29#include "mount.h"
30#include "ops_fstype.h" 30#include "ops_fstype.h"
31#include "ops_dentry.h"
31#include "ops_super.h" 32#include "ops_super.h"
32#include "recovery.h" 33#include "recovery.h"
33#include "rgrp.h" 34#include "rgrp.h"
34#include "super.h" 35#include "super.h"
35#include "sys.h" 36#include "sys.h"
36#include "util.h" 37#include "util.h"
38#include "log.h"
37 39
38#define DO 0 40#define DO 0
39#define UNDO 1 41#define UNDO 1
40 42
41extern struct dentry_operations gfs2_dops;
42
43static struct gfs2_sbd *init_sbd(struct super_block *sb) 43static struct gfs2_sbd *init_sbd(struct super_block *sb)
44{ 44{
45 struct gfs2_sbd *sdp; 45 struct gfs2_sbd *sdp;
@@ -82,13 +82,15 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb)
82 INIT_LIST_HEAD(&sdp->sd_log_le_revoke); 82 INIT_LIST_HEAD(&sdp->sd_log_le_revoke);
83 INIT_LIST_HEAD(&sdp->sd_log_le_rg); 83 INIT_LIST_HEAD(&sdp->sd_log_le_rg);
84 INIT_LIST_HEAD(&sdp->sd_log_le_databuf); 84 INIT_LIST_HEAD(&sdp->sd_log_le_databuf);
85 INIT_LIST_HEAD(&sdp->sd_log_le_ordered);
85 86
86 mutex_init(&sdp->sd_log_reserve_mutex); 87 mutex_init(&sdp->sd_log_reserve_mutex);
87 INIT_LIST_HEAD(&sdp->sd_ail1_list); 88 INIT_LIST_HEAD(&sdp->sd_ail1_list);
88 INIT_LIST_HEAD(&sdp->sd_ail2_list); 89 INIT_LIST_HEAD(&sdp->sd_ail2_list);
89 90
90 init_rwsem(&sdp->sd_log_flush_lock); 91 init_rwsem(&sdp->sd_log_flush_lock);
91 INIT_LIST_HEAD(&sdp->sd_log_flush_list); 92 atomic_set(&sdp->sd_log_in_flight, 0);
93 init_waitqueue_head(&sdp->sd_log_flush_wait);
92 94
93 INIT_LIST_HEAD(&sdp->sd_revoke_list); 95 INIT_LIST_HEAD(&sdp->sd_revoke_list);
94 96
@@ -145,7 +147,8 @@ static int init_names(struct gfs2_sbd *sdp, int silent)
145 snprintf(sdp->sd_proto_name, GFS2_FSNAME_LEN, "%s", proto); 147 snprintf(sdp->sd_proto_name, GFS2_FSNAME_LEN, "%s", proto);
146 snprintf(sdp->sd_table_name, GFS2_FSNAME_LEN, "%s", table); 148 snprintf(sdp->sd_table_name, GFS2_FSNAME_LEN, "%s", table);
147 149
148 while ((table = strchr(sdp->sd_table_name, '/'))) 150 table = sdp->sd_table_name;
151 while ((table = strchr(table, '/')))
149 *table = '_'; 152 *table = '_';
150 153
151out: 154out:
@@ -161,14 +164,6 @@ static int init_locking(struct gfs2_sbd *sdp, struct gfs2_holder *mount_gh,
161 if (undo) 164 if (undo)
162 goto fail_trans; 165 goto fail_trans;
163 166
164 p = kthread_run(gfs2_scand, sdp, "gfs2_scand");
165 error = IS_ERR(p);
166 if (error) {
167 fs_err(sdp, "can't start scand thread: %d\n", error);
168 return error;
169 }
170 sdp->sd_scand_process = p;
171
172 for (sdp->sd_glockd_num = 0; 167 for (sdp->sd_glockd_num = 0;
173 sdp->sd_glockd_num < sdp->sd_args.ar_num_glockd; 168 sdp->sd_glockd_num < sdp->sd_args.ar_num_glockd;
174 sdp->sd_glockd_num++) { 169 sdp->sd_glockd_num++) {
@@ -229,14 +224,13 @@ fail:
229 while (sdp->sd_glockd_num--) 224 while (sdp->sd_glockd_num--)
230 kthread_stop(sdp->sd_glockd_process[sdp->sd_glockd_num]); 225 kthread_stop(sdp->sd_glockd_process[sdp->sd_glockd_num]);
231 226
232 kthread_stop(sdp->sd_scand_process);
233 return error; 227 return error;
234} 228}
235 229
236static inline struct inode *gfs2_lookup_root(struct super_block *sb, 230static inline struct inode *gfs2_lookup_root(struct super_block *sb,
237 u64 no_addr) 231 u64 no_addr)
238{ 232{
239 return gfs2_inode_lookup(sb, DT_DIR, no_addr, 0); 233 return gfs2_inode_lookup(sb, DT_DIR, no_addr, 0, 0);
240} 234}
241 235
242static int init_sb(struct gfs2_sbd *sdp, int silent, int undo) 236static int init_sb(struct gfs2_sbd *sdp, int silent, int undo)
@@ -301,8 +295,9 @@ static int init_sb(struct gfs2_sbd *sdp, int silent, int undo)
301 fs_err(sdp, "can't get root dentry\n"); 295 fs_err(sdp, "can't get root dentry\n");
302 error = -ENOMEM; 296 error = -ENOMEM;
303 iput(inode); 297 iput(inode);
304 } 298 } else
305 sb->s_root->d_op = &gfs2_dops; 299 sb->s_root->d_op = &gfs2_dops;
300
306out: 301out:
307 gfs2_glock_dq_uninit(&sb_gh); 302 gfs2_glock_dq_uninit(&sb_gh);
308 return error; 303 return error;
@@ -368,7 +363,7 @@ static int init_journal(struct gfs2_sbd *sdp, int undo)
368 363
369 ip = GFS2_I(sdp->sd_jdesc->jd_inode); 364 ip = GFS2_I(sdp->sd_jdesc->jd_inode);
370 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, 365 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED,
371 LM_FLAG_NOEXP | GL_EXACT, 366 LM_FLAG_NOEXP | GL_EXACT | GL_NOCACHE,
372 &sdp->sd_jinode_gh); 367 &sdp->sd_jinode_gh);
373 if (error) { 368 if (error) {
374 fs_err(sdp, "can't acquire journal inode glock: %d\n", 369 fs_err(sdp, "can't acquire journal inode glock: %d\n",
@@ -818,7 +813,6 @@ static struct super_block* get_gfs2_sb(const char *dev_name)
818 struct nameidata nd; 813 struct nameidata nd;
819 struct file_system_type *fstype; 814 struct file_system_type *fstype;
820 struct super_block *sb = NULL, *s; 815 struct super_block *sb = NULL, *s;
821 struct list_head *l;
822 int error; 816 int error;
823 817
824 error = path_lookup(dev_name, LOOKUP_FOLLOW, &nd); 818 error = path_lookup(dev_name, LOOKUP_FOLLOW, &nd);
@@ -830,8 +824,7 @@ static struct super_block* get_gfs2_sb(const char *dev_name)
830 error = vfs_getattr(nd.mnt, nd.dentry, &stat); 824 error = vfs_getattr(nd.mnt, nd.dentry, &stat);
831 825
832 fstype = get_fs_type("gfs2"); 826 fstype = get_fs_type("gfs2");
833 list_for_each(l, &fstype->fs_supers) { 827 list_for_each_entry(s, &fstype->fs_supers, s_instances) {
834 s = list_entry(l, struct super_block, s_instances);
835 if ((S_ISBLK(stat.mode) && s->s_dev == stat.rdev) || 828 if ((S_ISBLK(stat.mode) && s->s_dev == stat.rdev) ||
836 (S_ISDIR(stat.mode) && s == nd.dentry->d_inode->i_sb)) { 829 (S_ISDIR(stat.mode) && s == nd.dentry->d_inode->i_sb)) {
837 sb = s; 830 sb = s;
@@ -861,7 +854,7 @@ static int gfs2_get_sb_meta(struct file_system_type *fs_type, int flags,
861 error = -ENOENT; 854 error = -ENOENT;
862 goto error; 855 goto error;
863 } 856 }
864 sdp = (struct gfs2_sbd*) sb->s_fs_info; 857 sdp = sb->s_fs_info;
865 if (sdp->sd_vfs_meta) { 858 if (sdp->sd_vfs_meta) {
866 printk(KERN_WARNING "GFS2: gfs2meta mount already exists\n"); 859 printk(KERN_WARNING "GFS2: gfs2meta mount already exists\n");
867 error = -EBUSY; 860 error = -EBUSY;
@@ -896,7 +889,10 @@ error:
896 889
897static void gfs2_kill_sb(struct super_block *sb) 890static void gfs2_kill_sb(struct super_block *sb)
898{ 891{
899 gfs2_delete_debugfs_file(sb->s_fs_info); 892 if (sb->s_fs_info) {
893 gfs2_delete_debugfs_file(sb->s_fs_info);
894 gfs2_meta_syncfs(sb->s_fs_info);
895 }
900 kill_block_super(sb); 896 kill_block_super(sb);
901} 897}
902 898
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c
index 911c115b5c6c..291f0c7eaa3b 100644
--- a/fs/gfs2/ops_inode.c
+++ b/fs/gfs2/ops_inode.c
@@ -69,7 +69,7 @@ static int gfs2_create(struct inode *dir, struct dentry *dentry,
69 mark_inode_dirty(inode); 69 mark_inode_dirty(inode);
70 break; 70 break;
71 } else if (PTR_ERR(inode) != -EEXIST || 71 } else if (PTR_ERR(inode) != -EEXIST ||
72 (nd->intent.open.flags & O_EXCL)) { 72 (nd && (nd->intent.open.flags & O_EXCL))) {
73 gfs2_holder_uninit(ghs); 73 gfs2_holder_uninit(ghs);
74 return PTR_ERR(inode); 74 return PTR_ERR(inode);
75 } 75 }
@@ -278,17 +278,25 @@ static int gfs2_unlink(struct inode *dir, struct dentry *dentry)
278 gfs2_holder_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + 2); 278 gfs2_holder_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + 2);
279 279
280 280
281 error = gfs2_glock_nq_m(3, ghs); 281 error = gfs2_glock_nq(ghs); /* parent */
282 if (error) 282 if (error)
283 goto out; 283 goto out_parent;
284
285 error = gfs2_glock_nq(ghs + 1); /* child */
286 if (error)
287 goto out_child;
288
289 error = gfs2_glock_nq(ghs + 2); /* rgrp */
290 if (error)
291 goto out_rgrp;
284 292
285 error = gfs2_unlink_ok(dip, &dentry->d_name, ip); 293 error = gfs2_unlink_ok(dip, &dentry->d_name, ip);
286 if (error) 294 if (error)
287 goto out_gunlock; 295 goto out_rgrp;
288 296
289 error = gfs2_trans_begin(sdp, 2*RES_DINODE + RES_LEAF + RES_RG_BIT, 0); 297 error = gfs2_trans_begin(sdp, 2*RES_DINODE + RES_LEAF + RES_RG_BIT, 0);
290 if (error) 298 if (error)
291 goto out_gunlock; 299 goto out_rgrp;
292 300
293 error = gfs2_dir_del(dip, &dentry->d_name); 301 error = gfs2_dir_del(dip, &dentry->d_name);
294 if (error) 302 if (error)
@@ -298,12 +306,15 @@ static int gfs2_unlink(struct inode *dir, struct dentry *dentry)
298 306
299out_end_trans: 307out_end_trans:
300 gfs2_trans_end(sdp); 308 gfs2_trans_end(sdp);
301out_gunlock: 309 gfs2_glock_dq(ghs + 2);
302 gfs2_glock_dq_m(3, ghs); 310out_rgrp:
303out:
304 gfs2_holder_uninit(ghs);
305 gfs2_holder_uninit(ghs + 1);
306 gfs2_holder_uninit(ghs + 2); 311 gfs2_holder_uninit(ghs + 2);
312 gfs2_glock_dq(ghs + 1);
313out_child:
314 gfs2_holder_uninit(ghs + 1);
315 gfs2_glock_dq(ghs);
316out_parent:
317 gfs2_holder_uninit(ghs);
307 gfs2_glock_dq_uninit(&ri_gh); 318 gfs2_glock_dq_uninit(&ri_gh);
308 return error; 319 return error;
309} 320}
@@ -894,12 +905,17 @@ static int gfs2_permission(struct inode *inode, int mask, struct nameidata *nd)
894static int setattr_size(struct inode *inode, struct iattr *attr) 905static int setattr_size(struct inode *inode, struct iattr *attr)
895{ 906{
896 struct gfs2_inode *ip = GFS2_I(inode); 907 struct gfs2_inode *ip = GFS2_I(inode);
908 struct gfs2_sbd *sdp = GFS2_SB(inode);
897 int error; 909 int error;
898 910
899 if (attr->ia_size != ip->i_di.di_size) { 911 if (attr->ia_size != ip->i_di.di_size) {
900 error = vmtruncate(inode, attr->ia_size); 912 error = gfs2_trans_begin(sdp, 0, sdp->sd_jdesc->jd_blocks);
901 if (error) 913 if (error)
902 return error; 914 return error;
915 error = vmtruncate(inode, attr->ia_size);
916 gfs2_trans_end(sdp);
917 if (error)
918 return error;
903 } 919 }
904 920
905 error = gfs2_truncatei(ip, attr->ia_size); 921 error = gfs2_truncatei(ip, attr->ia_size);
diff --git a/fs/gfs2/ops_super.c b/fs/gfs2/ops_super.c
index 603d940f1159..950f31460e8b 100644
--- a/fs/gfs2/ops_super.c
+++ b/fs/gfs2/ops_super.c
@@ -92,7 +92,6 @@ static void gfs2_put_super(struct super_block *sb)
92 kthread_stop(sdp->sd_recoverd_process); 92 kthread_stop(sdp->sd_recoverd_process);
93 while (sdp->sd_glockd_num--) 93 while (sdp->sd_glockd_num--)
94 kthread_stop(sdp->sd_glockd_process[sdp->sd_glockd_num]); 94 kthread_stop(sdp->sd_glockd_process[sdp->sd_glockd_num]);
95 kthread_stop(sdp->sd_scand_process);
96 95
97 if (!(sb->s_flags & MS_RDONLY)) { 96 if (!(sb->s_flags & MS_RDONLY)) {
98 error = gfs2_make_fs_ro(sdp); 97 error = gfs2_make_fs_ro(sdp);
@@ -456,12 +455,15 @@ static void gfs2_delete_inode(struct inode *inode)
456 } 455 }
457 456
458 error = gfs2_dinode_dealloc(ip); 457 error = gfs2_dinode_dealloc(ip);
459 /* 458 if (error)
460 * Must do this before unlock to avoid trying to write back 459 goto out_unlock;
461 * potentially dirty data now that inode no longer exists 460
462 * on disk. 461 error = gfs2_trans_begin(sdp, 0, sdp->sd_jdesc->jd_blocks);
463 */ 462 if (error)
463 goto out_unlock;
464 /* Needs to be done before glock release & also in a transaction */
464 truncate_inode_pages(&inode->i_data, 0); 465 truncate_inode_pages(&inode->i_data, 0);
466 gfs2_trans_end(sdp);
465 467
466out_unlock: 468out_unlock:
467 gfs2_glock_dq(&ip->i_iopen_gh); 469 gfs2_glock_dq(&ip->i_iopen_gh);
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index 6e546ee8f3d4..addb51e0f135 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -70,6 +70,7 @@ struct gfs2_quota_host {
70 u64 qu_limit; 70 u64 qu_limit;
71 u64 qu_warn; 71 u64 qu_warn;
72 s64 qu_value; 72 s64 qu_value;
73 u32 qu_ll_next;
73}; 74};
74 75
75struct gfs2_quota_change_host { 76struct gfs2_quota_change_host {
@@ -580,6 +581,7 @@ static void gfs2_quota_in(struct gfs2_quota_host *qu, const void *buf)
580 qu->qu_limit = be64_to_cpu(str->qu_limit); 581 qu->qu_limit = be64_to_cpu(str->qu_limit);
581 qu->qu_warn = be64_to_cpu(str->qu_warn); 582 qu->qu_warn = be64_to_cpu(str->qu_warn);
582 qu->qu_value = be64_to_cpu(str->qu_value); 583 qu->qu_value = be64_to_cpu(str->qu_value);
584 qu->qu_ll_next = be32_to_cpu(str->qu_ll_next);
583} 585}
584 586
585static void gfs2_quota_out(const struct gfs2_quota_host *qu, void *buf) 587static void gfs2_quota_out(const struct gfs2_quota_host *qu, void *buf)
@@ -589,6 +591,7 @@ static void gfs2_quota_out(const struct gfs2_quota_host *qu, void *buf)
589 str->qu_limit = cpu_to_be64(qu->qu_limit); 591 str->qu_limit = cpu_to_be64(qu->qu_limit);
590 str->qu_warn = cpu_to_be64(qu->qu_warn); 592 str->qu_warn = cpu_to_be64(qu->qu_warn);
591 str->qu_value = cpu_to_be64(qu->qu_value); 593 str->qu_value = cpu_to_be64(qu->qu_value);
594 str->qu_ll_next = cpu_to_be32(qu->qu_ll_next);
592 memset(&str->qu_reserved, 0, sizeof(str->qu_reserved)); 595 memset(&str->qu_reserved, 0, sizeof(str->qu_reserved));
593} 596}
594 597
@@ -614,6 +617,16 @@ static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc,
614 s64 value; 617 s64 value;
615 int err = -EIO; 618 int err = -EIO;
616 619
620 if (gfs2_is_stuffed(ip)) {
621 struct gfs2_alloc *al = NULL;
622 al = gfs2_alloc_get(ip);
623 /* just request 1 blk */
624 al->al_requested = 1;
625 gfs2_inplace_reserve(ip);
626 gfs2_unstuff_dinode(ip, NULL);
627 gfs2_inplace_release(ip);
628 gfs2_alloc_put(ip);
629 }
617 page = grab_cache_page(mapping, index); 630 page = grab_cache_page(mapping, index);
618 if (!page) 631 if (!page)
619 return -ENOMEM; 632 return -ENOMEM;
diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c
index 5ada38c99a2c..beb6c7ac0086 100644
--- a/fs/gfs2/recovery.c
+++ b/fs/gfs2/recovery.c
@@ -469,7 +469,7 @@ int gfs2_recover_journal(struct gfs2_jdesc *jd)
469 }; 469 };
470 470
471 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, 471 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED,
472 LM_FLAG_NOEXP, &ji_gh); 472 LM_FLAG_NOEXP | GL_NOCACHE, &ji_gh);
473 if (error) 473 if (error)
474 goto fail_gunlock_j; 474 goto fail_gunlock_j;
475 } else { 475 } else {
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index ce48c4594ec8..708c287e1d0e 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -31,6 +31,7 @@
31#include "inode.h" 31#include "inode.h"
32 32
33#define BFITNOENT ((u32)~0) 33#define BFITNOENT ((u32)~0)
34#define NO_BLOCK ((u64)~0)
34 35
35/* 36/*
36 * These routines are used by the resource group routines (rgrp.c) 37 * These routines are used by the resource group routines (rgrp.c)
@@ -116,8 +117,7 @@ static unsigned char gfs2_testbit(struct gfs2_rgrpd *rgd, unsigned char *buffer,
116 * @buffer: the buffer that holds the bitmaps 117 * @buffer: the buffer that holds the bitmaps
117 * @buflen: the length (in bytes) of the buffer 118 * @buflen: the length (in bytes) of the buffer
118 * @goal: start search at this block's bit-pair (within @buffer) 119 * @goal: start search at this block's bit-pair (within @buffer)
119 * @old_state: GFS2_BLKST_XXX the state of the block we're looking for; 120 * @old_state: GFS2_BLKST_XXX the state of the block we're looking for.
120 * bit 0 = alloc(1)/free(0), bit 1 = meta(1)/data(0)
121 * 121 *
122 * Scope of @goal and returned block number is only within this bitmap buffer, 122 * Scope of @goal and returned block number is only within this bitmap buffer,
123 * not entire rgrp or filesystem. @buffer will be offset from the actual 123 * not entire rgrp or filesystem. @buffer will be offset from the actual
@@ -137,9 +137,13 @@ static u32 gfs2_bitfit(struct gfs2_rgrpd *rgd, unsigned char *buffer,
137 byte = buffer + (goal / GFS2_NBBY); 137 byte = buffer + (goal / GFS2_NBBY);
138 bit = (goal % GFS2_NBBY) * GFS2_BIT_SIZE; 138 bit = (goal % GFS2_NBBY) * GFS2_BIT_SIZE;
139 end = buffer + buflen; 139 end = buffer + buflen;
140 alloc = (old_state & 1) ? 0 : 0x55; 140 alloc = (old_state == GFS2_BLKST_FREE) ? 0x55 : 0;
141 141
142 while (byte < end) { 142 while (byte < end) {
143 /* If we're looking for a free block we can eliminate all
144 bitmap settings with 0x55, which represents four data
145 blocks in a row. If we're looking for a data block, we can
146 eliminate 0x00 which corresponds to four free blocks. */
143 if ((*byte & 0x55) == alloc) { 147 if ((*byte & 0x55) == alloc) {
144 blk += (8 - bit) >> 1; 148 blk += (8 - bit) >> 1;
145 149
@@ -859,23 +863,28 @@ static int try_rgrp_fit(struct gfs2_rgrpd *rgd, struct gfs2_alloc *al)
859static struct inode *try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked) 863static struct inode *try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked)
860{ 864{
861 struct inode *inode; 865 struct inode *inode;
862 u32 goal = 0; 866 u32 goal = 0, block;
863 u64 no_addr; 867 u64 no_addr;
868 struct gfs2_sbd *sdp = rgd->rd_sbd;
864 869
865 for(;;) { 870 for(;;) {
866 if (goal >= rgd->rd_data) 871 if (goal >= rgd->rd_data)
867 break; 872 break;
868 goal = rgblk_search(rgd, goal, GFS2_BLKST_UNLINKED, 873 down_write(&sdp->sd_log_flush_lock);
869 GFS2_BLKST_UNLINKED); 874 block = rgblk_search(rgd, goal, GFS2_BLKST_UNLINKED,
870 if (goal == BFITNOENT) 875 GFS2_BLKST_UNLINKED);
876 up_write(&sdp->sd_log_flush_lock);
877 if (block == BFITNOENT)
871 break; 878 break;
872 no_addr = goal + rgd->rd_data0; 879 /* rgblk_search can return a block < goal, so we need to
880 keep it marching forward. */
881 no_addr = block + rgd->rd_data0;
873 goal++; 882 goal++;
874 if (no_addr < *last_unlinked) 883 if (*last_unlinked != NO_BLOCK && no_addr <= *last_unlinked)
875 continue; 884 continue;
876 *last_unlinked = no_addr; 885 *last_unlinked = no_addr;
877 inode = gfs2_inode_lookup(rgd->rd_sbd->sd_vfs, DT_UNKNOWN, 886 inode = gfs2_inode_lookup(rgd->rd_sbd->sd_vfs, DT_UNKNOWN,
878 no_addr, -1); 887 no_addr, -1, 1);
879 if (!IS_ERR(inode)) 888 if (!IS_ERR(inode))
880 return inode; 889 return inode;
881 } 890 }
@@ -1152,7 +1161,7 @@ int gfs2_inplace_reserve_i(struct gfs2_inode *ip, char *file, unsigned int line)
1152 struct gfs2_alloc *al = &ip->i_alloc; 1161 struct gfs2_alloc *al = &ip->i_alloc;
1153 struct inode *inode; 1162 struct inode *inode;
1154 int error = 0; 1163 int error = 0;
1155 u64 last_unlinked = 0; 1164 u64 last_unlinked = NO_BLOCK;
1156 1165
1157 if (gfs2_assert_warn(sdp, al->al_requested)) 1166 if (gfs2_assert_warn(sdp, al->al_requested))
1158 return -EINVAL; 1167 return -EINVAL;
@@ -1289,7 +1298,9 @@ static u32 rgblk_search(struct gfs2_rgrpd *rgd, u32 goal,
1289 allocatable block anywhere else, we want to be able wrap around and 1298 allocatable block anywhere else, we want to be able wrap around and
1290 search in the first part of our first-searched bit block. */ 1299 search in the first part of our first-searched bit block. */
1291 for (x = 0; x <= length; x++) { 1300 for (x = 0; x <= length; x++) {
1292 if (bi->bi_clone) 1301 /* The GFS2_BLKST_UNLINKED state doesn't apply to the clone
1302 bitmaps, so we must search the originals for that. */
1303 if (old_state != GFS2_BLKST_UNLINKED && bi->bi_clone)
1293 blk = gfs2_bitfit(rgd, bi->bi_clone + bi->bi_offset, 1304 blk = gfs2_bitfit(rgd, bi->bi_clone + bi->bi_offset,
1294 bi->bi_len, goal, old_state); 1305 bi->bi_len, goal, old_state);
1295 else 1306 else
@@ -1305,9 +1316,7 @@ static u32 rgblk_search(struct gfs2_rgrpd *rgd, u32 goal,
1305 goal = 0; 1316 goal = 0;
1306 } 1317 }
1307 1318
1308 if (old_state != new_state) { 1319 if (blk != BFITNOENT && old_state != new_state) {
1309 gfs2_assert_withdraw(rgd->rd_sbd, blk != BFITNOENT);
1310
1311 gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1); 1320 gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1);
1312 gfs2_setbit(rgd, bi->bi_bh->b_data + bi->bi_offset, 1321 gfs2_setbit(rgd, bi->bi_bh->b_data + bi->bi_offset,
1313 bi->bi_len, blk, new_state); 1322 bi->bi_len, blk, new_state);
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index a2da76b5ae4c..dd3e737f528e 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -58,7 +58,6 @@ void gfs2_tune_init(struct gfs2_tune *gt)
58 gt->gt_incore_log_blocks = 1024; 58 gt->gt_incore_log_blocks = 1024;
59 gt->gt_log_flush_secs = 60; 59 gt->gt_log_flush_secs = 60;
60 gt->gt_jindex_refresh_secs = 60; 60 gt->gt_jindex_refresh_secs = 60;
61 gt->gt_scand_secs = 15;
62 gt->gt_recoverd_secs = 60; 61 gt->gt_recoverd_secs = 60;
63 gt->gt_logd_secs = 1; 62 gt->gt_logd_secs = 1;
64 gt->gt_quotad_secs = 5; 63 gt->gt_quotad_secs = 5;
diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c
index c26c21b53c19..ba3a1729cc1a 100644
--- a/fs/gfs2/sys.c
+++ b/fs/gfs2/sys.c
@@ -442,7 +442,6 @@ TUNE_ATTR(quota_simul_sync, 1);
442TUNE_ATTR(quota_cache_secs, 1); 442TUNE_ATTR(quota_cache_secs, 1);
443TUNE_ATTR(stall_secs, 1); 443TUNE_ATTR(stall_secs, 1);
444TUNE_ATTR(statfs_quantum, 1); 444TUNE_ATTR(statfs_quantum, 1);
445TUNE_ATTR_DAEMON(scand_secs, scand_process);
446TUNE_ATTR_DAEMON(recoverd_secs, recoverd_process); 445TUNE_ATTR_DAEMON(recoverd_secs, recoverd_process);
447TUNE_ATTR_DAEMON(logd_secs, logd_process); 446TUNE_ATTR_DAEMON(logd_secs, logd_process);
448TUNE_ATTR_DAEMON(quotad_secs, quotad_process); 447TUNE_ATTR_DAEMON(quotad_secs, quotad_process);
@@ -464,7 +463,6 @@ static struct attribute *tune_attrs[] = {
464 &tune_attr_quota_cache_secs.attr, 463 &tune_attr_quota_cache_secs.attr,
465 &tune_attr_stall_secs.attr, 464 &tune_attr_stall_secs.attr,
466 &tune_attr_statfs_quantum.attr, 465 &tune_attr_statfs_quantum.attr,
467 &tune_attr_scand_secs.attr,
468 &tune_attr_recoverd_secs.attr, 466 &tune_attr_recoverd_secs.attr,
469 &tune_attr_logd_secs.attr, 467 &tune_attr_logd_secs.attr,
470 &tune_attr_quotad_secs.attr, 468 &tune_attr_quotad_secs.attr,
diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c
index f8dabf8446bb..717983e2c2ae 100644
--- a/fs/gfs2/trans.c
+++ b/fs/gfs2/trans.c
@@ -142,25 +142,25 @@ void gfs2_trans_add_bh(struct gfs2_glock *gl, struct buffer_head *bh, int meta)
142 lops_add(sdp, &bd->bd_le); 142 lops_add(sdp, &bd->bd_le);
143} 143}
144 144
145void gfs2_trans_add_revoke(struct gfs2_sbd *sdp, u64 blkno) 145void gfs2_trans_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd)
146{ 146{
147 struct gfs2_revoke *rv = kmalloc(sizeof(struct gfs2_revoke), 147 BUG_ON(!list_empty(&bd->bd_le.le_list));
148 GFP_NOFS | __GFP_NOFAIL); 148 BUG_ON(!list_empty(&bd->bd_ail_st_list));
149 lops_init_le(&rv->rv_le, &gfs2_revoke_lops); 149 BUG_ON(!list_empty(&bd->bd_ail_gl_list));
150 rv->rv_blkno = blkno; 150 lops_init_le(&bd->bd_le, &gfs2_revoke_lops);
151 lops_add(sdp, &rv->rv_le); 151 lops_add(sdp, &bd->bd_le);
152} 152}
153 153
154void gfs2_trans_add_unrevoke(struct gfs2_sbd *sdp, u64 blkno) 154void gfs2_trans_add_unrevoke(struct gfs2_sbd *sdp, u64 blkno)
155{ 155{
156 struct gfs2_revoke *rv; 156 struct gfs2_bufdata *bd;
157 int found = 0; 157 int found = 0;
158 158
159 gfs2_log_lock(sdp); 159 gfs2_log_lock(sdp);
160 160
161 list_for_each_entry(rv, &sdp->sd_log_le_revoke, rv_le.le_list) { 161 list_for_each_entry(bd, &sdp->sd_log_le_revoke, bd_le.le_list) {
162 if (rv->rv_blkno == blkno) { 162 if (bd->bd_blkno == blkno) {
163 list_del(&rv->rv_le.le_list); 163 list_del_init(&bd->bd_le.le_list);
164 gfs2_assert_withdraw(sdp, sdp->sd_log_num_revoke); 164 gfs2_assert_withdraw(sdp, sdp->sd_log_num_revoke);
165 sdp->sd_log_num_revoke--; 165 sdp->sd_log_num_revoke--;
166 found = 1; 166 found = 1;
@@ -172,7 +172,7 @@ void gfs2_trans_add_unrevoke(struct gfs2_sbd *sdp, u64 blkno)
172 172
173 if (found) { 173 if (found) {
174 struct gfs2_trans *tr = current->journal_info; 174 struct gfs2_trans *tr = current->journal_info;
175 kfree(rv); 175 kmem_cache_free(gfs2_bufdata_cachep, bd);
176 tr->tr_num_revoke_rm++; 176 tr->tr_num_revoke_rm++;
177 } 177 }
178} 178}
diff --git a/fs/gfs2/trans.h b/fs/gfs2/trans.h
index 23d4cbe1de5b..043d5f4b9c4c 100644
--- a/fs/gfs2/trans.h
+++ b/fs/gfs2/trans.h
@@ -32,7 +32,7 @@ void gfs2_trans_end(struct gfs2_sbd *sdp);
32 32
33void gfs2_trans_add_gl(struct gfs2_glock *gl); 33void gfs2_trans_add_gl(struct gfs2_glock *gl);
34void gfs2_trans_add_bh(struct gfs2_glock *gl, struct buffer_head *bh, int meta); 34void gfs2_trans_add_bh(struct gfs2_glock *gl, struct buffer_head *bh, int meta);
35void gfs2_trans_add_revoke(struct gfs2_sbd *sdp, u64 blkno); 35void gfs2_trans_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd);
36void gfs2_trans_add_unrevoke(struct gfs2_sbd *sdp, u64 blkno); 36void gfs2_trans_add_unrevoke(struct gfs2_sbd *sdp, u64 blkno);
37void gfs2_trans_add_rg(struct gfs2_rgrpd *rgd); 37void gfs2_trans_add_rg(struct gfs2_rgrpd *rgd);
38 38
diff --git a/include/linux/gfs2_ondisk.h b/include/linux/gfs2_ondisk.h
index a44a6a078f0a..c3c19f926e6f 100644
--- a/include/linux/gfs2_ondisk.h
+++ b/include/linux/gfs2_ondisk.h
@@ -170,6 +170,33 @@ struct gfs2_rgrp {
170}; 170};
171 171
172/* 172/*
173 * quota linked list: user quotas and group quotas form two separate
174 * singly linked lists. ll_next stores uids or gids of next quotas in the
175 * linked list.
176
177Given the uid/gid, how to calculate the quota file offsets for the corresponding
178gfs2_quota structures on disk:
179
180for user quotas, given uid,
181offset = uid * sizeof(struct gfs2_quota);
182
183for group quotas, given gid,
184offset = (gid * sizeof(struct gfs2_quota)) + sizeof(struct gfs2_quota);
185
186
187 uid:0 gid:0 uid:12 gid:12 uid:17 gid:17 uid:5142 gid:5142
188+-------+-------+ +-------+-------+ +-------+- - - -+ +- - - -+-------+
189| valid | valid | :: | valid | valid | :: | valid | inval | :: | inval | valid |
190+-------+-------+ +-------+-------+ +-------+- - - -+ +- - - -+-------+
191next:12 next:12 next:17 next:5142 next:NULL next:NULL
192 | | | | |<-- user quota list |
193 \______|___________/ \______|___________/ group quota list -->|
194 | | |
195 \__________________/ \_______________________________________/
196
197*/
198
199/*
173 * quota structure 200 * quota structure
174 */ 201 */
175 202
@@ -177,7 +204,8 @@ struct gfs2_quota {
177 __be64 qu_limit; 204 __be64 qu_limit;
178 __be64 qu_warn; 205 __be64 qu_warn;
179 __be64 qu_value; 206 __be64 qu_value;
180 __u8 qu_reserved[64]; 207 __be32 qu_ll_next; /* location of next quota in list */
208 __u8 qu_reserved[60];
181}; 209};
182 210
183/* 211/*