aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/cifs/file.c14
-rw-r--r--fs/compat.c6
-rw-r--r--fs/exec.c2
-rw-r--r--fs/ext2/xattr.c6
-rw-r--r--fs/fuse/dev.c6
-rw-r--r--fs/fuse/file.c11
-rw-r--r--fs/jbd/checkpoint.c418
-rw-r--r--fs/jbd/commit.c3
-rw-r--r--fs/lockd/clntlock.c27
-rw-r--r--fs/lockd/svc4proc.c2
-rw-r--r--fs/lockd/svcproc.c2
-rw-r--r--fs/ocfs2/dlm/dlmcommon.h4
-rw-r--r--fs/ocfs2/dlm/dlmconvert.c12
-rw-r--r--fs/ocfs2/dlm/dlmlock.c25
-rw-r--r--fs/ocfs2/dlm/dlmmaster.c7
-rw-r--r--fs/ocfs2/dlm/dlmrecovery.c42
-rw-r--r--fs/ocfs2/journal.c7
-rw-r--r--fs/ocfs2/journal.h2
-rw-r--r--fs/reiserfs/xattr_acl.c3
-rw-r--r--fs/select.c6
20 files changed, 320 insertions, 285 deletions
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index d17c97d07c80..675bd2568297 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -1442,13 +1442,15 @@ ssize_t cifs_user_read(struct file *file, char __user *read_data,
1442 &bytes_read, &smb_read_data, 1442 &bytes_read, &smb_read_data,
1443 &buf_type); 1443 &buf_type);
1444 pSMBr = (struct smb_com_read_rsp *)smb_read_data; 1444 pSMBr = (struct smb_com_read_rsp *)smb_read_data;
1445 if (copy_to_user(current_offset,
1446 smb_read_data + 4 /* RFC1001 hdr */
1447 + le16_to_cpu(pSMBr->DataOffset),
1448 bytes_read)) {
1449 rc = -EFAULT;
1450 }
1451 if (smb_read_data) { 1445 if (smb_read_data) {
1446 if (copy_to_user(current_offset,
1447 smb_read_data +
1448 4 /* RFC1001 length field */ +
1449 le16_to_cpu(pSMBr->DataOffset),
1450 bytes_read)) {
1451 rc = -EFAULT;
1452 }
1453
1452 if(buf_type == CIFS_SMALL_BUFFER) 1454 if(buf_type == CIFS_SMALL_BUFFER)
1453 cifs_small_buf_release(smb_read_data); 1455 cifs_small_buf_release(smb_read_data);
1454 else if(buf_type == CIFS_LARGE_BUFFER) 1456 else if(buf_type == CIFS_LARGE_BUFFER)
diff --git a/fs/compat.c b/fs/compat.c
index a2ba78bdf7f7..5333c7d7427f 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -1757,7 +1757,7 @@ asmlinkage long compat_sys_select(int n, compat_ulong_t __user *inp,
1757 goto sticky; 1757 goto sticky;
1758 rtv.tv_usec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ)); 1758 rtv.tv_usec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ));
1759 rtv.tv_sec = timeout; 1759 rtv.tv_sec = timeout;
1760 if (compat_timeval_compare(&rtv, &tv) < 0) 1760 if (compat_timeval_compare(&rtv, &tv) >= 0)
1761 rtv = tv; 1761 rtv = tv;
1762 if (copy_to_user(tvp, &rtv, sizeof(rtv))) { 1762 if (copy_to_user(tvp, &rtv, sizeof(rtv))) {
1763sticky: 1763sticky:
@@ -1834,7 +1834,7 @@ asmlinkage long compat_sys_pselect7(int n, compat_ulong_t __user *inp,
1834 rts.tv_sec++; 1834 rts.tv_sec++;
1835 rts.tv_nsec -= NSEC_PER_SEC; 1835 rts.tv_nsec -= NSEC_PER_SEC;
1836 } 1836 }
1837 if (compat_timespec_compare(&rts, &ts) < 0) 1837 if (compat_timespec_compare(&rts, &ts) >= 0)
1838 rts = ts; 1838 rts = ts;
1839 copy_to_user(tsp, &rts, sizeof(rts)); 1839 copy_to_user(tsp, &rts, sizeof(rts));
1840 } 1840 }
@@ -1934,7 +1934,7 @@ asmlinkage long compat_sys_ppoll(struct pollfd __user *ufds,
1934 rts.tv_nsec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ)) * 1934 rts.tv_nsec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ)) *
1935 1000; 1935 1000;
1936 rts.tv_sec = timeout; 1936 rts.tv_sec = timeout;
1937 if (compat_timespec_compare(&rts, &ts) < 0) 1937 if (compat_timespec_compare(&rts, &ts) >= 0)
1938 rts = ts; 1938 rts = ts;
1939 if (copy_to_user(tsp, &rts, sizeof(rts))) { 1939 if (copy_to_user(tsp, &rts, sizeof(rts))) {
1940sticky: 1940sticky:
diff --git a/fs/exec.c b/fs/exec.c
index 055378d2513e..0e1c95074d42 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1403,7 +1403,7 @@ static void zap_threads (struct mm_struct *mm)
1403 do_each_thread(g,p) { 1403 do_each_thread(g,p) {
1404 if (mm == p->mm && p != tsk && 1404 if (mm == p->mm && p != tsk &&
1405 p->ptrace && p->parent->mm == mm) { 1405 p->ptrace && p->parent->mm == mm) {
1406 __ptrace_unlink(p); 1406 __ptrace_detach(p, 0);
1407 } 1407 }
1408 } while_each_thread(g,p); 1408 } while_each_thread(g,p);
1409 write_unlock_irq(&tasklist_lock); 1409 write_unlock_irq(&tasklist_lock);
diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c
index a2ca3107d475..86ae8e93adb9 100644
--- a/fs/ext2/xattr.c
+++ b/fs/ext2/xattr.c
@@ -792,18 +792,20 @@ ext2_xattr_delete_inode(struct inode *inode)
792 ext2_free_blocks(inode, EXT2_I(inode)->i_file_acl, 1); 792 ext2_free_blocks(inode, EXT2_I(inode)->i_file_acl, 1);
793 get_bh(bh); 793 get_bh(bh);
794 bforget(bh); 794 bforget(bh);
795 unlock_buffer(bh);
795 } else { 796 } else {
796 HDR(bh)->h_refcount = cpu_to_le32( 797 HDR(bh)->h_refcount = cpu_to_le32(
797 le32_to_cpu(HDR(bh)->h_refcount) - 1); 798 le32_to_cpu(HDR(bh)->h_refcount) - 1);
798 if (ce) 799 if (ce)
799 mb_cache_entry_release(ce); 800 mb_cache_entry_release(ce);
801 ea_bdebug(bh, "refcount now=%d",
802 le32_to_cpu(HDR(bh)->h_refcount));
803 unlock_buffer(bh);
800 mark_buffer_dirty(bh); 804 mark_buffer_dirty(bh);
801 if (IS_SYNC(inode)) 805 if (IS_SYNC(inode))
802 sync_dirty_buffer(bh); 806 sync_dirty_buffer(bh);
803 DQUOT_FREE_BLOCK(inode, 1); 807 DQUOT_FREE_BLOCK(inode, 1);
804 } 808 }
805 ea_bdebug(bh, "refcount now=%d", le32_to_cpu(HDR(bh)->h_refcount) - 1);
806 unlock_buffer(bh);
807 EXT2_I(inode)->i_file_acl = 0; 809 EXT2_I(inode)->i_file_acl = 0;
808 810
809cleanup: 811cleanup:
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index f556a0d5c0d3..0c9a2ee54c91 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -66,6 +66,12 @@ static void restore_sigs(sigset_t *oldset)
66 sigprocmask(SIG_SETMASK, oldset, NULL); 66 sigprocmask(SIG_SETMASK, oldset, NULL);
67} 67}
68 68
69/*
70 * Reset request, so that it can be reused
71 *
72 * The caller must be _very_ careful to make sure, that it is holding
73 * the only reference to req
74 */
69void fuse_reset_request(struct fuse_req *req) 75void fuse_reset_request(struct fuse_req *req)
70{ 76{
71 int preallocated = req->preallocated; 77 int preallocated = req->preallocated;
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 296351615b00..6f05379b0a0d 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -116,9 +116,14 @@ int fuse_open_common(struct inode *inode, struct file *file, int isdir)
116/* Special case for failed iget in CREATE */ 116/* Special case for failed iget in CREATE */
117static void fuse_release_end(struct fuse_conn *fc, struct fuse_req *req) 117static void fuse_release_end(struct fuse_conn *fc, struct fuse_req *req)
118{ 118{
119 u64 nodeid = req->in.h.nodeid; 119 /* If called from end_io_requests(), req has more than one
120 fuse_reset_request(req); 120 reference and fuse_reset_request() cannot work */
121 fuse_send_forget(fc, req, nodeid, 1); 121 if (fc->connected) {
122 u64 nodeid = req->in.h.nodeid;
123 fuse_reset_request(req);
124 fuse_send_forget(fc, req, nodeid, 1);
125 } else
126 fuse_put_request(fc, req);
122} 127}
123 128
124void fuse_send_release(struct fuse_conn *fc, struct fuse_file *ff, 129void fuse_send_release(struct fuse_conn *fc, struct fuse_file *ff,
diff --git a/fs/jbd/checkpoint.c b/fs/jbd/checkpoint.c
index e6265a0b56b8..543ed543d1e5 100644
--- a/fs/jbd/checkpoint.c
+++ b/fs/jbd/checkpoint.c
@@ -24,75 +24,29 @@
24#include <linux/slab.h> 24#include <linux/slab.h>
25 25
26/* 26/*
27 * Unlink a buffer from a transaction checkpoint list. 27 * Unlink a buffer from a transaction.
28 * 28 *
29 * Called with j_list_lock held. 29 * Called with j_list_lock held.
30 */ 30 */
31 31
32static void __buffer_unlink_first(struct journal_head *jh) 32static inline void __buffer_unlink(struct journal_head *jh)
33{ 33{
34 transaction_t *transaction; 34 transaction_t *transaction;
35 35
36 transaction = jh->b_cp_transaction; 36 transaction = jh->b_cp_transaction;
37 jh->b_cp_transaction = NULL;
37 38
38 jh->b_cpnext->b_cpprev = jh->b_cpprev; 39 jh->b_cpnext->b_cpprev = jh->b_cpprev;
39 jh->b_cpprev->b_cpnext = jh->b_cpnext; 40 jh->b_cpprev->b_cpnext = jh->b_cpnext;
40 if (transaction->t_checkpoint_list == jh) { 41 if (transaction->t_checkpoint_list == jh)
41 transaction->t_checkpoint_list = jh->b_cpnext; 42 transaction->t_checkpoint_list = jh->b_cpnext;
42 if (transaction->t_checkpoint_list == jh) 43 if (transaction->t_checkpoint_list == jh)
43 transaction->t_checkpoint_list = NULL; 44 transaction->t_checkpoint_list = NULL;
44 }
45}
46
47/*
48 * Unlink a buffer from a transaction checkpoint(io) list.
49 *
50 * Called with j_list_lock held.
51 */
52
53static inline void __buffer_unlink(struct journal_head *jh)
54{
55 transaction_t *transaction;
56
57 transaction = jh->b_cp_transaction;
58
59 __buffer_unlink_first(jh);
60 if (transaction->t_checkpoint_io_list == jh) {
61 transaction->t_checkpoint_io_list = jh->b_cpnext;
62 if (transaction->t_checkpoint_io_list == jh)
63 transaction->t_checkpoint_io_list = NULL;
64 }
65}
66
67/*
68 * Move a buffer from the checkpoint list to the checkpoint io list
69 *
70 * Called with j_list_lock held
71 */
72
73static inline void __buffer_relink_io(struct journal_head *jh)
74{
75 transaction_t *transaction;
76
77 transaction = jh->b_cp_transaction;
78 __buffer_unlink_first(jh);
79
80 if (!transaction->t_checkpoint_io_list) {
81 jh->b_cpnext = jh->b_cpprev = jh;
82 } else {
83 jh->b_cpnext = transaction->t_checkpoint_io_list;
84 jh->b_cpprev = transaction->t_checkpoint_io_list->b_cpprev;
85 jh->b_cpprev->b_cpnext = jh;
86 jh->b_cpnext->b_cpprev = jh;
87 }
88 transaction->t_checkpoint_io_list = jh;
89} 45}
90 46
91/* 47/*
92 * Try to release a checkpointed buffer from its transaction. 48 * Try to release a checkpointed buffer from its transaction.
93 * Returns 1 if we released it and 2 if we also released the 49 * Returns 1 if we released it.
94 * whole transaction.
95 *
96 * Requires j_list_lock 50 * Requires j_list_lock
97 * Called under jbd_lock_bh_state(jh2bh(jh)), and drops it 51 * Called under jbd_lock_bh_state(jh2bh(jh)), and drops it
98 */ 52 */
@@ -103,11 +57,12 @@ static int __try_to_free_cp_buf(struct journal_head *jh)
103 57
104 if (jh->b_jlist == BJ_None && !buffer_locked(bh) && !buffer_dirty(bh)) { 58 if (jh->b_jlist == BJ_None && !buffer_locked(bh) && !buffer_dirty(bh)) {
105 JBUFFER_TRACE(jh, "remove from checkpoint list"); 59 JBUFFER_TRACE(jh, "remove from checkpoint list");
106 ret = __journal_remove_checkpoint(jh) + 1; 60 __journal_remove_checkpoint(jh);
107 jbd_unlock_bh_state(bh); 61 jbd_unlock_bh_state(bh);
108 journal_remove_journal_head(bh); 62 journal_remove_journal_head(bh);
109 BUFFER_TRACE(bh, "release"); 63 BUFFER_TRACE(bh, "release");
110 __brelse(bh); 64 __brelse(bh);
65 ret = 1;
111 } else { 66 } else {
112 jbd_unlock_bh_state(bh); 67 jbd_unlock_bh_state(bh);
113 } 68 }
@@ -162,53 +117,83 @@ static void jbd_sync_bh(journal_t *journal, struct buffer_head *bh)
162} 117}
163 118
164/* 119/*
165 * Clean up transaction's list of buffers submitted for io. 120 * Clean up a transaction's checkpoint list.
166 * We wait for any pending IO to complete and remove any clean 121 *
167 * buffers. Note that we take the buffers in the opposite ordering 122 * We wait for any pending IO to complete and make sure any clean
168 * from the one in which they were submitted for IO. 123 * buffers are removed from the transaction.
124 *
125 * Return 1 if we performed any actions which might have destroyed the
126 * checkpoint. (journal_remove_checkpoint() deletes the transaction when
127 * the last checkpoint buffer is cleansed)
169 * 128 *
170 * Called with j_list_lock held. 129 * Called with j_list_lock held.
171 */ 130 */
172 131static int __cleanup_transaction(journal_t *journal, transaction_t *transaction)
173static void __wait_cp_io(journal_t *journal, transaction_t *transaction)
174{ 132{
175 struct journal_head *jh; 133 struct journal_head *jh, *next_jh, *last_jh;
176 struct buffer_head *bh; 134 struct buffer_head *bh;
177 tid_t this_tid; 135 int ret = 0;
178 int released = 0; 136
179 137 assert_spin_locked(&journal->j_list_lock);
180 this_tid = transaction->t_tid; 138 jh = transaction->t_checkpoint_list;
181restart: 139 if (!jh)
182 /* Didn't somebody clean up the transaction in the meanwhile */ 140 return 0;
183 if (journal->j_checkpoint_transactions != transaction || 141
184 transaction->t_tid != this_tid) 142 last_jh = jh->b_cpprev;
185 return; 143 next_jh = jh;
186 while (!released && transaction->t_checkpoint_io_list) { 144 do {
187 jh = transaction->t_checkpoint_io_list; 145 jh = next_jh;
188 bh = jh2bh(jh); 146 bh = jh2bh(jh);
189 if (!jbd_trylock_bh_state(bh)) {
190 jbd_sync_bh(journal, bh);
191 spin_lock(&journal->j_list_lock);
192 goto restart;
193 }
194 if (buffer_locked(bh)) { 147 if (buffer_locked(bh)) {
195 atomic_inc(&bh->b_count); 148 atomic_inc(&bh->b_count);
196 spin_unlock(&journal->j_list_lock); 149 spin_unlock(&journal->j_list_lock);
197 jbd_unlock_bh_state(bh);
198 wait_on_buffer(bh); 150 wait_on_buffer(bh);
199 /* the journal_head may have gone by now */ 151 /* the journal_head may have gone by now */
200 BUFFER_TRACE(bh, "brelse"); 152 BUFFER_TRACE(bh, "brelse");
201 __brelse(bh); 153 __brelse(bh);
202 spin_lock(&journal->j_list_lock); 154 goto out_return_1;
203 goto restart;
204 } 155 }
156
205 /* 157 /*
206 * Now in whatever state the buffer currently is, we know that 158 * This is foul
207 * it has been written out and so we can drop it from the list
208 */ 159 */
209 released = __journal_remove_checkpoint(jh); 160 if (!jbd_trylock_bh_state(bh)) {
210 jbd_unlock_bh_state(bh); 161 jbd_sync_bh(journal, bh);
211 } 162 goto out_return_1;
163 }
164
165 if (jh->b_transaction != NULL) {
166 transaction_t *t = jh->b_transaction;
167 tid_t tid = t->t_tid;
168
169 spin_unlock(&journal->j_list_lock);
170 jbd_unlock_bh_state(bh);
171 log_start_commit(journal, tid);
172 log_wait_commit(journal, tid);
173 goto out_return_1;
174 }
175
176 /*
177 * AKPM: I think the buffer_jbddirty test is redundant - it
178 * shouldn't have NULL b_transaction?
179 */
180 next_jh = jh->b_cpnext;
181 if (!buffer_dirty(bh) && !buffer_jbddirty(bh)) {
182 BUFFER_TRACE(bh, "remove from checkpoint");
183 __journal_remove_checkpoint(jh);
184 jbd_unlock_bh_state(bh);
185 journal_remove_journal_head(bh);
186 __brelse(bh);
187 ret = 1;
188 } else {
189 jbd_unlock_bh_state(bh);
190 }
191 } while (jh != last_jh);
192
193 return ret;
194out_return_1:
195 spin_lock(&journal->j_list_lock);
196 return 1;
212} 197}
213 198
214#define NR_BATCH 64 199#define NR_BATCH 64
@@ -218,7 +203,9 @@ __flush_batch(journal_t *journal, struct buffer_head **bhs, int *batch_count)
218{ 203{
219 int i; 204 int i;
220 205
206 spin_unlock(&journal->j_list_lock);
221 ll_rw_block(SWRITE, *batch_count, bhs); 207 ll_rw_block(SWRITE, *batch_count, bhs);
208 spin_lock(&journal->j_list_lock);
222 for (i = 0; i < *batch_count; i++) { 209 for (i = 0; i < *batch_count; i++) {
223 struct buffer_head *bh = bhs[i]; 210 struct buffer_head *bh = bhs[i];
224 clear_buffer_jwrite(bh); 211 clear_buffer_jwrite(bh);
@@ -234,46 +221,19 @@ __flush_batch(journal_t *journal, struct buffer_head **bhs, int *batch_count)
234 * Return 1 if something happened which requires us to abort the current 221 * Return 1 if something happened which requires us to abort the current
235 * scan of the checkpoint list. 222 * scan of the checkpoint list.
236 * 223 *
237 * Called with j_list_lock held and drops it if 1 is returned 224 * Called with j_list_lock held.
238 * Called under jbd_lock_bh_state(jh2bh(jh)), and drops it 225 * Called under jbd_lock_bh_state(jh2bh(jh)), and drops it
239 */ 226 */
240static int __process_buffer(journal_t *journal, struct journal_head *jh, 227static int __flush_buffer(journal_t *journal, struct journal_head *jh,
241 struct buffer_head **bhs, int *batch_count) 228 struct buffer_head **bhs, int *batch_count,
229 int *drop_count)
242{ 230{
243 struct buffer_head *bh = jh2bh(jh); 231 struct buffer_head *bh = jh2bh(jh);
244 int ret = 0; 232 int ret = 0;
245 233
246 if (buffer_locked(bh)) { 234 if (buffer_dirty(bh) && !buffer_locked(bh) && jh->b_jlist == BJ_None) {
247 get_bh(bh); 235 J_ASSERT_JH(jh, jh->b_transaction == NULL);
248 spin_unlock(&journal->j_list_lock);
249 jbd_unlock_bh_state(bh);
250 wait_on_buffer(bh);
251 /* the journal_head may have gone by now */
252 BUFFER_TRACE(bh, "brelse");
253 put_bh(bh);
254 ret = 1;
255 }
256 else if (jh->b_transaction != NULL) {
257 transaction_t *t = jh->b_transaction;
258 tid_t tid = t->t_tid;
259 236
260 spin_unlock(&journal->j_list_lock);
261 jbd_unlock_bh_state(bh);
262 log_start_commit(journal, tid);
263 log_wait_commit(journal, tid);
264 ret = 1;
265 }
266 else if (!buffer_dirty(bh)) {
267 J_ASSERT_JH(jh, !buffer_jbddirty(bh));
268 BUFFER_TRACE(bh, "remove from checkpoint");
269 __journal_remove_checkpoint(jh);
270 spin_unlock(&journal->j_list_lock);
271 jbd_unlock_bh_state(bh);
272 journal_remove_journal_head(bh);
273 put_bh(bh);
274 ret = 1;
275 }
276 else {
277 /* 237 /*
278 * Important: we are about to write the buffer, and 238 * Important: we are about to write the buffer, and
279 * possibly block, while still holding the journal lock. 239 * possibly block, while still holding the journal lock.
@@ -286,30 +246,45 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh,
286 J_ASSERT_BH(bh, !buffer_jwrite(bh)); 246 J_ASSERT_BH(bh, !buffer_jwrite(bh));
287 set_buffer_jwrite(bh); 247 set_buffer_jwrite(bh);
288 bhs[*batch_count] = bh; 248 bhs[*batch_count] = bh;
289 __buffer_relink_io(jh);
290 jbd_unlock_bh_state(bh); 249 jbd_unlock_bh_state(bh);
291 (*batch_count)++; 250 (*batch_count)++;
292 if (*batch_count == NR_BATCH) { 251 if (*batch_count == NR_BATCH) {
293 spin_unlock(&journal->j_list_lock);
294 __flush_batch(journal, bhs, batch_count); 252 __flush_batch(journal, bhs, batch_count);
295 ret = 1; 253 ret = 1;
296 } 254 }
255 } else {
256 int last_buffer = 0;
257 if (jh->b_cpnext == jh) {
258 /* We may be about to drop the transaction. Tell the
259 * caller that the lists have changed.
260 */
261 last_buffer = 1;
262 }
263 if (__try_to_free_cp_buf(jh)) {
264 (*drop_count)++;
265 ret = last_buffer;
266 }
297 } 267 }
298 return ret; 268 return ret;
299} 269}
300 270
301/* 271/*
302 * Perform an actual checkpoint. We take the first transaction on the 272 * Perform an actual checkpoint. We don't write out only enough to
303 * list of transactions to be checkpointed and send all its buffers 273 * satisfy the current blocked requests: rather we submit a reasonably
304 * to disk. We submit larger chunks of data at once. 274 * sized chunk of the outstanding data to disk at once for
275 * efficiency. __log_wait_for_space() will retry if we didn't free enough.
305 * 276 *
277 * However, we _do_ take into account the amount requested so that once
278 * the IO has been queued, we can return as soon as enough of it has
279 * completed to disk.
280 *
306 * The journal should be locked before calling this function. 281 * The journal should be locked before calling this function.
307 */ 282 */
308int log_do_checkpoint(journal_t *journal) 283int log_do_checkpoint(journal_t *journal)
309{ 284{
310 transaction_t *transaction;
311 tid_t this_tid;
312 int result; 285 int result;
286 int batch_count = 0;
287 struct buffer_head *bhs[NR_BATCH];
313 288
314 jbd_debug(1, "Start checkpoint\n"); 289 jbd_debug(1, "Start checkpoint\n");
315 290
@@ -324,70 +299,79 @@ int log_do_checkpoint(journal_t *journal)
324 return result; 299 return result;
325 300
326 /* 301 /*
327 * OK, we need to start writing disk blocks. Take one transaction 302 * OK, we need to start writing disk blocks. Try to free up a
328 * and write it. 303 * quarter of the log in a single checkpoint if we can.
329 */ 304 */
330 spin_lock(&journal->j_list_lock);
331 if (!journal->j_checkpoint_transactions)
332 goto out;
333 transaction = journal->j_checkpoint_transactions;
334 this_tid = transaction->t_tid;
335restart:
336 /* 305 /*
337 * If someone cleaned up this transaction while we slept, we're 306 * AKPM: check this code. I had a feeling a while back that it
338 * done (maybe it's a new transaction, but it fell at the same 307 * degenerates into a busy loop at unmount time.
339 * address).
340 */ 308 */
341 if (journal->j_checkpoint_transactions == transaction && 309 spin_lock(&journal->j_list_lock);
342 transaction->t_tid == this_tid) { 310 while (journal->j_checkpoint_transactions) {
343 int batch_count = 0; 311 transaction_t *transaction;
344 struct buffer_head *bhs[NR_BATCH]; 312 struct journal_head *jh, *last_jh, *next_jh;
345 struct journal_head *jh; 313 int drop_count = 0;
346 int retry = 0; 314 int cleanup_ret, retry = 0;
347 315 tid_t this_tid;
348 while (!retry && transaction->t_checkpoint_list) { 316
317 transaction = journal->j_checkpoint_transactions;
318 this_tid = transaction->t_tid;
319 jh = transaction->t_checkpoint_list;
320 last_jh = jh->b_cpprev;
321 next_jh = jh;
322 do {
349 struct buffer_head *bh; 323 struct buffer_head *bh;
350 324
351 jh = transaction->t_checkpoint_list; 325 jh = next_jh;
326 next_jh = jh->b_cpnext;
352 bh = jh2bh(jh); 327 bh = jh2bh(jh);
353 if (!jbd_trylock_bh_state(bh)) { 328 if (!jbd_trylock_bh_state(bh)) {
354 jbd_sync_bh(journal, bh); 329 jbd_sync_bh(journal, bh);
330 spin_lock(&journal->j_list_lock);
355 retry = 1; 331 retry = 1;
356 break; 332 break;
357 } 333 }
358 retry = __process_buffer(journal, jh, bhs, 334 retry = __flush_buffer(journal, jh, bhs, &batch_count, &drop_count);
359 &batch_count); 335 if (cond_resched_lock(&journal->j_list_lock)) {
360 if (!retry &&
361 lock_need_resched(&journal->j_list_lock)) {
362 spin_unlock(&journal->j_list_lock);
363 retry = 1; 336 retry = 1;
364 break; 337 break;
365 } 338 }
366 } 339 } while (jh != last_jh && !retry);
367 340
368 if (batch_count) { 341 if (batch_count) {
369 if (!retry) {
370 spin_unlock(&journal->j_list_lock);
371 retry = 1;
372 }
373 __flush_batch(journal, bhs, &batch_count); 342 __flush_batch(journal, bhs, &batch_count);
343 retry = 1;
374 } 344 }
375 345
376 if (retry) {
377 spin_lock(&journal->j_list_lock);
378 goto restart;
379 }
380 /* 346 /*
381 * Now we have cleaned up the first transaction's checkpoint 347 * If someone cleaned up this transaction while we slept, we're
382 * list. Let's clean up the second one. 348 * done
349 */
350 if (journal->j_checkpoint_transactions != transaction)
351 break;
352 if (retry)
353 continue;
354 /*
355 * Maybe it's a new transaction, but it fell at the same
356 * address
383 */ 357 */
384 __wait_cp_io(journal, transaction); 358 if (transaction->t_tid != this_tid)
359 continue;
360 /*
361 * We have walked the whole transaction list without
362 * finding anything to write to disk. We had better be
363 * able to make some progress or we are in trouble.
364 */
365 cleanup_ret = __cleanup_transaction(journal, transaction);
366 J_ASSERT(drop_count != 0 || cleanup_ret != 0);
367 if (journal->j_checkpoint_transactions != transaction)
368 break;
385 } 369 }
386out:
387 spin_unlock(&journal->j_list_lock); 370 spin_unlock(&journal->j_list_lock);
388 result = cleanup_journal_tail(journal); 371 result = cleanup_journal_tail(journal);
389 if (result < 0) 372 if (result < 0)
390 return result; 373 return result;
374
391 return 0; 375 return 0;
392} 376}
393 377
@@ -472,91 +456,52 @@ int cleanup_journal_tail(journal_t *journal)
472/* Checkpoint list management */ 456/* Checkpoint list management */
473 457
474/* 458/*
475 * journal_clean_one_cp_list
476 *
477 * Find all the written-back checkpoint buffers in the given list and release them.
478 *
479 * Called with the journal locked.
480 * Called with j_list_lock held.
481 * Returns number of bufers reaped (for debug)
482 */
483
484static int journal_clean_one_cp_list(struct journal_head *jh, int *released)
485{
486 struct journal_head *last_jh;
487 struct journal_head *next_jh = jh;
488 int ret, freed = 0;
489
490 *released = 0;
491 if (!jh)
492 return 0;
493
494 last_jh = jh->b_cpprev;
495 do {
496 jh = next_jh;
497 next_jh = jh->b_cpnext;
498 /* Use trylock because of the ranking */
499 if (jbd_trylock_bh_state(jh2bh(jh))) {
500 ret = __try_to_free_cp_buf(jh);
501 if (ret) {
502 freed++;
503 if (ret == 2) {
504 *released = 1;
505 return freed;
506 }
507 }
508 }
509 /*
510 * This function only frees up some memory if possible so we
511 * dont have an obligation to finish processing. Bail out if
512 * preemption requested:
513 */
514 if (need_resched())
515 return freed;
516 } while (jh != last_jh);
517
518 return freed;
519}
520
521/*
522 * journal_clean_checkpoint_list 459 * journal_clean_checkpoint_list
523 * 460 *
524 * Find all the written-back checkpoint buffers in the journal and release them. 461 * Find all the written-back checkpoint buffers in the journal and release them.
525 * 462 *
526 * Called with the journal locked. 463 * Called with the journal locked.
527 * Called with j_list_lock held. 464 * Called with j_list_lock held.
528 * Returns number of buffers reaped (for debug) 465 * Returns number of bufers reaped (for debug)
529 */ 466 */
530 467
531int __journal_clean_checkpoint_list(journal_t *journal) 468int __journal_clean_checkpoint_list(journal_t *journal)
532{ 469{
533 transaction_t *transaction, *last_transaction, *next_transaction; 470 transaction_t *transaction, *last_transaction, *next_transaction;
534 int ret = 0, released; 471 int ret = 0;
535 472
536 transaction = journal->j_checkpoint_transactions; 473 transaction = journal->j_checkpoint_transactions;
537 if (!transaction) 474 if (transaction == 0)
538 goto out; 475 goto out;
539 476
540 last_transaction = transaction->t_cpprev; 477 last_transaction = transaction->t_cpprev;
541 next_transaction = transaction; 478 next_transaction = transaction;
542 do { 479 do {
480 struct journal_head *jh;
481
543 transaction = next_transaction; 482 transaction = next_transaction;
544 next_transaction = transaction->t_cpnext; 483 next_transaction = transaction->t_cpnext;
545 ret += journal_clean_one_cp_list(transaction-> 484 jh = transaction->t_checkpoint_list;
546 t_checkpoint_list, &released); 485 if (jh) {
547 if (need_resched()) 486 struct journal_head *last_jh = jh->b_cpprev;
548 goto out; 487 struct journal_head *next_jh = jh;
549 if (released) 488
550 continue; 489 do {
551 /* 490 jh = next_jh;
552 * It is essential that we are as careful as in the case of 491 next_jh = jh->b_cpnext;
553 * t_checkpoint_list with removing the buffer from the list as 492 /* Use trylock because of the ranknig */
554 * we can possibly see not yet submitted buffers on io_list 493 if (jbd_trylock_bh_state(jh2bh(jh)))
555 */ 494 ret += __try_to_free_cp_buf(jh);
556 ret += journal_clean_one_cp_list(transaction-> 495 /*
557 t_checkpoint_io_list, &released); 496 * This function only frees up some memory
558 if (need_resched()) 497 * if possible so we dont have an obligation
559 goto out; 498 * to finish processing. Bail out if preemption
499 * requested:
500 */
501 if (need_resched())
502 goto out;
503 } while (jh != last_jh);
504 }
560 } while (transaction != last_transaction); 505 } while (transaction != last_transaction);
561out: 506out:
562 return ret; 507 return ret;
@@ -571,22 +516,18 @@ out:
571 * buffer updates committed in that transaction have safely been stored 516 * buffer updates committed in that transaction have safely been stored
572 * elsewhere on disk. To achieve this, all of the buffers in a 517 * elsewhere on disk. To achieve this, all of the buffers in a
573 * transaction need to be maintained on the transaction's checkpoint 518 * transaction need to be maintained on the transaction's checkpoint
574 * lists until they have been rewritten, at which point this function is 519 * list until they have been rewritten, at which point this function is
575 * called to remove the buffer from the existing transaction's 520 * called to remove the buffer from the existing transaction's
576 * checkpoint lists. 521 * checkpoint list.
577 *
578 * The function returns 1 if it frees the transaction, 0 otherwise.
579 * 522 *
580 * This function is called with the journal locked. 523 * This function is called with the journal locked.
581 * This function is called with j_list_lock held. 524 * This function is called with j_list_lock held.
582 * This function is called with jbd_lock_bh_state(jh2bh(jh))
583 */ 525 */
584 526
585int __journal_remove_checkpoint(struct journal_head *jh) 527void __journal_remove_checkpoint(struct journal_head *jh)
586{ 528{
587 transaction_t *transaction; 529 transaction_t *transaction;
588 journal_t *journal; 530 journal_t *journal;
589 int ret = 0;
590 531
591 JBUFFER_TRACE(jh, "entry"); 532 JBUFFER_TRACE(jh, "entry");
592 533
@@ -597,10 +538,8 @@ int __journal_remove_checkpoint(struct journal_head *jh)
597 journal = transaction->t_journal; 538 journal = transaction->t_journal;
598 539
599 __buffer_unlink(jh); 540 __buffer_unlink(jh);
600 jh->b_cp_transaction = NULL;
601 541
602 if (transaction->t_checkpoint_list != NULL || 542 if (transaction->t_checkpoint_list != NULL)
603 transaction->t_checkpoint_io_list != NULL)
604 goto out; 543 goto out;
605 JBUFFER_TRACE(jh, "transaction has no more buffers"); 544 JBUFFER_TRACE(jh, "transaction has no more buffers");
606 545
@@ -626,10 +565,8 @@ int __journal_remove_checkpoint(struct journal_head *jh)
626 /* Just in case anybody was waiting for more transactions to be 565 /* Just in case anybody was waiting for more transactions to be
627 checkpointed... */ 566 checkpointed... */
628 wake_up(&journal->j_wait_logspace); 567 wake_up(&journal->j_wait_logspace);
629 ret = 1;
630out: 568out:
631 JBUFFER_TRACE(jh, "exit"); 569 JBUFFER_TRACE(jh, "exit");
632 return ret;
633} 570}
634 571
635/* 572/*
@@ -691,7 +628,6 @@ void __journal_drop_transaction(journal_t *journal, transaction_t *transaction)
691 J_ASSERT(transaction->t_shadow_list == NULL); 628 J_ASSERT(transaction->t_shadow_list == NULL);
692 J_ASSERT(transaction->t_log_list == NULL); 629 J_ASSERT(transaction->t_log_list == NULL);
693 J_ASSERT(transaction->t_checkpoint_list == NULL); 630 J_ASSERT(transaction->t_checkpoint_list == NULL);
694 J_ASSERT(transaction->t_checkpoint_io_list == NULL);
695 J_ASSERT(transaction->t_updates == 0); 631 J_ASSERT(transaction->t_updates == 0);
696 J_ASSERT(journal->j_committing_transaction != transaction); 632 J_ASSERT(journal->j_committing_transaction != transaction);
697 J_ASSERT(journal->j_running_transaction != transaction); 633 J_ASSERT(journal->j_running_transaction != transaction);
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c
index 29e62d98bae6..002ad2bbc769 100644
--- a/fs/jbd/commit.c
+++ b/fs/jbd/commit.c
@@ -829,8 +829,7 @@ restart_loop:
829 journal->j_committing_transaction = NULL; 829 journal->j_committing_transaction = NULL;
830 spin_unlock(&journal->j_state_lock); 830 spin_unlock(&journal->j_state_lock);
831 831
832 if (commit_transaction->t_checkpoint_list == NULL && 832 if (commit_transaction->t_checkpoint_list == NULL) {
833 commit_transaction->t_checkpoint_io_list == NULL) {
834 __journal_drop_transaction(journal, commit_transaction); 833 __journal_drop_transaction(journal, commit_transaction);
835 } else { 834 } else {
836 if (journal->j_checkpoint_transactions == NULL) { 835 if (journal->j_checkpoint_transactions == NULL) {
diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c
index 3eaf6e701087..da6354baa0b8 100644
--- a/fs/lockd/clntlock.c
+++ b/fs/lockd/clntlock.c
@@ -111,9 +111,10 @@ long nlmclnt_block(struct nlm_rqst *req, long timeout)
111/* 111/*
112 * The server lockd has called us back to tell us the lock was granted 112 * The server lockd has called us back to tell us the lock was granted
113 */ 113 */
114u32 114u32 nlmclnt_grant(const struct sockaddr_in *addr, const struct nlm_lock *lock)
115nlmclnt_grant(struct nlm_lock *lock)
116{ 115{
116 const struct file_lock *fl = &lock->fl;
117 const struct nfs_fh *fh = &lock->fh;
117 struct nlm_wait *block; 118 struct nlm_wait *block;
118 u32 res = nlm_lck_denied; 119 u32 res = nlm_lck_denied;
119 120
@@ -122,14 +123,20 @@ nlmclnt_grant(struct nlm_lock *lock)
122 * Warning: must not use cookie to match it! 123 * Warning: must not use cookie to match it!
123 */ 124 */
124 list_for_each_entry(block, &nlm_blocked, b_list) { 125 list_for_each_entry(block, &nlm_blocked, b_list) {
125 if (nlm_compare_locks(block->b_lock, &lock->fl)) { 126 struct file_lock *fl_blocked = block->b_lock;
126 /* Alright, we found a lock. Set the return status 127
127 * and wake up the caller 128 if (!nlm_compare_locks(fl_blocked, fl))
128 */ 129 continue;
129 block->b_status = NLM_LCK_GRANTED; 130 if (!nlm_cmp_addr(&block->b_host->h_addr, addr))
130 wake_up(&block->b_wait); 131 continue;
131 res = nlm_granted; 132 if (nfs_compare_fh(NFS_FH(fl_blocked->fl_file->f_dentry->d_inode) ,fh) != 0)
132 } 133 continue;
134 /* Alright, we found a lock. Set the return status
135 * and wake up the caller
136 */
137 block->b_status = NLM_LCK_GRANTED;
138 wake_up(&block->b_wait);
139 res = nlm_granted;
133 } 140 }
134 return res; 141 return res;
135} 142}
diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c
index 4063095d849e..b10f913aa06a 100644
--- a/fs/lockd/svc4proc.c
+++ b/fs/lockd/svc4proc.c
@@ -228,7 +228,7 @@ nlm4svc_proc_granted(struct svc_rqst *rqstp, struct nlm_args *argp,
228 resp->cookie = argp->cookie; 228 resp->cookie = argp->cookie;
229 229
230 dprintk("lockd: GRANTED called\n"); 230 dprintk("lockd: GRANTED called\n");
231 resp->status = nlmclnt_grant(&argp->lock); 231 resp->status = nlmclnt_grant(&rqstp->rq_addr, &argp->lock);
232 dprintk("lockd: GRANTED status %d\n", ntohl(resp->status)); 232 dprintk("lockd: GRANTED status %d\n", ntohl(resp->status));
233 return rpc_success; 233 return rpc_success;
234} 234}
diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c
index 3bc437e0cf5b..35681d9cf1fc 100644
--- a/fs/lockd/svcproc.c
+++ b/fs/lockd/svcproc.c
@@ -256,7 +256,7 @@ nlmsvc_proc_granted(struct svc_rqst *rqstp, struct nlm_args *argp,
256 resp->cookie = argp->cookie; 256 resp->cookie = argp->cookie;
257 257
258 dprintk("lockd: GRANTED called\n"); 258 dprintk("lockd: GRANTED called\n");
259 resp->status = nlmclnt_grant(&argp->lock); 259 resp->status = nlmclnt_grant(&rqstp->rq_addr, &argp->lock);
260 dprintk("lockd: GRANTED status %d\n", ntohl(resp->status)); 260 dprintk("lockd: GRANTED status %d\n", ntohl(resp->status));
261 return rpc_success; 261 return rpc_success;
262} 262}
diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h
index 42eb53b5293b..23ceaa7127b4 100644
--- a/fs/ocfs2/dlm/dlmcommon.h
+++ b/fs/ocfs2/dlm/dlmcommon.h
@@ -208,6 +208,9 @@ static inline void __dlm_set_joining_node(struct dlm_ctxt *dlm,
208#define DLM_LOCK_RES_IN_PROGRESS 0x00000010 208#define DLM_LOCK_RES_IN_PROGRESS 0x00000010
209#define DLM_LOCK_RES_MIGRATING 0x00000020 209#define DLM_LOCK_RES_MIGRATING 0x00000020
210 210
211/* max milliseconds to wait to sync up a network failure with a node death */
212#define DLM_NODE_DEATH_WAIT_MAX (5 * 1000)
213
211#define DLM_PURGE_INTERVAL_MS (8 * 1000) 214#define DLM_PURGE_INTERVAL_MS (8 * 1000)
212 215
213struct dlm_lock_resource 216struct dlm_lock_resource
@@ -658,6 +661,7 @@ int dlm_launch_recovery_thread(struct dlm_ctxt *dlm);
658void dlm_complete_recovery_thread(struct dlm_ctxt *dlm); 661void dlm_complete_recovery_thread(struct dlm_ctxt *dlm);
659void dlm_wait_for_recovery(struct dlm_ctxt *dlm); 662void dlm_wait_for_recovery(struct dlm_ctxt *dlm);
660int dlm_is_node_dead(struct dlm_ctxt *dlm, u8 node); 663int dlm_is_node_dead(struct dlm_ctxt *dlm, u8 node);
664int dlm_wait_for_node_death(struct dlm_ctxt *dlm, u8 node, int timeout);
661 665
662void dlm_put(struct dlm_ctxt *dlm); 666void dlm_put(struct dlm_ctxt *dlm);
663struct dlm_ctxt *dlm_grab(struct dlm_ctxt *dlm); 667struct dlm_ctxt *dlm_grab(struct dlm_ctxt *dlm);
diff --git a/fs/ocfs2/dlm/dlmconvert.c b/fs/ocfs2/dlm/dlmconvert.c
index 6001b22a997d..f66e2d818ccd 100644
--- a/fs/ocfs2/dlm/dlmconvert.c
+++ b/fs/ocfs2/dlm/dlmconvert.c
@@ -392,6 +392,11 @@ static enum dlm_status dlm_send_remote_convert_request(struct dlm_ctxt *dlm,
392 } else { 392 } else {
393 mlog_errno(tmpret); 393 mlog_errno(tmpret);
394 if (dlm_is_host_down(tmpret)) { 394 if (dlm_is_host_down(tmpret)) {
395 /* instead of logging the same network error over
396 * and over, sleep here and wait for the heartbeat
397 * to notice the node is dead. times out after 5s. */
398 dlm_wait_for_node_death(dlm, res->owner,
399 DLM_NODE_DEATH_WAIT_MAX);
395 ret = DLM_RECOVERING; 400 ret = DLM_RECOVERING;
396 mlog(0, "node %u died so returning DLM_RECOVERING " 401 mlog(0, "node %u died so returning DLM_RECOVERING "
397 "from convert message!\n", res->owner); 402 "from convert message!\n", res->owner);
@@ -421,7 +426,7 @@ int dlm_convert_lock_handler(struct o2net_msg *msg, u32 len, void *data)
421 struct dlm_lockstatus *lksb; 426 struct dlm_lockstatus *lksb;
422 enum dlm_status status = DLM_NORMAL; 427 enum dlm_status status = DLM_NORMAL;
423 u32 flags; 428 u32 flags;
424 int call_ast = 0, kick_thread = 0; 429 int call_ast = 0, kick_thread = 0, ast_reserved = 0;
425 430
426 if (!dlm_grab(dlm)) { 431 if (!dlm_grab(dlm)) {
427 dlm_error(DLM_REJECTED); 432 dlm_error(DLM_REJECTED);
@@ -490,6 +495,7 @@ int dlm_convert_lock_handler(struct o2net_msg *msg, u32 len, void *data)
490 status = __dlm_lockres_state_to_status(res); 495 status = __dlm_lockres_state_to_status(res);
491 if (status == DLM_NORMAL) { 496 if (status == DLM_NORMAL) {
492 __dlm_lockres_reserve_ast(res); 497 __dlm_lockres_reserve_ast(res);
498 ast_reserved = 1;
493 res->state |= DLM_LOCK_RES_IN_PROGRESS; 499 res->state |= DLM_LOCK_RES_IN_PROGRESS;
494 status = __dlmconvert_master(dlm, res, lock, flags, 500 status = __dlmconvert_master(dlm, res, lock, flags,
495 cnv->requested_type, 501 cnv->requested_type,
@@ -512,10 +518,10 @@ leave:
512 else 518 else
513 dlm_lock_put(lock); 519 dlm_lock_put(lock);
514 520
515 /* either queue the ast or release it */ 521 /* either queue the ast or release it, if reserved */
516 if (call_ast) 522 if (call_ast)
517 dlm_queue_ast(dlm, lock); 523 dlm_queue_ast(dlm, lock);
518 else 524 else if (ast_reserved)
519 dlm_lockres_release_ast(dlm, res); 525 dlm_lockres_release_ast(dlm, res);
520 526
521 if (kick_thread) 527 if (kick_thread)
diff --git a/fs/ocfs2/dlm/dlmlock.c b/fs/ocfs2/dlm/dlmlock.c
index d1a0038557a3..671d4ff222cc 100644
--- a/fs/ocfs2/dlm/dlmlock.c
+++ b/fs/ocfs2/dlm/dlmlock.c
@@ -220,6 +220,17 @@ static enum dlm_status dlmlock_remote(struct dlm_ctxt *dlm,
220 dlm_error(status); 220 dlm_error(status);
221 dlm_revert_pending_lock(res, lock); 221 dlm_revert_pending_lock(res, lock);
222 dlm_lock_put(lock); 222 dlm_lock_put(lock);
223 } else if (dlm_is_recovery_lock(res->lockname.name,
224 res->lockname.len)) {
225 /* special case for the $RECOVERY lock.
226 * there will never be an AST delivered to put
227 * this lock on the proper secondary queue
228 * (granted), so do it manually. */
229 mlog(0, "%s: $RECOVERY lock for this node (%u) is "
230 "mastered by %u; got lock, manually granting (no ast)\n",
231 dlm->name, dlm->node_num, res->owner);
232 list_del_init(&lock->list);
233 list_add_tail(&lock->list, &res->granted);
223 } 234 }
224 spin_unlock(&res->spinlock); 235 spin_unlock(&res->spinlock);
225 236
@@ -646,7 +657,19 @@ retry_lock:
646 mlog(0, "retrying lock with migration/" 657 mlog(0, "retrying lock with migration/"
647 "recovery/in progress\n"); 658 "recovery/in progress\n");
648 msleep(100); 659 msleep(100);
649 dlm_wait_for_recovery(dlm); 660 /* no waiting for dlm_reco_thread */
661 if (recovery) {
662 if (status == DLM_RECOVERING) {
663 mlog(0, "%s: got RECOVERING "
664 "for $REOCVERY lock, master "
665 "was %u\n", dlm->name,
666 res->owner);
667 dlm_wait_for_node_death(dlm, res->owner,
668 DLM_NODE_DEATH_WAIT_MAX);
669 }
670 } else {
671 dlm_wait_for_recovery(dlm);
672 }
650 goto retry_lock; 673 goto retry_lock;
651 } 674 }
652 675
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index a3194fe173d9..2e2e95e69499 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -2482,7 +2482,9 @@ top:
2482 atomic_set(&mle->woken, 1); 2482 atomic_set(&mle->woken, 1);
2483 spin_unlock(&mle->spinlock); 2483 spin_unlock(&mle->spinlock);
2484 wake_up(&mle->wq); 2484 wake_up(&mle->wq);
2485 /* final put will take care of list removal */ 2485 /* do not need events any longer, so detach
2486 * from heartbeat */
2487 __dlm_mle_detach_hb_events(dlm, mle);
2486 __dlm_put_mle(mle); 2488 __dlm_put_mle(mle);
2487 } 2489 }
2488 continue; 2490 continue;
@@ -2537,6 +2539,9 @@ top:
2537 spin_unlock(&res->spinlock); 2539 spin_unlock(&res->spinlock);
2538 dlm_lockres_put(res); 2540 dlm_lockres_put(res);
2539 2541
2542 /* about to get rid of mle, detach from heartbeat */
2543 __dlm_mle_detach_hb_events(dlm, mle);
2544
2540 /* dump the mle */ 2545 /* dump the mle */
2541 spin_lock(&dlm->master_lock); 2546 spin_lock(&dlm->master_lock);
2542 __dlm_put_mle(mle); 2547 __dlm_put_mle(mle);
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c
index 186e9a76aa58..ed76bda1a534 100644
--- a/fs/ocfs2/dlm/dlmrecovery.c
+++ b/fs/ocfs2/dlm/dlmrecovery.c
@@ -278,6 +278,24 @@ int dlm_is_node_dead(struct dlm_ctxt *dlm, u8 node)
278 return dead; 278 return dead;
279} 279}
280 280
281int dlm_wait_for_node_death(struct dlm_ctxt *dlm, u8 node, int timeout)
282{
283 if (timeout) {
284 mlog(ML_NOTICE, "%s: waiting %dms for notification of "
285 "death of node %u\n", dlm->name, timeout, node);
286 wait_event_timeout(dlm->dlm_reco_thread_wq,
287 dlm_is_node_dead(dlm, node),
288 msecs_to_jiffies(timeout));
289 } else {
290 mlog(ML_NOTICE, "%s: waiting indefinitely for notification "
291 "of death of node %u\n", dlm->name, node);
292 wait_event(dlm->dlm_reco_thread_wq,
293 dlm_is_node_dead(dlm, node));
294 }
295 /* for now, return 0 */
296 return 0;
297}
298
281/* callers of the top-level api calls (dlmlock/dlmunlock) should 299/* callers of the top-level api calls (dlmlock/dlmunlock) should
282 * block on the dlm->reco.event when recovery is in progress. 300 * block on the dlm->reco.event when recovery is in progress.
283 * the dlm recovery thread will set this state when it begins 301 * the dlm recovery thread will set this state when it begins
@@ -2032,6 +2050,30 @@ again:
2032 dlm->reco.new_master); 2050 dlm->reco.new_master);
2033 status = -EEXIST; 2051 status = -EEXIST;
2034 } else { 2052 } else {
2053 status = 0;
2054
2055 /* see if recovery was already finished elsewhere */
2056 spin_lock(&dlm->spinlock);
2057 if (dlm->reco.dead_node == O2NM_INVALID_NODE_NUM) {
2058 status = -EINVAL;
2059 mlog(0, "%s: got reco EX lock, but "
2060 "node got recovered already\n", dlm->name);
2061 if (dlm->reco.new_master != O2NM_INVALID_NODE_NUM) {
2062 mlog(ML_ERROR, "%s: new master is %u "
2063 "but no dead node!\n",
2064 dlm->name, dlm->reco.new_master);
2065 BUG();
2066 }
2067 }
2068 spin_unlock(&dlm->spinlock);
2069 }
2070
2071 /* if this node has actually become the recovery master,
2072 * set the master and send the messages to begin recovery */
2073 if (!status) {
2074 mlog(0, "%s: dead=%u, this=%u, sending "
2075 "begin_reco now\n", dlm->name,
2076 dlm->reco.dead_node, dlm->node_num);
2035 status = dlm_send_begin_reco_message(dlm, 2077 status = dlm_send_begin_reco_message(dlm,
2036 dlm->reco.dead_node); 2078 dlm->reco.dead_node);
2037 /* this always succeeds */ 2079 /* this always succeeds */
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index fa0bcac5ceae..d329c9df90ae 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -1584,10 +1584,9 @@ static int ocfs2_commit_thread(void *arg)
1584 while (!(kthread_should_stop() && 1584 while (!(kthread_should_stop() &&
1585 atomic_read(&journal->j_num_trans) == 0)) { 1585 atomic_read(&journal->j_num_trans) == 0)) {
1586 1586
1587 wait_event_interruptible_timeout(osb->checkpoint_event, 1587 wait_event_interruptible(osb->checkpoint_event,
1588 atomic_read(&journal->j_num_trans) 1588 atomic_read(&journal->j_num_trans)
1589 || kthread_should_stop(), 1589 || kthread_should_stop());
1590 OCFS2_CHECKPOINT_INTERVAL);
1591 1590
1592 status = ocfs2_commit_cache(osb); 1591 status = ocfs2_commit_cache(osb);
1593 if (status < 0) 1592 if (status < 0)
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h
index 7d0a816184fa..2f3a6acdac45 100644
--- a/fs/ocfs2/journal.h
+++ b/fs/ocfs2/journal.h
@@ -29,8 +29,6 @@
29#include <linux/fs.h> 29#include <linux/fs.h>
30#include <linux/jbd.h> 30#include <linux/jbd.h>
31 31
32#define OCFS2_CHECKPOINT_INTERVAL (8 * HZ)
33
34enum ocfs2_journal_state { 32enum ocfs2_journal_state {
35 OCFS2_JOURNAL_FREE = 0, 33 OCFS2_JOURNAL_FREE = 0,
36 OCFS2_JOURNAL_LOADED, 34 OCFS2_JOURNAL_LOADED,
diff --git a/fs/reiserfs/xattr_acl.c b/fs/reiserfs/xattr_acl.c
index 43de3ba83332..ab8894c3b9e5 100644
--- a/fs/reiserfs/xattr_acl.c
+++ b/fs/reiserfs/xattr_acl.c
@@ -228,7 +228,8 @@ struct posix_acl *reiserfs_get_acl(struct inode *inode, int type)
228 acl = ERR_PTR(retval); 228 acl = ERR_PTR(retval);
229 } else { 229 } else {
230 acl = posix_acl_from_disk(value, retval); 230 acl = posix_acl_from_disk(value, retval);
231 *p_acl = posix_acl_dup(acl); 231 if (!IS_ERR(acl))
232 *p_acl = posix_acl_dup(acl);
232 } 233 }
233 234
234 kfree(value); 235 kfree(value);
diff --git a/fs/select.c b/fs/select.c
index 6ce68a9c8976..1815a57d2255 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -404,7 +404,7 @@ asmlinkage long sys_select(int n, fd_set __user *inp, fd_set __user *outp,
404 goto sticky; 404 goto sticky;
405 rtv.tv_usec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ)); 405 rtv.tv_usec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ));
406 rtv.tv_sec = timeout; 406 rtv.tv_sec = timeout;
407 if (timeval_compare(&rtv, &tv) < 0) 407 if (timeval_compare(&rtv, &tv) >= 0)
408 rtv = tv; 408 rtv = tv;
409 if (copy_to_user(tvp, &rtv, sizeof(rtv))) { 409 if (copy_to_user(tvp, &rtv, sizeof(rtv))) {
410sticky: 410sticky:
@@ -471,7 +471,7 @@ asmlinkage long sys_pselect7(int n, fd_set __user *inp, fd_set __user *outp,
471 rts.tv_nsec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ)) * 471 rts.tv_nsec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ)) *
472 1000; 472 1000;
473 rts.tv_sec = timeout; 473 rts.tv_sec = timeout;
474 if (timespec_compare(&rts, &ts) < 0) 474 if (timespec_compare(&rts, &ts) >= 0)
475 rts = ts; 475 rts = ts;
476 if (copy_to_user(tsp, &rts, sizeof(rts))) { 476 if (copy_to_user(tsp, &rts, sizeof(rts))) {
477sticky: 477sticky:
@@ -775,7 +775,7 @@ asmlinkage long sys_ppoll(struct pollfd __user *ufds, unsigned int nfds,
775 rts.tv_nsec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ)) * 775 rts.tv_nsec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ)) *
776 1000; 776 1000;
777 rts.tv_sec = timeout; 777 rts.tv_sec = timeout;
778 if (timespec_compare(&rts, &ts) < 0) 778 if (timespec_compare(&rts, &ts) >= 0)
779 rts = ts; 779 rts = ts;
780 if (copy_to_user(tsp, &rts, sizeof(rts))) { 780 if (copy_to_user(tsp, &rts, sizeof(rts))) {
781 sticky: 781 sticky: