aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorSteve French <sfrench@us.ibm.com>2006-02-14 22:36:31 -0500
committerSteve French <sfrench@us.ibm.com>2006-02-14 22:36:31 -0500
commit0ed3f64ec3a7ad29e83e03607115eeffa32f553c (patch)
tree528681b043e947cfc51527d56098f586b6dfa217 /fs
parent5815449d1bfcb22f74b0e36a8b0631d6584cb7fc (diff)
parent10ee39fe3ff618d274e1cd0f6abbc2917b736bfd (diff)
Merge with /pub/scm/linux/kernel/git/torvalds/linux-2.6.git
Signed-off-by: Steve French <sfrench@us.ibm.com>
Diffstat (limited to 'fs')
-rw-r--r--fs/compat.c37
-rw-r--r--fs/jbd/checkpoint.c418
-rw-r--r--fs/jbd/commit.c3
-rw-r--r--fs/lockd/clntlock.c27
-rw-r--r--fs/lockd/svc4proc.c2
-rw-r--r--fs/lockd/svcproc.c2
-rw-r--r--fs/reiserfs/super.c2
-rw-r--r--fs/reiserfs/xattr_acl.c3
-rw-r--r--fs/select.c32
-rw-r--r--fs/stat.c22
10 files changed, 269 insertions, 279 deletions
diff --git a/fs/compat.c b/fs/compat.c
index 70c5af4cc270..a2ba78bdf7f7 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -1751,11 +1751,15 @@ asmlinkage long compat_sys_select(int n, compat_ulong_t __user *inp,
1751 ret = compat_core_sys_select(n, inp, outp, exp, &timeout); 1751 ret = compat_core_sys_select(n, inp, outp, exp, &timeout);
1752 1752
1753 if (tvp) { 1753 if (tvp) {
1754 struct compat_timeval rtv;
1755
1754 if (current->personality & STICKY_TIMEOUTS) 1756 if (current->personality & STICKY_TIMEOUTS)
1755 goto sticky; 1757 goto sticky;
1756 tv.tv_usec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ)); 1758 rtv.tv_usec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ));
1757 tv.tv_sec = timeout; 1759 rtv.tv_sec = timeout;
1758 if (copy_to_user(tvp, &tv, sizeof(tv))) { 1760 if (compat_timeval_compare(&rtv, &tv) < 0)
1761 rtv = tv;
1762 if (copy_to_user(tvp, &rtv, sizeof(rtv))) {
1759sticky: 1763sticky:
1760 /* 1764 /*
1761 * If an application puts its timeval in read-only 1765 * If an application puts its timeval in read-only
@@ -1822,13 +1826,17 @@ asmlinkage long compat_sys_pselect7(int n, compat_ulong_t __user *inp,
1822 } while (!ret && !timeout && tsp && (ts.tv_sec || ts.tv_nsec)); 1826 } while (!ret && !timeout && tsp && (ts.tv_sec || ts.tv_nsec));
1823 1827
1824 if (tsp && !(current->personality & STICKY_TIMEOUTS)) { 1828 if (tsp && !(current->personality & STICKY_TIMEOUTS)) {
1825 ts.tv_sec += timeout / HZ; 1829 struct compat_timespec rts;
1826 ts.tv_nsec += (timeout % HZ) * (1000000000/HZ); 1830
1827 if (ts.tv_nsec >= 1000000000) { 1831 rts.tv_sec = timeout / HZ;
1828 ts.tv_sec++; 1832 rts.tv_nsec = (timeout % HZ) * (NSEC_PER_SEC/HZ);
1829 ts.tv_nsec -= 1000000000; 1833 if (rts.tv_nsec >= NSEC_PER_SEC) {
1834 rts.tv_sec++;
1835 rts.tv_nsec -= NSEC_PER_SEC;
1830 } 1836 }
1831 (void)copy_to_user(tsp, &ts, sizeof(ts)); 1837 if (compat_timespec_compare(&rts, &ts) < 0)
1838 rts = ts;
1839 copy_to_user(tsp, &rts, sizeof(rts));
1832 } 1840 }
1833 1841
1834 if (ret == -ERESTARTNOHAND) { 1842 if (ret == -ERESTARTNOHAND) {
@@ -1918,12 +1926,17 @@ asmlinkage long compat_sys_ppoll(struct pollfd __user *ufds,
1918 sigprocmask(SIG_SETMASK, &sigsaved, NULL); 1926 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
1919 1927
1920 if (tsp && timeout >= 0) { 1928 if (tsp && timeout >= 0) {
1929 struct compat_timespec rts;
1930
1921 if (current->personality & STICKY_TIMEOUTS) 1931 if (current->personality & STICKY_TIMEOUTS)
1922 goto sticky; 1932 goto sticky;
1923 /* Yes, we know it's actually an s64, but it's also positive. */ 1933 /* Yes, we know it's actually an s64, but it's also positive. */
1924 ts.tv_nsec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ)) * 1000; 1934 rts.tv_nsec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ)) *
1925 ts.tv_sec = timeout; 1935 1000;
1926 if (copy_to_user(tsp, &ts, sizeof(ts))) { 1936 rts.tv_sec = timeout;
1937 if (compat_timespec_compare(&rts, &ts) < 0)
1938 rts = ts;
1939 if (copy_to_user(tsp, &rts, sizeof(rts))) {
1927sticky: 1940sticky:
1928 /* 1941 /*
1929 * If an application puts its timeval in read-only 1942 * If an application puts its timeval in read-only
diff --git a/fs/jbd/checkpoint.c b/fs/jbd/checkpoint.c
index e6265a0b56b8..543ed543d1e5 100644
--- a/fs/jbd/checkpoint.c
+++ b/fs/jbd/checkpoint.c
@@ -24,75 +24,29 @@
24#include <linux/slab.h> 24#include <linux/slab.h>
25 25
26/* 26/*
27 * Unlink a buffer from a transaction checkpoint list. 27 * Unlink a buffer from a transaction.
28 * 28 *
29 * Called with j_list_lock held. 29 * Called with j_list_lock held.
30 */ 30 */
31 31
32static void __buffer_unlink_first(struct journal_head *jh) 32static inline void __buffer_unlink(struct journal_head *jh)
33{ 33{
34 transaction_t *transaction; 34 transaction_t *transaction;
35 35
36 transaction = jh->b_cp_transaction; 36 transaction = jh->b_cp_transaction;
37 jh->b_cp_transaction = NULL;
37 38
38 jh->b_cpnext->b_cpprev = jh->b_cpprev; 39 jh->b_cpnext->b_cpprev = jh->b_cpprev;
39 jh->b_cpprev->b_cpnext = jh->b_cpnext; 40 jh->b_cpprev->b_cpnext = jh->b_cpnext;
40 if (transaction->t_checkpoint_list == jh) { 41 if (transaction->t_checkpoint_list == jh)
41 transaction->t_checkpoint_list = jh->b_cpnext; 42 transaction->t_checkpoint_list = jh->b_cpnext;
42 if (transaction->t_checkpoint_list == jh) 43 if (transaction->t_checkpoint_list == jh)
43 transaction->t_checkpoint_list = NULL; 44 transaction->t_checkpoint_list = NULL;
44 }
45}
46
47/*
48 * Unlink a buffer from a transaction checkpoint(io) list.
49 *
50 * Called with j_list_lock held.
51 */
52
53static inline void __buffer_unlink(struct journal_head *jh)
54{
55 transaction_t *transaction;
56
57 transaction = jh->b_cp_transaction;
58
59 __buffer_unlink_first(jh);
60 if (transaction->t_checkpoint_io_list == jh) {
61 transaction->t_checkpoint_io_list = jh->b_cpnext;
62 if (transaction->t_checkpoint_io_list == jh)
63 transaction->t_checkpoint_io_list = NULL;
64 }
65}
66
67/*
68 * Move a buffer from the checkpoint list to the checkpoint io list
69 *
70 * Called with j_list_lock held
71 */
72
73static inline void __buffer_relink_io(struct journal_head *jh)
74{
75 transaction_t *transaction;
76
77 transaction = jh->b_cp_transaction;
78 __buffer_unlink_first(jh);
79
80 if (!transaction->t_checkpoint_io_list) {
81 jh->b_cpnext = jh->b_cpprev = jh;
82 } else {
83 jh->b_cpnext = transaction->t_checkpoint_io_list;
84 jh->b_cpprev = transaction->t_checkpoint_io_list->b_cpprev;
85 jh->b_cpprev->b_cpnext = jh;
86 jh->b_cpnext->b_cpprev = jh;
87 }
88 transaction->t_checkpoint_io_list = jh;
89} 45}
90 46
91/* 47/*
92 * Try to release a checkpointed buffer from its transaction. 48 * Try to release a checkpointed buffer from its transaction.
93 * Returns 1 if we released it and 2 if we also released the 49 * Returns 1 if we released it.
94 * whole transaction.
95 *
96 * Requires j_list_lock 50 * Requires j_list_lock
97 * Called under jbd_lock_bh_state(jh2bh(jh)), and drops it 51 * Called under jbd_lock_bh_state(jh2bh(jh)), and drops it
98 */ 52 */
@@ -103,11 +57,12 @@ static int __try_to_free_cp_buf(struct journal_head *jh)
103 57
104 if (jh->b_jlist == BJ_None && !buffer_locked(bh) && !buffer_dirty(bh)) { 58 if (jh->b_jlist == BJ_None && !buffer_locked(bh) && !buffer_dirty(bh)) {
105 JBUFFER_TRACE(jh, "remove from checkpoint list"); 59 JBUFFER_TRACE(jh, "remove from checkpoint list");
106 ret = __journal_remove_checkpoint(jh) + 1; 60 __journal_remove_checkpoint(jh);
107 jbd_unlock_bh_state(bh); 61 jbd_unlock_bh_state(bh);
108 journal_remove_journal_head(bh); 62 journal_remove_journal_head(bh);
109 BUFFER_TRACE(bh, "release"); 63 BUFFER_TRACE(bh, "release");
110 __brelse(bh); 64 __brelse(bh);
65 ret = 1;
111 } else { 66 } else {
112 jbd_unlock_bh_state(bh); 67 jbd_unlock_bh_state(bh);
113 } 68 }
@@ -162,53 +117,83 @@ static void jbd_sync_bh(journal_t *journal, struct buffer_head *bh)
162} 117}
163 118
164/* 119/*
165 * Clean up transaction's list of buffers submitted for io. 120 * Clean up a transaction's checkpoint list.
166 * We wait for any pending IO to complete and remove any clean 121 *
167 * buffers. Note that we take the buffers in the opposite ordering 122 * We wait for any pending IO to complete and make sure any clean
168 * from the one in which they were submitted for IO. 123 * buffers are removed from the transaction.
124 *
125 * Return 1 if we performed any actions which might have destroyed the
126 * checkpoint. (journal_remove_checkpoint() deletes the transaction when
127 * the last checkpoint buffer is cleansed)
169 * 128 *
170 * Called with j_list_lock held. 129 * Called with j_list_lock held.
171 */ 130 */
172 131static int __cleanup_transaction(journal_t *journal, transaction_t *transaction)
173static void __wait_cp_io(journal_t *journal, transaction_t *transaction)
174{ 132{
175 struct journal_head *jh; 133 struct journal_head *jh, *next_jh, *last_jh;
176 struct buffer_head *bh; 134 struct buffer_head *bh;
177 tid_t this_tid; 135 int ret = 0;
178 int released = 0; 136
179 137 assert_spin_locked(&journal->j_list_lock);
180 this_tid = transaction->t_tid; 138 jh = transaction->t_checkpoint_list;
181restart: 139 if (!jh)
182 /* Didn't somebody clean up the transaction in the meanwhile */ 140 return 0;
183 if (journal->j_checkpoint_transactions != transaction || 141
184 transaction->t_tid != this_tid) 142 last_jh = jh->b_cpprev;
185 return; 143 next_jh = jh;
186 while (!released && transaction->t_checkpoint_io_list) { 144 do {
187 jh = transaction->t_checkpoint_io_list; 145 jh = next_jh;
188 bh = jh2bh(jh); 146 bh = jh2bh(jh);
189 if (!jbd_trylock_bh_state(bh)) {
190 jbd_sync_bh(journal, bh);
191 spin_lock(&journal->j_list_lock);
192 goto restart;
193 }
194 if (buffer_locked(bh)) { 147 if (buffer_locked(bh)) {
195 atomic_inc(&bh->b_count); 148 atomic_inc(&bh->b_count);
196 spin_unlock(&journal->j_list_lock); 149 spin_unlock(&journal->j_list_lock);
197 jbd_unlock_bh_state(bh);
198 wait_on_buffer(bh); 150 wait_on_buffer(bh);
199 /* the journal_head may have gone by now */ 151 /* the journal_head may have gone by now */
200 BUFFER_TRACE(bh, "brelse"); 152 BUFFER_TRACE(bh, "brelse");
201 __brelse(bh); 153 __brelse(bh);
202 spin_lock(&journal->j_list_lock); 154 goto out_return_1;
203 goto restart;
204 } 155 }
156
205 /* 157 /*
206 * Now in whatever state the buffer currently is, we know that 158 * This is foul
207 * it has been written out and so we can drop it from the list
208 */ 159 */
209 released = __journal_remove_checkpoint(jh); 160 if (!jbd_trylock_bh_state(bh)) {
210 jbd_unlock_bh_state(bh); 161 jbd_sync_bh(journal, bh);
211 } 162 goto out_return_1;
163 }
164
165 if (jh->b_transaction != NULL) {
166 transaction_t *t = jh->b_transaction;
167 tid_t tid = t->t_tid;
168
169 spin_unlock(&journal->j_list_lock);
170 jbd_unlock_bh_state(bh);
171 log_start_commit(journal, tid);
172 log_wait_commit(journal, tid);
173 goto out_return_1;
174 }
175
176 /*
177 * AKPM: I think the buffer_jbddirty test is redundant - it
178 * shouldn't have NULL b_transaction?
179 */
180 next_jh = jh->b_cpnext;
181 if (!buffer_dirty(bh) && !buffer_jbddirty(bh)) {
182 BUFFER_TRACE(bh, "remove from checkpoint");
183 __journal_remove_checkpoint(jh);
184 jbd_unlock_bh_state(bh);
185 journal_remove_journal_head(bh);
186 __brelse(bh);
187 ret = 1;
188 } else {
189 jbd_unlock_bh_state(bh);
190 }
191 } while (jh != last_jh);
192
193 return ret;
194out_return_1:
195 spin_lock(&journal->j_list_lock);
196 return 1;
212} 197}
213 198
214#define NR_BATCH 64 199#define NR_BATCH 64
@@ -218,7 +203,9 @@ __flush_batch(journal_t *journal, struct buffer_head **bhs, int *batch_count)
218{ 203{
219 int i; 204 int i;
220 205
206 spin_unlock(&journal->j_list_lock);
221 ll_rw_block(SWRITE, *batch_count, bhs); 207 ll_rw_block(SWRITE, *batch_count, bhs);
208 spin_lock(&journal->j_list_lock);
222 for (i = 0; i < *batch_count; i++) { 209 for (i = 0; i < *batch_count; i++) {
223 struct buffer_head *bh = bhs[i]; 210 struct buffer_head *bh = bhs[i];
224 clear_buffer_jwrite(bh); 211 clear_buffer_jwrite(bh);
@@ -234,46 +221,19 @@ __flush_batch(journal_t *journal, struct buffer_head **bhs, int *batch_count)
234 * Return 1 if something happened which requires us to abort the current 221 * Return 1 if something happened which requires us to abort the current
235 * scan of the checkpoint list. 222 * scan of the checkpoint list.
236 * 223 *
237 * Called with j_list_lock held and drops it if 1 is returned 224 * Called with j_list_lock held.
238 * Called under jbd_lock_bh_state(jh2bh(jh)), and drops it 225 * Called under jbd_lock_bh_state(jh2bh(jh)), and drops it
239 */ 226 */
240static int __process_buffer(journal_t *journal, struct journal_head *jh, 227static int __flush_buffer(journal_t *journal, struct journal_head *jh,
241 struct buffer_head **bhs, int *batch_count) 228 struct buffer_head **bhs, int *batch_count,
229 int *drop_count)
242{ 230{
243 struct buffer_head *bh = jh2bh(jh); 231 struct buffer_head *bh = jh2bh(jh);
244 int ret = 0; 232 int ret = 0;
245 233
246 if (buffer_locked(bh)) { 234 if (buffer_dirty(bh) && !buffer_locked(bh) && jh->b_jlist == BJ_None) {
247 get_bh(bh); 235 J_ASSERT_JH(jh, jh->b_transaction == NULL);
248 spin_unlock(&journal->j_list_lock);
249 jbd_unlock_bh_state(bh);
250 wait_on_buffer(bh);
251 /* the journal_head may have gone by now */
252 BUFFER_TRACE(bh, "brelse");
253 put_bh(bh);
254 ret = 1;
255 }
256 else if (jh->b_transaction != NULL) {
257 transaction_t *t = jh->b_transaction;
258 tid_t tid = t->t_tid;
259 236
260 spin_unlock(&journal->j_list_lock);
261 jbd_unlock_bh_state(bh);
262 log_start_commit(journal, tid);
263 log_wait_commit(journal, tid);
264 ret = 1;
265 }
266 else if (!buffer_dirty(bh)) {
267 J_ASSERT_JH(jh, !buffer_jbddirty(bh));
268 BUFFER_TRACE(bh, "remove from checkpoint");
269 __journal_remove_checkpoint(jh);
270 spin_unlock(&journal->j_list_lock);
271 jbd_unlock_bh_state(bh);
272 journal_remove_journal_head(bh);
273 put_bh(bh);
274 ret = 1;
275 }
276 else {
277 /* 237 /*
278 * Important: we are about to write the buffer, and 238 * Important: we are about to write the buffer, and
279 * possibly block, while still holding the journal lock. 239 * possibly block, while still holding the journal lock.
@@ -286,30 +246,45 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh,
286 J_ASSERT_BH(bh, !buffer_jwrite(bh)); 246 J_ASSERT_BH(bh, !buffer_jwrite(bh));
287 set_buffer_jwrite(bh); 247 set_buffer_jwrite(bh);
288 bhs[*batch_count] = bh; 248 bhs[*batch_count] = bh;
289 __buffer_relink_io(jh);
290 jbd_unlock_bh_state(bh); 249 jbd_unlock_bh_state(bh);
291 (*batch_count)++; 250 (*batch_count)++;
292 if (*batch_count == NR_BATCH) { 251 if (*batch_count == NR_BATCH) {
293 spin_unlock(&journal->j_list_lock);
294 __flush_batch(journal, bhs, batch_count); 252 __flush_batch(journal, bhs, batch_count);
295 ret = 1; 253 ret = 1;
296 } 254 }
255 } else {
256 int last_buffer = 0;
257 if (jh->b_cpnext == jh) {
258 /* We may be about to drop the transaction. Tell the
259 * caller that the lists have changed.
260 */
261 last_buffer = 1;
262 }
263 if (__try_to_free_cp_buf(jh)) {
264 (*drop_count)++;
265 ret = last_buffer;
266 }
297 } 267 }
298 return ret; 268 return ret;
299} 269}
300 270
301/* 271/*
302 * Perform an actual checkpoint. We take the first transaction on the 272 * Perform an actual checkpoint. We don't write out only enough to
303 * list of transactions to be checkpointed and send all its buffers 273 * satisfy the current blocked requests: rather we submit a reasonably
304 * to disk. We submit larger chunks of data at once. 274 * sized chunk of the outstanding data to disk at once for
275 * efficiency. __log_wait_for_space() will retry if we didn't free enough.
305 * 276 *
277 * However, we _do_ take into account the amount requested so that once
278 * the IO has been queued, we can return as soon as enough of it has
279 * completed to disk.
280 *
306 * The journal should be locked before calling this function. 281 * The journal should be locked before calling this function.
307 */ 282 */
308int log_do_checkpoint(journal_t *journal) 283int log_do_checkpoint(journal_t *journal)
309{ 284{
310 transaction_t *transaction;
311 tid_t this_tid;
312 int result; 285 int result;
286 int batch_count = 0;
287 struct buffer_head *bhs[NR_BATCH];
313 288
314 jbd_debug(1, "Start checkpoint\n"); 289 jbd_debug(1, "Start checkpoint\n");
315 290
@@ -324,70 +299,79 @@ int log_do_checkpoint(journal_t *journal)
324 return result; 299 return result;
325 300
326 /* 301 /*
327 * OK, we need to start writing disk blocks. Take one transaction 302 * OK, we need to start writing disk blocks. Try to free up a
328 * and write it. 303 * quarter of the log in a single checkpoint if we can.
329 */ 304 */
330 spin_lock(&journal->j_list_lock);
331 if (!journal->j_checkpoint_transactions)
332 goto out;
333 transaction = journal->j_checkpoint_transactions;
334 this_tid = transaction->t_tid;
335restart:
336 /* 305 /*
337 * If someone cleaned up this transaction while we slept, we're 306 * AKPM: check this code. I had a feeling a while back that it
338 * done (maybe it's a new transaction, but it fell at the same 307 * degenerates into a busy loop at unmount time.
339 * address).
340 */ 308 */
341 if (journal->j_checkpoint_transactions == transaction && 309 spin_lock(&journal->j_list_lock);
342 transaction->t_tid == this_tid) { 310 while (journal->j_checkpoint_transactions) {
343 int batch_count = 0; 311 transaction_t *transaction;
344 struct buffer_head *bhs[NR_BATCH]; 312 struct journal_head *jh, *last_jh, *next_jh;
345 struct journal_head *jh; 313 int drop_count = 0;
346 int retry = 0; 314 int cleanup_ret, retry = 0;
347 315 tid_t this_tid;
348 while (!retry && transaction->t_checkpoint_list) { 316
317 transaction = journal->j_checkpoint_transactions;
318 this_tid = transaction->t_tid;
319 jh = transaction->t_checkpoint_list;
320 last_jh = jh->b_cpprev;
321 next_jh = jh;
322 do {
349 struct buffer_head *bh; 323 struct buffer_head *bh;
350 324
351 jh = transaction->t_checkpoint_list; 325 jh = next_jh;
326 next_jh = jh->b_cpnext;
352 bh = jh2bh(jh); 327 bh = jh2bh(jh);
353 if (!jbd_trylock_bh_state(bh)) { 328 if (!jbd_trylock_bh_state(bh)) {
354 jbd_sync_bh(journal, bh); 329 jbd_sync_bh(journal, bh);
330 spin_lock(&journal->j_list_lock);
355 retry = 1; 331 retry = 1;
356 break; 332 break;
357 } 333 }
358 retry = __process_buffer(journal, jh, bhs, 334 retry = __flush_buffer(journal, jh, bhs, &batch_count, &drop_count);
359 &batch_count); 335 if (cond_resched_lock(&journal->j_list_lock)) {
360 if (!retry &&
361 lock_need_resched(&journal->j_list_lock)) {
362 spin_unlock(&journal->j_list_lock);
363 retry = 1; 336 retry = 1;
364 break; 337 break;
365 } 338 }
366 } 339 } while (jh != last_jh && !retry);
367 340
368 if (batch_count) { 341 if (batch_count) {
369 if (!retry) {
370 spin_unlock(&journal->j_list_lock);
371 retry = 1;
372 }
373 __flush_batch(journal, bhs, &batch_count); 342 __flush_batch(journal, bhs, &batch_count);
343 retry = 1;
374 } 344 }
375 345
376 if (retry) {
377 spin_lock(&journal->j_list_lock);
378 goto restart;
379 }
380 /* 346 /*
381 * Now we have cleaned up the first transaction's checkpoint 347 * If someone cleaned up this transaction while we slept, we're
382 * list. Let's clean up the second one. 348 * done
349 */
350 if (journal->j_checkpoint_transactions != transaction)
351 break;
352 if (retry)
353 continue;
354 /*
355 * Maybe it's a new transaction, but it fell at the same
356 * address
383 */ 357 */
384 __wait_cp_io(journal, transaction); 358 if (transaction->t_tid != this_tid)
359 continue;
360 /*
361 * We have walked the whole transaction list without
362 * finding anything to write to disk. We had better be
363 * able to make some progress or we are in trouble.
364 */
365 cleanup_ret = __cleanup_transaction(journal, transaction);
366 J_ASSERT(drop_count != 0 || cleanup_ret != 0);
367 if (journal->j_checkpoint_transactions != transaction)
368 break;
385 } 369 }
386out:
387 spin_unlock(&journal->j_list_lock); 370 spin_unlock(&journal->j_list_lock);
388 result = cleanup_journal_tail(journal); 371 result = cleanup_journal_tail(journal);
389 if (result < 0) 372 if (result < 0)
390 return result; 373 return result;
374
391 return 0; 375 return 0;
392} 376}
393 377
@@ -472,91 +456,52 @@ int cleanup_journal_tail(journal_t *journal)
472/* Checkpoint list management */ 456/* Checkpoint list management */
473 457
474/* 458/*
475 * journal_clean_one_cp_list
476 *
477 * Find all the written-back checkpoint buffers in the given list and release them.
478 *
479 * Called with the journal locked.
480 * Called with j_list_lock held.
481 * Returns number of bufers reaped (for debug)
482 */
483
484static int journal_clean_one_cp_list(struct journal_head *jh, int *released)
485{
486 struct journal_head *last_jh;
487 struct journal_head *next_jh = jh;
488 int ret, freed = 0;
489
490 *released = 0;
491 if (!jh)
492 return 0;
493
494 last_jh = jh->b_cpprev;
495 do {
496 jh = next_jh;
497 next_jh = jh->b_cpnext;
498 /* Use trylock because of the ranking */
499 if (jbd_trylock_bh_state(jh2bh(jh))) {
500 ret = __try_to_free_cp_buf(jh);
501 if (ret) {
502 freed++;
503 if (ret == 2) {
504 *released = 1;
505 return freed;
506 }
507 }
508 }
509 /*
510 * This function only frees up some memory if possible so we
511 * dont have an obligation to finish processing. Bail out if
512 * preemption requested:
513 */
514 if (need_resched())
515 return freed;
516 } while (jh != last_jh);
517
518 return freed;
519}
520
521/*
522 * journal_clean_checkpoint_list 459 * journal_clean_checkpoint_list
523 * 460 *
524 * Find all the written-back checkpoint buffers in the journal and release them. 461 * Find all the written-back checkpoint buffers in the journal and release them.
525 * 462 *
526 * Called with the journal locked. 463 * Called with the journal locked.
527 * Called with j_list_lock held. 464 * Called with j_list_lock held.
528 * Returns number of buffers reaped (for debug) 465 * Returns number of bufers reaped (for debug)
529 */ 466 */
530 467
531int __journal_clean_checkpoint_list(journal_t *journal) 468int __journal_clean_checkpoint_list(journal_t *journal)
532{ 469{
533 transaction_t *transaction, *last_transaction, *next_transaction; 470 transaction_t *transaction, *last_transaction, *next_transaction;
534 int ret = 0, released; 471 int ret = 0;
535 472
536 transaction = journal->j_checkpoint_transactions; 473 transaction = journal->j_checkpoint_transactions;
537 if (!transaction) 474 if (transaction == 0)
538 goto out; 475 goto out;
539 476
540 last_transaction = transaction->t_cpprev; 477 last_transaction = transaction->t_cpprev;
541 next_transaction = transaction; 478 next_transaction = transaction;
542 do { 479 do {
480 struct journal_head *jh;
481
543 transaction = next_transaction; 482 transaction = next_transaction;
544 next_transaction = transaction->t_cpnext; 483 next_transaction = transaction->t_cpnext;
545 ret += journal_clean_one_cp_list(transaction-> 484 jh = transaction->t_checkpoint_list;
546 t_checkpoint_list, &released); 485 if (jh) {
547 if (need_resched()) 486 struct journal_head *last_jh = jh->b_cpprev;
548 goto out; 487 struct journal_head *next_jh = jh;
549 if (released) 488
550 continue; 489 do {
551 /* 490 jh = next_jh;
552 * It is essential that we are as careful as in the case of 491 next_jh = jh->b_cpnext;
553 * t_checkpoint_list with removing the buffer from the list as 492 /* Use trylock because of the ranknig */
554 * we can possibly see not yet submitted buffers on io_list 493 if (jbd_trylock_bh_state(jh2bh(jh)))
555 */ 494 ret += __try_to_free_cp_buf(jh);
556 ret += journal_clean_one_cp_list(transaction-> 495 /*
557 t_checkpoint_io_list, &released); 496 * This function only frees up some memory
558 if (need_resched()) 497 * if possible so we dont have an obligation
559 goto out; 498 * to finish processing. Bail out if preemption
499 * requested:
500 */
501 if (need_resched())
502 goto out;
503 } while (jh != last_jh);
504 }
560 } while (transaction != last_transaction); 505 } while (transaction != last_transaction);
561out: 506out:
562 return ret; 507 return ret;
@@ -571,22 +516,18 @@ out:
571 * buffer updates committed in that transaction have safely been stored 516 * buffer updates committed in that transaction have safely been stored
572 * elsewhere on disk. To achieve this, all of the buffers in a 517 * elsewhere on disk. To achieve this, all of the buffers in a
573 * transaction need to be maintained on the transaction's checkpoint 518 * transaction need to be maintained on the transaction's checkpoint
574 * lists until they have been rewritten, at which point this function is 519 * list until they have been rewritten, at which point this function is
575 * called to remove the buffer from the existing transaction's 520 * called to remove the buffer from the existing transaction's
576 * checkpoint lists. 521 * checkpoint list.
577 *
578 * The function returns 1 if it frees the transaction, 0 otherwise.
579 * 522 *
580 * This function is called with the journal locked. 523 * This function is called with the journal locked.
581 * This function is called with j_list_lock held. 524 * This function is called with j_list_lock held.
582 * This function is called with jbd_lock_bh_state(jh2bh(jh))
583 */ 525 */
584 526
585int __journal_remove_checkpoint(struct journal_head *jh) 527void __journal_remove_checkpoint(struct journal_head *jh)
586{ 528{
587 transaction_t *transaction; 529 transaction_t *transaction;
588 journal_t *journal; 530 journal_t *journal;
589 int ret = 0;
590 531
591 JBUFFER_TRACE(jh, "entry"); 532 JBUFFER_TRACE(jh, "entry");
592 533
@@ -597,10 +538,8 @@ int __journal_remove_checkpoint(struct journal_head *jh)
597 journal = transaction->t_journal; 538 journal = transaction->t_journal;
598 539
599 __buffer_unlink(jh); 540 __buffer_unlink(jh);
600 jh->b_cp_transaction = NULL;
601 541
602 if (transaction->t_checkpoint_list != NULL || 542 if (transaction->t_checkpoint_list != NULL)
603 transaction->t_checkpoint_io_list != NULL)
604 goto out; 543 goto out;
605 JBUFFER_TRACE(jh, "transaction has no more buffers"); 544 JBUFFER_TRACE(jh, "transaction has no more buffers");
606 545
@@ -626,10 +565,8 @@ int __journal_remove_checkpoint(struct journal_head *jh)
626 /* Just in case anybody was waiting for more transactions to be 565 /* Just in case anybody was waiting for more transactions to be
627 checkpointed... */ 566 checkpointed... */
628 wake_up(&journal->j_wait_logspace); 567 wake_up(&journal->j_wait_logspace);
629 ret = 1;
630out: 568out:
631 JBUFFER_TRACE(jh, "exit"); 569 JBUFFER_TRACE(jh, "exit");
632 return ret;
633} 570}
634 571
635/* 572/*
@@ -691,7 +628,6 @@ void __journal_drop_transaction(journal_t *journal, transaction_t *transaction)
691 J_ASSERT(transaction->t_shadow_list == NULL); 628 J_ASSERT(transaction->t_shadow_list == NULL);
692 J_ASSERT(transaction->t_log_list == NULL); 629 J_ASSERT(transaction->t_log_list == NULL);
693 J_ASSERT(transaction->t_checkpoint_list == NULL); 630 J_ASSERT(transaction->t_checkpoint_list == NULL);
694 J_ASSERT(transaction->t_checkpoint_io_list == NULL);
695 J_ASSERT(transaction->t_updates == 0); 631 J_ASSERT(transaction->t_updates == 0);
696 J_ASSERT(journal->j_committing_transaction != transaction); 632 J_ASSERT(journal->j_committing_transaction != transaction);
697 J_ASSERT(journal->j_running_transaction != transaction); 633 J_ASSERT(journal->j_running_transaction != transaction);
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c
index 29e62d98bae6..002ad2bbc769 100644
--- a/fs/jbd/commit.c
+++ b/fs/jbd/commit.c
@@ -829,8 +829,7 @@ restart_loop:
829 journal->j_committing_transaction = NULL; 829 journal->j_committing_transaction = NULL;
830 spin_unlock(&journal->j_state_lock); 830 spin_unlock(&journal->j_state_lock);
831 831
832 if (commit_transaction->t_checkpoint_list == NULL && 832 if (commit_transaction->t_checkpoint_list == NULL) {
833 commit_transaction->t_checkpoint_io_list == NULL) {
834 __journal_drop_transaction(journal, commit_transaction); 833 __journal_drop_transaction(journal, commit_transaction);
835 } else { 834 } else {
836 if (journal->j_checkpoint_transactions == NULL) { 835 if (journal->j_checkpoint_transactions == NULL) {
diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c
index 3eaf6e701087..da6354baa0b8 100644
--- a/fs/lockd/clntlock.c
+++ b/fs/lockd/clntlock.c
@@ -111,9 +111,10 @@ long nlmclnt_block(struct nlm_rqst *req, long timeout)
111/* 111/*
112 * The server lockd has called us back to tell us the lock was granted 112 * The server lockd has called us back to tell us the lock was granted
113 */ 113 */
114u32 114u32 nlmclnt_grant(const struct sockaddr_in *addr, const struct nlm_lock *lock)
115nlmclnt_grant(struct nlm_lock *lock)
116{ 115{
116 const struct file_lock *fl = &lock->fl;
117 const struct nfs_fh *fh = &lock->fh;
117 struct nlm_wait *block; 118 struct nlm_wait *block;
118 u32 res = nlm_lck_denied; 119 u32 res = nlm_lck_denied;
119 120
@@ -122,14 +123,20 @@ nlmclnt_grant(struct nlm_lock *lock)
122 * Warning: must not use cookie to match it! 123 * Warning: must not use cookie to match it!
123 */ 124 */
124 list_for_each_entry(block, &nlm_blocked, b_list) { 125 list_for_each_entry(block, &nlm_blocked, b_list) {
125 if (nlm_compare_locks(block->b_lock, &lock->fl)) { 126 struct file_lock *fl_blocked = block->b_lock;
126 /* Alright, we found a lock. Set the return status 127
127 * and wake up the caller 128 if (!nlm_compare_locks(fl_blocked, fl))
128 */ 129 continue;
129 block->b_status = NLM_LCK_GRANTED; 130 if (!nlm_cmp_addr(&block->b_host->h_addr, addr))
130 wake_up(&block->b_wait); 131 continue;
131 res = nlm_granted; 132 if (nfs_compare_fh(NFS_FH(fl_blocked->fl_file->f_dentry->d_inode) ,fh) != 0)
132 } 133 continue;
134 /* Alright, we found a lock. Set the return status
135 * and wake up the caller
136 */
137 block->b_status = NLM_LCK_GRANTED;
138 wake_up(&block->b_wait);
139 res = nlm_granted;
133 } 140 }
134 return res; 141 return res;
135} 142}
diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c
index 4063095d849e..b10f913aa06a 100644
--- a/fs/lockd/svc4proc.c
+++ b/fs/lockd/svc4proc.c
@@ -228,7 +228,7 @@ nlm4svc_proc_granted(struct svc_rqst *rqstp, struct nlm_args *argp,
228 resp->cookie = argp->cookie; 228 resp->cookie = argp->cookie;
229 229
230 dprintk("lockd: GRANTED called\n"); 230 dprintk("lockd: GRANTED called\n");
231 resp->status = nlmclnt_grant(&argp->lock); 231 resp->status = nlmclnt_grant(&rqstp->rq_addr, &argp->lock);
232 dprintk("lockd: GRANTED status %d\n", ntohl(resp->status)); 232 dprintk("lockd: GRANTED status %d\n", ntohl(resp->status));
233 return rpc_success; 233 return rpc_success;
234} 234}
diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c
index 3bc437e0cf5b..35681d9cf1fc 100644
--- a/fs/lockd/svcproc.c
+++ b/fs/lockd/svcproc.c
@@ -256,7 +256,7 @@ nlmsvc_proc_granted(struct svc_rqst *rqstp, struct nlm_args *argp,
256 resp->cookie = argp->cookie; 256 resp->cookie = argp->cookie;
257 257
258 dprintk("lockd: GRANTED called\n"); 258 dprintk("lockd: GRANTED called\n");
259 resp->status = nlmclnt_grant(&argp->lock); 259 resp->status = nlmclnt_grant(&rqstp->rq_addr, &argp->lock);
260 dprintk("lockd: GRANTED status %d\n", ntohl(resp->status)); 260 dprintk("lockd: GRANTED status %d\n", ntohl(resp->status));
261 return rpc_success; 261 return rpc_success;
262} 262}
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index ef5e5414e7a8..d63da756eb49 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -1124,8 +1124,6 @@ static void handle_attrs(struct super_block *s)
1124 "reiserfs: cannot support attributes until flag is set in super-block"); 1124 "reiserfs: cannot support attributes until flag is set in super-block");
1125 REISERFS_SB(s)->s_mount_opt &= ~(1 << REISERFS_ATTRS); 1125 REISERFS_SB(s)->s_mount_opt &= ~(1 << REISERFS_ATTRS);
1126 } 1126 }
1127 } else if (le32_to_cpu(rs->s_flags) & reiserfs_attrs_cleared) {
1128 REISERFS_SB(s)->s_mount_opt |= (1 << REISERFS_ATTRS);
1129 } 1127 }
1130} 1128}
1131 1129
diff --git a/fs/reiserfs/xattr_acl.c b/fs/reiserfs/xattr_acl.c
index 43de3ba83332..ab8894c3b9e5 100644
--- a/fs/reiserfs/xattr_acl.c
+++ b/fs/reiserfs/xattr_acl.c
@@ -228,7 +228,8 @@ struct posix_acl *reiserfs_get_acl(struct inode *inode, int type)
228 acl = ERR_PTR(retval); 228 acl = ERR_PTR(retval);
229 } else { 229 } else {
230 acl = posix_acl_from_disk(value, retval); 230 acl = posix_acl_from_disk(value, retval);
231 *p_acl = posix_acl_dup(acl); 231 if (!IS_ERR(acl))
232 *p_acl = posix_acl_dup(acl);
232 } 233 }
233 234
234 kfree(value); 235 kfree(value);
diff --git a/fs/select.c b/fs/select.c
index bc60a3e14ef3..6ce68a9c8976 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -398,11 +398,15 @@ asmlinkage long sys_select(int n, fd_set __user *inp, fd_set __user *outp,
398 ret = core_sys_select(n, inp, outp, exp, &timeout); 398 ret = core_sys_select(n, inp, outp, exp, &timeout);
399 399
400 if (tvp) { 400 if (tvp) {
401 struct timeval rtv;
402
401 if (current->personality & STICKY_TIMEOUTS) 403 if (current->personality & STICKY_TIMEOUTS)
402 goto sticky; 404 goto sticky;
403 tv.tv_usec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ)); 405 rtv.tv_usec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ));
404 tv.tv_sec = timeout; 406 rtv.tv_sec = timeout;
405 if (copy_to_user(tvp, &tv, sizeof(tv))) { 407 if (timeval_compare(&rtv, &tv) < 0)
408 rtv = tv;
409 if (copy_to_user(tvp, &rtv, sizeof(rtv))) {
406sticky: 410sticky:
407 /* 411 /*
408 * If an application puts its timeval in read-only 412 * If an application puts its timeval in read-only
@@ -460,11 +464,16 @@ asmlinkage long sys_pselect7(int n, fd_set __user *inp, fd_set __user *outp,
460 ret = core_sys_select(n, inp, outp, exp, &timeout); 464 ret = core_sys_select(n, inp, outp, exp, &timeout);
461 465
462 if (tsp) { 466 if (tsp) {
467 struct timespec rts;
468
463 if (current->personality & STICKY_TIMEOUTS) 469 if (current->personality & STICKY_TIMEOUTS)
464 goto sticky; 470 goto sticky;
465 ts.tv_nsec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ)) * 1000; 471 rts.tv_nsec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ)) *
466 ts.tv_sec = timeout; 472 1000;
467 if (copy_to_user(tsp, &ts, sizeof(ts))) { 473 rts.tv_sec = timeout;
474 if (timespec_compare(&rts, &ts) < 0)
475 rts = ts;
476 if (copy_to_user(tsp, &rts, sizeof(rts))) {
468sticky: 477sticky:
469 /* 478 /*
470 * If an application puts its timeval in read-only 479 * If an application puts its timeval in read-only
@@ -758,12 +767,17 @@ asmlinkage long sys_ppoll(struct pollfd __user *ufds, unsigned int nfds,
758 sigprocmask(SIG_SETMASK, &sigsaved, NULL); 767 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
759 768
760 if (tsp && timeout >= 0) { 769 if (tsp && timeout >= 0) {
770 struct timespec rts;
771
761 if (current->personality & STICKY_TIMEOUTS) 772 if (current->personality & STICKY_TIMEOUTS)
762 goto sticky; 773 goto sticky;
763 /* Yes, we know it's actually an s64, but it's also positive. */ 774 /* Yes, we know it's actually an s64, but it's also positive. */
764 ts.tv_nsec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ)) * 1000; 775 rts.tv_nsec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ)) *
765 ts.tv_sec = timeout; 776 1000;
766 if (copy_to_user(tsp, &ts, sizeof(ts))) { 777 rts.tv_sec = timeout;
778 if (timespec_compare(&rts, &ts) < 0)
779 rts = ts;
780 if (copy_to_user(tsp, &rts, sizeof(rts))) {
767 sticky: 781 sticky:
768 /* 782 /*
769 * If an application puts its timeval in read-only 783 * If an application puts its timeval in read-only
diff --git a/fs/stat.c b/fs/stat.c
index 24211b030f39..9948cc1685a4 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -261,6 +261,7 @@ asmlinkage long sys_newlstat(char __user *filename, struct stat __user *statbuf)
261 return error; 261 return error;
262} 262}
263 263
264#ifndef __ARCH_WANT_STAT64
264asmlinkage long sys_newfstatat(int dfd, char __user *filename, 265asmlinkage long sys_newfstatat(int dfd, char __user *filename,
265 struct stat __user *statbuf, int flag) 266 struct stat __user *statbuf, int flag)
266{ 267{
@@ -281,6 +282,7 @@ asmlinkage long sys_newfstatat(int dfd, char __user *filename,
281out: 282out:
282 return error; 283 return error;
283} 284}
285#endif
284 286
285asmlinkage long sys_newfstat(unsigned int fd, struct stat __user *statbuf) 287asmlinkage long sys_newfstat(unsigned int fd, struct stat __user *statbuf)
286{ 288{
@@ -395,6 +397,26 @@ asmlinkage long sys_fstat64(unsigned long fd, struct stat64 __user * statbuf)
395 return error; 397 return error;
396} 398}
397 399
400asmlinkage long sys_fstatat64(int dfd, char __user *filename,
401 struct stat64 __user *statbuf, int flag)
402{
403 struct kstat stat;
404 int error = -EINVAL;
405
406 if ((flag & ~AT_SYMLINK_NOFOLLOW) != 0)
407 goto out;
408
409 if (flag & AT_SYMLINK_NOFOLLOW)
410 error = vfs_lstat_fd(dfd, filename, &stat);
411 else
412 error = vfs_stat_fd(dfd, filename, &stat);
413
414 if (!error)
415 error = cp_new_stat64(&stat, statbuf);
416
417out:
418 return error;
419}
398#endif /* __ARCH_WANT_STAT64 */ 420#endif /* __ARCH_WANT_STAT64 */
399 421
400void inode_add_bytes(struct inode *inode, loff_t bytes) 422void inode_add_bytes(struct inode *inode, loff_t bytes)