aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorJames Morris <jmorris@namei.org>2009-08-19 19:18:42 -0400
committerJames Morris <jmorris@namei.org>2009-08-19 19:18:42 -0400
commitece13879e74313e62109e0755dd3d4f172df89e2 (patch)
tree1fe96ab392c1ff203a6fb3f67ed0ed577056572e /fs
parentb08dc3eba0c34027010caeda258f495074ae3a54 (diff)
parent6c30c53fd5ae6a99a23ad78e90c428d2c8ffb07f (diff)
Merge branch 'master' into next
Conflicts: security/Kconfig Manual fix. Signed-off-by: James Morris <jmorris@namei.org>
Diffstat (limited to 'fs')
-rw-r--r--fs/gfs2/sys.c20
-rw-r--r--fs/libfs.c2
-rw-r--r--fs/nfs/direct.c20
-rw-r--r--fs/nfs/read.c6
-rw-r--r--fs/nfs/write.c6
-rw-r--r--fs/nilfs2/super.c2
-rw-r--r--fs/nilfs2/the_nilfs.h2
-rw-r--r--fs/notify/inotify/inotify_fsnotify.c13
-rw-r--r--fs/notify/inotify/inotify_user.c9
-rw-r--r--fs/notify/notification.c11
-rw-r--r--fs/ocfs2/alloc.c47
-rw-r--r--fs/ocfs2/aops.c69
-rw-r--r--fs/ocfs2/dcache.c35
-rw-r--r--fs/ocfs2/dcache.h3
-rw-r--r--fs/ocfs2/dlm/dlmast.c1
-rw-r--r--fs/ocfs2/dlm/dlmrecovery.c2
-rw-r--r--fs/ocfs2/file.c5
-rw-r--r--fs/ocfs2/journal.c8
-rw-r--r--fs/ocfs2/journal.h19
-rw-r--r--fs/ocfs2/ocfs2.h22
-rw-r--r--fs/ocfs2/quota.h1
-rw-r--r--fs/ocfs2/quota_global.c134
-rw-r--r--fs/ocfs2/quota_local.c110
-rw-r--r--fs/ocfs2/stack_o2cb.c3
-rw-r--r--fs/ocfs2/super.c30
-rw-r--r--fs/ocfs2/xattr.c3
-rw-r--r--fs/proc/base.c19
-rw-r--r--fs/select.c1
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c2
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.c13
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.h1
-rw-r--r--fs/xfs/xfs_attr.c8
-rw-r--r--fs/xfs/xfs_bmap.c2
-rw-r--r--fs/xfs/xfs_btree.c4
-rw-r--r--fs/xfs/xfs_da_btree.c6
-rw-r--r--fs/xfs/xfs_dir2.c2
-rw-r--r--fs/xfs/xfs_fsops.c20
-rw-r--r--fs/xfs/xfs_iget.c113
-rw-r--r--fs/xfs/xfs_inode.c10
-rw-r--r--fs/xfs/xfs_log.c2
-rw-r--r--fs/xfs/xfs_vnodeops.c4
41 files changed, 518 insertions, 272 deletions
diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c
index 23419dc3027b..a7cbfbd340c7 100644
--- a/fs/gfs2/sys.c
+++ b/fs/gfs2/sys.c
@@ -386,16 +386,16 @@ static ssize_t jid_show(struct gfs2_sbd *sdp, char *buf)
386#define GDLM_ATTR(_name,_mode,_show,_store) \ 386#define GDLM_ATTR(_name,_mode,_show,_store) \
387static struct gfs2_attr gdlm_attr_##_name = __ATTR(_name,_mode,_show,_store) 387static struct gfs2_attr gdlm_attr_##_name = __ATTR(_name,_mode,_show,_store)
388 388
389GDLM_ATTR(proto_name, 0444, proto_name_show, NULL); 389GDLM_ATTR(proto_name, 0444, proto_name_show, NULL);
390GDLM_ATTR(block, 0644, block_show, block_store); 390GDLM_ATTR(block, 0644, block_show, block_store);
391GDLM_ATTR(withdraw, 0644, withdraw_show, withdraw_store); 391GDLM_ATTR(withdraw, 0644, withdraw_show, withdraw_store);
392GDLM_ATTR(id, 0444, lkid_show, NULL); 392GDLM_ATTR(id, 0444, lkid_show, NULL);
393GDLM_ATTR(jid, 0444, jid_show, NULL); 393GDLM_ATTR(jid, 0444, jid_show, NULL);
394GDLM_ATTR(first, 0444, lkfirst_show, NULL); 394GDLM_ATTR(first, 0444, lkfirst_show, NULL);
395GDLM_ATTR(first_done, 0444, first_done_show, NULL); 395GDLM_ATTR(first_done, 0444, first_done_show, NULL);
396GDLM_ATTR(recover, 0200, NULL, recover_store); 396GDLM_ATTR(recover, 0600, NULL, recover_store);
397GDLM_ATTR(recover_done, 0444, recover_done_show, NULL); 397GDLM_ATTR(recover_done, 0444, recover_done_show, NULL);
398GDLM_ATTR(recover_status, 0444, recover_status_show, NULL); 398GDLM_ATTR(recover_status, 0444, recover_status_show, NULL);
399 399
400static struct attribute *lock_module_attrs[] = { 400static struct attribute *lock_module_attrs[] = {
401 &gdlm_attr_proto_name.attr, 401 &gdlm_attr_proto_name.attr,
diff --git a/fs/libfs.c b/fs/libfs.c
index ddfa89948c3f..dcec3d3ea64f 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -217,7 +217,7 @@ int get_sb_pseudo(struct file_system_type *fs_type, char *name,
217 return PTR_ERR(s); 217 return PTR_ERR(s);
218 218
219 s->s_flags = MS_NOUSER; 219 s->s_flags = MS_NOUSER;
220 s->s_maxbytes = ~0ULL; 220 s->s_maxbytes = MAX_LFS_FILESIZE;
221 s->s_blocksize = PAGE_SIZE; 221 s->s_blocksize = PAGE_SIZE;
222 s->s_blocksize_bits = PAGE_SHIFT; 222 s->s_blocksize_bits = PAGE_SHIFT;
223 s->s_magic = magic; 223 s->s_magic = magic;
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 489fc01a3204..e4e089a8f294 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -255,7 +255,7 @@ static void nfs_direct_read_release(void *calldata)
255 255
256 if (put_dreq(dreq)) 256 if (put_dreq(dreq))
257 nfs_direct_complete(dreq); 257 nfs_direct_complete(dreq);
258 nfs_readdata_release(calldata); 258 nfs_readdata_free(data);
259} 259}
260 260
261static const struct rpc_call_ops nfs_read_direct_ops = { 261static const struct rpc_call_ops nfs_read_direct_ops = {
@@ -314,14 +314,14 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_direct_req *dreq,
314 data->npages, 1, 0, data->pagevec, NULL); 314 data->npages, 1, 0, data->pagevec, NULL);
315 up_read(&current->mm->mmap_sem); 315 up_read(&current->mm->mmap_sem);
316 if (result < 0) { 316 if (result < 0) {
317 nfs_readdata_release(data); 317 nfs_readdata_free(data);
318 break; 318 break;
319 } 319 }
320 if ((unsigned)result < data->npages) { 320 if ((unsigned)result < data->npages) {
321 bytes = result * PAGE_SIZE; 321 bytes = result * PAGE_SIZE;
322 if (bytes <= pgbase) { 322 if (bytes <= pgbase) {
323 nfs_direct_release_pages(data->pagevec, result); 323 nfs_direct_release_pages(data->pagevec, result);
324 nfs_readdata_release(data); 324 nfs_readdata_free(data);
325 break; 325 break;
326 } 326 }
327 bytes -= pgbase; 327 bytes -= pgbase;
@@ -334,7 +334,7 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_direct_req *dreq,
334 data->inode = inode; 334 data->inode = inode;
335 data->cred = msg.rpc_cred; 335 data->cred = msg.rpc_cred;
336 data->args.fh = NFS_FH(inode); 336 data->args.fh = NFS_FH(inode);
337 data->args.context = get_nfs_open_context(ctx); 337 data->args.context = ctx;
338 data->args.offset = pos; 338 data->args.offset = pos;
339 data->args.pgbase = pgbase; 339 data->args.pgbase = pgbase;
340 data->args.pages = data->pagevec; 340 data->args.pages = data->pagevec;
@@ -441,7 +441,7 @@ static void nfs_direct_free_writedata(struct nfs_direct_req *dreq)
441 struct nfs_write_data *data = list_entry(dreq->rewrite_list.next, struct nfs_write_data, pages); 441 struct nfs_write_data *data = list_entry(dreq->rewrite_list.next, struct nfs_write_data, pages);
442 list_del(&data->pages); 442 list_del(&data->pages);
443 nfs_direct_release_pages(data->pagevec, data->npages); 443 nfs_direct_release_pages(data->pagevec, data->npages);
444 nfs_writedata_release(data); 444 nfs_writedata_free(data);
445 } 445 }
446} 446}
447 447
@@ -534,7 +534,7 @@ static void nfs_direct_commit_release(void *calldata)
534 534
535 dprintk("NFS: %5u commit returned %d\n", data->task.tk_pid, status); 535 dprintk("NFS: %5u commit returned %d\n", data->task.tk_pid, status);
536 nfs_direct_write_complete(dreq, data->inode); 536 nfs_direct_write_complete(dreq, data->inode);
537 nfs_commitdata_release(calldata); 537 nfs_commit_free(data);
538} 538}
539 539
540static const struct rpc_call_ops nfs_commit_direct_ops = { 540static const struct rpc_call_ops nfs_commit_direct_ops = {
@@ -570,7 +570,7 @@ static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq)
570 data->args.fh = NFS_FH(data->inode); 570 data->args.fh = NFS_FH(data->inode);
571 data->args.offset = 0; 571 data->args.offset = 0;
572 data->args.count = 0; 572 data->args.count = 0;
573 data->args.context = get_nfs_open_context(dreq->ctx); 573 data->args.context = dreq->ctx;
574 data->res.count = 0; 574 data->res.count = 0;
575 data->res.fattr = &data->fattr; 575 data->res.fattr = &data->fattr;
576 data->res.verf = &data->verf; 576 data->res.verf = &data->verf;
@@ -734,14 +734,14 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_direct_req *dreq,
734 data->npages, 0, 0, data->pagevec, NULL); 734 data->npages, 0, 0, data->pagevec, NULL);
735 up_read(&current->mm->mmap_sem); 735 up_read(&current->mm->mmap_sem);
736 if (result < 0) { 736 if (result < 0) {
737 nfs_writedata_release(data); 737 nfs_writedata_free(data);
738 break; 738 break;
739 } 739 }
740 if ((unsigned)result < data->npages) { 740 if ((unsigned)result < data->npages) {
741 bytes = result * PAGE_SIZE; 741 bytes = result * PAGE_SIZE;
742 if (bytes <= pgbase) { 742 if (bytes <= pgbase) {
743 nfs_direct_release_pages(data->pagevec, result); 743 nfs_direct_release_pages(data->pagevec, result);
744 nfs_writedata_release(data); 744 nfs_writedata_free(data);
745 break; 745 break;
746 } 746 }
747 bytes -= pgbase; 747 bytes -= pgbase;
@@ -756,7 +756,7 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_direct_req *dreq,
756 data->inode = inode; 756 data->inode = inode;
757 data->cred = msg.rpc_cred; 757 data->cred = msg.rpc_cred;
758 data->args.fh = NFS_FH(inode); 758 data->args.fh = NFS_FH(inode);
759 data->args.context = get_nfs_open_context(ctx); 759 data->args.context = ctx;
760 data->args.offset = pos; 760 data->args.offset = pos;
761 data->args.pgbase = pgbase; 761 data->args.pgbase = pgbase;
762 data->args.pages = data->pagevec; 762 data->args.pages = data->pagevec;
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 73ea5e8d66ce..12c9e66d3f1d 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -60,17 +60,15 @@ struct nfs_read_data *nfs_readdata_alloc(unsigned int pagecount)
60 return p; 60 return p;
61} 61}
62 62
63static void nfs_readdata_free(struct nfs_read_data *p) 63void nfs_readdata_free(struct nfs_read_data *p)
64{ 64{
65 if (p && (p->pagevec != &p->page_array[0])) 65 if (p && (p->pagevec != &p->page_array[0]))
66 kfree(p->pagevec); 66 kfree(p->pagevec);
67 mempool_free(p, nfs_rdata_mempool); 67 mempool_free(p, nfs_rdata_mempool);
68} 68}
69 69
70void nfs_readdata_release(void *data) 70static void nfs_readdata_release(struct nfs_read_data *rdata)
71{ 71{
72 struct nfs_read_data *rdata = data;
73
74 put_nfs_open_context(rdata->args.context); 72 put_nfs_open_context(rdata->args.context);
75 nfs_readdata_free(rdata); 73 nfs_readdata_free(rdata);
76} 74}
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 0a0a2ff767c3..a34fae21fe10 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -87,17 +87,15 @@ struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount)
87 return p; 87 return p;
88} 88}
89 89
90static void nfs_writedata_free(struct nfs_write_data *p) 90void nfs_writedata_free(struct nfs_write_data *p)
91{ 91{
92 if (p && (p->pagevec != &p->page_array[0])) 92 if (p && (p->pagevec != &p->page_array[0]))
93 kfree(p->pagevec); 93 kfree(p->pagevec);
94 mempool_free(p, nfs_wdata_mempool); 94 mempool_free(p, nfs_wdata_mempool);
95} 95}
96 96
97void nfs_writedata_release(void *data) 97static void nfs_writedata_release(struct nfs_write_data *wdata)
98{ 98{
99 struct nfs_write_data *wdata = data;
100
101 put_nfs_open_context(wdata->args.context); 99 put_nfs_open_context(wdata->args.context);
102 nfs_writedata_free(wdata); 100 nfs_writedata_free(wdata);
103} 101}
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index 8e2ec43b18f4..151964f0de4c 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -416,8 +416,10 @@ int nilfs_attach_checkpoint(struct nilfs_sb_info *sbi, __u64 cno)
416 if (unlikely(err)) 416 if (unlikely(err))
417 goto failed; 417 goto failed;
418 418
419 down_read(&nilfs->ns_segctor_sem);
419 err = nilfs_cpfile_get_checkpoint(nilfs->ns_cpfile, cno, 0, &raw_cp, 420 err = nilfs_cpfile_get_checkpoint(nilfs->ns_cpfile, cno, 0, &raw_cp,
420 &bh_cp); 421 &bh_cp);
422 up_read(&nilfs->ns_segctor_sem);
421 if (unlikely(err)) { 423 if (unlikely(err)) {
422 if (err == -ENOENT || err == -EINVAL) { 424 if (err == -ENOENT || err == -EINVAL) {
423 printk(KERN_ERR 425 printk(KERN_ERR
diff --git a/fs/nilfs2/the_nilfs.h b/fs/nilfs2/the_nilfs.h
index e8adbffc626f..1b9caafb8662 100644
--- a/fs/nilfs2/the_nilfs.h
+++ b/fs/nilfs2/the_nilfs.h
@@ -253,7 +253,7 @@ nilfs_detach_writer(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi)
253 253
254static inline void nilfs_put_sbinfo(struct nilfs_sb_info *sbi) 254static inline void nilfs_put_sbinfo(struct nilfs_sb_info *sbi)
255{ 255{
256 if (!atomic_dec_and_test(&sbi->s_count)) 256 if (atomic_dec_and_test(&sbi->s_count))
257 kfree(sbi); 257 kfree(sbi);
258} 258}
259 259
diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c
index 47cd258fd24d..5dcbafe72d71 100644
--- a/fs/notify/inotify/inotify_fsnotify.c
+++ b/fs/notify/inotify/inotify_fsnotify.c
@@ -62,13 +62,14 @@ static int inotify_handle_event(struct fsnotify_group *group, struct fsnotify_ev
62 event_priv->wd = wd; 62 event_priv->wd = wd;
63 63
64 ret = fsnotify_add_notify_event(group, event, fsn_event_priv); 64 ret = fsnotify_add_notify_event(group, event, fsn_event_priv);
65 /* EEXIST is not an error */ 65 if (ret) {
66 if (ret == -EEXIST)
67 ret = 0;
68
69 /* did event_priv get attached? */
70 if (list_empty(&fsn_event_priv->event_list))
71 inotify_free_event_priv(fsn_event_priv); 66 inotify_free_event_priv(fsn_event_priv);
67 /* EEXIST says we tail matched, EOVERFLOW isn't something
68 * to report up the stack. */
69 if ((ret == -EEXIST) ||
70 (ret == -EOVERFLOW))
71 ret = 0;
72 }
72 73
73 /* 74 /*
74 * If we hold the entry until after the event is on the queue 75 * If we hold the entry until after the event is on the queue
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index f30d9bbc2e1b..dc32ed8323ba 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -386,6 +386,7 @@ void inotify_ignored_and_remove_idr(struct fsnotify_mark_entry *entry,
386 struct fsnotify_event *ignored_event; 386 struct fsnotify_event *ignored_event;
387 struct inotify_event_private_data *event_priv; 387 struct inotify_event_private_data *event_priv;
388 struct fsnotify_event_private_data *fsn_event_priv; 388 struct fsnotify_event_private_data *fsn_event_priv;
389 int ret;
389 390
390 ignored_event = fsnotify_create_event(NULL, FS_IN_IGNORED, NULL, 391 ignored_event = fsnotify_create_event(NULL, FS_IN_IGNORED, NULL,
391 FSNOTIFY_EVENT_NONE, NULL, 0, 392 FSNOTIFY_EVENT_NONE, NULL, 0,
@@ -404,10 +405,8 @@ void inotify_ignored_and_remove_idr(struct fsnotify_mark_entry *entry,
404 fsn_event_priv->group = group; 405 fsn_event_priv->group = group;
405 event_priv->wd = ientry->wd; 406 event_priv->wd = ientry->wd;
406 407
407 fsnotify_add_notify_event(group, ignored_event, fsn_event_priv); 408 ret = fsnotify_add_notify_event(group, ignored_event, fsn_event_priv);
408 409 if (ret)
409 /* did the private data get added? */
410 if (list_empty(&fsn_event_priv->event_list))
411 inotify_free_event_priv(fsn_event_priv); 410 inotify_free_event_priv(fsn_event_priv);
412 411
413skip_send_ignore: 412skip_send_ignore:
@@ -568,7 +567,7 @@ static struct fsnotify_group *inotify_new_group(struct user_struct *user, unsign
568 567
569 spin_lock_init(&group->inotify_data.idr_lock); 568 spin_lock_init(&group->inotify_data.idr_lock);
570 idr_init(&group->inotify_data.idr); 569 idr_init(&group->inotify_data.idr);
571 group->inotify_data.last_wd = 0; 570 group->inotify_data.last_wd = 1;
572 group->inotify_data.user = user; 571 group->inotify_data.user = user;
573 group->inotify_data.fa = NULL; 572 group->inotify_data.fa = NULL;
574 573
diff --git a/fs/notify/notification.c b/fs/notify/notification.c
index 521368574e97..3816d5750dd5 100644
--- a/fs/notify/notification.c
+++ b/fs/notify/notification.c
@@ -153,6 +153,10 @@ static bool event_compare(struct fsnotify_event *old, struct fsnotify_event *new
153 return true; 153 return true;
154 break; 154 break;
155 case (FSNOTIFY_EVENT_NONE): 155 case (FSNOTIFY_EVENT_NONE):
156 if (old->mask & FS_Q_OVERFLOW)
157 return true;
158 else if (old->mask & FS_IN_IGNORED)
159 return false;
156 return false; 160 return false;
157 }; 161 };
158 } 162 }
@@ -171,9 +175,7 @@ int fsnotify_add_notify_event(struct fsnotify_group *group, struct fsnotify_even
171 struct list_head *list = &group->notification_list; 175 struct list_head *list = &group->notification_list;
172 struct fsnotify_event_holder *last_holder; 176 struct fsnotify_event_holder *last_holder;
173 struct fsnotify_event *last_event; 177 struct fsnotify_event *last_event;
174 178 int ret = 0;
175 /* easy to tell if priv was attached to the event */
176 INIT_LIST_HEAD(&priv->event_list);
177 179
178 /* 180 /*
179 * There is one fsnotify_event_holder embedded inside each fsnotify_event. 181 * There is one fsnotify_event_holder embedded inside each fsnotify_event.
@@ -194,6 +196,7 @@ alloc_holder:
194 196
195 if (group->q_len >= group->max_events) { 197 if (group->q_len >= group->max_events) {
196 event = &q_overflow_event; 198 event = &q_overflow_event;
199 ret = -EOVERFLOW;
197 /* sorry, no private data on the overflow event */ 200 /* sorry, no private data on the overflow event */
198 priv = NULL; 201 priv = NULL;
199 } 202 }
@@ -235,7 +238,7 @@ alloc_holder:
235 mutex_unlock(&group->notification_mutex); 238 mutex_unlock(&group->notification_mutex);
236 239
237 wake_up(&group->notification_waitq); 240 wake_up(&group->notification_waitq);
238 return 0; 241 return ret;
239} 242}
240 243
241/* 244/*
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 9edcde4974aa..f9a3e8942669 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -1914,7 +1914,8 @@ static void ocfs2_adjust_adjacent_records(struct ocfs2_extent_rec *left_rec,
1914 * immediately to their right. 1914 * immediately to their right.
1915 */ 1915 */
1916 left_clusters = le32_to_cpu(right_child_el->l_recs[0].e_cpos); 1916 left_clusters = le32_to_cpu(right_child_el->l_recs[0].e_cpos);
1917 if (ocfs2_is_empty_extent(&right_child_el->l_recs[0])) { 1917 if (!ocfs2_rec_clusters(right_child_el, &right_child_el->l_recs[0])) {
1918 BUG_ON(right_child_el->l_tree_depth);
1918 BUG_ON(le16_to_cpu(right_child_el->l_next_free_rec) <= 1); 1919 BUG_ON(le16_to_cpu(right_child_el->l_next_free_rec) <= 1);
1919 left_clusters = le32_to_cpu(right_child_el->l_recs[1].e_cpos); 1920 left_clusters = le32_to_cpu(right_child_el->l_recs[1].e_cpos);
1920 } 1921 }
@@ -2476,15 +2477,37 @@ out_ret_path:
2476 return ret; 2477 return ret;
2477} 2478}
2478 2479
2479static void ocfs2_update_edge_lengths(struct inode *inode, handle_t *handle, 2480static int ocfs2_update_edge_lengths(struct inode *inode, handle_t *handle,
2480 struct ocfs2_path *path) 2481 int subtree_index, struct ocfs2_path *path)
2481{ 2482{
2482 int i, idx; 2483 int i, idx, ret;
2483 struct ocfs2_extent_rec *rec; 2484 struct ocfs2_extent_rec *rec;
2484 struct ocfs2_extent_list *el; 2485 struct ocfs2_extent_list *el;
2485 struct ocfs2_extent_block *eb; 2486 struct ocfs2_extent_block *eb;
2486 u32 range; 2487 u32 range;
2487 2488
2489 /*
2490 * In normal tree rotation process, we will never touch the
2491 * tree branch above subtree_index and ocfs2_extend_rotate_transaction
2492 * doesn't reserve the credits for them either.
2493 *
2494 * But we do have a special case here which will update the rightmost
2495 * records for all the bh in the path.
2496 * So we have to allocate extra credits and access them.
2497 */
2498 ret = ocfs2_extend_trans(handle,
2499 handle->h_buffer_credits + subtree_index);
2500 if (ret) {
2501 mlog_errno(ret);
2502 goto out;
2503 }
2504
2505 ret = ocfs2_journal_access_path(inode, handle, path);
2506 if (ret) {
2507 mlog_errno(ret);
2508 goto out;
2509 }
2510
2488 /* Path should always be rightmost. */ 2511 /* Path should always be rightmost. */
2489 eb = (struct ocfs2_extent_block *)path_leaf_bh(path)->b_data; 2512 eb = (struct ocfs2_extent_block *)path_leaf_bh(path)->b_data;
2490 BUG_ON(eb->h_next_leaf_blk != 0ULL); 2513 BUG_ON(eb->h_next_leaf_blk != 0ULL);
@@ -2505,6 +2528,8 @@ static void ocfs2_update_edge_lengths(struct inode *inode, handle_t *handle,
2505 2528
2506 ocfs2_journal_dirty(handle, path->p_node[i].bh); 2529 ocfs2_journal_dirty(handle, path->p_node[i].bh);
2507 } 2530 }
2531out:
2532 return ret;
2508} 2533}
2509 2534
2510static void ocfs2_unlink_path(struct inode *inode, handle_t *handle, 2535static void ocfs2_unlink_path(struct inode *inode, handle_t *handle,
@@ -2717,7 +2742,12 @@ static int ocfs2_rotate_subtree_left(struct inode *inode, handle_t *handle,
2717 if (del_right_subtree) { 2742 if (del_right_subtree) {
2718 ocfs2_unlink_subtree(inode, handle, left_path, right_path, 2743 ocfs2_unlink_subtree(inode, handle, left_path, right_path,
2719 subtree_index, dealloc); 2744 subtree_index, dealloc);
2720 ocfs2_update_edge_lengths(inode, handle, left_path); 2745 ret = ocfs2_update_edge_lengths(inode, handle, subtree_index,
2746 left_path);
2747 if (ret) {
2748 mlog_errno(ret);
2749 goto out;
2750 }
2721 2751
2722 eb = (struct ocfs2_extent_block *)path_leaf_bh(left_path)->b_data; 2752 eb = (struct ocfs2_extent_block *)path_leaf_bh(left_path)->b_data;
2723 ocfs2_et_set_last_eb_blk(et, le64_to_cpu(eb->h_blkno)); 2753 ocfs2_et_set_last_eb_blk(et, le64_to_cpu(eb->h_blkno));
@@ -3034,7 +3064,12 @@ static int ocfs2_remove_rightmost_path(struct inode *inode, handle_t *handle,
3034 3064
3035 ocfs2_unlink_subtree(inode, handle, left_path, path, 3065 ocfs2_unlink_subtree(inode, handle, left_path, path,
3036 subtree_index, dealloc); 3066 subtree_index, dealloc);
3037 ocfs2_update_edge_lengths(inode, handle, left_path); 3067 ret = ocfs2_update_edge_lengths(inode, handle, subtree_index,
3068 left_path);
3069 if (ret) {
3070 mlog_errno(ret);
3071 goto out;
3072 }
3038 3073
3039 eb = (struct ocfs2_extent_block *)path_leaf_bh(left_path)->b_data; 3074 eb = (struct ocfs2_extent_block *)path_leaf_bh(left_path)->b_data;
3040 ocfs2_et_set_last_eb_blk(et, le64_to_cpu(eb->h_blkno)); 3075 ocfs2_et_set_last_eb_blk(et, le64_to_cpu(eb->h_blkno));
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index b2c52b3a1484..b401654011a2 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -193,6 +193,7 @@ static int ocfs2_get_block(struct inode *inode, sector_t iblock,
193 (unsigned long long)OCFS2_I(inode)->ip_blkno); 193 (unsigned long long)OCFS2_I(inode)->ip_blkno);
194 mlog(ML_ERROR, "Size %llu, clusters %u\n", (unsigned long long)i_size_read(inode), OCFS2_I(inode)->ip_clusters); 194 mlog(ML_ERROR, "Size %llu, clusters %u\n", (unsigned long long)i_size_read(inode), OCFS2_I(inode)->ip_clusters);
195 dump_stack(); 195 dump_stack();
196 goto bail;
196 } 197 }
197 198
198 past_eof = ocfs2_blocks_for_bytes(inode->i_sb, i_size_read(inode)); 199 past_eof = ocfs2_blocks_for_bytes(inode->i_sb, i_size_read(inode));
@@ -894,18 +895,17 @@ struct ocfs2_write_cluster_desc {
894 */ 895 */
895 unsigned c_new; 896 unsigned c_new;
896 unsigned c_unwritten; 897 unsigned c_unwritten;
898 unsigned c_needs_zero;
897}; 899};
898 900
899static inline int ocfs2_should_zero_cluster(struct ocfs2_write_cluster_desc *d)
900{
901 return d->c_new || d->c_unwritten;
902}
903
904struct ocfs2_write_ctxt { 901struct ocfs2_write_ctxt {
905 /* Logical cluster position / len of write */ 902 /* Logical cluster position / len of write */
906 u32 w_cpos; 903 u32 w_cpos;
907 u32 w_clen; 904 u32 w_clen;
908 905
906 /* First cluster allocated in a nonsparse extend */
907 u32 w_first_new_cpos;
908
909 struct ocfs2_write_cluster_desc w_desc[OCFS2_MAX_CLUSTERS_PER_PAGE]; 909 struct ocfs2_write_cluster_desc w_desc[OCFS2_MAX_CLUSTERS_PER_PAGE];
910 910
911 /* 911 /*
@@ -983,6 +983,7 @@ static int ocfs2_alloc_write_ctxt(struct ocfs2_write_ctxt **wcp,
983 return -ENOMEM; 983 return -ENOMEM;
984 984
985 wc->w_cpos = pos >> osb->s_clustersize_bits; 985 wc->w_cpos = pos >> osb->s_clustersize_bits;
986 wc->w_first_new_cpos = UINT_MAX;
986 cend = (pos + len - 1) >> osb->s_clustersize_bits; 987 cend = (pos + len - 1) >> osb->s_clustersize_bits;
987 wc->w_clen = cend - wc->w_cpos + 1; 988 wc->w_clen = cend - wc->w_cpos + 1;
988 get_bh(di_bh); 989 get_bh(di_bh);
@@ -1217,20 +1218,18 @@ out:
1217 */ 1218 */
1218static int ocfs2_write_cluster(struct address_space *mapping, 1219static int ocfs2_write_cluster(struct address_space *mapping,
1219 u32 phys, unsigned int unwritten, 1220 u32 phys, unsigned int unwritten,
1221 unsigned int should_zero,
1220 struct ocfs2_alloc_context *data_ac, 1222 struct ocfs2_alloc_context *data_ac,
1221 struct ocfs2_alloc_context *meta_ac, 1223 struct ocfs2_alloc_context *meta_ac,
1222 struct ocfs2_write_ctxt *wc, u32 cpos, 1224 struct ocfs2_write_ctxt *wc, u32 cpos,
1223 loff_t user_pos, unsigned user_len) 1225 loff_t user_pos, unsigned user_len)
1224{ 1226{
1225 int ret, i, new, should_zero = 0; 1227 int ret, i, new;
1226 u64 v_blkno, p_blkno; 1228 u64 v_blkno, p_blkno;
1227 struct inode *inode = mapping->host; 1229 struct inode *inode = mapping->host;
1228 struct ocfs2_extent_tree et; 1230 struct ocfs2_extent_tree et;
1229 1231
1230 new = phys == 0 ? 1 : 0; 1232 new = phys == 0 ? 1 : 0;
1231 if (new || unwritten)
1232 should_zero = 1;
1233
1234 if (new) { 1233 if (new) {
1235 u32 tmp_pos; 1234 u32 tmp_pos;
1236 1235
@@ -1301,7 +1300,7 @@ static int ocfs2_write_cluster(struct address_space *mapping,
1301 if (tmpret) { 1300 if (tmpret) {
1302 mlog_errno(tmpret); 1301 mlog_errno(tmpret);
1303 if (ret == 0) 1302 if (ret == 0)
1304 tmpret = ret; 1303 ret = tmpret;
1305 } 1304 }
1306 } 1305 }
1307 1306
@@ -1341,7 +1340,9 @@ static int ocfs2_write_cluster_by_desc(struct address_space *mapping,
1341 local_len = osb->s_clustersize - cluster_off; 1340 local_len = osb->s_clustersize - cluster_off;
1342 1341
1343 ret = ocfs2_write_cluster(mapping, desc->c_phys, 1342 ret = ocfs2_write_cluster(mapping, desc->c_phys,
1344 desc->c_unwritten, data_ac, meta_ac, 1343 desc->c_unwritten,
1344 desc->c_needs_zero,
1345 data_ac, meta_ac,
1345 wc, desc->c_cpos, pos, local_len); 1346 wc, desc->c_cpos, pos, local_len);
1346 if (ret) { 1347 if (ret) {
1347 mlog_errno(ret); 1348 mlog_errno(ret);
@@ -1391,14 +1392,14 @@ static void ocfs2_set_target_boundaries(struct ocfs2_super *osb,
1391 * newly allocated cluster. 1392 * newly allocated cluster.
1392 */ 1393 */
1393 desc = &wc->w_desc[0]; 1394 desc = &wc->w_desc[0];
1394 if (ocfs2_should_zero_cluster(desc)) 1395 if (desc->c_needs_zero)
1395 ocfs2_figure_cluster_boundaries(osb, 1396 ocfs2_figure_cluster_boundaries(osb,
1396 desc->c_cpos, 1397 desc->c_cpos,
1397 &wc->w_target_from, 1398 &wc->w_target_from,
1398 NULL); 1399 NULL);
1399 1400
1400 desc = &wc->w_desc[wc->w_clen - 1]; 1401 desc = &wc->w_desc[wc->w_clen - 1];
1401 if (ocfs2_should_zero_cluster(desc)) 1402 if (desc->c_needs_zero)
1402 ocfs2_figure_cluster_boundaries(osb, 1403 ocfs2_figure_cluster_boundaries(osb,
1403 desc->c_cpos, 1404 desc->c_cpos,
1404 NULL, 1405 NULL,
@@ -1466,13 +1467,28 @@ static int ocfs2_populate_write_desc(struct inode *inode,
1466 phys++; 1467 phys++;
1467 } 1468 }
1468 1469
1470 /*
1471 * If w_first_new_cpos is < UINT_MAX, we have a non-sparse
1472 * file that got extended. w_first_new_cpos tells us
1473 * where the newly allocated clusters are so we can
1474 * zero them.
1475 */
1476 if (desc->c_cpos >= wc->w_first_new_cpos) {
1477 BUG_ON(phys == 0);
1478 desc->c_needs_zero = 1;
1479 }
1480
1469 desc->c_phys = phys; 1481 desc->c_phys = phys;
1470 if (phys == 0) { 1482 if (phys == 0) {
1471 desc->c_new = 1; 1483 desc->c_new = 1;
1484 desc->c_needs_zero = 1;
1472 *clusters_to_alloc = *clusters_to_alloc + 1; 1485 *clusters_to_alloc = *clusters_to_alloc + 1;
1473 } 1486 }
1474 if (ext_flags & OCFS2_EXT_UNWRITTEN) 1487
1488 if (ext_flags & OCFS2_EXT_UNWRITTEN) {
1475 desc->c_unwritten = 1; 1489 desc->c_unwritten = 1;
1490 desc->c_needs_zero = 1;
1491 }
1476 1492
1477 num_clusters--; 1493 num_clusters--;
1478 } 1494 }
@@ -1632,10 +1648,13 @@ static int ocfs2_expand_nonsparse_inode(struct inode *inode, loff_t pos,
1632 if (newsize <= i_size_read(inode)) 1648 if (newsize <= i_size_read(inode))
1633 return 0; 1649 return 0;
1634 1650
1635 ret = ocfs2_extend_no_holes(inode, newsize, newsize - len); 1651 ret = ocfs2_extend_no_holes(inode, newsize, pos);
1636 if (ret) 1652 if (ret)
1637 mlog_errno(ret); 1653 mlog_errno(ret);
1638 1654
1655 wc->w_first_new_cpos =
1656 ocfs2_clusters_for_bytes(inode->i_sb, i_size_read(inode));
1657
1639 return ret; 1658 return ret;
1640} 1659}
1641 1660
@@ -1644,7 +1663,7 @@ int ocfs2_write_begin_nolock(struct address_space *mapping,
1644 struct page **pagep, void **fsdata, 1663 struct page **pagep, void **fsdata,
1645 struct buffer_head *di_bh, struct page *mmap_page) 1664 struct buffer_head *di_bh, struct page *mmap_page)
1646{ 1665{
1647 int ret, credits = OCFS2_INODE_UPDATE_CREDITS; 1666 int ret, cluster_of_pages, credits = OCFS2_INODE_UPDATE_CREDITS;
1648 unsigned int clusters_to_alloc, extents_to_split; 1667 unsigned int clusters_to_alloc, extents_to_split;
1649 struct ocfs2_write_ctxt *wc; 1668 struct ocfs2_write_ctxt *wc;
1650 struct inode *inode = mapping->host; 1669 struct inode *inode = mapping->host;
@@ -1722,8 +1741,19 @@ int ocfs2_write_begin_nolock(struct address_space *mapping,
1722 1741
1723 } 1742 }
1724 1743
1725 ocfs2_set_target_boundaries(osb, wc, pos, len, 1744 /*
1726 clusters_to_alloc + extents_to_split); 1745 * We have to zero sparse allocated clusters, unwritten extent clusters,
1746 * and non-sparse clusters we just extended. For non-sparse writes,
1747 * we know zeros will only be needed in the first and/or last cluster.
1748 */
1749 if (clusters_to_alloc || extents_to_split ||
1750 wc->w_desc[0].c_needs_zero ||
1751 wc->w_desc[wc->w_clen - 1].c_needs_zero)
1752 cluster_of_pages = 1;
1753 else
1754 cluster_of_pages = 0;
1755
1756 ocfs2_set_target_boundaries(osb, wc, pos, len, cluster_of_pages);
1727 1757
1728 handle = ocfs2_start_trans(osb, credits); 1758 handle = ocfs2_start_trans(osb, credits);
1729 if (IS_ERR(handle)) { 1759 if (IS_ERR(handle)) {
@@ -1756,8 +1786,7 @@ int ocfs2_write_begin_nolock(struct address_space *mapping,
1756 * extent. 1786 * extent.
1757 */ 1787 */
1758 ret = ocfs2_grab_pages_for_write(mapping, wc, wc->w_cpos, pos, 1788 ret = ocfs2_grab_pages_for_write(mapping, wc, wc->w_cpos, pos,
1759 clusters_to_alloc + extents_to_split, 1789 cluster_of_pages, mmap_page);
1760 mmap_page);
1761 if (ret) { 1790 if (ret) {
1762 mlog_errno(ret); 1791 mlog_errno(ret);
1763 goto out_quota; 1792 goto out_quota;
diff --git a/fs/ocfs2/dcache.c b/fs/ocfs2/dcache.c
index b574431a031d..2f28b7de2c8d 100644
--- a/fs/ocfs2/dcache.c
+++ b/fs/ocfs2/dcache.c
@@ -310,22 +310,19 @@ out_attach:
310 return ret; 310 return ret;
311} 311}
312 312
313static DEFINE_SPINLOCK(dentry_list_lock); 313DEFINE_SPINLOCK(dentry_list_lock);
314 314
315/* We limit the number of dentry locks to drop in one go. We have 315/* We limit the number of dentry locks to drop in one go. We have
316 * this limit so that we don't starve other users of ocfs2_wq. */ 316 * this limit so that we don't starve other users of ocfs2_wq. */
317#define DL_INODE_DROP_COUNT 64 317#define DL_INODE_DROP_COUNT 64
318 318
319/* Drop inode references from dentry locks */ 319/* Drop inode references from dentry locks */
320void ocfs2_drop_dl_inodes(struct work_struct *work) 320static void __ocfs2_drop_dl_inodes(struct ocfs2_super *osb, int drop_count)
321{ 321{
322 struct ocfs2_super *osb = container_of(work, struct ocfs2_super,
323 dentry_lock_work);
324 struct ocfs2_dentry_lock *dl; 322 struct ocfs2_dentry_lock *dl;
325 int drop_count = DL_INODE_DROP_COUNT;
326 323
327 spin_lock(&dentry_list_lock); 324 spin_lock(&dentry_list_lock);
328 while (osb->dentry_lock_list && drop_count--) { 325 while (osb->dentry_lock_list && (drop_count < 0 || drop_count--)) {
329 dl = osb->dentry_lock_list; 326 dl = osb->dentry_lock_list;
330 osb->dentry_lock_list = dl->dl_next; 327 osb->dentry_lock_list = dl->dl_next;
331 spin_unlock(&dentry_list_lock); 328 spin_unlock(&dentry_list_lock);
@@ -333,11 +330,32 @@ void ocfs2_drop_dl_inodes(struct work_struct *work)
333 kfree(dl); 330 kfree(dl);
334 spin_lock(&dentry_list_lock); 331 spin_lock(&dentry_list_lock);
335 } 332 }
336 if (osb->dentry_lock_list) 333 spin_unlock(&dentry_list_lock);
334}
335
336void ocfs2_drop_dl_inodes(struct work_struct *work)
337{
338 struct ocfs2_super *osb = container_of(work, struct ocfs2_super,
339 dentry_lock_work);
340
341 __ocfs2_drop_dl_inodes(osb, DL_INODE_DROP_COUNT);
342 /*
343 * Don't queue dropping if umount is in progress. We flush the
344 * list in ocfs2_dismount_volume
345 */
346 spin_lock(&dentry_list_lock);
347 if (osb->dentry_lock_list &&
348 !ocfs2_test_osb_flag(osb, OCFS2_OSB_DROP_DENTRY_LOCK_IMMED))
337 queue_work(ocfs2_wq, &osb->dentry_lock_work); 349 queue_work(ocfs2_wq, &osb->dentry_lock_work);
338 spin_unlock(&dentry_list_lock); 350 spin_unlock(&dentry_list_lock);
339} 351}
340 352
353/* Flush the whole work queue */
354void ocfs2_drop_all_dl_inodes(struct ocfs2_super *osb)
355{
356 __ocfs2_drop_dl_inodes(osb, -1);
357}
358
341/* 359/*
342 * ocfs2_dentry_iput() and friends. 360 * ocfs2_dentry_iput() and friends.
343 * 361 *
@@ -368,7 +386,8 @@ static void ocfs2_drop_dentry_lock(struct ocfs2_super *osb,
368 /* We leave dropping of inode reference to ocfs2_wq as that can 386 /* We leave dropping of inode reference to ocfs2_wq as that can
369 * possibly lead to inode deletion which gets tricky */ 387 * possibly lead to inode deletion which gets tricky */
370 spin_lock(&dentry_list_lock); 388 spin_lock(&dentry_list_lock);
371 if (!osb->dentry_lock_list) 389 if (!osb->dentry_lock_list &&
390 !ocfs2_test_osb_flag(osb, OCFS2_OSB_DROP_DENTRY_LOCK_IMMED))
372 queue_work(ocfs2_wq, &osb->dentry_lock_work); 391 queue_work(ocfs2_wq, &osb->dentry_lock_work);
373 dl->dl_next = osb->dentry_lock_list; 392 dl->dl_next = osb->dentry_lock_list;
374 osb->dentry_lock_list = dl; 393 osb->dentry_lock_list = dl;
diff --git a/fs/ocfs2/dcache.h b/fs/ocfs2/dcache.h
index faa12e75f98d..f5dd1789acf1 100644
--- a/fs/ocfs2/dcache.h
+++ b/fs/ocfs2/dcache.h
@@ -49,10 +49,13 @@ struct ocfs2_dentry_lock {
49int ocfs2_dentry_attach_lock(struct dentry *dentry, struct inode *inode, 49int ocfs2_dentry_attach_lock(struct dentry *dentry, struct inode *inode,
50 u64 parent_blkno); 50 u64 parent_blkno);
51 51
52extern spinlock_t dentry_list_lock;
53
52void ocfs2_dentry_lock_put(struct ocfs2_super *osb, 54void ocfs2_dentry_lock_put(struct ocfs2_super *osb,
53 struct ocfs2_dentry_lock *dl); 55 struct ocfs2_dentry_lock *dl);
54 56
55void ocfs2_drop_dl_inodes(struct work_struct *work); 57void ocfs2_drop_dl_inodes(struct work_struct *work);
58void ocfs2_drop_all_dl_inodes(struct ocfs2_super *osb);
56 59
57struct dentry *ocfs2_find_local_alias(struct inode *inode, u64 parent_blkno, 60struct dentry *ocfs2_find_local_alias(struct inode *inode, u64 parent_blkno,
58 int skip_unhashed); 61 int skip_unhashed);
diff --git a/fs/ocfs2/dlm/dlmast.c b/fs/ocfs2/dlm/dlmast.c
index d07ddbe4b283..81eff8e58322 100644
--- a/fs/ocfs2/dlm/dlmast.c
+++ b/fs/ocfs2/dlm/dlmast.c
@@ -103,7 +103,6 @@ static void __dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock)
103 lock->ast_pending, lock->ml.type); 103 lock->ast_pending, lock->ml.type);
104 BUG(); 104 BUG();
105 } 105 }
106 BUG_ON(!list_empty(&lock->ast_list));
107 if (lock->ast_pending) 106 if (lock->ast_pending)
108 mlog(0, "lock has an ast getting flushed right now\n"); 107 mlog(0, "lock has an ast getting flushed right now\n");
109 108
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c
index bcb9260c3735..43e6e3280569 100644
--- a/fs/ocfs2/dlm/dlmrecovery.c
+++ b/fs/ocfs2/dlm/dlmrecovery.c
@@ -1118,7 +1118,7 @@ static int dlm_send_mig_lockres_msg(struct dlm_ctxt *dlm,
1118 1118
1119 mlog(0, "%s:%.*s: sending mig lockres (%s) to %u\n", 1119 mlog(0, "%s:%.*s: sending mig lockres (%s) to %u\n",
1120 dlm->name, res->lockname.len, res->lockname.name, 1120 dlm->name, res->lockname.len, res->lockname.name,
1121 orig_flags & DLM_MRES_MIGRATION ? "migrate" : "recovery", 1121 orig_flags & DLM_MRES_MIGRATION ? "migration" : "recovery",
1122 send_to); 1122 send_to);
1123 1123
1124 /* send it */ 1124 /* send it */
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 62442e413a00..aa501d3f93f1 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -1851,6 +1851,7 @@ relock:
1851 if (ret) 1851 if (ret)
1852 goto out_dio; 1852 goto out_dio;
1853 1853
1854 count = ocount;
1854 ret = generic_write_checks(file, ppos, &count, 1855 ret = generic_write_checks(file, ppos, &count,
1855 S_ISBLK(inode->i_mode)); 1856 S_ISBLK(inode->i_mode));
1856 if (ret) 1857 if (ret)
@@ -1918,8 +1919,10 @@ out_sems:
1918 1919
1919 mutex_unlock(&inode->i_mutex); 1920 mutex_unlock(&inode->i_mutex);
1920 1921
1922 if (written)
1923 ret = written;
1921 mlog_exit(ret); 1924 mlog_exit(ret);
1922 return written ? written : ret; 1925 return ret;
1923} 1926}
1924 1927
1925static int ocfs2_splice_to_file(struct pipe_inode_info *pipe, 1928static int ocfs2_splice_to_file(struct pipe_inode_info *pipe,
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index f033760ecbea..c48b93ac6b65 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -1954,10 +1954,16 @@ void ocfs2_orphan_scan_init(struct ocfs2_super *osb)
1954 os->os_osb = osb; 1954 os->os_osb = osb;
1955 os->os_count = 0; 1955 os->os_count = 0;
1956 os->os_seqno = 0; 1956 os->os_seqno = 0;
1957 os->os_scantime = CURRENT_TIME;
1958 mutex_init(&os->os_lock); 1957 mutex_init(&os->os_lock);
1959 INIT_DELAYED_WORK(&os->os_orphan_scan_work, ocfs2_orphan_scan_work); 1958 INIT_DELAYED_WORK(&os->os_orphan_scan_work, ocfs2_orphan_scan_work);
1959}
1960 1960
1961void ocfs2_orphan_scan_start(struct ocfs2_super *osb)
1962{
1963 struct ocfs2_orphan_scan *os;
1964
1965 os = &osb->osb_orphan_scan;
1966 os->os_scantime = CURRENT_TIME;
1961 if (ocfs2_is_hard_readonly(osb) || ocfs2_mount_local(osb)) 1967 if (ocfs2_is_hard_readonly(osb) || ocfs2_mount_local(osb))
1962 atomic_set(&os->os_state, ORPHAN_SCAN_INACTIVE); 1968 atomic_set(&os->os_state, ORPHAN_SCAN_INACTIVE);
1963 else { 1969 else {
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h
index 5432c7f79cc6..2c3222aec622 100644
--- a/fs/ocfs2/journal.h
+++ b/fs/ocfs2/journal.h
@@ -145,6 +145,7 @@ static inline void ocfs2_inode_set_new(struct ocfs2_super *osb,
145 145
146/* Exported only for the journal struct init code in super.c. Do not call. */ 146/* Exported only for the journal struct init code in super.c. Do not call. */
147void ocfs2_orphan_scan_init(struct ocfs2_super *osb); 147void ocfs2_orphan_scan_init(struct ocfs2_super *osb);
148void ocfs2_orphan_scan_start(struct ocfs2_super *osb);
148void ocfs2_orphan_scan_stop(struct ocfs2_super *osb); 149void ocfs2_orphan_scan_stop(struct ocfs2_super *osb);
149void ocfs2_orphan_scan_exit(struct ocfs2_super *osb); 150void ocfs2_orphan_scan_exit(struct ocfs2_super *osb);
150 151
@@ -329,20 +330,27 @@ int ocfs2_journal_dirty(handle_t *handle,
329/* extended attribute block update */ 330/* extended attribute block update */
330#define OCFS2_XATTR_BLOCK_UPDATE_CREDITS 1 331#define OCFS2_XATTR_BLOCK_UPDATE_CREDITS 1
331 332
333/* Update of a single quota block */
334#define OCFS2_QUOTA_BLOCK_UPDATE_CREDITS 1
335
332/* global quotafile inode update, data block */ 336/* global quotafile inode update, data block */
333#define OCFS2_QINFO_WRITE_CREDITS (OCFS2_INODE_UPDATE_CREDITS + 1) 337#define OCFS2_QINFO_WRITE_CREDITS (OCFS2_INODE_UPDATE_CREDITS + \
338 OCFS2_QUOTA_BLOCK_UPDATE_CREDITS)
334 339
340#define OCFS2_LOCAL_QINFO_WRITE_CREDITS OCFS2_QUOTA_BLOCK_UPDATE_CREDITS
335/* 341/*
336 * The two writes below can accidentally see global info dirty due 342 * The two writes below can accidentally see global info dirty due
337 * to set_info() quotactl so make them prepared for the writes. 343 * to set_info() quotactl so make them prepared for the writes.
338 */ 344 */
339/* quota data block, global info */ 345/* quota data block, global info */
340/* Write to local quota file */ 346/* Write to local quota file */
341#define OCFS2_QWRITE_CREDITS (OCFS2_QINFO_WRITE_CREDITS + 1) 347#define OCFS2_QWRITE_CREDITS (OCFS2_QINFO_WRITE_CREDITS + \
348 OCFS2_QUOTA_BLOCK_UPDATE_CREDITS)
342 349
343/* global quota data block, local quota data block, global quota inode, 350/* global quota data block, local quota data block, global quota inode,
344 * global quota info */ 351 * global quota info */
345#define OCFS2_QSYNC_CREDITS (OCFS2_INODE_UPDATE_CREDITS + 3) 352#define OCFS2_QSYNC_CREDITS (OCFS2_QINFO_WRITE_CREDITS + \
353 2 * OCFS2_QUOTA_BLOCK_UPDATE_CREDITS)
346 354
347static inline int ocfs2_quota_trans_credits(struct super_block *sb) 355static inline int ocfs2_quota_trans_credits(struct super_block *sb)
348{ 356{
@@ -355,11 +363,6 @@ static inline int ocfs2_quota_trans_credits(struct super_block *sb)
355 return credits; 363 return credits;
356} 364}
357 365
358/* Number of credits needed for removing quota structure from file */
359int ocfs2_calc_qdel_credits(struct super_block *sb, int type);
360/* Number of credits needed for initialization of new quota structure */
361int ocfs2_calc_qinit_credits(struct super_block *sb, int type);
362
363/* group extend. inode update and last group update. */ 366/* group extend. inode update and last group update. */
364#define OCFS2_GROUP_EXTEND_CREDITS (OCFS2_INODE_UPDATE_CREDITS + 1) 367#define OCFS2_GROUP_EXTEND_CREDITS (OCFS2_INODE_UPDATE_CREDITS + 1)
365 368
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index c9345ebb8493..39e1d5a39505 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -224,10 +224,12 @@ enum ocfs2_mount_options
224 OCFS2_MOUNT_GRPQUOTA = 1 << 10, /* We support group quotas */ 224 OCFS2_MOUNT_GRPQUOTA = 1 << 10, /* We support group quotas */
225}; 225};
226 226
227#define OCFS2_OSB_SOFT_RO 0x0001 227#define OCFS2_OSB_SOFT_RO 0x0001
228#define OCFS2_OSB_HARD_RO 0x0002 228#define OCFS2_OSB_HARD_RO 0x0002
229#define OCFS2_OSB_ERROR_FS 0x0004 229#define OCFS2_OSB_ERROR_FS 0x0004
230#define OCFS2_DEFAULT_ATIME_QUANTUM 60 230#define OCFS2_OSB_DROP_DENTRY_LOCK_IMMED 0x0008
231
232#define OCFS2_DEFAULT_ATIME_QUANTUM 60
231 233
232struct ocfs2_journal; 234struct ocfs2_journal;
233struct ocfs2_slot_info; 235struct ocfs2_slot_info;
@@ -490,6 +492,18 @@ static inline void ocfs2_set_osb_flag(struct ocfs2_super *osb,
490 spin_unlock(&osb->osb_lock); 492 spin_unlock(&osb->osb_lock);
491} 493}
492 494
495
496static inline unsigned long ocfs2_test_osb_flag(struct ocfs2_super *osb,
497 unsigned long flag)
498{
499 unsigned long ret;
500
501 spin_lock(&osb->osb_lock);
502 ret = osb->osb_flags & flag;
503 spin_unlock(&osb->osb_lock);
504 return ret;
505}
506
493static inline void ocfs2_set_ro_flag(struct ocfs2_super *osb, 507static inline void ocfs2_set_ro_flag(struct ocfs2_super *osb,
494 int hard) 508 int hard)
495{ 509{
diff --git a/fs/ocfs2/quota.h b/fs/ocfs2/quota.h
index 7365e2e08706..3fb96fcd4c81 100644
--- a/fs/ocfs2/quota.h
+++ b/fs/ocfs2/quota.h
@@ -50,7 +50,6 @@ struct ocfs2_mem_dqinfo {
50 unsigned int dqi_chunks; /* Number of chunks in local quota file */ 50 unsigned int dqi_chunks; /* Number of chunks in local quota file */
51 unsigned int dqi_blocks; /* Number of blocks allocated for local quota file */ 51 unsigned int dqi_blocks; /* Number of blocks allocated for local quota file */
52 unsigned int dqi_syncms; /* How often should we sync with other nodes */ 52 unsigned int dqi_syncms; /* How often should we sync with other nodes */
53 unsigned int dqi_syncjiff; /* Precomputed dqi_syncms in jiffies */
54 struct list_head dqi_chunk; /* List of chunks */ 53 struct list_head dqi_chunk; /* List of chunks */
55 struct inode *dqi_gqinode; /* Global quota file inode */ 54 struct inode *dqi_gqinode; /* Global quota file inode */
56 struct ocfs2_lock_res dqi_gqlock; /* Lock protecting quota information structure */ 55 struct ocfs2_lock_res dqi_gqlock; /* Lock protecting quota information structure */
diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c
index edfa60cd155c..bf7742d0ee3b 100644
--- a/fs/ocfs2/quota_global.c
+++ b/fs/ocfs2/quota_global.c
@@ -69,6 +69,7 @@ static void ocfs2_global_mem2diskdqb(void *dp, struct dquot *dquot)
69 d->dqb_curspace = cpu_to_le64(m->dqb_curspace); 69 d->dqb_curspace = cpu_to_le64(m->dqb_curspace);
70 d->dqb_btime = cpu_to_le64(m->dqb_btime); 70 d->dqb_btime = cpu_to_le64(m->dqb_btime);
71 d->dqb_itime = cpu_to_le64(m->dqb_itime); 71 d->dqb_itime = cpu_to_le64(m->dqb_itime);
72 d->dqb_pad1 = d->dqb_pad2 = 0;
72} 73}
73 74
74static int ocfs2_global_is_id(void *dp, struct dquot *dquot) 75static int ocfs2_global_is_id(void *dp, struct dquot *dquot)
@@ -211,14 +212,13 @@ ssize_t ocfs2_quota_write(struct super_block *sb, int type,
211 212
212 mutex_lock_nested(&gqinode->i_mutex, I_MUTEX_QUOTA); 213 mutex_lock_nested(&gqinode->i_mutex, I_MUTEX_QUOTA);
213 if (gqinode->i_size < off + len) { 214 if (gqinode->i_size < off + len) {
214 down_write(&OCFS2_I(gqinode)->ip_alloc_sem); 215 loff_t rounded_end =
215 err = ocfs2_extend_no_holes(gqinode, off + len, off); 216 ocfs2_align_bytes_to_blocks(sb, off + len);
216 up_write(&OCFS2_I(gqinode)->ip_alloc_sem); 217
217 if (err < 0) 218 /* Space is already allocated in ocfs2_global_read_dquot() */
218 goto out;
219 err = ocfs2_simple_size_update(gqinode, 219 err = ocfs2_simple_size_update(gqinode,
220 oinfo->dqi_gqi_bh, 220 oinfo->dqi_gqi_bh,
221 off + len); 221 rounded_end);
222 if (err < 0) 222 if (err < 0)
223 goto out; 223 goto out;
224 new = 1; 224 new = 1;
@@ -234,7 +234,7 @@ ssize_t ocfs2_quota_write(struct super_block *sb, int type,
234 } 234 }
235 if (err) { 235 if (err) {
236 mlog_errno(err); 236 mlog_errno(err);
237 return err; 237 goto out;
238 } 238 }
239 lock_buffer(bh); 239 lock_buffer(bh);
240 if (new) 240 if (new)
@@ -342,7 +342,6 @@ int ocfs2_global_read_info(struct super_block *sb, int type)
342 info->dqi_bgrace = le32_to_cpu(dinfo.dqi_bgrace); 342 info->dqi_bgrace = le32_to_cpu(dinfo.dqi_bgrace);
343 info->dqi_igrace = le32_to_cpu(dinfo.dqi_igrace); 343 info->dqi_igrace = le32_to_cpu(dinfo.dqi_igrace);
344 oinfo->dqi_syncms = le32_to_cpu(dinfo.dqi_syncms); 344 oinfo->dqi_syncms = le32_to_cpu(dinfo.dqi_syncms);
345 oinfo->dqi_syncjiff = msecs_to_jiffies(oinfo->dqi_syncms);
346 oinfo->dqi_gi.dqi_blocks = le32_to_cpu(dinfo.dqi_blocks); 345 oinfo->dqi_gi.dqi_blocks = le32_to_cpu(dinfo.dqi_blocks);
347 oinfo->dqi_gi.dqi_free_blk = le32_to_cpu(dinfo.dqi_free_blk); 346 oinfo->dqi_gi.dqi_free_blk = le32_to_cpu(dinfo.dqi_free_blk);
348 oinfo->dqi_gi.dqi_free_entry = le32_to_cpu(dinfo.dqi_free_entry); 347 oinfo->dqi_gi.dqi_free_entry = le32_to_cpu(dinfo.dqi_free_entry);
@@ -352,7 +351,7 @@ int ocfs2_global_read_info(struct super_block *sb, int type)
352 oinfo->dqi_gi.dqi_qtree_depth = qtree_depth(&oinfo->dqi_gi); 351 oinfo->dqi_gi.dqi_qtree_depth = qtree_depth(&oinfo->dqi_gi);
353 INIT_DELAYED_WORK(&oinfo->dqi_sync_work, qsync_work_fn); 352 INIT_DELAYED_WORK(&oinfo->dqi_sync_work, qsync_work_fn);
354 queue_delayed_work(ocfs2_quota_wq, &oinfo->dqi_sync_work, 353 queue_delayed_work(ocfs2_quota_wq, &oinfo->dqi_sync_work,
355 oinfo->dqi_syncjiff); 354 msecs_to_jiffies(oinfo->dqi_syncms));
356 355
357out_err: 356out_err:
358 mlog_exit(status); 357 mlog_exit(status);
@@ -402,13 +401,36 @@ int ocfs2_global_write_info(struct super_block *sb, int type)
402 return err; 401 return err;
403} 402}
404 403
404static int ocfs2_global_qinit_alloc(struct super_block *sb, int type)
405{
406 struct ocfs2_mem_dqinfo *oinfo = sb_dqinfo(sb, type)->dqi_priv;
407
408 /*
409 * We may need to allocate tree blocks and a leaf block but not the
410 * root block
411 */
412 return oinfo->dqi_gi.dqi_qtree_depth;
413}
414
415static int ocfs2_calc_global_qinit_credits(struct super_block *sb, int type)
416{
417 /* We modify all the allocated blocks, tree root, and info block */
418 return (ocfs2_global_qinit_alloc(sb, type) + 2) *
419 OCFS2_QUOTA_BLOCK_UPDATE_CREDITS;
420}
421
405/* Read in information from global quota file and acquire a reference to it. 422/* Read in information from global quota file and acquire a reference to it.
406 * dquot_acquire() has already started the transaction and locked quota file */ 423 * dquot_acquire() has already started the transaction and locked quota file */
407int ocfs2_global_read_dquot(struct dquot *dquot) 424int ocfs2_global_read_dquot(struct dquot *dquot)
408{ 425{
409 int err, err2, ex = 0; 426 int err, err2, ex = 0;
410 struct ocfs2_mem_dqinfo *info = 427 struct super_block *sb = dquot->dq_sb;
411 sb_dqinfo(dquot->dq_sb, dquot->dq_type)->dqi_priv; 428 int type = dquot->dq_type;
429 struct ocfs2_mem_dqinfo *info = sb_dqinfo(sb, type)->dqi_priv;
430 struct ocfs2_super *osb = OCFS2_SB(sb);
431 struct inode *gqinode = info->dqi_gqinode;
432 int need_alloc = ocfs2_global_qinit_alloc(sb, type);
433 handle_t *handle = NULL;
412 434
413 err = ocfs2_qinfo_lock(info, 0); 435 err = ocfs2_qinfo_lock(info, 0);
414 if (err < 0) 436 if (err < 0)
@@ -419,14 +441,33 @@ int ocfs2_global_read_dquot(struct dquot *dquot)
419 OCFS2_DQUOT(dquot)->dq_use_count++; 441 OCFS2_DQUOT(dquot)->dq_use_count++;
420 OCFS2_DQUOT(dquot)->dq_origspace = dquot->dq_dqb.dqb_curspace; 442 OCFS2_DQUOT(dquot)->dq_origspace = dquot->dq_dqb.dqb_curspace;
421 OCFS2_DQUOT(dquot)->dq_originodes = dquot->dq_dqb.dqb_curinodes; 443 OCFS2_DQUOT(dquot)->dq_originodes = dquot->dq_dqb.dqb_curinodes;
444 ocfs2_qinfo_unlock(info, 0);
445
422 if (!dquot->dq_off) { /* No real quota entry? */ 446 if (!dquot->dq_off) { /* No real quota entry? */
423 /* Upgrade to exclusive lock for allocation */
424 ocfs2_qinfo_unlock(info, 0);
425 err = ocfs2_qinfo_lock(info, 1);
426 if (err < 0)
427 goto out_qlock;
428 ex = 1; 447 ex = 1;
448 /*
449 * Add blocks to quota file before we start a transaction since
450 * locking allocators ranks above a transaction start
451 */
452 WARN_ON(journal_current_handle());
453 down_write(&OCFS2_I(gqinode)->ip_alloc_sem);
454 err = ocfs2_extend_no_holes(gqinode,
455 gqinode->i_size + (need_alloc << sb->s_blocksize_bits),
456 gqinode->i_size);
457 up_write(&OCFS2_I(gqinode)->ip_alloc_sem);
458 if (err < 0)
459 goto out;
429 } 460 }
461
462 handle = ocfs2_start_trans(osb,
463 ocfs2_calc_global_qinit_credits(sb, type));
464 if (IS_ERR(handle)) {
465 err = PTR_ERR(handle);
466 goto out;
467 }
468 err = ocfs2_qinfo_lock(info, ex);
469 if (err < 0)
470 goto out_trans;
430 err = qtree_write_dquot(&info->dqi_gi, dquot); 471 err = qtree_write_dquot(&info->dqi_gi, dquot);
431 if (ex && info_dirty(sb_dqinfo(dquot->dq_sb, dquot->dq_type))) { 472 if (ex && info_dirty(sb_dqinfo(dquot->dq_sb, dquot->dq_type))) {
432 err2 = __ocfs2_global_write_info(dquot->dq_sb, dquot->dq_type); 473 err2 = __ocfs2_global_write_info(dquot->dq_sb, dquot->dq_type);
@@ -438,6 +479,9 @@ out_qlock:
438 ocfs2_qinfo_unlock(info, 1); 479 ocfs2_qinfo_unlock(info, 1);
439 else 480 else
440 ocfs2_qinfo_unlock(info, 0); 481 ocfs2_qinfo_unlock(info, 0);
482out_trans:
483 if (handle)
484 ocfs2_commit_trans(osb, handle);
441out: 485out:
442 if (err < 0) 486 if (err < 0)
443 mlog_errno(err); 487 mlog_errno(err);
@@ -607,7 +651,7 @@ static void qsync_work_fn(struct work_struct *work)
607 651
608 dquot_scan_active(sb, ocfs2_sync_dquot_helper, oinfo->dqi_type); 652 dquot_scan_active(sb, ocfs2_sync_dquot_helper, oinfo->dqi_type);
609 queue_delayed_work(ocfs2_quota_wq, &oinfo->dqi_sync_work, 653 queue_delayed_work(ocfs2_quota_wq, &oinfo->dqi_sync_work,
610 oinfo->dqi_syncjiff); 654 msecs_to_jiffies(oinfo->dqi_syncms));
611} 655}
612 656
613/* 657/*
@@ -635,20 +679,18 @@ out:
635 return status; 679 return status;
636} 680}
637 681
638int ocfs2_calc_qdel_credits(struct super_block *sb, int type) 682static int ocfs2_calc_qdel_credits(struct super_block *sb, int type)
639{ 683{
640 struct ocfs2_mem_dqinfo *oinfo; 684 struct ocfs2_mem_dqinfo *oinfo = sb_dqinfo(sb, type)->dqi_priv;
641 int features[MAXQUOTAS] = { OCFS2_FEATURE_RO_COMPAT_USRQUOTA, 685 /*
642 OCFS2_FEATURE_RO_COMPAT_GRPQUOTA }; 686 * We modify tree, leaf block, global info, local chunk header,
643 687 * global and local inode; OCFS2_QINFO_WRITE_CREDITS already
644 if (!OCFS2_HAS_RO_COMPAT_FEATURE(sb, features[type])) 688 * accounts for inode update
645 return 0; 689 */
646 690 return (oinfo->dqi_gi.dqi_qtree_depth + 2) *
647 oinfo = sb_dqinfo(sb, type)->dqi_priv; 691 OCFS2_QUOTA_BLOCK_UPDATE_CREDITS +
648 /* We modify tree, leaf block, global info, local chunk header, 692 OCFS2_QINFO_WRITE_CREDITS +
649 * global and local inode */ 693 OCFS2_INODE_UPDATE_CREDITS;
650 return oinfo->dqi_gi.dqi_qtree_depth + 2 + 1 +
651 2 * OCFS2_INODE_UPDATE_CREDITS;
652} 694}
653 695
654static int ocfs2_release_dquot(struct dquot *dquot) 696static int ocfs2_release_dquot(struct dquot *dquot)
@@ -680,33 +722,10 @@ out:
680 return status; 722 return status;
681} 723}
682 724
683int ocfs2_calc_qinit_credits(struct super_block *sb, int type)
684{
685 struct ocfs2_mem_dqinfo *oinfo;
686 int features[MAXQUOTAS] = { OCFS2_FEATURE_RO_COMPAT_USRQUOTA,
687 OCFS2_FEATURE_RO_COMPAT_GRPQUOTA };
688 struct ocfs2_dinode *lfe, *gfe;
689
690 if (!OCFS2_HAS_RO_COMPAT_FEATURE(sb, features[type]))
691 return 0;
692
693 oinfo = sb_dqinfo(sb, type)->dqi_priv;
694 gfe = (struct ocfs2_dinode *)oinfo->dqi_gqi_bh->b_data;
695 lfe = (struct ocfs2_dinode *)oinfo->dqi_lqi_bh->b_data;
696 /* We can extend local file + global file. In local file we
697 * can modify info, chunk header block and dquot block. In
698 * global file we can modify info, tree and leaf block */
699 return ocfs2_calc_extend_credits(sb, &lfe->id2.i_list, 0) +
700 ocfs2_calc_extend_credits(sb, &gfe->id2.i_list, 0) +
701 3 + oinfo->dqi_gi.dqi_qtree_depth + 2;
702}
703
704static int ocfs2_acquire_dquot(struct dquot *dquot) 725static int ocfs2_acquire_dquot(struct dquot *dquot)
705{ 726{
706 handle_t *handle;
707 struct ocfs2_mem_dqinfo *oinfo = 727 struct ocfs2_mem_dqinfo *oinfo =
708 sb_dqinfo(dquot->dq_sb, dquot->dq_type)->dqi_priv; 728 sb_dqinfo(dquot->dq_sb, dquot->dq_type)->dqi_priv;
709 struct ocfs2_super *osb = OCFS2_SB(dquot->dq_sb);
710 int status = 0; 729 int status = 0;
711 730
712 mlog_entry("id=%u, type=%d", dquot->dq_id, dquot->dq_type); 731 mlog_entry("id=%u, type=%d", dquot->dq_id, dquot->dq_type);
@@ -715,16 +734,7 @@ static int ocfs2_acquire_dquot(struct dquot *dquot)
715 status = ocfs2_lock_global_qf(oinfo, 1); 734 status = ocfs2_lock_global_qf(oinfo, 1);
716 if (status < 0) 735 if (status < 0)
717 goto out; 736 goto out;
718 handle = ocfs2_start_trans(osb,
719 ocfs2_calc_qinit_credits(dquot->dq_sb, dquot->dq_type));
720 if (IS_ERR(handle)) {
721 status = PTR_ERR(handle);
722 mlog_errno(status);
723 goto out_ilock;
724 }
725 status = dquot_acquire(dquot); 737 status = dquot_acquire(dquot);
726 ocfs2_commit_trans(osb, handle);
727out_ilock:
728 ocfs2_unlock_global_qf(oinfo, 1); 738 ocfs2_unlock_global_qf(oinfo, 1);
729out: 739out:
730 mlog_exit(status); 740 mlog_exit(status);
diff --git a/fs/ocfs2/quota_local.c b/fs/ocfs2/quota_local.c
index 5a460fa82553..bdb09cb6e1fe 100644
--- a/fs/ocfs2/quota_local.c
+++ b/fs/ocfs2/quota_local.c
@@ -20,6 +20,7 @@
20#include "sysfile.h" 20#include "sysfile.h"
21#include "dlmglue.h" 21#include "dlmglue.h"
22#include "quota.h" 22#include "quota.h"
23#include "uptodate.h"
23 24
24/* Number of local quota structures per block */ 25/* Number of local quota structures per block */
25static inline unsigned int ol_quota_entries_per_block(struct super_block *sb) 26static inline unsigned int ol_quota_entries_per_block(struct super_block *sb)
@@ -100,7 +101,8 @@ static int ocfs2_modify_bh(struct inode *inode, struct buffer_head *bh,
100 handle_t *handle; 101 handle_t *handle;
101 int status; 102 int status;
102 103
103 handle = ocfs2_start_trans(OCFS2_SB(sb), 1); 104 handle = ocfs2_start_trans(OCFS2_SB(sb),
105 OCFS2_QUOTA_BLOCK_UPDATE_CREDITS);
104 if (IS_ERR(handle)) { 106 if (IS_ERR(handle)) {
105 status = PTR_ERR(handle); 107 status = PTR_ERR(handle);
106 mlog_errno(status); 108 mlog_errno(status);
@@ -610,7 +612,8 @@ int ocfs2_finish_quota_recovery(struct ocfs2_super *osb,
610 goto out_bh; 612 goto out_bh;
611 /* Mark quota file as clean if we are recovering quota file of 613 /* Mark quota file as clean if we are recovering quota file of
612 * some other node. */ 614 * some other node. */
613 handle = ocfs2_start_trans(osb, 1); 615 handle = ocfs2_start_trans(osb,
616 OCFS2_LOCAL_QINFO_WRITE_CREDITS);
614 if (IS_ERR(handle)) { 617 if (IS_ERR(handle)) {
615 status = PTR_ERR(handle); 618 status = PTR_ERR(handle);
616 mlog_errno(status); 619 mlog_errno(status);
@@ -940,7 +943,7 @@ static struct ocfs2_quota_chunk *ocfs2_local_quota_add_chunk(
940 struct ocfs2_local_disk_chunk *dchunk; 943 struct ocfs2_local_disk_chunk *dchunk;
941 int status; 944 int status;
942 handle_t *handle; 945 handle_t *handle;
943 struct buffer_head *bh = NULL; 946 struct buffer_head *bh = NULL, *dbh = NULL;
944 u64 p_blkno; 947 u64 p_blkno;
945 948
946 /* We are protected by dqio_sem so no locking needed */ 949 /* We are protected by dqio_sem so no locking needed */
@@ -964,32 +967,35 @@ static struct ocfs2_quota_chunk *ocfs2_local_quota_add_chunk(
964 mlog_errno(status); 967 mlog_errno(status);
965 goto out; 968 goto out;
966 } 969 }
970 /* Local quota info and two new blocks we initialize */
971 handle = ocfs2_start_trans(OCFS2_SB(sb),
972 OCFS2_LOCAL_QINFO_WRITE_CREDITS +
973 2 * OCFS2_QUOTA_BLOCK_UPDATE_CREDITS);
974 if (IS_ERR(handle)) {
975 status = PTR_ERR(handle);
976 mlog_errno(status);
977 goto out;
978 }
967 979
980 /* Initialize chunk header */
968 down_read(&OCFS2_I(lqinode)->ip_alloc_sem); 981 down_read(&OCFS2_I(lqinode)->ip_alloc_sem);
969 status = ocfs2_extent_map_get_blocks(lqinode, oinfo->dqi_blocks, 982 status = ocfs2_extent_map_get_blocks(lqinode, oinfo->dqi_blocks,
970 &p_blkno, NULL, NULL); 983 &p_blkno, NULL, NULL);
971 up_read(&OCFS2_I(lqinode)->ip_alloc_sem); 984 up_read(&OCFS2_I(lqinode)->ip_alloc_sem);
972 if (status < 0) { 985 if (status < 0) {
973 mlog_errno(status); 986 mlog_errno(status);
974 goto out; 987 goto out_trans;
975 } 988 }
976 bh = sb_getblk(sb, p_blkno); 989 bh = sb_getblk(sb, p_blkno);
977 if (!bh) { 990 if (!bh) {
978 status = -ENOMEM; 991 status = -ENOMEM;
979 mlog_errno(status); 992 mlog_errno(status);
980 goto out; 993 goto out_trans;
981 } 994 }
982 dchunk = (struct ocfs2_local_disk_chunk *)bh->b_data; 995 dchunk = (struct ocfs2_local_disk_chunk *)bh->b_data;
983 996 ocfs2_set_new_buffer_uptodate(lqinode, bh);
984 handle = ocfs2_start_trans(OCFS2_SB(sb), 2);
985 if (IS_ERR(handle)) {
986 status = PTR_ERR(handle);
987 mlog_errno(status);
988 goto out;
989 }
990
991 status = ocfs2_journal_access_dq(handle, lqinode, bh, 997 status = ocfs2_journal_access_dq(handle, lqinode, bh,
992 OCFS2_JOURNAL_ACCESS_WRITE); 998 OCFS2_JOURNAL_ACCESS_CREATE);
993 if (status < 0) { 999 if (status < 0) {
994 mlog_errno(status); 1000 mlog_errno(status);
995 goto out_trans; 1001 goto out_trans;
@@ -999,7 +1005,6 @@ static struct ocfs2_quota_chunk *ocfs2_local_quota_add_chunk(
999 memset(dchunk->dqc_bitmap, 0, 1005 memset(dchunk->dqc_bitmap, 0,
1000 sb->s_blocksize - sizeof(struct ocfs2_local_disk_chunk) - 1006 sb->s_blocksize - sizeof(struct ocfs2_local_disk_chunk) -
1001 OCFS2_QBLK_RESERVED_SPACE); 1007 OCFS2_QBLK_RESERVED_SPACE);
1002 set_buffer_uptodate(bh);
1003 unlock_buffer(bh); 1008 unlock_buffer(bh);
1004 status = ocfs2_journal_dirty(handle, bh); 1009 status = ocfs2_journal_dirty(handle, bh);
1005 if (status < 0) { 1010 if (status < 0) {
@@ -1007,6 +1012,38 @@ static struct ocfs2_quota_chunk *ocfs2_local_quota_add_chunk(
1007 goto out_trans; 1012 goto out_trans;
1008 } 1013 }
1009 1014
1015 /* Initialize new block with structures */
1016 down_read(&OCFS2_I(lqinode)->ip_alloc_sem);
1017 status = ocfs2_extent_map_get_blocks(lqinode, oinfo->dqi_blocks + 1,
1018 &p_blkno, NULL, NULL);
1019 up_read(&OCFS2_I(lqinode)->ip_alloc_sem);
1020 if (status < 0) {
1021 mlog_errno(status);
1022 goto out_trans;
1023 }
1024 dbh = sb_getblk(sb, p_blkno);
1025 if (!dbh) {
1026 status = -ENOMEM;
1027 mlog_errno(status);
1028 goto out_trans;
1029 }
1030 ocfs2_set_new_buffer_uptodate(lqinode, dbh);
1031 status = ocfs2_journal_access_dq(handle, lqinode, dbh,
1032 OCFS2_JOURNAL_ACCESS_CREATE);
1033 if (status < 0) {
1034 mlog_errno(status);
1035 goto out_trans;
1036 }
1037 lock_buffer(dbh);
1038 memset(dbh->b_data, 0, sb->s_blocksize - OCFS2_QBLK_RESERVED_SPACE);
1039 unlock_buffer(dbh);
1040 status = ocfs2_journal_dirty(handle, dbh);
1041 if (status < 0) {
1042 mlog_errno(status);
1043 goto out_trans;
1044 }
1045
1046 /* Update local quotafile info */
1010 oinfo->dqi_blocks += 2; 1047 oinfo->dqi_blocks += 2;
1011 oinfo->dqi_chunks++; 1048 oinfo->dqi_chunks++;
1012 status = ocfs2_local_write_info(sb, type); 1049 status = ocfs2_local_write_info(sb, type);
@@ -1031,6 +1068,7 @@ out_trans:
1031 ocfs2_commit_trans(OCFS2_SB(sb), handle); 1068 ocfs2_commit_trans(OCFS2_SB(sb), handle);
1032out: 1069out:
1033 brelse(bh); 1070 brelse(bh);
1071 brelse(dbh);
1034 kmem_cache_free(ocfs2_qf_chunk_cachep, chunk); 1072 kmem_cache_free(ocfs2_qf_chunk_cachep, chunk);
1035 return ERR_PTR(status); 1073 return ERR_PTR(status);
1036} 1074}
@@ -1048,6 +1086,8 @@ static struct ocfs2_quota_chunk *ocfs2_extend_local_quota_file(
1048 struct ocfs2_local_disk_chunk *dchunk; 1086 struct ocfs2_local_disk_chunk *dchunk;
1049 int epb = ol_quota_entries_per_block(sb); 1087 int epb = ol_quota_entries_per_block(sb);
1050 unsigned int chunk_blocks; 1088 unsigned int chunk_blocks;
1089 struct buffer_head *bh;
1090 u64 p_blkno;
1051 int status; 1091 int status;
1052 handle_t *handle; 1092 handle_t *handle;
1053 1093
@@ -1075,12 +1115,49 @@ static struct ocfs2_quota_chunk *ocfs2_extend_local_quota_file(
1075 mlog_errno(status); 1115 mlog_errno(status);
1076 goto out; 1116 goto out;
1077 } 1117 }
1078 handle = ocfs2_start_trans(OCFS2_SB(sb), 2); 1118
1119 /* Get buffer from the just added block */
1120 down_read(&OCFS2_I(lqinode)->ip_alloc_sem);
1121 status = ocfs2_extent_map_get_blocks(lqinode, oinfo->dqi_blocks,
1122 &p_blkno, NULL, NULL);
1123 up_read(&OCFS2_I(lqinode)->ip_alloc_sem);
1124 if (status < 0) {
1125 mlog_errno(status);
1126 goto out;
1127 }
1128 bh = sb_getblk(sb, p_blkno);
1129 if (!bh) {
1130 status = -ENOMEM;
1131 mlog_errno(status);
1132 goto out;
1133 }
1134 ocfs2_set_new_buffer_uptodate(lqinode, bh);
1135
1136 /* Local quota info, chunk header and the new block we initialize */
1137 handle = ocfs2_start_trans(OCFS2_SB(sb),
1138 OCFS2_LOCAL_QINFO_WRITE_CREDITS +
1139 2 * OCFS2_QUOTA_BLOCK_UPDATE_CREDITS);
1079 if (IS_ERR(handle)) { 1140 if (IS_ERR(handle)) {
1080 status = PTR_ERR(handle); 1141 status = PTR_ERR(handle);
1081 mlog_errno(status); 1142 mlog_errno(status);
1082 goto out; 1143 goto out;
1083 } 1144 }
1145 /* Zero created block */
1146 status = ocfs2_journal_access_dq(handle, lqinode, bh,
1147 OCFS2_JOURNAL_ACCESS_CREATE);
1148 if (status < 0) {
1149 mlog_errno(status);
1150 goto out_trans;
1151 }
1152 lock_buffer(bh);
1153 memset(bh->b_data, 0, sb->s_blocksize);
1154 unlock_buffer(bh);
1155 status = ocfs2_journal_dirty(handle, bh);
1156 if (status < 0) {
1157 mlog_errno(status);
1158 goto out_trans;
1159 }
1160 /* Update chunk header */
1084 status = ocfs2_journal_access_dq(handle, lqinode, chunk->qc_headerbh, 1161 status = ocfs2_journal_access_dq(handle, lqinode, chunk->qc_headerbh,
1085 OCFS2_JOURNAL_ACCESS_WRITE); 1162 OCFS2_JOURNAL_ACCESS_WRITE);
1086 if (status < 0) { 1163 if (status < 0) {
@@ -1097,6 +1174,7 @@ static struct ocfs2_quota_chunk *ocfs2_extend_local_quota_file(
1097 mlog_errno(status); 1174 mlog_errno(status);
1098 goto out_trans; 1175 goto out_trans;
1099 } 1176 }
1177 /* Update file header */
1100 oinfo->dqi_blocks++; 1178 oinfo->dqi_blocks++;
1101 status = ocfs2_local_write_info(sb, type); 1179 status = ocfs2_local_write_info(sb, type);
1102 if (status < 0) { 1180 if (status < 0) {
diff --git a/fs/ocfs2/stack_o2cb.c b/fs/ocfs2/stack_o2cb.c
index 3f661376a2de..e49c41050264 100644
--- a/fs/ocfs2/stack_o2cb.c
+++ b/fs/ocfs2/stack_o2cb.c
@@ -17,6 +17,7 @@
17 * General Public License for more details. 17 * General Public License for more details.
18 */ 18 */
19 19
20#include <linux/kernel.h>
20#include <linux/crc32.h> 21#include <linux/crc32.h>
21#include <linux/module.h> 22#include <linux/module.h>
22 23
@@ -153,7 +154,7 @@ static int status_map[] = {
153 154
154static int dlm_status_to_errno(enum dlm_status status) 155static int dlm_status_to_errno(enum dlm_status status)
155{ 156{
156 BUG_ON(status > (sizeof(status_map) / sizeof(status_map[0]))); 157 BUG_ON(status < 0 || status >= ARRAY_SIZE(status_map));
157 158
158 return status_map[status]; 159 return status_map[status];
159} 160}
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 7efb349fb9bd..b0ee0fdf799a 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -777,6 +777,7 @@ static int ocfs2_sb_probe(struct super_block *sb,
777 } 777 }
778 di = (struct ocfs2_dinode *) (*bh)->b_data; 778 di = (struct ocfs2_dinode *) (*bh)->b_data;
779 memset(stats, 0, sizeof(struct ocfs2_blockcheck_stats)); 779 memset(stats, 0, sizeof(struct ocfs2_blockcheck_stats));
780 spin_lock_init(&stats->b_lock);
780 status = ocfs2_verify_volume(di, *bh, blksize, stats); 781 status = ocfs2_verify_volume(di, *bh, blksize, stats);
781 if (status >= 0) 782 if (status >= 0)
782 goto bail; 783 goto bail;
@@ -1182,7 +1183,7 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
1182 wake_up(&osb->osb_mount_event); 1183 wake_up(&osb->osb_mount_event);
1183 1184
1184 /* Start this when the mount is almost sure of being successful */ 1185 /* Start this when the mount is almost sure of being successful */
1185 ocfs2_orphan_scan_init(osb); 1186 ocfs2_orphan_scan_start(osb);
1186 1187
1187 mlog_exit(status); 1188 mlog_exit(status);
1188 return status; 1189 return status;
@@ -1213,14 +1214,27 @@ static int ocfs2_get_sb(struct file_system_type *fs_type,
1213 mnt); 1214 mnt);
1214} 1215}
1215 1216
1217static void ocfs2_kill_sb(struct super_block *sb)
1218{
1219 struct ocfs2_super *osb = OCFS2_SB(sb);
1220
1221 /* Prevent further queueing of inode drop events */
1222 spin_lock(&dentry_list_lock);
1223 ocfs2_set_osb_flag(osb, OCFS2_OSB_DROP_DENTRY_LOCK_IMMED);
1224 spin_unlock(&dentry_list_lock);
1225 /* Wait for work to finish and/or remove it */
1226 cancel_work_sync(&osb->dentry_lock_work);
1227
1228 kill_block_super(sb);
1229}
1230
1216static struct file_system_type ocfs2_fs_type = { 1231static struct file_system_type ocfs2_fs_type = {
1217 .owner = THIS_MODULE, 1232 .owner = THIS_MODULE,
1218 .name = "ocfs2", 1233 .name = "ocfs2",
1219 .get_sb = ocfs2_get_sb, /* is this called when we mount 1234 .get_sb = ocfs2_get_sb, /* is this called when we mount
1220 * the fs? */ 1235 * the fs? */
1221 .kill_sb = kill_block_super, /* set to the generic one 1236 .kill_sb = ocfs2_kill_sb,
1222 * right now, but do we 1237
1223 * need to change that? */
1224 .fs_flags = FS_REQUIRES_DEV|FS_RENAME_DOES_D_MOVE, 1238 .fs_flags = FS_REQUIRES_DEV|FS_RENAME_DOES_D_MOVE,
1225 .next = NULL 1239 .next = NULL
1226}; 1240};
@@ -1819,6 +1833,12 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err)
1819 1833
1820 debugfs_remove(osb->osb_ctxt); 1834 debugfs_remove(osb->osb_ctxt);
1821 1835
1836 /*
1837 * Flush inode dropping work queue so that deletes are
1838 * performed while the filesystem is still working
1839 */
1840 ocfs2_drop_all_dl_inodes(osb);
1841
1822 /* Orphan scan should be stopped as early as possible */ 1842 /* Orphan scan should be stopped as early as possible */
1823 ocfs2_orphan_scan_stop(osb); 1843 ocfs2_orphan_scan_stop(osb);
1824 1844
@@ -1981,6 +2001,8 @@ static int ocfs2_initialize_super(struct super_block *sb,
1981 snprintf(osb->dev_str, sizeof(osb->dev_str), "%u,%u", 2001 snprintf(osb->dev_str, sizeof(osb->dev_str), "%u,%u",
1982 MAJOR(osb->sb->s_dev), MINOR(osb->sb->s_dev)); 2002 MAJOR(osb->sb->s_dev), MINOR(osb->sb->s_dev));
1983 2003
2004 ocfs2_orphan_scan_init(osb);
2005
1984 status = ocfs2_recovery_init(osb); 2006 status = ocfs2_recovery_init(osb);
1985 if (status) { 2007 if (status) {
1986 mlog(ML_ERROR, "Unable to initialize recovery state\n"); 2008 mlog(ML_ERROR, "Unable to initialize recovery state\n");
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index ba320e250747..d1a27cda984f 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -1052,7 +1052,8 @@ static int ocfs2_xattr_block_get(struct inode *inode,
1052 struct ocfs2_xattr_block *xb; 1052 struct ocfs2_xattr_block *xb;
1053 struct ocfs2_xattr_value_root *xv; 1053 struct ocfs2_xattr_value_root *xv;
1054 size_t size; 1054 size_t size;
1055 int ret = -ENODATA, name_offset, name_len, block_off, i; 1055 int ret = -ENODATA, name_offset, name_len, i;
1056 int uninitialized_var(block_off);
1056 1057
1057 xs->bucket = ocfs2_xattr_bucket_new(inode); 1058 xs->bucket = ocfs2_xattr_bucket_new(inode);
1058 if (!xs->bucket) { 1059 if (!xs->bucket) {
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 175db258942f..6f742f6658a9 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1003,12 +1003,7 @@ static ssize_t oom_adjust_read(struct file *file, char __user *buf,
1003 1003
1004 if (!task) 1004 if (!task)
1005 return -ESRCH; 1005 return -ESRCH;
1006 task_lock(task); 1006 oom_adjust = task->oomkilladj;
1007 if (task->mm)
1008 oom_adjust = task->mm->oom_adj;
1009 else
1010 oom_adjust = OOM_DISABLE;
1011 task_unlock(task);
1012 put_task_struct(task); 1007 put_task_struct(task);
1013 1008
1014 len = snprintf(buffer, sizeof(buffer), "%i\n", oom_adjust); 1009 len = snprintf(buffer, sizeof(buffer), "%i\n", oom_adjust);
@@ -1037,19 +1032,11 @@ static ssize_t oom_adjust_write(struct file *file, const char __user *buf,
1037 task = get_proc_task(file->f_path.dentry->d_inode); 1032 task = get_proc_task(file->f_path.dentry->d_inode);
1038 if (!task) 1033 if (!task)
1039 return -ESRCH; 1034 return -ESRCH;
1040 task_lock(task); 1035 if (oom_adjust < task->oomkilladj && !capable(CAP_SYS_RESOURCE)) {
1041 if (!task->mm) {
1042 task_unlock(task);
1043 put_task_struct(task);
1044 return -EINVAL;
1045 }
1046 if (oom_adjust < task->mm->oom_adj && !capable(CAP_SYS_RESOURCE)) {
1047 task_unlock(task);
1048 put_task_struct(task); 1036 put_task_struct(task);
1049 return -EACCES; 1037 return -EACCES;
1050 } 1038 }
1051 task->mm->oom_adj = oom_adjust; 1039 task->oomkilladj = oom_adjust;
1052 task_unlock(task);
1053 put_task_struct(task); 1040 put_task_struct(task);
1054 if (end - buffer == 0) 1041 if (end - buffer == 0)
1055 return -EIO; 1042 return -EIO;
diff --git a/fs/select.c b/fs/select.c
index d870237e42c7..8084834e123e 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -110,6 +110,7 @@ void poll_initwait(struct poll_wqueues *pwq)
110{ 110{
111 init_poll_funcptr(&pwq->pt, __pollwait); 111 init_poll_funcptr(&pwq->pt, __pollwait);
112 pwq->polling_task = current; 112 pwq->polling_task = current;
113 pwq->triggered = 0;
113 pwq->error = 0; 114 pwq->error = 0;
114 pwq->table = NULL; 115 pwq->table = NULL;
115 pwq->inline_index = 0; 116 pwq->inline_index = 0;
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index 0c93c7ef3d18..965df1227d64 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -770,7 +770,7 @@ xfs_buf_associate_memory(
770 bp->b_pages = NULL; 770 bp->b_pages = NULL;
771 bp->b_addr = mem; 771 bp->b_addr = mem;
772 772
773 rval = _xfs_buf_get_pages(bp, page_count, 0); 773 rval = _xfs_buf_get_pages(bp, page_count, XBF_DONT_BLOCK);
774 if (rval) 774 if (rval)
775 return rval; 775 return rval;
776 776
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index b619d6b8ca43..98ef624d9baf 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -708,6 +708,16 @@ xfs_reclaim_inode(
708 return 0; 708 return 0;
709} 709}
710 710
711void
712__xfs_inode_set_reclaim_tag(
713 struct xfs_perag *pag,
714 struct xfs_inode *ip)
715{
716 radix_tree_tag_set(&pag->pag_ici_root,
717 XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino),
718 XFS_ICI_RECLAIM_TAG);
719}
720
711/* 721/*
712 * We set the inode flag atomically with the radix tree tag. 722 * We set the inode flag atomically with the radix tree tag.
713 * Once we get tag lookups on the radix tree, this inode flag 723 * Once we get tag lookups on the radix tree, this inode flag
@@ -722,8 +732,7 @@ xfs_inode_set_reclaim_tag(
722 732
723 read_lock(&pag->pag_ici_lock); 733 read_lock(&pag->pag_ici_lock);
724 spin_lock(&ip->i_flags_lock); 734 spin_lock(&ip->i_flags_lock);
725 radix_tree_tag_set(&pag->pag_ici_root, 735 __xfs_inode_set_reclaim_tag(pag, ip);
726 XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG);
727 __xfs_iflags_set(ip, XFS_IRECLAIMABLE); 736 __xfs_iflags_set(ip, XFS_IRECLAIMABLE);
728 spin_unlock(&ip->i_flags_lock); 737 spin_unlock(&ip->i_flags_lock);
729 read_unlock(&pag->pag_ici_lock); 738 read_unlock(&pag->pag_ici_lock);
diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h
index 2a10301c99c7..59120602588a 100644
--- a/fs/xfs/linux-2.6/xfs_sync.h
+++ b/fs/xfs/linux-2.6/xfs_sync.h
@@ -48,6 +48,7 @@ int xfs_reclaim_inode(struct xfs_inode *ip, int locked, int sync_mode);
48int xfs_reclaim_inodes(struct xfs_mount *mp, int mode); 48int xfs_reclaim_inodes(struct xfs_mount *mp, int mode);
49 49
50void xfs_inode_set_reclaim_tag(struct xfs_inode *ip); 50void xfs_inode_set_reclaim_tag(struct xfs_inode *ip);
51void __xfs_inode_set_reclaim_tag(struct xfs_perag *pag, struct xfs_inode *ip);
51void xfs_inode_clear_reclaim_tag(struct xfs_inode *ip); 52void xfs_inode_clear_reclaim_tag(struct xfs_inode *ip);
52void __xfs_inode_clear_reclaim_tag(struct xfs_mount *mp, struct xfs_perag *pag, 53void __xfs_inode_clear_reclaim_tag(struct xfs_mount *mp, struct xfs_perag *pag,
53 struct xfs_inode *ip); 54 struct xfs_inode *ip);
diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c
index db15feb906ff..4ece1906bd41 100644
--- a/fs/xfs/xfs_attr.c
+++ b/fs/xfs/xfs_attr.c
@@ -2010,7 +2010,9 @@ xfs_attr_rmtval_get(xfs_da_args_t *args)
2010 dblkno = XFS_FSB_TO_DADDR(mp, map[i].br_startblock); 2010 dblkno = XFS_FSB_TO_DADDR(mp, map[i].br_startblock);
2011 blkcnt = XFS_FSB_TO_BB(mp, map[i].br_blockcount); 2011 blkcnt = XFS_FSB_TO_BB(mp, map[i].br_blockcount);
2012 error = xfs_read_buf(mp, mp->m_ddev_targp, dblkno, 2012 error = xfs_read_buf(mp, mp->m_ddev_targp, dblkno,
2013 blkcnt, XFS_BUF_LOCK, &bp); 2013 blkcnt,
2014 XFS_BUF_LOCK | XBF_DONT_BLOCK,
2015 &bp);
2014 if (error) 2016 if (error)
2015 return(error); 2017 return(error);
2016 2018
@@ -2141,8 +2143,8 @@ xfs_attr_rmtval_set(xfs_da_args_t *args)
2141 dblkno = XFS_FSB_TO_DADDR(mp, map.br_startblock), 2143 dblkno = XFS_FSB_TO_DADDR(mp, map.br_startblock),
2142 blkcnt = XFS_FSB_TO_BB(mp, map.br_blockcount); 2144 blkcnt = XFS_FSB_TO_BB(mp, map.br_blockcount);
2143 2145
2144 bp = xfs_buf_get_flags(mp->m_ddev_targp, dblkno, 2146 bp = xfs_buf_get_flags(mp->m_ddev_targp, dblkno, blkcnt,
2145 blkcnt, XFS_BUF_LOCK); 2147 XFS_BUF_LOCK | XBF_DONT_BLOCK);
2146 ASSERT(bp); 2148 ASSERT(bp);
2147 ASSERT(!XFS_BUF_GETERROR(bp)); 2149 ASSERT(!XFS_BUF_GETERROR(bp));
2148 2150
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index 7928b9983c1d..8ee5b5a76a2a 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -6009,7 +6009,7 @@ xfs_getbmap(
6009 */ 6009 */
6010 error = ENOMEM; 6010 error = ENOMEM;
6011 subnex = 16; 6011 subnex = 16;
6012 map = kmem_alloc(subnex * sizeof(*map), KM_MAYFAIL); 6012 map = kmem_alloc(subnex * sizeof(*map), KM_MAYFAIL | KM_NOFS);
6013 if (!map) 6013 if (!map)
6014 goto out_unlock_ilock; 6014 goto out_unlock_ilock;
6015 6015
diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c
index e9df99574829..26717388acf5 100644
--- a/fs/xfs/xfs_btree.c
+++ b/fs/xfs/xfs_btree.c
@@ -120,8 +120,8 @@ xfs_btree_check_sblock(
120 XFS_RANDOM_BTREE_CHECK_SBLOCK))) { 120 XFS_RANDOM_BTREE_CHECK_SBLOCK))) {
121 if (bp) 121 if (bp)
122 xfs_buftrace("SBTREE ERROR", bp); 122 xfs_buftrace("SBTREE ERROR", bp);
123 XFS_ERROR_REPORT("xfs_btree_check_sblock", XFS_ERRLEVEL_LOW, 123 XFS_CORRUPTION_ERROR("xfs_btree_check_sblock",
124 cur->bc_mp); 124 XFS_ERRLEVEL_LOW, cur->bc_mp, block);
125 return XFS_ERROR(EFSCORRUPTED); 125 return XFS_ERROR(EFSCORRUPTED);
126 } 126 }
127 return 0; 127 return 0;
diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/xfs_da_btree.c
index 9ff6e57a5075..2847bbc1c534 100644
--- a/fs/xfs/xfs_da_btree.c
+++ b/fs/xfs/xfs_da_btree.c
@@ -2201,7 +2201,7 @@ kmem_zone_t *xfs_dabuf_zone; /* dabuf zone */
2201xfs_da_state_t * 2201xfs_da_state_t *
2202xfs_da_state_alloc(void) 2202xfs_da_state_alloc(void)
2203{ 2203{
2204 return kmem_zone_zalloc(xfs_da_state_zone, KM_SLEEP); 2204 return kmem_zone_zalloc(xfs_da_state_zone, KM_NOFS);
2205} 2205}
2206 2206
2207/* 2207/*
@@ -2261,9 +2261,9 @@ xfs_da_buf_make(int nbuf, xfs_buf_t **bps, inst_t *ra)
2261 int off; 2261 int off;
2262 2262
2263 if (nbuf == 1) 2263 if (nbuf == 1)
2264 dabuf = kmem_zone_alloc(xfs_dabuf_zone, KM_SLEEP); 2264 dabuf = kmem_zone_alloc(xfs_dabuf_zone, KM_NOFS);
2265 else 2265 else
2266 dabuf = kmem_alloc(XFS_DA_BUF_SIZE(nbuf), KM_SLEEP); 2266 dabuf = kmem_alloc(XFS_DA_BUF_SIZE(nbuf), KM_NOFS);
2267 dabuf->dirty = 0; 2267 dabuf->dirty = 0;
2268#ifdef XFS_DABUF_DEBUG 2268#ifdef XFS_DABUF_DEBUG
2269 dabuf->ra = ra; 2269 dabuf->ra = ra;
diff --git a/fs/xfs/xfs_dir2.c b/fs/xfs/xfs_dir2.c
index c657bec6d951..bb1d58eb3982 100644
--- a/fs/xfs/xfs_dir2.c
+++ b/fs/xfs/xfs_dir2.c
@@ -256,7 +256,7 @@ xfs_dir_cilookup_result(
256 !(args->op_flags & XFS_DA_OP_CILOOKUP)) 256 !(args->op_flags & XFS_DA_OP_CILOOKUP))
257 return EEXIST; 257 return EEXIST;
258 258
259 args->value = kmem_alloc(len, KM_MAYFAIL); 259 args->value = kmem_alloc(len, KM_NOFS | KM_MAYFAIL);
260 if (!args->value) 260 if (!args->value)
261 return ENOMEM; 261 return ENOMEM;
262 262
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index cbd451bb4848..2d0b3e1da9e6 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -167,17 +167,25 @@ xfs_growfs_data_private(
167 new = nb - mp->m_sb.sb_dblocks; 167 new = nb - mp->m_sb.sb_dblocks;
168 oagcount = mp->m_sb.sb_agcount; 168 oagcount = mp->m_sb.sb_agcount;
169 if (nagcount > oagcount) { 169 if (nagcount > oagcount) {
170 void *new_perag, *old_perag;
171
170 xfs_filestream_flush(mp); 172 xfs_filestream_flush(mp);
173
174 new_perag = kmem_zalloc(sizeof(xfs_perag_t) * nagcount,
175 KM_MAYFAIL);
176 if (!new_perag)
177 return XFS_ERROR(ENOMEM);
178
171 down_write(&mp->m_peraglock); 179 down_write(&mp->m_peraglock);
172 mp->m_perag = kmem_realloc(mp->m_perag, 180 memcpy(new_perag, mp->m_perag, sizeof(xfs_perag_t) * oagcount);
173 sizeof(xfs_perag_t) * nagcount, 181 old_perag = mp->m_perag;
174 sizeof(xfs_perag_t) * oagcount, 182 mp->m_perag = new_perag;
175 KM_SLEEP); 183
176 memset(&mp->m_perag[oagcount], 0,
177 (nagcount - oagcount) * sizeof(xfs_perag_t));
178 mp->m_flags |= XFS_MOUNT_32BITINODES; 184 mp->m_flags |= XFS_MOUNT_32BITINODES;
179 nagimax = xfs_initialize_perag(mp, nagcount); 185 nagimax = xfs_initialize_perag(mp, nagcount);
180 up_write(&mp->m_peraglock); 186 up_write(&mp->m_peraglock);
187
188 kmem_free(old_perag);
181 } 189 }
182 tp = xfs_trans_alloc(mp, XFS_TRANS_GROWFS); 190 tp = xfs_trans_alloc(mp, XFS_TRANS_GROWFS);
183 tp->t_flags |= XFS_TRANS_RESERVE; 191 tp->t_flags |= XFS_TRANS_RESERVE;
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index 34ec86923f7e..ecbf8b4d2e2e 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -191,80 +191,82 @@ xfs_iget_cache_hit(
191 int flags, 191 int flags,
192 int lock_flags) __releases(pag->pag_ici_lock) 192 int lock_flags) __releases(pag->pag_ici_lock)
193{ 193{
194 struct inode *inode = VFS_I(ip);
194 struct xfs_mount *mp = ip->i_mount; 195 struct xfs_mount *mp = ip->i_mount;
195 int error = EAGAIN; 196 int error;
197
198 spin_lock(&ip->i_flags_lock);
196 199
197 /* 200 /*
198 * If INEW is set this inode is being set up 201 * If we are racing with another cache hit that is currently
199 * If IRECLAIM is set this inode is being torn down 202 * instantiating this inode or currently recycling it out of
200 * Pause and try again. 203 * reclaimabe state, wait for the initialisation to complete
204 * before continuing.
205 *
206 * XXX(hch): eventually we should do something equivalent to
207 * wait_on_inode to wait for these flags to be cleared
208 * instead of polling for it.
201 */ 209 */
202 if (xfs_iflags_test(ip, (XFS_INEW|XFS_IRECLAIM))) { 210 if (ip->i_flags & (XFS_INEW|XFS_IRECLAIM)) {
203 XFS_STATS_INC(xs_ig_frecycle); 211 XFS_STATS_INC(xs_ig_frecycle);
212 error = EAGAIN;
204 goto out_error; 213 goto out_error;
205 } 214 }
206 215
207 /* If IRECLAIMABLE is set, we've torn down the vfs inode part */ 216 /*
208 if (xfs_iflags_test(ip, XFS_IRECLAIMABLE)) { 217 * If lookup is racing with unlink return an error immediately.
209 218 */
210 /* 219 if (ip->i_d.di_mode == 0 && !(flags & XFS_IGET_CREATE)) {
211 * If lookup is racing with unlink, then we should return an 220 error = ENOENT;
212 * error immediately so we don't remove it from the reclaim 221 goto out_error;
213 * list and potentially leak the inode. 222 }
214 */
215 if ((ip->i_d.di_mode == 0) && !(flags & XFS_IGET_CREATE)) {
216 error = ENOENT;
217 goto out_error;
218 }
219 223
224 /*
225 * If IRECLAIMABLE is set, we've torn down the VFS inode already.
226 * Need to carefully get it back into useable state.
227 */
228 if (ip->i_flags & XFS_IRECLAIMABLE) {
220 xfs_itrace_exit_tag(ip, "xfs_iget.alloc"); 229 xfs_itrace_exit_tag(ip, "xfs_iget.alloc");
221 230
222 /* 231 /*
223 * We need to re-initialise the VFS inode as it has been 232 * We need to set XFS_INEW atomically with clearing the
224 * 'freed' by the VFS. Do this here so we can deal with 233 * reclaimable tag so that we do have an indicator of the
225 * errors cleanly, then tag it so it can be set up correctly 234 * inode still being initialized.
226 * later.
227 */ 235 */
228 if (inode_init_always(mp->m_super, VFS_I(ip))) { 236 ip->i_flags |= XFS_INEW;
229 error = ENOMEM; 237 ip->i_flags &= ~XFS_IRECLAIMABLE;
230 goto out_error; 238 __xfs_inode_clear_reclaim_tag(mp, pag, ip);
231 }
232 239
233 /* 240 spin_unlock(&ip->i_flags_lock);
234 * We must set the XFS_INEW flag before clearing the 241 read_unlock(&pag->pag_ici_lock);
235 * XFS_IRECLAIMABLE flag so that if a racing lookup does
236 * not find the XFS_IRECLAIMABLE above but has the igrab()
237 * below succeed we can safely check XFS_INEW to detect
238 * that this inode is still being initialised.
239 */
240 xfs_iflags_set(ip, XFS_INEW);
241 xfs_iflags_clear(ip, XFS_IRECLAIMABLE);
242 242
243 /* clear the radix tree reclaim flag as well. */ 243 error = -inode_init_always(mp->m_super, inode);
244 __xfs_inode_clear_reclaim_tag(mp, pag, ip); 244 if (error) {
245 } else if (!igrab(VFS_I(ip))) { 245 /*
246 * Re-initializing the inode failed, and we are in deep
247 * trouble. Try to re-add it to the reclaim list.
248 */
249 read_lock(&pag->pag_ici_lock);
250 spin_lock(&ip->i_flags_lock);
251
252 ip->i_flags &= ~XFS_INEW;
253 ip->i_flags |= XFS_IRECLAIMABLE;
254 __xfs_inode_set_reclaim_tag(pag, ip);
255 goto out_error;
256 }
257 inode->i_state = I_LOCK|I_NEW;
258 } else {
246 /* If the VFS inode is being torn down, pause and try again. */ 259 /* If the VFS inode is being torn down, pause and try again. */
247 XFS_STATS_INC(xs_ig_frecycle); 260 if (!igrab(inode)) {
248 goto out_error; 261 error = EAGAIN;
249 } else if (xfs_iflags_test(ip, XFS_INEW)) { 262 goto out_error;
250 /* 263 }
251 * We are racing with another cache hit that is
252 * currently recycling this inode out of the XFS_IRECLAIMABLE
253 * state. Wait for the initialisation to complete before
254 * continuing.
255 */
256 wait_on_inode(VFS_I(ip));
257 }
258 264
259 if (ip->i_d.di_mode == 0 && !(flags & XFS_IGET_CREATE)) { 265 /* We've got a live one. */
260 error = ENOENT; 266 spin_unlock(&ip->i_flags_lock);
261 iput(VFS_I(ip)); 267 read_unlock(&pag->pag_ici_lock);
262 goto out_error;
263 } 268 }
264 269
265 /* We've got a live one. */
266 read_unlock(&pag->pag_ici_lock);
267
268 if (lock_flags != 0) 270 if (lock_flags != 0)
269 xfs_ilock(ip, lock_flags); 271 xfs_ilock(ip, lock_flags);
270 272
@@ -274,6 +276,7 @@ xfs_iget_cache_hit(
274 return 0; 276 return 0;
275 277
276out_error: 278out_error:
279 spin_unlock(&ip->i_flags_lock);
277 read_unlock(&pag->pag_ici_lock); 280 read_unlock(&pag->pag_ici_lock);
278 return error; 281 return error;
279} 282}
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 1f22d65fed0a..da428b3fe0f5 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -343,6 +343,16 @@ xfs_iformat(
343 return XFS_ERROR(EFSCORRUPTED); 343 return XFS_ERROR(EFSCORRUPTED);
344 } 344 }
345 345
346 if (unlikely((ip->i_d.di_flags & XFS_DIFLAG_REALTIME) &&
347 !ip->i_mount->m_rtdev_targp)) {
348 xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount,
349 "corrupt dinode %Lu, has realtime flag set.",
350 ip->i_ino);
351 XFS_CORRUPTION_ERROR("xfs_iformat(realtime)",
352 XFS_ERRLEVEL_LOW, ip->i_mount, dip);
353 return XFS_ERROR(EFSCORRUPTED);
354 }
355
346 switch (ip->i_d.di_mode & S_IFMT) { 356 switch (ip->i_d.di_mode & S_IFMT) {
347 case S_IFIFO: 357 case S_IFIFO:
348 case S_IFCHR: 358 case S_IFCHR:
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 3750f04ede0b..9dbdff3ea484 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -3180,7 +3180,7 @@ try_again:
3180STATIC void 3180STATIC void
3181xlog_state_want_sync(xlog_t *log, xlog_in_core_t *iclog) 3181xlog_state_want_sync(xlog_t *log, xlog_in_core_t *iclog)
3182{ 3182{
3183 ASSERT(spin_is_locked(&log->l_icloglock)); 3183 assert_spin_locked(&log->l_icloglock);
3184 3184
3185 if (iclog->ic_state == XLOG_STATE_ACTIVE) { 3185 if (iclog->ic_state == XLOG_STATE_ACTIVE) {
3186 xlog_state_switch_iclogs(log, iclog, 0); 3186 xlog_state_switch_iclogs(log, iclog, 0);
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index c4eca5ed5dab..492d75bae2bf 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -538,7 +538,9 @@ xfs_readlink_bmap(
538 d = XFS_FSB_TO_DADDR(mp, mval[n].br_startblock); 538 d = XFS_FSB_TO_DADDR(mp, mval[n].br_startblock);
539 byte_cnt = XFS_FSB_TO_B(mp, mval[n].br_blockcount); 539 byte_cnt = XFS_FSB_TO_B(mp, mval[n].br_blockcount);
540 540
541 bp = xfs_buf_read(mp->m_ddev_targp, d, BTOBB(byte_cnt), 0); 541 bp = xfs_buf_read_flags(mp->m_ddev_targp, d, BTOBB(byte_cnt),
542 XBF_LOCK | XBF_MAPPED |
543 XBF_DONT_BLOCK);
542 error = XFS_BUF_GETERROR(bp); 544 error = XFS_BUF_GETERROR(bp);
543 if (error) { 545 if (error) {
544 xfs_ioerror_alert("xfs_readlink", 546 xfs_ioerror_alert("xfs_readlink",