aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/eventpoll.c38
-rw-r--r--fs/gfs2/file.c14
-rw-r--r--fs/gfs2/lops.c16
-rw-r--r--fs/gfs2/quota.c7
-rw-r--r--fs/gfs2/rgrp.c33
-rw-r--r--fs/gfs2/super.c3
-rw-r--r--fs/gfs2/trans.c8
-rw-r--r--fs/notify/fanotify/fanotify.c1
-rw-r--r--fs/xfs/xfs_alloc.c43
-rw-r--r--fs/xfs/xfs_alloc.h3
-rw-r--r--fs/xfs/xfs_alloc_btree.c2
-rw-r--r--fs/xfs/xfs_bmap.c63
-rw-r--r--fs/xfs/xfs_bmap.h9
-rw-r--r--fs/xfs/xfs_buf_item.c18
-rw-r--r--fs/xfs/xfs_fsops.c21
-rw-r--r--fs/xfs/xfs_ialloc.c1
-rw-r--r--fs/xfs/xfs_inode.c3
-rw-r--r--fs/xfs/xfs_ioctl.c2
-rw-r--r--fs/xfs/xfs_iomap.c4
-rw-r--r--fs/xfs/xfs_log.c19
-rw-r--r--fs/xfs/xfs_log_recover.c2
21 files changed, 174 insertions, 136 deletions
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index da72250ddc1c..cd96649bfe62 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -346,7 +346,7 @@ static inline struct epitem *ep_item_from_epqueue(poll_table *p)
346/* Tells if the epoll_ctl(2) operation needs an event copy from userspace */ 346/* Tells if the epoll_ctl(2) operation needs an event copy from userspace */
347static inline int ep_op_has_event(int op) 347static inline int ep_op_has_event(int op)
348{ 348{
349 return op == EPOLL_CTL_ADD || op == EPOLL_CTL_MOD; 349 return op != EPOLL_CTL_DEL;
350} 350}
351 351
352/* Initialize the poll safe wake up structure */ 352/* Initialize the poll safe wake up structure */
@@ -676,34 +676,6 @@ static int ep_remove(struct eventpoll *ep, struct epitem *epi)
676 return 0; 676 return 0;
677} 677}
678 678
679/*
680 * Disables a "struct epitem" in the eventpoll set. Returns -EBUSY if the item
681 * had no event flags set, indicating that another thread may be currently
682 * handling that item's events (in the case that EPOLLONESHOT was being
683 * used). Otherwise a zero result indicates that the item has been disabled
684 * from receiving events. A disabled item may be re-enabled via
685 * EPOLL_CTL_MOD. Must be called with "mtx" held.
686 */
687static int ep_disable(struct eventpoll *ep, struct epitem *epi)
688{
689 int result = 0;
690 unsigned long flags;
691
692 spin_lock_irqsave(&ep->lock, flags);
693 if (epi->event.events & ~EP_PRIVATE_BITS) {
694 if (ep_is_linked(&epi->rdllink))
695 list_del_init(&epi->rdllink);
696 /* Ensure ep_poll_callback will not add epi back onto ready
697 list: */
698 epi->event.events &= EP_PRIVATE_BITS;
699 }
700 else
701 result = -EBUSY;
702 spin_unlock_irqrestore(&ep->lock, flags);
703
704 return result;
705}
706
707static void ep_free(struct eventpoll *ep) 679static void ep_free(struct eventpoll *ep)
708{ 680{
709 struct rb_node *rbp; 681 struct rb_node *rbp;
@@ -1048,6 +1020,8 @@ static void ep_rbtree_insert(struct eventpoll *ep, struct epitem *epi)
1048 rb_insert_color(&epi->rbn, &ep->rbr); 1020 rb_insert_color(&epi->rbn, &ep->rbr);
1049} 1021}
1050 1022
1023
1024
1051#define PATH_ARR_SIZE 5 1025#define PATH_ARR_SIZE 5
1052/* 1026/*
1053 * These are the number paths of length 1 to 5, that we are allowing to emanate 1027 * These are the number paths of length 1 to 5, that we are allowing to emanate
@@ -1813,12 +1787,6 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
1813 } else 1787 } else
1814 error = -ENOENT; 1788 error = -ENOENT;
1815 break; 1789 break;
1816 case EPOLL_CTL_DISABLE:
1817 if (epi)
1818 error = ep_disable(ep, epi);
1819 else
1820 error = -ENOENT;
1821 break;
1822 } 1790 }
1823 mutex_unlock(&ep->mtx); 1791 mutex_unlock(&ep->mtx);
1824 1792
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 0def0504afc1..e056b4ce4877 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -516,15 +516,13 @@ static int gfs2_mmap(struct file *file, struct vm_area_struct *vma)
516 struct gfs2_holder i_gh; 516 struct gfs2_holder i_gh;
517 int error; 517 int error;
518 518
519 gfs2_holder_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh); 519 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY,
520 error = gfs2_glock_nq(&i_gh); 520 &i_gh);
521 if (error == 0) {
522 file_accessed(file);
523 gfs2_glock_dq(&i_gh);
524 }
525 gfs2_holder_uninit(&i_gh);
526 if (error) 521 if (error)
527 return error; 522 return error;
523 /* grab lock to update inode */
524 gfs2_glock_dq_uninit(&i_gh);
525 file_accessed(file);
528 } 526 }
529 vma->vm_ops = &gfs2_vm_ops; 527 vma->vm_ops = &gfs2_vm_ops;
530 528
@@ -677,10 +675,8 @@ static ssize_t gfs2_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
677 size_t writesize = iov_length(iov, nr_segs); 675 size_t writesize = iov_length(iov, nr_segs);
678 struct dentry *dentry = file->f_dentry; 676 struct dentry *dentry = file->f_dentry;
679 struct gfs2_inode *ip = GFS2_I(dentry->d_inode); 677 struct gfs2_inode *ip = GFS2_I(dentry->d_inode);
680 struct gfs2_sbd *sdp;
681 int ret; 678 int ret;
682 679
683 sdp = GFS2_SB(file->f_mapping->host);
684 ret = gfs2_rs_alloc(ip); 680 ret = gfs2_rs_alloc(ip);
685 if (ret) 681 if (ret)
686 return ret; 682 return ret;
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index 8ff95a2d54ee..9ceccb1595a3 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -393,12 +393,10 @@ static void buf_lo_add(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd)
393 struct gfs2_meta_header *mh; 393 struct gfs2_meta_header *mh;
394 struct gfs2_trans *tr; 394 struct gfs2_trans *tr;
395 395
396 lock_buffer(bd->bd_bh);
397 gfs2_log_lock(sdp);
398 tr = current->journal_info; 396 tr = current->journal_info;
399 tr->tr_touched = 1; 397 tr->tr_touched = 1;
400 if (!list_empty(&bd->bd_list)) 398 if (!list_empty(&bd->bd_list))
401 goto out; 399 return;
402 set_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags); 400 set_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags);
403 set_bit(GLF_DIRTY, &bd->bd_gl->gl_flags); 401 set_bit(GLF_DIRTY, &bd->bd_gl->gl_flags);
404 mh = (struct gfs2_meta_header *)bd->bd_bh->b_data; 402 mh = (struct gfs2_meta_header *)bd->bd_bh->b_data;
@@ -414,9 +412,6 @@ static void buf_lo_add(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd)
414 sdp->sd_log_num_buf++; 412 sdp->sd_log_num_buf++;
415 list_add(&bd->bd_list, &sdp->sd_log_le_buf); 413 list_add(&bd->bd_list, &sdp->sd_log_le_buf);
416 tr->tr_num_buf_new++; 414 tr->tr_num_buf_new++;
417out:
418 gfs2_log_unlock(sdp);
419 unlock_buffer(bd->bd_bh);
420} 415}
421 416
422static void gfs2_check_magic(struct buffer_head *bh) 417static void gfs2_check_magic(struct buffer_head *bh)
@@ -621,7 +616,6 @@ static void revoke_lo_add(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd)
621 616
622static void revoke_lo_before_commit(struct gfs2_sbd *sdp) 617static void revoke_lo_before_commit(struct gfs2_sbd *sdp)
623{ 618{
624 struct gfs2_log_descriptor *ld;
625 struct gfs2_meta_header *mh; 619 struct gfs2_meta_header *mh;
626 unsigned int offset; 620 unsigned int offset;
627 struct list_head *head = &sdp->sd_log_le_revoke; 621 struct list_head *head = &sdp->sd_log_le_revoke;
@@ -634,7 +628,6 @@ static void revoke_lo_before_commit(struct gfs2_sbd *sdp)
634 628
635 length = gfs2_struct2blk(sdp, sdp->sd_log_num_revoke, sizeof(u64)); 629 length = gfs2_struct2blk(sdp, sdp->sd_log_num_revoke, sizeof(u64));
636 page = gfs2_get_log_desc(sdp, GFS2_LOG_DESC_REVOKE, length, sdp->sd_log_num_revoke); 630 page = gfs2_get_log_desc(sdp, GFS2_LOG_DESC_REVOKE, length, sdp->sd_log_num_revoke);
637 ld = page_address(page);
638 offset = sizeof(struct gfs2_log_descriptor); 631 offset = sizeof(struct gfs2_log_descriptor);
639 632
640 list_for_each_entry(bd, head, bd_list) { 633 list_for_each_entry(bd, head, bd_list) {
@@ -777,12 +770,10 @@ static void databuf_lo_add(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd)
777 struct address_space *mapping = bd->bd_bh->b_page->mapping; 770 struct address_space *mapping = bd->bd_bh->b_page->mapping;
778 struct gfs2_inode *ip = GFS2_I(mapping->host); 771 struct gfs2_inode *ip = GFS2_I(mapping->host);
779 772
780 lock_buffer(bd->bd_bh);
781 gfs2_log_lock(sdp);
782 if (tr) 773 if (tr)
783 tr->tr_touched = 1; 774 tr->tr_touched = 1;
784 if (!list_empty(&bd->bd_list)) 775 if (!list_empty(&bd->bd_list))
785 goto out; 776 return;
786 set_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags); 777 set_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags);
787 set_bit(GLF_DIRTY, &bd->bd_gl->gl_flags); 778 set_bit(GLF_DIRTY, &bd->bd_gl->gl_flags);
788 if (gfs2_is_jdata(ip)) { 779 if (gfs2_is_jdata(ip)) {
@@ -793,9 +784,6 @@ static void databuf_lo_add(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd)
793 } else { 784 } else {
794 list_add_tail(&bd->bd_list, &sdp->sd_log_le_ordered); 785 list_add_tail(&bd->bd_list, &sdp->sd_log_le_ordered);
795 } 786 }
796out:
797 gfs2_log_unlock(sdp);
798 unlock_buffer(bd->bd_bh);
799} 787}
800 788
801/** 789/**
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index 40c4b0d42fa8..c5af8e18f27a 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -497,8 +497,11 @@ int gfs2_quota_hold(struct gfs2_inode *ip, u32 uid, u32 gid)
497 struct gfs2_quota_data **qd; 497 struct gfs2_quota_data **qd;
498 int error; 498 int error;
499 499
500 if (ip->i_res == NULL) 500 if (ip->i_res == NULL) {
501 gfs2_rs_alloc(ip); 501 error = gfs2_rs_alloc(ip);
502 if (error)
503 return error;
504 }
502 505
503 qd = ip->i_res->rs_qa_qd; 506 qd = ip->i_res->rs_qa_qd;
504 507
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 3cc402ce6fea..38fe18f2f055 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -553,7 +553,6 @@ void gfs2_free_clones(struct gfs2_rgrpd *rgd)
553 */ 553 */
554int gfs2_rs_alloc(struct gfs2_inode *ip) 554int gfs2_rs_alloc(struct gfs2_inode *ip)
555{ 555{
556 int error = 0;
557 struct gfs2_blkreserv *res; 556 struct gfs2_blkreserv *res;
558 557
559 if (ip->i_res) 558 if (ip->i_res)
@@ -561,7 +560,7 @@ int gfs2_rs_alloc(struct gfs2_inode *ip)
561 560
562 res = kmem_cache_zalloc(gfs2_rsrv_cachep, GFP_NOFS); 561 res = kmem_cache_zalloc(gfs2_rsrv_cachep, GFP_NOFS);
563 if (!res) 562 if (!res)
564 error = -ENOMEM; 563 return -ENOMEM;
565 564
566 RB_CLEAR_NODE(&res->rs_node); 565 RB_CLEAR_NODE(&res->rs_node);
567 566
@@ -571,7 +570,7 @@ int gfs2_rs_alloc(struct gfs2_inode *ip)
571 else 570 else
572 ip->i_res = res; 571 ip->i_res = res;
573 up_write(&ip->i_rw_mutex); 572 up_write(&ip->i_rw_mutex);
574 return error; 573 return 0;
575} 574}
576 575
577static void dump_rs(struct seq_file *seq, const struct gfs2_blkreserv *rs) 576static void dump_rs(struct seq_file *seq, const struct gfs2_blkreserv *rs)
@@ -1263,7 +1262,9 @@ int gfs2_fitrim(struct file *filp, void __user *argp)
1263 int ret = 0; 1262 int ret = 0;
1264 u64 amt; 1263 u64 amt;
1265 u64 trimmed = 0; 1264 u64 trimmed = 0;
1265 u64 start, end, minlen;
1266 unsigned int x; 1266 unsigned int x;
1267 unsigned bs_shift = sdp->sd_sb.sb_bsize_shift;
1267 1268
1268 if (!capable(CAP_SYS_ADMIN)) 1269 if (!capable(CAP_SYS_ADMIN))
1269 return -EPERM; 1270 return -EPERM;
@@ -1271,19 +1272,25 @@ int gfs2_fitrim(struct file *filp, void __user *argp)
1271 if (!blk_queue_discard(q)) 1272 if (!blk_queue_discard(q))
1272 return -EOPNOTSUPP; 1273 return -EOPNOTSUPP;
1273 1274
1274 if (argp == NULL) { 1275 if (copy_from_user(&r, argp, sizeof(r)))
1275 r.start = 0;
1276 r.len = ULLONG_MAX;
1277 r.minlen = 0;
1278 } else if (copy_from_user(&r, argp, sizeof(r)))
1279 return -EFAULT; 1276 return -EFAULT;
1280 1277
1281 ret = gfs2_rindex_update(sdp); 1278 ret = gfs2_rindex_update(sdp);
1282 if (ret) 1279 if (ret)
1283 return ret; 1280 return ret;
1284 1281
1285 rgd = gfs2_blk2rgrpd(sdp, r.start, 0); 1282 start = r.start >> bs_shift;
1286 rgd_end = gfs2_blk2rgrpd(sdp, r.start + r.len, 0); 1283 end = start + (r.len >> bs_shift);
1284 minlen = max_t(u64, r.minlen,
1285 q->limits.discard_granularity) >> bs_shift;
1286
1287 rgd = gfs2_blk2rgrpd(sdp, start, 0);
1288 rgd_end = gfs2_blk2rgrpd(sdp, end - 1, 0);
1289
1290 if (end <= start ||
1291 minlen > sdp->sd_max_rg_data ||
1292 start > rgd_end->rd_data0 + rgd_end->rd_data)
1293 return -EINVAL;
1287 1294
1288 while (1) { 1295 while (1) {
1289 1296
@@ -1295,7 +1302,9 @@ int gfs2_fitrim(struct file *filp, void __user *argp)
1295 /* Trim each bitmap in the rgrp */ 1302 /* Trim each bitmap in the rgrp */
1296 for (x = 0; x < rgd->rd_length; x++) { 1303 for (x = 0; x < rgd->rd_length; x++) {
1297 struct gfs2_bitmap *bi = rgd->rd_bits + x; 1304 struct gfs2_bitmap *bi = rgd->rd_bits + x;
1298 ret = gfs2_rgrp_send_discards(sdp, rgd->rd_data0, NULL, bi, r.minlen, &amt); 1305 ret = gfs2_rgrp_send_discards(sdp,
1306 rgd->rd_data0, NULL, bi, minlen,
1307 &amt);
1299 if (ret) { 1308 if (ret) {
1300 gfs2_glock_dq_uninit(&gh); 1309 gfs2_glock_dq_uninit(&gh);
1301 goto out; 1310 goto out;
@@ -1324,7 +1333,7 @@ int gfs2_fitrim(struct file *filp, void __user *argp)
1324 1333
1325out: 1334out:
1326 r.len = trimmed << 9; 1335 r.len = trimmed << 9;
1327 if (argp && copy_to_user(argp, &r, sizeof(r))) 1336 if (copy_to_user(argp, &r, sizeof(r)))
1328 return -EFAULT; 1337 return -EFAULT;
1329 1338
1330 return ret; 1339 return ret;
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index bc737261f234..d6488674d916 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -810,7 +810,8 @@ static void gfs2_dirty_inode(struct inode *inode, int flags)
810 return; 810 return;
811 } 811 }
812 need_unlock = 1; 812 need_unlock = 1;
813 } 813 } else if (WARN_ON_ONCE(ip->i_gl->gl_state != LM_ST_EXCLUSIVE))
814 return;
814 815
815 if (current->journal_info == NULL) { 816 if (current->journal_info == NULL) {
816 ret = gfs2_trans_begin(sdp, RES_DINODE, 0); 817 ret = gfs2_trans_begin(sdp, RES_DINODE, 0);
diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c
index adbd27875ef9..413627072f36 100644
--- a/fs/gfs2/trans.c
+++ b/fs/gfs2/trans.c
@@ -155,14 +155,22 @@ void gfs2_trans_add_bh(struct gfs2_glock *gl, struct buffer_head *bh, int meta)
155 struct gfs2_sbd *sdp = gl->gl_sbd; 155 struct gfs2_sbd *sdp = gl->gl_sbd;
156 struct gfs2_bufdata *bd; 156 struct gfs2_bufdata *bd;
157 157
158 lock_buffer(bh);
159 gfs2_log_lock(sdp);
158 bd = bh->b_private; 160 bd = bh->b_private;
159 if (bd) 161 if (bd)
160 gfs2_assert(sdp, bd->bd_gl == gl); 162 gfs2_assert(sdp, bd->bd_gl == gl);
161 else { 163 else {
164 gfs2_log_unlock(sdp);
165 unlock_buffer(bh);
162 gfs2_attach_bufdata(gl, bh, meta); 166 gfs2_attach_bufdata(gl, bh, meta);
163 bd = bh->b_private; 167 bd = bh->b_private;
168 lock_buffer(bh);
169 gfs2_log_lock(sdp);
164 } 170 }
165 lops_add(sdp, bd); 171 lops_add(sdp, bd);
172 gfs2_log_unlock(sdp);
173 unlock_buffer(bh);
166} 174}
167 175
168void gfs2_trans_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd) 176void gfs2_trans_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd)
diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
index f35794b97e8e..a50636025364 100644
--- a/fs/notify/fanotify/fanotify.c
+++ b/fs/notify/fanotify/fanotify.c
@@ -21,6 +21,7 @@ static bool should_merge(struct fsnotify_event *old, struct fsnotify_event *new)
21 if ((old->path.mnt == new->path.mnt) && 21 if ((old->path.mnt == new->path.mnt) &&
22 (old->path.dentry == new->path.dentry)) 22 (old->path.dentry == new->path.dentry))
23 return true; 23 return true;
24 break;
24 case (FSNOTIFY_EVENT_NONE): 25 case (FSNOTIFY_EVENT_NONE):
25 return true; 26 return true;
26 default: 27 default:
diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c
index 4f33c32affe3..335206a9c698 100644
--- a/fs/xfs/xfs_alloc.c
+++ b/fs/xfs/xfs_alloc.c
@@ -1866,6 +1866,7 @@ xfs_alloc_fix_freelist(
1866 /* 1866 /*
1867 * Initialize the args structure. 1867 * Initialize the args structure.
1868 */ 1868 */
1869 memset(&targs, 0, sizeof(targs));
1869 targs.tp = tp; 1870 targs.tp = tp;
1870 targs.mp = mp; 1871 targs.mp = mp;
1871 targs.agbp = agbp; 1872 targs.agbp = agbp;
@@ -2207,7 +2208,7 @@ xfs_alloc_read_agf(
2207 * group or loop over the allocation groups to find the result. 2208 * group or loop over the allocation groups to find the result.
2208 */ 2209 */
2209int /* error */ 2210int /* error */
2210__xfs_alloc_vextent( 2211xfs_alloc_vextent(
2211 xfs_alloc_arg_t *args) /* allocation argument structure */ 2212 xfs_alloc_arg_t *args) /* allocation argument structure */
2212{ 2213{
2213 xfs_agblock_t agsize; /* allocation group size */ 2214 xfs_agblock_t agsize; /* allocation group size */
@@ -2417,46 +2418,6 @@ error0:
2417 return error; 2418 return error;
2418} 2419}
2419 2420
2420static void
2421xfs_alloc_vextent_worker(
2422 struct work_struct *work)
2423{
2424 struct xfs_alloc_arg *args = container_of(work,
2425 struct xfs_alloc_arg, work);
2426 unsigned long pflags;
2427
2428 /* we are in a transaction context here */
2429 current_set_flags_nested(&pflags, PF_FSTRANS);
2430
2431 args->result = __xfs_alloc_vextent(args);
2432 complete(args->done);
2433
2434 current_restore_flags_nested(&pflags, PF_FSTRANS);
2435}
2436
2437/*
2438 * Data allocation requests often come in with little stack to work on. Push
2439 * them off to a worker thread so there is lots of stack to use. Metadata
2440 * requests, OTOH, are generally from low stack usage paths, so avoid the
2441 * context switch overhead here.
2442 */
2443int
2444xfs_alloc_vextent(
2445 struct xfs_alloc_arg *args)
2446{
2447 DECLARE_COMPLETION_ONSTACK(done);
2448
2449 if (!args->userdata)
2450 return __xfs_alloc_vextent(args);
2451
2452
2453 args->done = &done;
2454 INIT_WORK_ONSTACK(&args->work, xfs_alloc_vextent_worker);
2455 queue_work(xfs_alloc_wq, &args->work);
2456 wait_for_completion(&done);
2457 return args->result;
2458}
2459
2460/* 2421/*
2461 * Free an extent. 2422 * Free an extent.
2462 * Just break up the extent address and hand off to xfs_free_ag_extent 2423 * Just break up the extent address and hand off to xfs_free_ag_extent
diff --git a/fs/xfs/xfs_alloc.h b/fs/xfs/xfs_alloc.h
index 93be4a667ca1..feacb061bab7 100644
--- a/fs/xfs/xfs_alloc.h
+++ b/fs/xfs/xfs_alloc.h
@@ -120,9 +120,6 @@ typedef struct xfs_alloc_arg {
120 char isfl; /* set if is freelist blocks - !acctg */ 120 char isfl; /* set if is freelist blocks - !acctg */
121 char userdata; /* set if this is user data */ 121 char userdata; /* set if this is user data */
122 xfs_fsblock_t firstblock; /* io first block allocated */ 122 xfs_fsblock_t firstblock; /* io first block allocated */
123 struct completion *done;
124 struct work_struct work;
125 int result;
126} xfs_alloc_arg_t; 123} xfs_alloc_arg_t;
127 124
128/* 125/*
diff --git a/fs/xfs/xfs_alloc_btree.c b/fs/xfs/xfs_alloc_btree.c
index f1647caace8f..f7876c6d6165 100644
--- a/fs/xfs/xfs_alloc_btree.c
+++ b/fs/xfs/xfs_alloc_btree.c
@@ -121,6 +121,8 @@ xfs_allocbt_free_block(
121 xfs_extent_busy_insert(cur->bc_tp, be32_to_cpu(agf->agf_seqno), bno, 1, 121 xfs_extent_busy_insert(cur->bc_tp, be32_to_cpu(agf->agf_seqno), bno, 1,
122 XFS_EXTENT_BUSY_SKIP_DISCARD); 122 XFS_EXTENT_BUSY_SKIP_DISCARD);
123 xfs_trans_agbtree_delta(cur->bc_tp, -1); 123 xfs_trans_agbtree_delta(cur->bc_tp, -1);
124
125 xfs_trans_binval(cur->bc_tp, bp);
124 return 0; 126 return 0;
125} 127}
126 128
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index 848ffa77707b..83d0cf3df930 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -2437,6 +2437,7 @@ xfs_bmap_btalloc(
2437 * Normal allocation, done through xfs_alloc_vextent. 2437 * Normal allocation, done through xfs_alloc_vextent.
2438 */ 2438 */
2439 tryagain = isaligned = 0; 2439 tryagain = isaligned = 0;
2440 memset(&args, 0, sizeof(args));
2440 args.tp = ap->tp; 2441 args.tp = ap->tp;
2441 args.mp = mp; 2442 args.mp = mp;
2442 args.fsbno = ap->blkno; 2443 args.fsbno = ap->blkno;
@@ -3082,6 +3083,7 @@ xfs_bmap_extents_to_btree(
3082 * Convert to a btree with two levels, one record in root. 3083 * Convert to a btree with two levels, one record in root.
3083 */ 3084 */
3084 XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_BTREE); 3085 XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_BTREE);
3086 memset(&args, 0, sizeof(args));
3085 args.tp = tp; 3087 args.tp = tp;
3086 args.mp = mp; 3088 args.mp = mp;
3087 args.firstblock = *firstblock; 3089 args.firstblock = *firstblock;
@@ -3237,6 +3239,7 @@ xfs_bmap_local_to_extents(
3237 xfs_buf_t *bp; /* buffer for extent block */ 3239 xfs_buf_t *bp; /* buffer for extent block */
3238 xfs_bmbt_rec_host_t *ep;/* extent record pointer */ 3240 xfs_bmbt_rec_host_t *ep;/* extent record pointer */
3239 3241
3242 memset(&args, 0, sizeof(args));
3240 args.tp = tp; 3243 args.tp = tp;
3241 args.mp = ip->i_mount; 3244 args.mp = ip->i_mount;
3242 args.firstblock = *firstblock; 3245 args.firstblock = *firstblock;
@@ -4616,12 +4619,11 @@ xfs_bmapi_delay(
4616 4619
4617 4620
4618STATIC int 4621STATIC int
4619xfs_bmapi_allocate( 4622__xfs_bmapi_allocate(
4620 struct xfs_bmalloca *bma, 4623 struct xfs_bmalloca *bma)
4621 int flags)
4622{ 4624{
4623 struct xfs_mount *mp = bma->ip->i_mount; 4625 struct xfs_mount *mp = bma->ip->i_mount;
4624 int whichfork = (flags & XFS_BMAPI_ATTRFORK) ? 4626 int whichfork = (bma->flags & XFS_BMAPI_ATTRFORK) ?
4625 XFS_ATTR_FORK : XFS_DATA_FORK; 4627 XFS_ATTR_FORK : XFS_DATA_FORK;
4626 struct xfs_ifork *ifp = XFS_IFORK_PTR(bma->ip, whichfork); 4628 struct xfs_ifork *ifp = XFS_IFORK_PTR(bma->ip, whichfork);
4627 int tmp_logflags = 0; 4629 int tmp_logflags = 0;
@@ -4654,24 +4656,27 @@ xfs_bmapi_allocate(
4654 * Indicate if this is the first user data in the file, or just any 4656 * Indicate if this is the first user data in the file, or just any
4655 * user data. 4657 * user data.
4656 */ 4658 */
4657 if (!(flags & XFS_BMAPI_METADATA)) { 4659 if (!(bma->flags & XFS_BMAPI_METADATA)) {
4658 bma->userdata = (bma->offset == 0) ? 4660 bma->userdata = (bma->offset == 0) ?
4659 XFS_ALLOC_INITIAL_USER_DATA : XFS_ALLOC_USERDATA; 4661 XFS_ALLOC_INITIAL_USER_DATA : XFS_ALLOC_USERDATA;
4660 } 4662 }
4661 4663
4662 bma->minlen = (flags & XFS_BMAPI_CONTIG) ? bma->length : 1; 4664 bma->minlen = (bma->flags & XFS_BMAPI_CONTIG) ? bma->length : 1;
4663 4665
4664 /* 4666 /*
4665 * Only want to do the alignment at the eof if it is userdata and 4667 * Only want to do the alignment at the eof if it is userdata and
4666 * allocation length is larger than a stripe unit. 4668 * allocation length is larger than a stripe unit.
4667 */ 4669 */
4668 if (mp->m_dalign && bma->length >= mp->m_dalign && 4670 if (mp->m_dalign && bma->length >= mp->m_dalign &&
4669 !(flags & XFS_BMAPI_METADATA) && whichfork == XFS_DATA_FORK) { 4671 !(bma->flags & XFS_BMAPI_METADATA) && whichfork == XFS_DATA_FORK) {
4670 error = xfs_bmap_isaeof(bma, whichfork); 4672 error = xfs_bmap_isaeof(bma, whichfork);
4671 if (error) 4673 if (error)
4672 return error; 4674 return error;
4673 } 4675 }
4674 4676
4677 if (bma->flags & XFS_BMAPI_STACK_SWITCH)
4678 bma->stack_switch = 1;
4679
4675 error = xfs_bmap_alloc(bma); 4680 error = xfs_bmap_alloc(bma);
4676 if (error) 4681 if (error)
4677 return error; 4682 return error;
@@ -4706,7 +4711,7 @@ xfs_bmapi_allocate(
4706 * A wasdelay extent has been initialized, so shouldn't be flagged 4711 * A wasdelay extent has been initialized, so shouldn't be flagged
4707 * as unwritten. 4712 * as unwritten.
4708 */ 4713 */
4709 if (!bma->wasdel && (flags & XFS_BMAPI_PREALLOC) && 4714 if (!bma->wasdel && (bma->flags & XFS_BMAPI_PREALLOC) &&
4710 xfs_sb_version_hasextflgbit(&mp->m_sb)) 4715 xfs_sb_version_hasextflgbit(&mp->m_sb))
4711 bma->got.br_state = XFS_EXT_UNWRITTEN; 4716 bma->got.br_state = XFS_EXT_UNWRITTEN;
4712 4717
@@ -4734,6 +4739,45 @@ xfs_bmapi_allocate(
4734 return 0; 4739 return 0;
4735} 4740}
4736 4741
4742static void
4743xfs_bmapi_allocate_worker(
4744 struct work_struct *work)
4745{
4746 struct xfs_bmalloca *args = container_of(work,
4747 struct xfs_bmalloca, work);
4748 unsigned long pflags;
4749
4750 /* we are in a transaction context here */
4751 current_set_flags_nested(&pflags, PF_FSTRANS);
4752
4753 args->result = __xfs_bmapi_allocate(args);
4754 complete(args->done);
4755
4756 current_restore_flags_nested(&pflags, PF_FSTRANS);
4757}
4758
4759/*
4760 * Some allocation requests often come in with little stack to work on. Push
4761 * them off to a worker thread so there is lots of stack to use. Otherwise just
4762 * call directly to avoid the context switch overhead here.
4763 */
4764int
4765xfs_bmapi_allocate(
4766 struct xfs_bmalloca *args)
4767{
4768 DECLARE_COMPLETION_ONSTACK(done);
4769
4770 if (!args->stack_switch)
4771 return __xfs_bmapi_allocate(args);
4772
4773
4774 args->done = &done;
4775 INIT_WORK_ONSTACK(&args->work, xfs_bmapi_allocate_worker);
4776 queue_work(xfs_alloc_wq, &args->work);
4777 wait_for_completion(&done);
4778 return args->result;
4779}
4780
4737STATIC int 4781STATIC int
4738xfs_bmapi_convert_unwritten( 4782xfs_bmapi_convert_unwritten(
4739 struct xfs_bmalloca *bma, 4783 struct xfs_bmalloca *bma,
@@ -4919,6 +4963,7 @@ xfs_bmapi_write(
4919 bma.conv = !!(flags & XFS_BMAPI_CONVERT); 4963 bma.conv = !!(flags & XFS_BMAPI_CONVERT);
4920 bma.wasdel = wasdelay; 4964 bma.wasdel = wasdelay;
4921 bma.offset = bno; 4965 bma.offset = bno;
4966 bma.flags = flags;
4922 4967
4923 /* 4968 /*
4924 * There's a 32/64 bit type mismatch between the 4969 * There's a 32/64 bit type mismatch between the
@@ -4934,7 +4979,7 @@ xfs_bmapi_write(
4934 4979
4935 ASSERT(len > 0); 4980 ASSERT(len > 0);
4936 ASSERT(bma.length > 0); 4981 ASSERT(bma.length > 0);
4937 error = xfs_bmapi_allocate(&bma, flags); 4982 error = xfs_bmapi_allocate(&bma);
4938 if (error) 4983 if (error)
4939 goto error0; 4984 goto error0;
4940 if (bma.blkno == NULLFSBLOCK) 4985 if (bma.blkno == NULLFSBLOCK)
diff --git a/fs/xfs/xfs_bmap.h b/fs/xfs/xfs_bmap.h
index 803b56d7ce16..5f469c3516eb 100644
--- a/fs/xfs/xfs_bmap.h
+++ b/fs/xfs/xfs_bmap.h
@@ -77,6 +77,7 @@ typedef struct xfs_bmap_free
77 * from written to unwritten, otherwise convert from unwritten to written. 77 * from written to unwritten, otherwise convert from unwritten to written.
78 */ 78 */
79#define XFS_BMAPI_CONVERT 0x040 79#define XFS_BMAPI_CONVERT 0x040
80#define XFS_BMAPI_STACK_SWITCH 0x080
80 81
81#define XFS_BMAPI_FLAGS \ 82#define XFS_BMAPI_FLAGS \
82 { XFS_BMAPI_ENTIRE, "ENTIRE" }, \ 83 { XFS_BMAPI_ENTIRE, "ENTIRE" }, \
@@ -85,7 +86,8 @@ typedef struct xfs_bmap_free
85 { XFS_BMAPI_PREALLOC, "PREALLOC" }, \ 86 { XFS_BMAPI_PREALLOC, "PREALLOC" }, \
86 { XFS_BMAPI_IGSTATE, "IGSTATE" }, \ 87 { XFS_BMAPI_IGSTATE, "IGSTATE" }, \
87 { XFS_BMAPI_CONTIG, "CONTIG" }, \ 88 { XFS_BMAPI_CONTIG, "CONTIG" }, \
88 { XFS_BMAPI_CONVERT, "CONVERT" } 89 { XFS_BMAPI_CONVERT, "CONVERT" }, \
90 { XFS_BMAPI_STACK_SWITCH, "STACK_SWITCH" }
89 91
90 92
91static inline int xfs_bmapi_aflag(int w) 93static inline int xfs_bmapi_aflag(int w)
@@ -133,6 +135,11 @@ typedef struct xfs_bmalloca {
133 char userdata;/* set if is user data */ 135 char userdata;/* set if is user data */
134 char aeof; /* allocated space at eof */ 136 char aeof; /* allocated space at eof */
135 char conv; /* overwriting unwritten extents */ 137 char conv; /* overwriting unwritten extents */
138 char stack_switch;
139 int flags;
140 struct completion *done;
141 struct work_struct work;
142 int result;
136} xfs_bmalloca_t; 143} xfs_bmalloca_t;
137 144
138/* 145/*
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index a8d0ed911196..becf4a97efc6 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -526,7 +526,25 @@ xfs_buf_item_unpin(
526 } 526 }
527 xfs_buf_relse(bp); 527 xfs_buf_relse(bp);
528 } else if (freed && remove) { 528 } else if (freed && remove) {
529 /*
530 * There are currently two references to the buffer - the active
531 * LRU reference and the buf log item. What we are about to do
532 * here - simulate a failed IO completion - requires 3
533 * references.
534 *
535 * The LRU reference is removed by the xfs_buf_stale() call. The
536 * buf item reference is removed by the xfs_buf_iodone()
537 * callback that is run by xfs_buf_do_callbacks() during ioend
538 * processing (via the bp->b_iodone callback), and then finally
539 * the ioend processing will drop the IO reference if the buffer
540 * is marked XBF_ASYNC.
541 *
542 * Hence we need to take an additional reference here so that IO
543 * completion processing doesn't free the buffer prematurely.
544 */
529 xfs_buf_lock(bp); 545 xfs_buf_lock(bp);
546 xfs_buf_hold(bp);
547 bp->b_flags |= XBF_ASYNC;
530 xfs_buf_ioerror(bp, EIO); 548 xfs_buf_ioerror(bp, EIO);
531 XFS_BUF_UNDONE(bp); 549 XFS_BUF_UNDONE(bp);
532 xfs_buf_stale(bp); 550 xfs_buf_stale(bp);
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index c25b094efbf7..4beaede43277 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -399,9 +399,26 @@ xfs_growfs_data_private(
399 399
400 /* update secondary superblocks. */ 400 /* update secondary superblocks. */
401 for (agno = 1; agno < nagcount; agno++) { 401 for (agno = 1; agno < nagcount; agno++) {
402 error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, 402 error = 0;
403 /*
404 * new secondary superblocks need to be zeroed, not read from
405 * disk as the contents of the new area we are growing into is
406 * completely unknown.
407 */
408 if (agno < oagcount) {
409 error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp,
403 XFS_AGB_TO_DADDR(mp, agno, XFS_SB_BLOCK(mp)), 410 XFS_AGB_TO_DADDR(mp, agno, XFS_SB_BLOCK(mp)),
404 XFS_FSS_TO_BB(mp, 1), 0, &bp); 411 XFS_FSS_TO_BB(mp, 1), 0, &bp);
412 } else {
413 bp = xfs_trans_get_buf(NULL, mp->m_ddev_targp,
414 XFS_AGB_TO_DADDR(mp, agno, XFS_SB_BLOCK(mp)),
415 XFS_FSS_TO_BB(mp, 1), 0);
416 if (bp)
417 xfs_buf_zero(bp, 0, BBTOB(bp->b_length));
418 else
419 error = ENOMEM;
420 }
421
405 if (error) { 422 if (error) {
406 xfs_warn(mp, 423 xfs_warn(mp,
407 "error %d reading secondary superblock for ag %d", 424 "error %d reading secondary superblock for ag %d",
@@ -423,7 +440,7 @@ xfs_growfs_data_private(
423 break; /* no point in continuing */ 440 break; /* no point in continuing */
424 } 441 }
425 } 442 }
426 return 0; 443 return error;
427 444
428 error0: 445 error0:
429 xfs_trans_cancel(tp, XFS_TRANS_ABORT); 446 xfs_trans_cancel(tp, XFS_TRANS_ABORT);
diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c
index 445bf1aef31c..c5c4ef4f2bdb 100644
--- a/fs/xfs/xfs_ialloc.c
+++ b/fs/xfs/xfs_ialloc.c
@@ -250,6 +250,7 @@ xfs_ialloc_ag_alloc(
250 /* boundary */ 250 /* boundary */
251 struct xfs_perag *pag; 251 struct xfs_perag *pag;
252 252
253 memset(&args, 0, sizeof(args));
253 args.tp = tp; 254 args.tp = tp;
254 args.mp = tp->t_mountp; 255 args.mp = tp->t_mountp;
255 256
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 2778258fcfa2..1938b41ee9f5 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1509,7 +1509,8 @@ xfs_ifree_cluster(
1509 * to mark all the active inodes on the buffer stale. 1509 * to mark all the active inodes on the buffer stale.
1510 */ 1510 */
1511 bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, blkno, 1511 bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, blkno,
1512 mp->m_bsize * blks_per_cluster, 0); 1512 mp->m_bsize * blks_per_cluster,
1513 XBF_UNMAPPED);
1513 1514
1514 if (!bp) 1515 if (!bp)
1515 return ENOMEM; 1516 return ENOMEM;
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index 8305f2ac6773..c1df3c623de2 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -70,7 +70,7 @@ xfs_find_handle(
70 int hsize; 70 int hsize;
71 xfs_handle_t handle; 71 xfs_handle_t handle;
72 struct inode *inode; 72 struct inode *inode;
73 struct fd f; 73 struct fd f = {0};
74 struct path path; 74 struct path path;
75 int error; 75 int error;
76 struct xfs_inode *ip; 76 struct xfs_inode *ip;
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 973dff6ad935..7f537663365b 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -584,7 +584,9 @@ xfs_iomap_write_allocate(
584 * pointer that the caller gave to us. 584 * pointer that the caller gave to us.
585 */ 585 */
586 error = xfs_bmapi_write(tp, ip, map_start_fsb, 586 error = xfs_bmapi_write(tp, ip, map_start_fsb,
587 count_fsb, 0, &first_block, 1, 587 count_fsb,
588 XFS_BMAPI_STACK_SWITCH,
589 &first_block, 1,
588 imap, &nimaps, &free_list); 590 imap, &nimaps, &free_list);
589 if (error) 591 if (error)
590 goto trans_cancel; 592 goto trans_cancel;
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 7f4f9370d0e7..4dad756962d0 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -2387,14 +2387,27 @@ xlog_state_do_callback(
2387 2387
2388 2388
2389 /* 2389 /*
2390 * update the last_sync_lsn before we drop the 2390 * Completion of a iclog IO does not imply that
2391 * a transaction has completed, as transactions
2392 * can be large enough to span many iclogs. We
2393 * cannot change the tail of the log half way
2394 * through a transaction as this may be the only
2395 * transaction in the log and moving th etail to
2396 * point to the middle of it will prevent
2397 * recovery from finding the start of the
2398 * transaction. Hence we should only update the
2399 * last_sync_lsn if this iclog contains
2400 * transaction completion callbacks on it.
2401 *
2402 * We have to do this before we drop the
2391 * icloglock to ensure we are the only one that 2403 * icloglock to ensure we are the only one that
2392 * can update it. 2404 * can update it.
2393 */ 2405 */
2394 ASSERT(XFS_LSN_CMP(atomic64_read(&log->l_last_sync_lsn), 2406 ASSERT(XFS_LSN_CMP(atomic64_read(&log->l_last_sync_lsn),
2395 be64_to_cpu(iclog->ic_header.h_lsn)) <= 0); 2407 be64_to_cpu(iclog->ic_header.h_lsn)) <= 0);
2396 atomic64_set(&log->l_last_sync_lsn, 2408 if (iclog->ic_callback)
2397 be64_to_cpu(iclog->ic_header.h_lsn)); 2409 atomic64_set(&log->l_last_sync_lsn,
2410 be64_to_cpu(iclog->ic_header.h_lsn));
2398 2411
2399 } else 2412 } else
2400 ioerrors++; 2413 ioerrors++;
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 5da3ace352bf..d308749fabf1 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -3541,7 +3541,7 @@ xlog_do_recovery_pass(
3541 * - order is important. 3541 * - order is important.
3542 */ 3542 */
3543 error = xlog_bread_offset(log, 0, 3543 error = xlog_bread_offset(log, 0,
3544 bblks - split_bblks, hbp, 3544 bblks - split_bblks, dbp,
3545 offset + BBTOB(split_bblks)); 3545 offset + BBTOB(split_bblks));
3546 if (error) 3546 if (error)
3547 goto bread_err2; 3547 goto bread_err2;