aboutsummaryrefslogtreecommitdiffstats
path: root/fs/xfs
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-01-08 16:05:29 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2012-01-08 16:05:29 -0500
commit29ad0de279002f9b6a63df5ba85328f5b633b842 (patch)
tree8284c1a50ff2ecf9defca2a47f12947977c04df1 /fs/xfs
parent972b2c719990f91eb3b2310d44ef8a2d38955a14 (diff)
parent40d344ec5ee440596b1f3ae87556e20c7197757a (diff)
Merge branch 'for-linus' of git://oss.sgi.com/xfs/xfs
* 'for-linus' of git://oss.sgi.com/xfs/xfs: (22 commits) xfs: mark the xfssyncd workqueue as non-reentrant xfs: simplify xfs_qm_detach_gdquots xfs: fix acl count validation in xfs_acl_from_disk() xfs: remove unused XBT_FORCE_SLEEP bit xfs: remove XFS_QMOPT_DQSUSER xfs: kill xfs_qm_idtodq xfs: merge xfs_qm_dqinit_core into the only caller xfs: add a xfs_dqhold helper xfs: simplify xfs_qm_dqattach_grouphint xfs: nest qm_dqfrlist_lock inside the dquot qlock xfs: flatten the dquot lock ordering xfs: implement lazy removal for the dquot freelist xfs: remove XFS_DQ_INACTIVE xfs: cleanup xfs_qm_dqlookup xfs: cleanup dquot locking helpers xfs: remove the sync_mode argument to xfs_qm_dqflush_all xfs: remove xfs_qm_sync xfs: make sure to really flush all dquots in xfs_qm_quotacheck xfs: untangle SYNC_WAIT and SYNC_TRYLOCK meanings for xfs_qm_dqflush xfs: remove the lid_size field in struct log_item_desc ... Fix up trivial conflict in fs/xfs/xfs_sync.c
Diffstat (limited to 'fs/xfs')
-rw-r--r--fs/xfs/xfs_acl.c2
-rw-r--r--fs/xfs/xfs_buf.c6
-rw-r--r--fs/xfs/xfs_buf.h3
-rw-r--r--fs/xfs/xfs_dquot.c500
-rw-r--r--fs/xfs/xfs_dquot.h39
-rw-r--r--fs/xfs/xfs_dquot_item.c5
-rw-r--r--fs/xfs/xfs_inode_item.c2
-rw-r--r--fs/xfs/xfs_log.c79
-rw-r--r--fs/xfs/xfs_log.h8
-rw-r--r--fs/xfs/xfs_log_cil.c96
-rw-r--r--fs/xfs/xfs_mount.h1
-rw-r--r--fs/xfs/xfs_qm.c464
-rw-r--r--fs/xfs/xfs_qm.h6
-rw-r--r--fs/xfs/xfs_quota.h12
-rw-r--r--fs/xfs/xfs_super.c36
-rw-r--r--fs/xfs/xfs_sync.c6
-rw-r--r--fs/xfs/xfs_trace.h2
-rw-r--r--fs/xfs/xfs_trans.c475
-rw-r--r--fs/xfs/xfs_trans.h3
19 files changed, 412 insertions, 1333 deletions
diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c
index 76e4266d2e7e..ac702a6eab9b 100644
--- a/fs/xfs/xfs_acl.c
+++ b/fs/xfs/xfs_acl.c
@@ -39,7 +39,7 @@ xfs_acl_from_disk(struct xfs_acl *aclp)
39 struct posix_acl_entry *acl_e; 39 struct posix_acl_entry *acl_e;
40 struct posix_acl *acl; 40 struct posix_acl *acl;
41 struct xfs_acl_entry *ace; 41 struct xfs_acl_entry *ace;
42 int count, i; 42 unsigned int count, i;
43 43
44 count = be32_to_cpu(aclp->acl_cnt); 44 count = be32_to_cpu(aclp->acl_cnt);
45 if (count > XFS_ACL_MAX_ENTRIES) 45 if (count > XFS_ACL_MAX_ENTRIES)
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index cf0ac056815f..2277bcae395f 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -1701,12 +1701,8 @@ xfsbufd(
1701 struct list_head tmp; 1701 struct list_head tmp;
1702 struct blk_plug plug; 1702 struct blk_plug plug;
1703 1703
1704 if (unlikely(freezing(current))) { 1704 if (unlikely(freezing(current)))
1705 set_bit(XBT_FORCE_SLEEP, &target->bt_flags);
1706 refrigerator(); 1705 refrigerator();
1707 } else {
1708 clear_bit(XBT_FORCE_SLEEP, &target->bt_flags);
1709 }
1710 1706
1711 /* sleep for a long time if there is nothing to do. */ 1707 /* sleep for a long time if there is nothing to do. */
1712 if (list_empty(&target->bt_delwri_queue)) 1708 if (list_empty(&target->bt_delwri_queue))
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
index 5bab046e859f..df7ffb0affe7 100644
--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h
@@ -90,8 +90,7 @@ typedef unsigned int xfs_buf_flags_t;
90 { _XBF_DELWRI_Q, "DELWRI_Q" } 90 { _XBF_DELWRI_Q, "DELWRI_Q" }
91 91
92typedef enum { 92typedef enum {
93 XBT_FORCE_SLEEP = 0, 93 XBT_FORCE_FLUSH = 0,
94 XBT_FORCE_FLUSH = 1,
95} xfs_buftarg_flags_t; 94} xfs_buftarg_flags_t;
96 95
97typedef struct xfs_buftarg { 96typedef struct xfs_buftarg {
diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c
index 25d7280e9f6b..b4ff40b5f918 100644
--- a/fs/xfs/xfs_dquot.c
+++ b/fs/xfs/xfs_dquot.c
@@ -39,20 +39,19 @@
39#include "xfs_qm.h" 39#include "xfs_qm.h"
40#include "xfs_trace.h" 40#include "xfs_trace.h"
41 41
42
43/* 42/*
44 LOCK ORDER 43 * Lock order:
45 44 *
46 inode lock (ilock) 45 * ip->i_lock
47 dquot hash-chain lock (hashlock) 46 * qh->qh_lock
48 xqm dquot freelist lock (freelistlock 47 * qi->qi_dqlist_lock
49 mount's dquot list lock (mplistlock) 48 * dquot->q_qlock (xfs_dqlock() and friends)
50 user dquot lock - lock ordering among dquots is based on the uid or gid 49 * dquot->q_flush (xfs_dqflock() and friends)
51 group dquot lock - similar to udquots. Between the two dquots, the udquot 50 * xfs_Gqm->qm_dqfrlist_lock
52 has to be locked first. 51 *
53 pin lock - the dquot lock must be held to take this lock. 52 * If two dquots need to be locked the order is user before group/project,
54 flush lock - ditto. 53 * otherwise by the lowest id first, see xfs_dqlock2.
55*/ 54 */
56 55
57#ifdef DEBUG 56#ifdef DEBUG
58xfs_buftarg_t *xfs_dqerror_target; 57xfs_buftarg_t *xfs_dqerror_target;
@@ -155,24 +154,6 @@ xfs_qm_dqdestroy(
155} 154}
156 155
157/* 156/*
158 * This is what a 'fresh' dquot inside a dquot chunk looks like on disk.
159 */
160STATIC void
161xfs_qm_dqinit_core(
162 xfs_dqid_t id,
163 uint type,
164 xfs_dqblk_t *d)
165{
166 /*
167 * Caller has zero'd the entire dquot 'chunk' already.
168 */
169 d->dd_diskdq.d_magic = cpu_to_be16(XFS_DQUOT_MAGIC);
170 d->dd_diskdq.d_version = XFS_DQUOT_VERSION;
171 d->dd_diskdq.d_id = cpu_to_be32(id);
172 d->dd_diskdq.d_flags = type;
173}
174
175/*
176 * If default limits are in force, push them into the dquot now. 157 * If default limits are in force, push them into the dquot now.
177 * We overwrite the dquot limits only if they are zero and this 158 * We overwrite the dquot limits only if they are zero and this
178 * is not the root dquot. 159 * is not the root dquot.
@@ -328,8 +309,13 @@ xfs_qm_init_dquot_blk(
328 curid = id - (id % q->qi_dqperchunk); 309 curid = id - (id % q->qi_dqperchunk);
329 ASSERT(curid >= 0); 310 ASSERT(curid >= 0);
330 memset(d, 0, BBTOB(q->qi_dqchunklen)); 311 memset(d, 0, BBTOB(q->qi_dqchunklen));
331 for (i = 0; i < q->qi_dqperchunk; i++, d++, curid++) 312 for (i = 0; i < q->qi_dqperchunk; i++, d++, curid++) {
332 xfs_qm_dqinit_core(curid, type, d); 313 d->dd_diskdq.d_magic = cpu_to_be16(XFS_DQUOT_MAGIC);
314 d->dd_diskdq.d_version = XFS_DQUOT_VERSION;
315 d->dd_diskdq.d_id = cpu_to_be32(curid);
316 d->dd_diskdq.d_flags = type;
317 }
318
333 xfs_trans_dquot_buf(tp, bp, 319 xfs_trans_dquot_buf(tp, bp,
334 (type & XFS_DQ_USER ? XFS_BLF_UDQUOT_BUF : 320 (type & XFS_DQ_USER ? XFS_BLF_UDQUOT_BUF :
335 ((type & XFS_DQ_PROJ) ? XFS_BLF_PDQUOT_BUF : 321 ((type & XFS_DQ_PROJ) ? XFS_BLF_PDQUOT_BUF :
@@ -564,36 +550,62 @@ xfs_qm_dqtobp(
564 * Read in the ondisk dquot using dqtobp() then copy it to an incore version, 550 * Read in the ondisk dquot using dqtobp() then copy it to an incore version,
565 * and release the buffer immediately. 551 * and release the buffer immediately.
566 * 552 *
553 * If XFS_QMOPT_DQALLOC is set, allocate a dquot on disk if it needed.
567 */ 554 */
568/* ARGSUSED */ 555int
569STATIC int
570xfs_qm_dqread( 556xfs_qm_dqread(
571 xfs_trans_t **tpp, 557 struct xfs_mount *mp,
572 xfs_dqid_t id, 558 xfs_dqid_t id,
573 xfs_dquot_t *dqp, /* dquot to get filled in */ 559 uint type,
574 uint flags) 560 uint flags,
561 struct xfs_dquot **O_dqpp)
575{ 562{
576 xfs_disk_dquot_t *ddqp; 563 struct xfs_dquot *dqp;
577 xfs_buf_t *bp; 564 struct xfs_disk_dquot *ddqp;
578 int error; 565 struct xfs_buf *bp;
579 xfs_trans_t *tp; 566 struct xfs_trans *tp = NULL;
567 int error;
568 int cancelflags = 0;
580 569
581 ASSERT(tpp); 570 dqp = xfs_qm_dqinit(mp, id, type);
582 571
583 trace_xfs_dqread(dqp); 572 trace_xfs_dqread(dqp);
584 573
574 if (flags & XFS_QMOPT_DQALLOC) {
575 tp = xfs_trans_alloc(mp, XFS_TRANS_QM_DQALLOC);
576 error = xfs_trans_reserve(tp, XFS_QM_DQALLOC_SPACE_RES(mp),
577 XFS_WRITE_LOG_RES(mp) +
578 /*
579 * Round the chunklen up to the next multiple
580 * of 128 (buf log item chunk size)).
581 */
582 BBTOB(mp->m_quotainfo->qi_dqchunklen) - 1 + 128,
583 0,
584 XFS_TRANS_PERM_LOG_RES,
585 XFS_WRITE_LOG_COUNT);
586 if (error)
587 goto error1;
588 cancelflags = XFS_TRANS_RELEASE_LOG_RES;
589 }
590
585 /* 591 /*
586 * get a pointer to the on-disk dquot and the buffer containing it 592 * get a pointer to the on-disk dquot and the buffer containing it
587 * dqp already knows its own type (GROUP/USER). 593 * dqp already knows its own type (GROUP/USER).
588 */ 594 */
589 if ((error = xfs_qm_dqtobp(tpp, dqp, &ddqp, &bp, flags))) { 595 error = xfs_qm_dqtobp(&tp, dqp, &ddqp, &bp, flags);
590 return (error); 596 if (error) {
597 /*
598 * This can happen if quotas got turned off (ESRCH),
599 * or if the dquot didn't exist on disk and we ask to
600 * allocate (ENOENT).
601 */
602 trace_xfs_dqread_fail(dqp);
603 cancelflags |= XFS_TRANS_ABORT;
604 goto error1;
591 } 605 }
592 tp = *tpp;
593 606
594 /* copy everything from disk dquot to the incore dquot */ 607 /* copy everything from disk dquot to the incore dquot */
595 memcpy(&dqp->q_core, ddqp, sizeof(xfs_disk_dquot_t)); 608 memcpy(&dqp->q_core, ddqp, sizeof(xfs_disk_dquot_t));
596 ASSERT(be32_to_cpu(dqp->q_core.d_id) == id);
597 xfs_qm_dquot_logitem_init(dqp); 609 xfs_qm_dquot_logitem_init(dqp);
598 610
599 /* 611 /*
@@ -622,77 +634,22 @@ xfs_qm_dqread(
622 ASSERT(xfs_buf_islocked(bp)); 634 ASSERT(xfs_buf_islocked(bp));
623 xfs_trans_brelse(tp, bp); 635 xfs_trans_brelse(tp, bp);
624 636
625 return (error);
626}
627
628
629/*
630 * allocate an incore dquot from the kernel heap,
631 * and fill its core with quota information kept on disk.
632 * If XFS_QMOPT_DQALLOC is set, it'll allocate a dquot on disk
633 * if it wasn't already allocated.
634 */
635STATIC int
636xfs_qm_idtodq(
637 xfs_mount_t *mp,
638 xfs_dqid_t id, /* gid or uid, depending on type */
639 uint type, /* UDQUOT or GDQUOT */
640 uint flags, /* DQALLOC, DQREPAIR */
641 xfs_dquot_t **O_dqpp)/* OUT : incore dquot, not locked */
642{
643 xfs_dquot_t *dqp;
644 int error;
645 xfs_trans_t *tp;
646 int cancelflags=0;
647
648 dqp = xfs_qm_dqinit(mp, id, type);
649 tp = NULL;
650 if (flags & XFS_QMOPT_DQALLOC) {
651 tp = xfs_trans_alloc(mp, XFS_TRANS_QM_DQALLOC);
652 error = xfs_trans_reserve(tp, XFS_QM_DQALLOC_SPACE_RES(mp),
653 XFS_WRITE_LOG_RES(mp) +
654 BBTOB(mp->m_quotainfo->qi_dqchunklen) - 1 +
655 128,
656 0,
657 XFS_TRANS_PERM_LOG_RES,
658 XFS_WRITE_LOG_COUNT);
659 if (error) {
660 cancelflags = 0;
661 goto error0;
662 }
663 cancelflags = XFS_TRANS_RELEASE_LOG_RES;
664 }
665
666 /*
667 * Read it from disk; xfs_dqread() takes care of
668 * all the necessary initialization of dquot's fields (locks, etc)
669 */
670 if ((error = xfs_qm_dqread(&tp, id, dqp, flags))) {
671 /*
672 * This can happen if quotas got turned off (ESRCH),
673 * or if the dquot didn't exist on disk and we ask to
674 * allocate (ENOENT).
675 */
676 trace_xfs_dqread_fail(dqp);
677 cancelflags |= XFS_TRANS_ABORT;
678 goto error0;
679 }
680 if (tp) { 637 if (tp) {
681 if ((error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES))) 638 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
682 goto error1; 639 if (error)
640 goto error0;
683 } 641 }
684 642
685 *O_dqpp = dqp; 643 *O_dqpp = dqp;
686 return (0); 644 return error;
687 645
688 error0: 646error1:
689 ASSERT(error);
690 if (tp) 647 if (tp)
691 xfs_trans_cancel(tp, cancelflags); 648 xfs_trans_cancel(tp, cancelflags);
692 error1: 649error0:
693 xfs_qm_dqdestroy(dqp); 650 xfs_qm_dqdestroy(dqp);
694 *O_dqpp = NULL; 651 *O_dqpp = NULL;
695 return (error); 652 return error;
696} 653}
697 654
698/* 655/*
@@ -710,12 +667,9 @@ xfs_qm_dqlookup(
710 xfs_dquot_t **O_dqpp) 667 xfs_dquot_t **O_dqpp)
711{ 668{
712 xfs_dquot_t *dqp; 669 xfs_dquot_t *dqp;
713 uint flist_locked;
714 670
715 ASSERT(mutex_is_locked(&qh->qh_lock)); 671 ASSERT(mutex_is_locked(&qh->qh_lock));
716 672
717 flist_locked = B_FALSE;
718
719 /* 673 /*
720 * Traverse the hashchain looking for a match 674 * Traverse the hashchain looking for a match
721 */ 675 */
@@ -725,70 +679,31 @@ xfs_qm_dqlookup(
725 * dqlock to look at the id field of the dquot, since the 679 * dqlock to look at the id field of the dquot, since the
726 * id can't be modified without the hashlock anyway. 680 * id can't be modified without the hashlock anyway.
727 */ 681 */
728 if (be32_to_cpu(dqp->q_core.d_id) == id && dqp->q_mount == mp) { 682 if (be32_to_cpu(dqp->q_core.d_id) != id || dqp->q_mount != mp)
729 trace_xfs_dqlookup_found(dqp); 683 continue;
730 684
731 /* 685 trace_xfs_dqlookup_found(dqp);
732 * All in core dquots must be on the dqlist of mp
733 */
734 ASSERT(!list_empty(&dqp->q_mplist));
735
736 xfs_dqlock(dqp);
737 if (dqp->q_nrefs == 0) {
738 ASSERT(!list_empty(&dqp->q_freelist));
739 if (!mutex_trylock(&xfs_Gqm->qm_dqfrlist_lock)) {
740 trace_xfs_dqlookup_want(dqp);
741
742 /*
743 * We may have raced with dqreclaim_one()
744 * (and lost). So, flag that we don't
745 * want the dquot to be reclaimed.
746 */
747 dqp->dq_flags |= XFS_DQ_WANT;
748 xfs_dqunlock(dqp);
749 mutex_lock(&xfs_Gqm->qm_dqfrlist_lock);
750 xfs_dqlock(dqp);
751 dqp->dq_flags &= ~(XFS_DQ_WANT);
752 }
753 flist_locked = B_TRUE;
754 }
755 686
756 /* 687 xfs_dqlock(dqp);
757 * id couldn't have changed; we had the hashlock all 688 if (dqp->dq_flags & XFS_DQ_FREEING) {
758 * along 689 *O_dqpp = NULL;
759 */ 690 xfs_dqunlock(dqp);
760 ASSERT(be32_to_cpu(dqp->q_core.d_id) == id); 691 return -1;
761 692 }
762 if (flist_locked) {
763 if (dqp->q_nrefs != 0) {
764 mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
765 flist_locked = B_FALSE;
766 } else {
767 /* take it off the freelist */
768 trace_xfs_dqlookup_freelist(dqp);
769 list_del_init(&dqp->q_freelist);
770 xfs_Gqm->qm_dqfrlist_cnt--;
771 }
772 }
773 693
774 XFS_DQHOLD(dqp); 694 dqp->q_nrefs++;
775 695
776 if (flist_locked) 696 /*
777 mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock); 697 * move the dquot to the front of the hashchain
778 /* 698 */
779 * move the dquot to the front of the hashchain 699 list_move(&dqp->q_hashlist, &qh->qh_list);
780 */ 700 trace_xfs_dqlookup_done(dqp);
781 ASSERT(mutex_is_locked(&qh->qh_lock)); 701 *O_dqpp = dqp;
782 list_move(&dqp->q_hashlist, &qh->qh_list); 702 return 0;
783 trace_xfs_dqlookup_done(dqp);
784 *O_dqpp = dqp;
785 return 0;
786 }
787 } 703 }
788 704
789 *O_dqpp = NULL; 705 *O_dqpp = NULL;
790 ASSERT(mutex_is_locked(&qh->qh_lock)); 706 return 1;
791 return (1);
792} 707}
793 708
794/* 709/*
@@ -829,11 +744,7 @@ xfs_qm_dqget(
829 return (EIO); 744 return (EIO);
830 } 745 }
831 } 746 }
832#endif
833
834 again:
835 747
836#ifdef DEBUG
837 ASSERT(type == XFS_DQ_USER || 748 ASSERT(type == XFS_DQ_USER ||
838 type == XFS_DQ_PROJ || 749 type == XFS_DQ_PROJ ||
839 type == XFS_DQ_GROUP); 750 type == XFS_DQ_GROUP);
@@ -845,13 +756,21 @@ xfs_qm_dqget(
845 ASSERT(ip->i_gdquot == NULL); 756 ASSERT(ip->i_gdquot == NULL);
846 } 757 }
847#endif 758#endif
759
760restart:
848 mutex_lock(&h->qh_lock); 761 mutex_lock(&h->qh_lock);
849 762
850 /* 763 /*
851 * Look in the cache (hashtable). 764 * Look in the cache (hashtable).
852 * The chain is kept locked during lookup. 765 * The chain is kept locked during lookup.
853 */ 766 */
854 if (xfs_qm_dqlookup(mp, id, h, O_dqpp) == 0) { 767 switch (xfs_qm_dqlookup(mp, id, h, O_dqpp)) {
768 case -1:
769 XQM_STATS_INC(xqmstats.xs_qm_dquot_dups);
770 mutex_unlock(&h->qh_lock);
771 delay(1);
772 goto restart;
773 case 0:
855 XQM_STATS_INC(xqmstats.xs_qm_dqcachehits); 774 XQM_STATS_INC(xqmstats.xs_qm_dqcachehits);
856 /* 775 /*
857 * The dquot was found, moved to the front of the chain, 776 * The dquot was found, moved to the front of the chain,
@@ -862,9 +781,11 @@ xfs_qm_dqget(
862 ASSERT(XFS_DQ_IS_LOCKED(*O_dqpp)); 781 ASSERT(XFS_DQ_IS_LOCKED(*O_dqpp));
863 mutex_unlock(&h->qh_lock); 782 mutex_unlock(&h->qh_lock);
864 trace_xfs_dqget_hit(*O_dqpp); 783 trace_xfs_dqget_hit(*O_dqpp);
865 return (0); /* success */ 784 return 0; /* success */
785 default:
786 XQM_STATS_INC(xqmstats.xs_qm_dqcachemisses);
787 break;
866 } 788 }
867 XQM_STATS_INC(xqmstats.xs_qm_dqcachemisses);
868 789
869 /* 790 /*
870 * Dquot cache miss. We don't want to keep the inode lock across 791 * Dquot cache miss. We don't want to keep the inode lock across
@@ -882,41 +803,18 @@ xfs_qm_dqget(
882 version = h->qh_version; 803 version = h->qh_version;
883 mutex_unlock(&h->qh_lock); 804 mutex_unlock(&h->qh_lock);
884 805
885 /* 806 error = xfs_qm_dqread(mp, id, type, flags, &dqp);
886 * Allocate the dquot on the kernel heap, and read the ondisk
887 * portion off the disk. Also, do all the necessary initialization
888 * This can return ENOENT if dquot didn't exist on disk and we didn't
889 * ask it to allocate; ESRCH if quotas got turned off suddenly.
890 */
891 if ((error = xfs_qm_idtodq(mp, id, type,
892 flags & (XFS_QMOPT_DQALLOC|XFS_QMOPT_DQREPAIR|
893 XFS_QMOPT_DOWARN),
894 &dqp))) {
895 if (ip)
896 xfs_ilock(ip, XFS_ILOCK_EXCL);
897 return (error);
898 }
899 807
900 /* 808 if (ip)
901 * See if this is mount code calling to look at the overall quota limits 809 xfs_ilock(ip, XFS_ILOCK_EXCL);
902 * which are stored in the id == 0 user or group's dquot. 810
903 * Since we may not have done a quotacheck by this point, just return 811 if (error)
904 * the dquot without attaching it to any hashtables, lists, etc, or even 812 return error;
905 * taking a reference.
906 * The caller must dqdestroy this once done.
907 */
908 if (flags & XFS_QMOPT_DQSUSER) {
909 ASSERT(id == 0);
910 ASSERT(! ip);
911 goto dqret;
912 }
913 813
914 /* 814 /*
915 * Dquot lock comes after hashlock in the lock ordering 815 * Dquot lock comes after hashlock in the lock ordering
916 */ 816 */
917 if (ip) { 817 if (ip) {
918 xfs_ilock(ip, XFS_ILOCK_EXCL);
919
920 /* 818 /*
921 * A dquot could be attached to this inode by now, since 819 * A dquot could be attached to this inode by now, since
922 * we had dropped the ilock. 820 * we had dropped the ilock.
@@ -961,16 +859,21 @@ xfs_qm_dqget(
961 * lock order between the two dquots here since dqp isn't 859 * lock order between the two dquots here since dqp isn't
962 * on any findable lists yet. 860 * on any findable lists yet.
963 */ 861 */
964 if (xfs_qm_dqlookup(mp, id, h, &tmpdqp) == 0) { 862 switch (xfs_qm_dqlookup(mp, id, h, &tmpdqp)) {
863 case 0:
864 case -1:
965 /* 865 /*
966 * Duplicate found. Just throw away the new dquot 866 * Duplicate found, either in cache or on its way out.
967 * and start over. 867 * Just throw away the new dquot and start over.
968 */ 868 */
969 xfs_qm_dqput(tmpdqp); 869 if (tmpdqp)
870 xfs_qm_dqput(tmpdqp);
970 mutex_unlock(&h->qh_lock); 871 mutex_unlock(&h->qh_lock);
971 xfs_qm_dqdestroy(dqp); 872 xfs_qm_dqdestroy(dqp);
972 XQM_STATS_INC(xqmstats.xs_qm_dquot_dups); 873 XQM_STATS_INC(xqmstats.xs_qm_dquot_dups);
973 goto again; 874 goto restart;
875 default:
876 break;
974 } 877 }
975 } 878 }
976 879
@@ -1015,67 +918,49 @@ xfs_qm_dqget(
1015 */ 918 */
1016void 919void
1017xfs_qm_dqput( 920xfs_qm_dqput(
1018 xfs_dquot_t *dqp) 921 struct xfs_dquot *dqp)
1019{ 922{
1020 xfs_dquot_t *gdqp; 923 struct xfs_dquot *gdqp;
1021 924
1022 ASSERT(dqp->q_nrefs > 0); 925 ASSERT(dqp->q_nrefs > 0);
1023 ASSERT(XFS_DQ_IS_LOCKED(dqp)); 926 ASSERT(XFS_DQ_IS_LOCKED(dqp));
1024 927
1025 trace_xfs_dqput(dqp); 928 trace_xfs_dqput(dqp);
1026 929
1027 if (dqp->q_nrefs != 1) { 930recurse:
1028 dqp->q_nrefs--; 931 if (--dqp->q_nrefs > 0) {
1029 xfs_dqunlock(dqp); 932 xfs_dqunlock(dqp);
1030 return; 933 return;
1031 } 934 }
1032 935
936 trace_xfs_dqput_free(dqp);
937
938 mutex_lock(&xfs_Gqm->qm_dqfrlist_lock);
939 if (list_empty(&dqp->q_freelist)) {
940 list_add_tail(&dqp->q_freelist, &xfs_Gqm->qm_dqfrlist);
941 xfs_Gqm->qm_dqfrlist_cnt++;
942 }
943 mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
944
1033 /* 945 /*
1034 * drop the dqlock and acquire the freelist and dqlock 946 * If we just added a udquot to the freelist, then we want to release
1035 * in the right order; but try to get it out-of-order first 947 * the gdquot reference that it (probably) has. Otherwise it'll keep
948 * the gdquot from getting reclaimed.
1036 */ 949 */
1037 if (!mutex_trylock(&xfs_Gqm->qm_dqfrlist_lock)) { 950 gdqp = dqp->q_gdquot;
1038 trace_xfs_dqput_wait(dqp); 951 if (gdqp) {
1039 xfs_dqunlock(dqp); 952 xfs_dqlock(gdqp);
1040 mutex_lock(&xfs_Gqm->qm_dqfrlist_lock); 953 dqp->q_gdquot = NULL;
1041 xfs_dqlock(dqp);
1042 } 954 }
955 xfs_dqunlock(dqp);
1043 956
1044 while (1) { 957 /*
1045 gdqp = NULL; 958 * If we had a group quota hint, release it now.
1046 959 */
1047 /* We can't depend on nrefs being == 1 here */ 960 if (gdqp) {
1048 if (--dqp->q_nrefs == 0) {
1049 trace_xfs_dqput_free(dqp);
1050
1051 list_add_tail(&dqp->q_freelist, &xfs_Gqm->qm_dqfrlist);
1052 xfs_Gqm->qm_dqfrlist_cnt++;
1053
1054 /*
1055 * If we just added a udquot to the freelist, then
1056 * we want to release the gdquot reference that
1057 * it (probably) has. Otherwise it'll keep the
1058 * gdquot from getting reclaimed.
1059 */
1060 if ((gdqp = dqp->q_gdquot)) {
1061 /*
1062 * Avoid a recursive dqput call
1063 */
1064 xfs_dqlock(gdqp);
1065 dqp->q_gdquot = NULL;
1066 }
1067 }
1068 xfs_dqunlock(dqp);
1069
1070 /*
1071 * If we had a group quota inside the user quota as a hint,
1072 * release it now.
1073 */
1074 if (! gdqp)
1075 break;
1076 dqp = gdqp; 961 dqp = gdqp;
962 goto recurse;
1077 } 963 }
1078 mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
1079} 964}
1080 965
1081/* 966/*
@@ -1169,7 +1054,7 @@ xfs_qm_dqflush(
1169 * If not dirty, or it's pinned and we are not supposed to block, nada. 1054 * If not dirty, or it's pinned and we are not supposed to block, nada.
1170 */ 1055 */
1171 if (!XFS_DQ_IS_DIRTY(dqp) || 1056 if (!XFS_DQ_IS_DIRTY(dqp) ||
1172 (!(flags & SYNC_WAIT) && atomic_read(&dqp->q_pincount) > 0)) { 1057 ((flags & SYNC_TRYLOCK) && atomic_read(&dqp->q_pincount) > 0)) {
1173 xfs_dqfunlock(dqp); 1058 xfs_dqfunlock(dqp);
1174 return 0; 1059 return 0;
1175 } 1060 }
@@ -1257,40 +1142,17 @@ xfs_qm_dqflush(
1257 1142
1258} 1143}
1259 1144
1260int
1261xfs_qm_dqlock_nowait(
1262 xfs_dquot_t *dqp)
1263{
1264 return mutex_trylock(&dqp->q_qlock);
1265}
1266
1267void
1268xfs_dqlock(
1269 xfs_dquot_t *dqp)
1270{
1271 mutex_lock(&dqp->q_qlock);
1272}
1273
1274void 1145void
1275xfs_dqunlock( 1146xfs_dqunlock(
1276 xfs_dquot_t *dqp) 1147 xfs_dquot_t *dqp)
1277{ 1148{
1278 mutex_unlock(&(dqp->q_qlock)); 1149 xfs_dqunlock_nonotify(dqp);
1279 if (dqp->q_logitem.qli_dquot == dqp) { 1150 if (dqp->q_logitem.qli_dquot == dqp) {
1280 /* Once was dqp->q_mount, but might just have been cleared */
1281 xfs_trans_unlocked_item(dqp->q_logitem.qli_item.li_ailp, 1151 xfs_trans_unlocked_item(dqp->q_logitem.qli_item.li_ailp,
1282 (xfs_log_item_t*)&(dqp->q_logitem)); 1152 &dqp->q_logitem.qli_item);
1283 } 1153 }
1284} 1154}
1285 1155
1286
1287void
1288xfs_dqunlock_nonotify(
1289 xfs_dquot_t *dqp)
1290{
1291 mutex_unlock(&(dqp->q_qlock));
1292}
1293
1294/* 1156/*
1295 * Lock two xfs_dquot structures. 1157 * Lock two xfs_dquot structures.
1296 * 1158 *
@@ -1319,43 +1181,18 @@ xfs_dqlock2(
1319 } 1181 }
1320} 1182}
1321 1183
1322
1323/* 1184/*
1324 * Take a dquot out of the mount's dqlist as well as the hashlist. 1185 * Take a dquot out of the mount's dqlist as well as the hashlist. This is
1325 * This is called via unmount as well as quotaoff, and the purge 1186 * called via unmount as well as quotaoff, and the purge will always succeed.
1326 * will always succeed unless there are soft (temp) references
1327 * outstanding.
1328 *
1329 * This returns 0 if it was purged, 1 if it wasn't. It's not an error code
1330 * that we're returning! XXXsup - not cool.
1331 */ 1187 */
1332/* ARGSUSED */ 1188void
1333int
1334xfs_qm_dqpurge( 1189xfs_qm_dqpurge(
1335 xfs_dquot_t *dqp) 1190 struct xfs_dquot *dqp)
1336{ 1191{
1337 xfs_dqhash_t *qh = dqp->q_hash; 1192 struct xfs_mount *mp = dqp->q_mount;
1338 xfs_mount_t *mp = dqp->q_mount; 1193 struct xfs_dqhash *qh = dqp->q_hash;
1339
1340 ASSERT(mutex_is_locked(&mp->m_quotainfo->qi_dqlist_lock));
1341 ASSERT(mutex_is_locked(&dqp->q_hash->qh_lock));
1342 1194
1343 xfs_dqlock(dqp); 1195 xfs_dqlock(dqp);
1344 /*
1345 * We really can't afford to purge a dquot that is
1346 * referenced, because these are hard refs.
1347 * It shouldn't happen in general because we went thru _all_ inodes in
1348 * dqrele_all_inodes before calling this and didn't let the mountlock go.
1349 * However it is possible that we have dquots with temporary
1350 * references that are not attached to an inode. e.g. see xfs_setattr().
1351 */
1352 if (dqp->q_nrefs != 0) {
1353 xfs_dqunlock(dqp);
1354 mutex_unlock(&dqp->q_hash->qh_lock);
1355 return (1);
1356 }
1357
1358 ASSERT(!list_empty(&dqp->q_freelist));
1359 1196
1360 /* 1197 /*
1361 * If we're turning off quotas, we have to make sure that, for 1198 * If we're turning off quotas, we have to make sure that, for
@@ -1370,23 +1207,18 @@ xfs_qm_dqpurge(
1370 * Block on the flush lock after nudging dquot buffer, 1207 * Block on the flush lock after nudging dquot buffer,
1371 * if it is incore. 1208 * if it is incore.
1372 */ 1209 */
1373 xfs_qm_dqflock_pushbuf_wait(dqp); 1210 xfs_dqflock_pushbuf_wait(dqp);
1374 } 1211 }
1375 1212
1376 /* 1213 /*
1377 * XXXIf we're turning this type of quotas off, we don't care 1214 * If we are turning this type of quotas off, we don't care
1378 * about the dirty metadata sitting in this dquot. OTOH, if 1215 * about the dirty metadata sitting in this dquot. OTOH, if
1379 * we're unmounting, we do care, so we flush it and wait. 1216 * we're unmounting, we do care, so we flush it and wait.
1380 */ 1217 */
1381 if (XFS_DQ_IS_DIRTY(dqp)) { 1218 if (XFS_DQ_IS_DIRTY(dqp)) {
1382 int error; 1219 int error;
1383 1220
1384 /* dqflush unlocks dqflock */
1385 /* 1221 /*
1386 * Given that dqpurge is a very rare occurrence, it is OK
1387 * that we're holding the hashlist and mplist locks
1388 * across the disk write. But, ... XXXsup
1389 *
1390 * We don't care about getting disk errors here. We need 1222 * We don't care about getting disk errors here. We need
1391 * to purge this dquot anyway, so we go ahead regardless. 1223 * to purge this dquot anyway, so we go ahead regardless.
1392 */ 1224 */
@@ -1396,38 +1228,44 @@ xfs_qm_dqpurge(
1396 __func__, dqp); 1228 __func__, dqp);
1397 xfs_dqflock(dqp); 1229 xfs_dqflock(dqp);
1398 } 1230 }
1231
1399 ASSERT(atomic_read(&dqp->q_pincount) == 0); 1232 ASSERT(atomic_read(&dqp->q_pincount) == 0);
1400 ASSERT(XFS_FORCED_SHUTDOWN(mp) || 1233 ASSERT(XFS_FORCED_SHUTDOWN(mp) ||
1401 !(dqp->q_logitem.qli_item.li_flags & XFS_LI_IN_AIL)); 1234 !(dqp->q_logitem.qli_item.li_flags & XFS_LI_IN_AIL));
1402 1235
1236 xfs_dqfunlock(dqp);
1237 xfs_dqunlock(dqp);
1238
1239 mutex_lock(&qh->qh_lock);
1403 list_del_init(&dqp->q_hashlist); 1240 list_del_init(&dqp->q_hashlist);
1404 qh->qh_version++; 1241 qh->qh_version++;
1242 mutex_unlock(&qh->qh_lock);
1243
1244 mutex_lock(&mp->m_quotainfo->qi_dqlist_lock);
1405 list_del_init(&dqp->q_mplist); 1245 list_del_init(&dqp->q_mplist);
1406 mp->m_quotainfo->qi_dqreclaims++; 1246 mp->m_quotainfo->qi_dqreclaims++;
1407 mp->m_quotainfo->qi_dquots--; 1247 mp->m_quotainfo->qi_dquots--;
1248 mutex_unlock(&mp->m_quotainfo->qi_dqlist_lock);
1249
1408 /* 1250 /*
1409 * XXX Move this to the front of the freelist, if we can get the 1251 * We move dquots to the freelist as soon as their reference count
1410 * freelist lock. 1252 * hits zero, so it really should be on the freelist here.
1411 */ 1253 */
1254 mutex_lock(&xfs_Gqm->qm_dqfrlist_lock);
1412 ASSERT(!list_empty(&dqp->q_freelist)); 1255 ASSERT(!list_empty(&dqp->q_freelist));
1256 list_del_init(&dqp->q_freelist);
1257 xfs_Gqm->qm_dqfrlist_cnt--;
1258 mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
1413 1259
1414 dqp->q_mount = NULL; 1260 xfs_qm_dqdestroy(dqp);
1415 dqp->q_hash = NULL;
1416 dqp->dq_flags = XFS_DQ_INACTIVE;
1417 memset(&dqp->q_core, 0, sizeof(dqp->q_core));
1418 xfs_dqfunlock(dqp);
1419 xfs_dqunlock(dqp);
1420 mutex_unlock(&qh->qh_lock);
1421 return (0);
1422} 1261}
1423 1262
1424
1425/* 1263/*
1426 * Give the buffer a little push if it is incore and 1264 * Give the buffer a little push if it is incore and
1427 * wait on the flush lock. 1265 * wait on the flush lock.
1428 */ 1266 */
1429void 1267void
1430xfs_qm_dqflock_pushbuf_wait( 1268xfs_dqflock_pushbuf_wait(
1431 xfs_dquot_t *dqp) 1269 xfs_dquot_t *dqp)
1432{ 1270{
1433 xfs_mount_t *mp = dqp->q_mount; 1271 xfs_mount_t *mp = dqp->q_mount;
diff --git a/fs/xfs/xfs_dquot.h b/fs/xfs/xfs_dquot.h
index 34b7e945dbfa..a1d91d8f1802 100644
--- a/fs/xfs/xfs_dquot.h
+++ b/fs/xfs/xfs_dquot.h
@@ -80,8 +80,6 @@ enum {
80 XFS_QLOCK_NESTED, 80 XFS_QLOCK_NESTED,
81}; 81};
82 82
83#define XFS_DQHOLD(dqp) ((dqp)->q_nrefs++)
84
85/* 83/*
86 * Manage the q_flush completion queue embedded in the dquot. This completion 84 * Manage the q_flush completion queue embedded in the dquot. This completion
87 * queue synchronizes processes attempting to flush the in-core dquot back to 85 * queue synchronizes processes attempting to flush the in-core dquot back to
@@ -102,6 +100,21 @@ static inline void xfs_dqfunlock(xfs_dquot_t *dqp)
102 complete(&dqp->q_flush); 100 complete(&dqp->q_flush);
103} 101}
104 102
103static inline int xfs_dqlock_nowait(struct xfs_dquot *dqp)
104{
105 return mutex_trylock(&dqp->q_qlock);
106}
107
108static inline void xfs_dqlock(struct xfs_dquot *dqp)
109{
110 mutex_lock(&dqp->q_qlock);
111}
112
113static inline void xfs_dqunlock_nonotify(struct xfs_dquot *dqp)
114{
115 mutex_unlock(&dqp->q_qlock);
116}
117
105#define XFS_DQ_IS_LOCKED(dqp) (mutex_is_locked(&((dqp)->q_qlock))) 118#define XFS_DQ_IS_LOCKED(dqp) (mutex_is_locked(&((dqp)->q_qlock)))
106#define XFS_DQ_IS_DIRTY(dqp) ((dqp)->dq_flags & XFS_DQ_DIRTY) 119#define XFS_DQ_IS_DIRTY(dqp) ((dqp)->dq_flags & XFS_DQ_DIRTY)
107#define XFS_QM_ISUDQ(dqp) ((dqp)->dq_flags & XFS_DQ_USER) 120#define XFS_QM_ISUDQ(dqp) ((dqp)->dq_flags & XFS_DQ_USER)
@@ -116,12 +129,12 @@ static inline void xfs_dqfunlock(xfs_dquot_t *dqp)
116 (XFS_IS_UQUOTA_ON((d)->q_mount)) : \ 129 (XFS_IS_UQUOTA_ON((d)->q_mount)) : \
117 (XFS_IS_OQUOTA_ON((d)->q_mount)))) 130 (XFS_IS_OQUOTA_ON((d)->q_mount))))
118 131
132extern int xfs_qm_dqread(struct xfs_mount *, xfs_dqid_t, uint,
133 uint, struct xfs_dquot **);
119extern void xfs_qm_dqdestroy(xfs_dquot_t *); 134extern void xfs_qm_dqdestroy(xfs_dquot_t *);
120extern int xfs_qm_dqflush(xfs_dquot_t *, uint); 135extern int xfs_qm_dqflush(xfs_dquot_t *, uint);
121extern int xfs_qm_dqpurge(xfs_dquot_t *); 136extern void xfs_qm_dqpurge(xfs_dquot_t *);
122extern void xfs_qm_dqunpin_wait(xfs_dquot_t *); 137extern void xfs_qm_dqunpin_wait(xfs_dquot_t *);
123extern int xfs_qm_dqlock_nowait(xfs_dquot_t *);
124extern void xfs_qm_dqflock_pushbuf_wait(xfs_dquot_t *dqp);
125extern void xfs_qm_adjust_dqtimers(xfs_mount_t *, 138extern void xfs_qm_adjust_dqtimers(xfs_mount_t *,
126 xfs_disk_dquot_t *); 139 xfs_disk_dquot_t *);
127extern void xfs_qm_adjust_dqlimits(xfs_mount_t *, 140extern void xfs_qm_adjust_dqlimits(xfs_mount_t *,
@@ -129,9 +142,17 @@ extern void xfs_qm_adjust_dqlimits(xfs_mount_t *,
129extern int xfs_qm_dqget(xfs_mount_t *, xfs_inode_t *, 142extern int xfs_qm_dqget(xfs_mount_t *, xfs_inode_t *,
130 xfs_dqid_t, uint, uint, xfs_dquot_t **); 143 xfs_dqid_t, uint, uint, xfs_dquot_t **);
131extern void xfs_qm_dqput(xfs_dquot_t *); 144extern void xfs_qm_dqput(xfs_dquot_t *);
132extern void xfs_dqlock(xfs_dquot_t *); 145
133extern void xfs_dqlock2(xfs_dquot_t *, xfs_dquot_t *); 146extern void xfs_dqlock2(struct xfs_dquot *, struct xfs_dquot *);
134extern void xfs_dqunlock(xfs_dquot_t *); 147extern void xfs_dqunlock(struct xfs_dquot *);
135extern void xfs_dqunlock_nonotify(xfs_dquot_t *); 148extern void xfs_dqflock_pushbuf_wait(struct xfs_dquot *dqp);
149
150static inline struct xfs_dquot *xfs_qm_dqhold(struct xfs_dquot *dqp)
151{
152 xfs_dqlock(dqp);
153 dqp->q_nrefs++;
154 xfs_dqunlock(dqp);
155 return dqp;
156}
136 157
137#endif /* __XFS_DQUOT_H__ */ 158#endif /* __XFS_DQUOT_H__ */
diff --git a/fs/xfs/xfs_dquot_item.c b/fs/xfs/xfs_dquot_item.c
index 0dee0b71029d..34baeae45265 100644
--- a/fs/xfs/xfs_dquot_item.c
+++ b/fs/xfs/xfs_dquot_item.c
@@ -73,7 +73,6 @@ xfs_qm_dquot_logitem_format(
73 logvec->i_len = sizeof(xfs_disk_dquot_t); 73 logvec->i_len = sizeof(xfs_disk_dquot_t);
74 logvec->i_type = XLOG_REG_TYPE_DQUOT; 74 logvec->i_type = XLOG_REG_TYPE_DQUOT;
75 75
76 ASSERT(2 == lip->li_desc->lid_size);
77 qlip->qli_format.qlf_size = 2; 76 qlip->qli_format.qlf_size = 2;
78 77
79} 78}
@@ -134,7 +133,7 @@ xfs_qm_dquot_logitem_push(
134 * lock without sleeping, then there must not have been 133 * lock without sleeping, then there must not have been
135 * anyone in the process of flushing the dquot. 134 * anyone in the process of flushing the dquot.
136 */ 135 */
137 error = xfs_qm_dqflush(dqp, 0); 136 error = xfs_qm_dqflush(dqp, SYNC_TRYLOCK);
138 if (error) 137 if (error)
139 xfs_warn(dqp->q_mount, "%s: push error %d on dqp %p", 138 xfs_warn(dqp->q_mount, "%s: push error %d on dqp %p",
140 __func__, error, dqp); 139 __func__, error, dqp);
@@ -237,7 +236,7 @@ xfs_qm_dquot_logitem_trylock(
237 if (atomic_read(&dqp->q_pincount) > 0) 236 if (atomic_read(&dqp->q_pincount) > 0)
238 return XFS_ITEM_PINNED; 237 return XFS_ITEM_PINNED;
239 238
240 if (!xfs_qm_dqlock_nowait(dqp)) 239 if (!xfs_dqlock_nowait(dqp))
241 return XFS_ITEM_LOCKED; 240 return XFS_ITEM_LOCKED;
242 241
243 if (!xfs_dqflock_nowait(dqp)) { 242 if (!xfs_dqflock_nowait(dqp)) {
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index abaafdbb3e65..cfd6c7f8cc3c 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -437,7 +437,6 @@ xfs_inode_item_format(
437 * Assert that no attribute-related log flags are set. 437 * Assert that no attribute-related log flags are set.
438 */ 438 */
439 if (!XFS_IFORK_Q(ip)) { 439 if (!XFS_IFORK_Q(ip)) {
440 ASSERT(nvecs == lip->li_desc->lid_size);
441 iip->ili_format.ilf_size = nvecs; 440 iip->ili_format.ilf_size = nvecs;
442 ASSERT(!(iip->ili_format.ilf_fields & 441 ASSERT(!(iip->ili_format.ilf_fields &
443 (XFS_ILOG_ADATA | XFS_ILOG_ABROOT | XFS_ILOG_AEXT))); 442 (XFS_ILOG_ADATA | XFS_ILOG_ABROOT | XFS_ILOG_AEXT)));
@@ -521,7 +520,6 @@ xfs_inode_item_format(
521 break; 520 break;
522 } 521 }
523 522
524 ASSERT(nvecs == lip->li_desc->lid_size);
525 iip->ili_format.ilf_size = nvecs; 523 iip->ili_format.ilf_size = nvecs;
526} 524}
527 525
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 34817adf4b9e..e2cc3568c299 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -760,38 +760,6 @@ xfs_log_item_init(
760 INIT_LIST_HEAD(&item->li_cil); 760 INIT_LIST_HEAD(&item->li_cil);
761} 761}
762 762
763/*
764 * Write region vectors to log. The write happens using the space reservation
765 * of the ticket (tic). It is not a requirement that all writes for a given
766 * transaction occur with one call to xfs_log_write(). However, it is important
767 * to note that the transaction reservation code makes an assumption about the
768 * number of log headers a transaction requires that may be violated if you
769 * don't pass all the transaction vectors in one call....
770 */
771int
772xfs_log_write(
773 struct xfs_mount *mp,
774 struct xfs_log_iovec reg[],
775 int nentries,
776 struct xlog_ticket *tic,
777 xfs_lsn_t *start_lsn)
778{
779 struct log *log = mp->m_log;
780 int error;
781 struct xfs_log_vec vec = {
782 .lv_niovecs = nentries,
783 .lv_iovecp = reg,
784 };
785
786 if (XLOG_FORCED_SHUTDOWN(log))
787 return XFS_ERROR(EIO);
788
789 error = xlog_write(log, &vec, tic, start_lsn, NULL, 0);
790 if (error)
791 xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR);
792 return error;
793}
794
795void 763void
796xfs_log_move_tail(xfs_mount_t *mp, 764xfs_log_move_tail(xfs_mount_t *mp,
797 xfs_lsn_t tail_lsn) 765 xfs_lsn_t tail_lsn)
@@ -1685,7 +1653,7 @@ xlog_print_tic_res(
1685 }; 1653 };
1686 1654
1687 xfs_warn(mp, 1655 xfs_warn(mp,
1688 "xfs_log_write: reservation summary:\n" 1656 "xlog_write: reservation summary:\n"
1689 " trans type = %s (%u)\n" 1657 " trans type = %s (%u)\n"
1690 " unit res = %d bytes\n" 1658 " unit res = %d bytes\n"
1691 " current res = %d bytes\n" 1659 " current res = %d bytes\n"
@@ -1714,7 +1682,7 @@ xlog_print_tic_res(
1714 } 1682 }
1715 1683
1716 xfs_alert_tag(mp, XFS_PTAG_LOGRES, 1684 xfs_alert_tag(mp, XFS_PTAG_LOGRES,
1717 "xfs_log_write: reservation ran out. Need to up reservation"); 1685 "xlog_write: reservation ran out. Need to up reservation");
1718 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); 1686 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
1719} 1687}
1720 1688
@@ -1968,23 +1936,21 @@ xlog_write(
1968 *start_lsn = 0; 1936 *start_lsn = 0;
1969 1937
1970 len = xlog_write_calc_vec_length(ticket, log_vector); 1938 len = xlog_write_calc_vec_length(ticket, log_vector);
1971 if (log->l_cilp) {
1972 /*
1973 * Region headers and bytes are already accounted for.
1974 * We only need to take into account start records and
1975 * split regions in this function.
1976 */
1977 if (ticket->t_flags & XLOG_TIC_INITED)
1978 ticket->t_curr_res -= sizeof(xlog_op_header_t);
1979 1939
1980 /* 1940 /*
1981 * Commit record headers need to be accounted for. These 1941 * Region headers and bytes are already accounted for.
1982 * come in as separate writes so are easy to detect. 1942 * We only need to take into account start records and
1983 */ 1943 * split regions in this function.
1984 if (flags & (XLOG_COMMIT_TRANS | XLOG_UNMOUNT_TRANS)) 1944 */
1985 ticket->t_curr_res -= sizeof(xlog_op_header_t); 1945 if (ticket->t_flags & XLOG_TIC_INITED)
1986 } else 1946 ticket->t_curr_res -= sizeof(xlog_op_header_t);
1987 ticket->t_curr_res -= len; 1947
1948 /*
1949 * Commit record headers need to be accounted for. These
1950 * come in as separate writes so are easy to detect.
1951 */
1952 if (flags & (XLOG_COMMIT_TRANS | XLOG_UNMOUNT_TRANS))
1953 ticket->t_curr_res -= sizeof(xlog_op_header_t);
1988 1954
1989 if (ticket->t_curr_res < 0) 1955 if (ticket->t_curr_res < 0)
1990 xlog_print_tic_res(log->l_mp, ticket); 1956 xlog_print_tic_res(log->l_mp, ticket);
@@ -2931,8 +2897,7 @@ _xfs_log_force(
2931 2897
2932 XFS_STATS_INC(xs_log_force); 2898 XFS_STATS_INC(xs_log_force);
2933 2899
2934 if (log->l_cilp) 2900 xlog_cil_force(log);
2935 xlog_cil_force(log);
2936 2901
2937 spin_lock(&log->l_icloglock); 2902 spin_lock(&log->l_icloglock);
2938 2903
@@ -3081,11 +3046,9 @@ _xfs_log_force_lsn(
3081 3046
3082 XFS_STATS_INC(xs_log_force); 3047 XFS_STATS_INC(xs_log_force);
3083 3048
3084 if (log->l_cilp) { 3049 lsn = xlog_cil_force_lsn(log, lsn);
3085 lsn = xlog_cil_force_lsn(log, lsn); 3050 if (lsn == NULLCOMMITLSN)
3086 if (lsn == NULLCOMMITLSN) 3051 return 0;
3087 return 0;
3088 }
3089 3052
3090try_again: 3053try_again:
3091 spin_lock(&log->l_icloglock); 3054 spin_lock(&log->l_icloglock);
@@ -3653,7 +3616,7 @@ xfs_log_force_umount(
3653 * completed transactions are flushed to disk with the xfs_log_force() 3616 * completed transactions are flushed to disk with the xfs_log_force()
3654 * call below. 3617 * call below.
3655 */ 3618 */
3656 if (!logerror && (mp->m_flags & XFS_MOUNT_DELAYLOG)) 3619 if (!logerror)
3657 xlog_cil_force(log); 3620 xlog_cil_force(log);
3658 3621
3659 /* 3622 /*
diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h
index 3f7bf451c034..2aee3b22d29c 100644
--- a/fs/xfs/xfs_log.h
+++ b/fs/xfs/xfs_log.h
@@ -174,11 +174,6 @@ int xfs_log_reserve(struct xfs_mount *mp,
174 __uint8_t clientid, 174 __uint8_t clientid,
175 uint flags, 175 uint flags,
176 uint t_type); 176 uint t_type);
177int xfs_log_write(struct xfs_mount *mp,
178 xfs_log_iovec_t region[],
179 int nentries,
180 struct xlog_ticket *ticket,
181 xfs_lsn_t *start_lsn);
182int xfs_log_unmount_write(struct xfs_mount *mp); 177int xfs_log_unmount_write(struct xfs_mount *mp);
183void xfs_log_unmount(struct xfs_mount *mp); 178void xfs_log_unmount(struct xfs_mount *mp);
184int xfs_log_force_umount(struct xfs_mount *mp, int logerror); 179int xfs_log_force_umount(struct xfs_mount *mp, int logerror);
@@ -189,8 +184,7 @@ void xlog_iodone(struct xfs_buf *);
189struct xlog_ticket *xfs_log_ticket_get(struct xlog_ticket *ticket); 184struct xlog_ticket *xfs_log_ticket_get(struct xlog_ticket *ticket);
190void xfs_log_ticket_put(struct xlog_ticket *ticket); 185void xfs_log_ticket_put(struct xlog_ticket *ticket);
191 186
192void xfs_log_commit_cil(struct xfs_mount *mp, struct xfs_trans *tp, 187int xfs_log_commit_cil(struct xfs_mount *mp, struct xfs_trans *tp,
193 struct xfs_log_vec *log_vector,
194 xfs_lsn_t *commit_lsn, int flags); 188 xfs_lsn_t *commit_lsn, int flags);
195bool xfs_log_item_in_current_chkpt(struct xfs_log_item *lip); 189bool xfs_log_item_in_current_chkpt(struct xfs_log_item *lip);
196 190
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c
index c7755d5a5fbe..26db6b13f1f9 100644
--- a/fs/xfs/xfs_log_cil.c
+++ b/fs/xfs/xfs_log_cil.c
@@ -32,10 +32,7 @@
32#include "xfs_discard.h" 32#include "xfs_discard.h"
33 33
34/* 34/*
35 * Perform initial CIL structure initialisation. If the CIL is not 35 * Perform initial CIL structure initialisation.
36 * enabled in this filesystem, ensure the log->l_cilp is null so
37 * we can check this conditional to determine if we are doing delayed
38 * logging or not.
39 */ 36 */
40int 37int
41xlog_cil_init( 38xlog_cil_init(
@@ -44,10 +41,6 @@ xlog_cil_init(
44 struct xfs_cil *cil; 41 struct xfs_cil *cil;
45 struct xfs_cil_ctx *ctx; 42 struct xfs_cil_ctx *ctx;
46 43
47 log->l_cilp = NULL;
48 if (!(log->l_mp->m_flags & XFS_MOUNT_DELAYLOG))
49 return 0;
50
51 cil = kmem_zalloc(sizeof(*cil), KM_SLEEP|KM_MAYFAIL); 44 cil = kmem_zalloc(sizeof(*cil), KM_SLEEP|KM_MAYFAIL);
52 if (!cil) 45 if (!cil)
53 return ENOMEM; 46 return ENOMEM;
@@ -80,9 +73,6 @@ void
80xlog_cil_destroy( 73xlog_cil_destroy(
81 struct log *log) 74 struct log *log)
82{ 75{
83 if (!log->l_cilp)
84 return;
85
86 if (log->l_cilp->xc_ctx) { 76 if (log->l_cilp->xc_ctx) {
87 if (log->l_cilp->xc_ctx->ticket) 77 if (log->l_cilp->xc_ctx->ticket)
88 xfs_log_ticket_put(log->l_cilp->xc_ctx->ticket); 78 xfs_log_ticket_put(log->l_cilp->xc_ctx->ticket);
@@ -137,9 +127,6 @@ void
137xlog_cil_init_post_recovery( 127xlog_cil_init_post_recovery(
138 struct log *log) 128 struct log *log)
139{ 129{
140 if (!log->l_cilp)
141 return;
142
143 log->l_cilp->xc_ctx->ticket = xlog_cil_ticket_alloc(log); 130 log->l_cilp->xc_ctx->ticket = xlog_cil_ticket_alloc(log);
144 log->l_cilp->xc_ctx->sequence = 1; 131 log->l_cilp->xc_ctx->sequence = 1;
145 log->l_cilp->xc_ctx->commit_lsn = xlog_assign_lsn(log->l_curr_cycle, 132 log->l_cilp->xc_ctx->commit_lsn = xlog_assign_lsn(log->l_curr_cycle,
@@ -172,37 +159,73 @@ xlog_cil_init_post_recovery(
172 * format the regions into the iclog as though they are being formatted 159 * format the regions into the iclog as though they are being formatted
173 * directly out of the objects themselves. 160 * directly out of the objects themselves.
174 */ 161 */
175static void 162static struct xfs_log_vec *
176xlog_cil_format_items( 163xlog_cil_prepare_log_vecs(
177 struct log *log, 164 struct xfs_trans *tp)
178 struct xfs_log_vec *log_vector)
179{ 165{
180 struct xfs_log_vec *lv; 166 struct xfs_log_item_desc *lidp;
167 struct xfs_log_vec *lv = NULL;
168 struct xfs_log_vec *ret_lv = NULL;
181 169
182 ASSERT(log_vector); 170
183 for (lv = log_vector; lv; lv = lv->lv_next) { 171 /* Bail out if we didn't find a log item. */
172 if (list_empty(&tp->t_items)) {
173 ASSERT(0);
174 return NULL;
175 }
176
177 list_for_each_entry(lidp, &tp->t_items, lid_trans) {
178 struct xfs_log_vec *new_lv;
184 void *ptr; 179 void *ptr;
185 int index; 180 int index;
186 int len = 0; 181 int len = 0;
182 uint niovecs;
183
184 /* Skip items which aren't dirty in this transaction. */
185 if (!(lidp->lid_flags & XFS_LID_DIRTY))
186 continue;
187
188 /* Skip items that do not have any vectors for writing */
189 niovecs = IOP_SIZE(lidp->lid_item);
190 if (!niovecs)
191 continue;
192
193 new_lv = kmem_zalloc(sizeof(*new_lv) +
194 niovecs * sizeof(struct xfs_log_iovec),
195 KM_SLEEP);
196
197 /* The allocated iovec region lies beyond the log vector. */
198 new_lv->lv_iovecp = (struct xfs_log_iovec *)&new_lv[1];
199 new_lv->lv_niovecs = niovecs;
200 new_lv->lv_item = lidp->lid_item;
187 201
188 /* build the vector array and calculate it's length */ 202 /* build the vector array and calculate it's length */
189 IOP_FORMAT(lv->lv_item, lv->lv_iovecp); 203 IOP_FORMAT(new_lv->lv_item, new_lv->lv_iovecp);
190 for (index = 0; index < lv->lv_niovecs; index++) 204 for (index = 0; index < new_lv->lv_niovecs; index++)
191 len += lv->lv_iovecp[index].i_len; 205 len += new_lv->lv_iovecp[index].i_len;
192 206
193 lv->lv_buf_len = len; 207 new_lv->lv_buf_len = len;
194 lv->lv_buf = kmem_alloc(lv->lv_buf_len, KM_SLEEP|KM_NOFS); 208 new_lv->lv_buf = kmem_alloc(new_lv->lv_buf_len,
195 ptr = lv->lv_buf; 209 KM_SLEEP|KM_NOFS);
210 ptr = new_lv->lv_buf;
196 211
197 for (index = 0; index < lv->lv_niovecs; index++) { 212 for (index = 0; index < new_lv->lv_niovecs; index++) {
198 struct xfs_log_iovec *vec = &lv->lv_iovecp[index]; 213 struct xfs_log_iovec *vec = &new_lv->lv_iovecp[index];
199 214
200 memcpy(ptr, vec->i_addr, vec->i_len); 215 memcpy(ptr, vec->i_addr, vec->i_len);
201 vec->i_addr = ptr; 216 vec->i_addr = ptr;
202 ptr += vec->i_len; 217 ptr += vec->i_len;
203 } 218 }
204 ASSERT(ptr == lv->lv_buf + lv->lv_buf_len); 219 ASSERT(ptr == new_lv->lv_buf + new_lv->lv_buf_len);
220
221 if (!ret_lv)
222 ret_lv = new_lv;
223 else
224 lv->lv_next = new_lv;
225 lv = new_lv;
205 } 226 }
227
228 return ret_lv;
206} 229}
207 230
208/* 231/*
@@ -635,28 +658,30 @@ out_abort:
635 * background commit, returns without it held once background commits are 658 * background commit, returns without it held once background commits are
636 * allowed again. 659 * allowed again.
637 */ 660 */
638void 661int
639xfs_log_commit_cil( 662xfs_log_commit_cil(
640 struct xfs_mount *mp, 663 struct xfs_mount *mp,
641 struct xfs_trans *tp, 664 struct xfs_trans *tp,
642 struct xfs_log_vec *log_vector,
643 xfs_lsn_t *commit_lsn, 665 xfs_lsn_t *commit_lsn,
644 int flags) 666 int flags)
645{ 667{
646 struct log *log = mp->m_log; 668 struct log *log = mp->m_log;
647 int log_flags = 0; 669 int log_flags = 0;
648 int push = 0; 670 int push = 0;
671 struct xfs_log_vec *log_vector;
649 672
650 if (flags & XFS_TRANS_RELEASE_LOG_RES) 673 if (flags & XFS_TRANS_RELEASE_LOG_RES)
651 log_flags = XFS_LOG_REL_PERM_RESERV; 674 log_flags = XFS_LOG_REL_PERM_RESERV;
652 675
653 /* 676 /*
654 * do all the hard work of formatting items (including memory 677 * Do all the hard work of formatting items (including memory
655 * allocation) outside the CIL context lock. This prevents stalling CIL 678 * allocation) outside the CIL context lock. This prevents stalling CIL
656 * pushes when we are low on memory and a transaction commit spends a 679 * pushes when we are low on memory and a transaction commit spends a
657 * lot of time in memory reclaim. 680 * lot of time in memory reclaim.
658 */ 681 */
659 xlog_cil_format_items(log, log_vector); 682 log_vector = xlog_cil_prepare_log_vecs(tp);
683 if (!log_vector)
684 return ENOMEM;
660 685
661 /* lock out background commit */ 686 /* lock out background commit */
662 down_read(&log->l_cilp->xc_ctx_lock); 687 down_read(&log->l_cilp->xc_ctx_lock);
@@ -709,6 +734,7 @@ xfs_log_commit_cil(
709 */ 734 */
710 if (push) 735 if (push)
711 xlog_cil_push(log, 0); 736 xlog_cil_push(log, 0);
737 return 0;
712} 738}
713 739
714/* 740/*
@@ -786,8 +812,6 @@ xfs_log_item_in_current_chkpt(
786{ 812{
787 struct xfs_cil_ctx *ctx; 813 struct xfs_cil_ctx *ctx;
788 814
789 if (!(lip->li_mountp->m_flags & XFS_MOUNT_DELAYLOG))
790 return false;
791 if (list_empty(&lip->li_cil)) 815 if (list_empty(&lip->li_cil))
792 return false; 816 return false;
793 817
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index bb24dac42a25..19f69e232509 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -219,7 +219,6 @@ typedef struct xfs_mount {
219#define XFS_MOUNT_WSYNC (1ULL << 0) /* for nfs - all metadata ops 219#define XFS_MOUNT_WSYNC (1ULL << 0) /* for nfs - all metadata ops
220 must be synchronous except 220 must be synchronous except
221 for space allocations */ 221 for space allocations */
222#define XFS_MOUNT_DELAYLOG (1ULL << 1) /* delayed logging is enabled */
223#define XFS_MOUNT_WAS_CLEAN (1ULL << 3) 222#define XFS_MOUNT_WAS_CLEAN (1ULL << 3)
224#define XFS_MOUNT_FS_SHUTDOWN (1ULL << 4) /* atomic stop of all filesystem 223#define XFS_MOUNT_FS_SHUTDOWN (1ULL << 4) /* atomic stop of all filesystem
225 operations, typically for 224 operations, typically for
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c
index 0bbb1a41998b..671f37eae1c7 100644
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -154,12 +154,17 @@ STATIC void
154xfs_qm_destroy( 154xfs_qm_destroy(
155 struct xfs_qm *xqm) 155 struct xfs_qm *xqm)
156{ 156{
157 struct xfs_dquot *dqp, *n;
158 int hsize, i; 157 int hsize, i;
159 158
160 ASSERT(xqm != NULL); 159 ASSERT(xqm != NULL);
161 ASSERT(xqm->qm_nrefs == 0); 160 ASSERT(xqm->qm_nrefs == 0);
161
162 unregister_shrinker(&xfs_qm_shaker); 162 unregister_shrinker(&xfs_qm_shaker);
163
164 mutex_lock(&xqm->qm_dqfrlist_lock);
165 ASSERT(list_empty(&xqm->qm_dqfrlist));
166 mutex_unlock(&xqm->qm_dqfrlist_lock);
167
163 hsize = xqm->qm_dqhashmask + 1; 168 hsize = xqm->qm_dqhashmask + 1;
164 for (i = 0; i < hsize; i++) { 169 for (i = 0; i < hsize; i++) {
165 xfs_qm_list_destroy(&(xqm->qm_usr_dqhtable[i])); 170 xfs_qm_list_destroy(&(xqm->qm_usr_dqhtable[i]));
@@ -171,17 +176,6 @@ xfs_qm_destroy(
171 xqm->qm_grp_dqhtable = NULL; 176 xqm->qm_grp_dqhtable = NULL;
172 xqm->qm_dqhashmask = 0; 177 xqm->qm_dqhashmask = 0;
173 178
174 /* frlist cleanup */
175 mutex_lock(&xqm->qm_dqfrlist_lock);
176 list_for_each_entry_safe(dqp, n, &xqm->qm_dqfrlist, q_freelist) {
177 xfs_dqlock(dqp);
178 list_del_init(&dqp->q_freelist);
179 xfs_Gqm->qm_dqfrlist_cnt--;
180 xfs_dqunlock(dqp);
181 xfs_qm_dqdestroy(dqp);
182 }
183 mutex_unlock(&xqm->qm_dqfrlist_lock);
184 mutex_destroy(&xqm->qm_dqfrlist_lock);
185 kmem_free(xqm); 179 kmem_free(xqm);
186} 180}
187 181
@@ -232,34 +226,10 @@ STATIC void
232xfs_qm_rele_quotafs_ref( 226xfs_qm_rele_quotafs_ref(
233 struct xfs_mount *mp) 227 struct xfs_mount *mp)
234{ 228{
235 xfs_dquot_t *dqp, *n;
236
237 ASSERT(xfs_Gqm); 229 ASSERT(xfs_Gqm);
238 ASSERT(xfs_Gqm->qm_nrefs > 0); 230 ASSERT(xfs_Gqm->qm_nrefs > 0);
239 231
240 /* 232 /*
241 * Go thru the freelist and destroy all inactive dquots.
242 */
243 mutex_lock(&xfs_Gqm->qm_dqfrlist_lock);
244
245 list_for_each_entry_safe(dqp, n, &xfs_Gqm->qm_dqfrlist, q_freelist) {
246 xfs_dqlock(dqp);
247 if (dqp->dq_flags & XFS_DQ_INACTIVE) {
248 ASSERT(dqp->q_mount == NULL);
249 ASSERT(! XFS_DQ_IS_DIRTY(dqp));
250 ASSERT(list_empty(&dqp->q_hashlist));
251 ASSERT(list_empty(&dqp->q_mplist));
252 list_del_init(&dqp->q_freelist);
253 xfs_Gqm->qm_dqfrlist_cnt--;
254 xfs_dqunlock(dqp);
255 xfs_qm_dqdestroy(dqp);
256 } else {
257 xfs_dqunlock(dqp);
258 }
259 }
260 mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
261
262 /*
263 * Destroy the entire XQM. If somebody mounts with quotaon, this'll 233 * Destroy the entire XQM. If somebody mounts with quotaon, this'll
264 * be restarted. 234 * be restarted.
265 */ 235 */
@@ -415,8 +385,7 @@ xfs_qm_unmount_quotas(
415 */ 385 */
416STATIC int 386STATIC int
417xfs_qm_dqflush_all( 387xfs_qm_dqflush_all(
418 struct xfs_mount *mp, 388 struct xfs_mount *mp)
419 int sync_mode)
420{ 389{
421 struct xfs_quotainfo *q = mp->m_quotainfo; 390 struct xfs_quotainfo *q = mp->m_quotainfo;
422 int recl; 391 int recl;
@@ -429,7 +398,8 @@ again:
429 mutex_lock(&q->qi_dqlist_lock); 398 mutex_lock(&q->qi_dqlist_lock);
430 list_for_each_entry(dqp, &q->qi_dqlist, q_mplist) { 399 list_for_each_entry(dqp, &q->qi_dqlist, q_mplist) {
431 xfs_dqlock(dqp); 400 xfs_dqlock(dqp);
432 if (! XFS_DQ_IS_DIRTY(dqp)) { 401 if ((dqp->dq_flags & XFS_DQ_FREEING) ||
402 !XFS_DQ_IS_DIRTY(dqp)) {
433 xfs_dqunlock(dqp); 403 xfs_dqunlock(dqp);
434 continue; 404 continue;
435 } 405 }
@@ -444,14 +414,14 @@ again:
444 * out immediately. We'll be able to acquire 414 * out immediately. We'll be able to acquire
445 * the flush lock when the I/O completes. 415 * the flush lock when the I/O completes.
446 */ 416 */
447 xfs_qm_dqflock_pushbuf_wait(dqp); 417 xfs_dqflock_pushbuf_wait(dqp);
448 } 418 }
449 /* 419 /*
450 * Let go of the mplist lock. We don't want to hold it 420 * Let go of the mplist lock. We don't want to hold it
451 * across a disk write. 421 * across a disk write.
452 */ 422 */
453 mutex_unlock(&q->qi_dqlist_lock); 423 mutex_unlock(&q->qi_dqlist_lock);
454 error = xfs_qm_dqflush(dqp, sync_mode); 424 error = xfs_qm_dqflush(dqp, 0);
455 xfs_dqunlock(dqp); 425 xfs_dqunlock(dqp);
456 if (error) 426 if (error)
457 return error; 427 return error;
@@ -468,6 +438,7 @@ again:
468 /* return ! busy */ 438 /* return ! busy */
469 return 0; 439 return 0;
470} 440}
441
471/* 442/*
472 * Release the group dquot pointers the user dquots may be 443 * Release the group dquot pointers the user dquots may be
473 * carrying around as a hint. mplist is locked on entry and exit. 444 * carrying around as a hint. mplist is locked on entry and exit.
@@ -478,31 +449,26 @@ xfs_qm_detach_gdquots(
478{ 449{
479 struct xfs_quotainfo *q = mp->m_quotainfo; 450 struct xfs_quotainfo *q = mp->m_quotainfo;
480 struct xfs_dquot *dqp, *gdqp; 451 struct xfs_dquot *dqp, *gdqp;
481 int nrecl;
482 452
483 again: 453 again:
484 ASSERT(mutex_is_locked(&q->qi_dqlist_lock)); 454 ASSERT(mutex_is_locked(&q->qi_dqlist_lock));
485 list_for_each_entry(dqp, &q->qi_dqlist, q_mplist) { 455 list_for_each_entry(dqp, &q->qi_dqlist, q_mplist) {
486 xfs_dqlock(dqp); 456 xfs_dqlock(dqp);
487 if ((gdqp = dqp->q_gdquot)) { 457 if (dqp->dq_flags & XFS_DQ_FREEING) {
488 xfs_dqlock(gdqp); 458 xfs_dqunlock(dqp);
489 dqp->q_gdquot = NULL;
490 }
491 xfs_dqunlock(dqp);
492
493 if (gdqp) {
494 /*
495 * Can't hold the mplist lock across a dqput.
496 * XXXmust convert to marker based iterations here.
497 */
498 nrecl = q->qi_dqreclaims;
499 mutex_unlock(&q->qi_dqlist_lock); 459 mutex_unlock(&q->qi_dqlist_lock);
500 xfs_qm_dqput(gdqp); 460 delay(1);
501
502 mutex_lock(&q->qi_dqlist_lock); 461 mutex_lock(&q->qi_dqlist_lock);
503 if (nrecl != q->qi_dqreclaims) 462 goto again;
504 goto again;
505 } 463 }
464
465 gdqp = dqp->q_gdquot;
466 if (gdqp)
467 dqp->q_gdquot = NULL;
468 xfs_dqunlock(dqp);
469
470 if (gdqp)
471 xfs_qm_dqrele(gdqp);
506 } 472 }
507} 473}
508 474
@@ -520,8 +486,8 @@ xfs_qm_dqpurge_int(
520 struct xfs_quotainfo *q = mp->m_quotainfo; 486 struct xfs_quotainfo *q = mp->m_quotainfo;
521 struct xfs_dquot *dqp, *n; 487 struct xfs_dquot *dqp, *n;
522 uint dqtype; 488 uint dqtype;
523 int nrecl; 489 int nmisses = 0;
524 int nmisses; 490 LIST_HEAD (dispose_list);
525 491
526 if (!q) 492 if (!q)
527 return 0; 493 return 0;
@@ -540,47 +506,26 @@ xfs_qm_dqpurge_int(
540 */ 506 */
541 xfs_qm_detach_gdquots(mp); 507 xfs_qm_detach_gdquots(mp);
542 508
543 again:
544 nmisses = 0;
545 ASSERT(mutex_is_locked(&q->qi_dqlist_lock));
546 /* 509 /*
547 * Try to get rid of all of the unwanted dquots. The idea is to 510 * Try to get rid of all of the unwanted dquots.
548 * get them off mplist and hashlist, but leave them on freelist.
549 */ 511 */
550 list_for_each_entry_safe(dqp, n, &q->qi_dqlist, q_mplist) { 512 list_for_each_entry_safe(dqp, n, &q->qi_dqlist, q_mplist) {
551 /* 513 xfs_dqlock(dqp);
552 * It's OK to look at the type without taking dqlock here. 514 if ((dqp->dq_flags & dqtype) != 0 &&
553 * We're holding the mplist lock here, and that's needed for 515 !(dqp->dq_flags & XFS_DQ_FREEING)) {
554 * a dqreclaim. 516 if (dqp->q_nrefs == 0) {
555 */ 517 dqp->dq_flags |= XFS_DQ_FREEING;
556 if ((dqp->dq_flags & dqtype) == 0) 518 list_move_tail(&dqp->q_mplist, &dispose_list);
557 continue; 519 } else
558 520 nmisses++;
559 if (!mutex_trylock(&dqp->q_hash->qh_lock)) {
560 nrecl = q->qi_dqreclaims;
561 mutex_unlock(&q->qi_dqlist_lock);
562 mutex_lock(&dqp->q_hash->qh_lock);
563 mutex_lock(&q->qi_dqlist_lock);
564
565 /*
566 * XXXTheoretically, we can get into a very long
567 * ping pong game here.
568 * No one can be adding dquots to the mplist at
569 * this point, but somebody might be taking things off.
570 */
571 if (nrecl != q->qi_dqreclaims) {
572 mutex_unlock(&dqp->q_hash->qh_lock);
573 goto again;
574 }
575 } 521 }
576 522 xfs_dqunlock(dqp);
577 /*
578 * Take the dquot off the mplist and hashlist. It may remain on
579 * freelist in INACTIVE state.
580 */
581 nmisses += xfs_qm_dqpurge(dqp);
582 } 523 }
583 mutex_unlock(&q->qi_dqlist_lock); 524 mutex_unlock(&q->qi_dqlist_lock);
525
526 list_for_each_entry_safe(dqp, n, &dispose_list, q_mplist)
527 xfs_qm_dqpurge(dqp);
528
584 return nmisses; 529 return nmisses;
585} 530}
586 531
@@ -648,12 +593,9 @@ xfs_qm_dqattach_one(
648 */ 593 */
649 dqp = udqhint->q_gdquot; 594 dqp = udqhint->q_gdquot;
650 if (dqp && be32_to_cpu(dqp->q_core.d_id) == id) { 595 if (dqp && be32_to_cpu(dqp->q_core.d_id) == id) {
651 xfs_dqlock(dqp);
652 XFS_DQHOLD(dqp);
653 ASSERT(*IO_idqpp == NULL); 596 ASSERT(*IO_idqpp == NULL);
654 *IO_idqpp = dqp;
655 597
656 xfs_dqunlock(dqp); 598 *IO_idqpp = xfs_qm_dqhold(dqp);
657 xfs_dqunlock(udqhint); 599 xfs_dqunlock(udqhint);
658 return 0; 600 return 0;
659 } 601 }
@@ -693,11 +635,7 @@ xfs_qm_dqattach_one(
693 635
694/* 636/*
695 * Given a udquot and gdquot, attach a ptr to the group dquot in the 637 * Given a udquot and gdquot, attach a ptr to the group dquot in the
696 * udquot as a hint for future lookups. The idea sounds simple, but the 638 * udquot as a hint for future lookups.
697 * execution isn't, because the udquot might have a group dquot attached
698 * already and getting rid of that gets us into lock ordering constraints.
699 * The process is complicated more by the fact that the dquots may or may not
700 * be locked on entry.
701 */ 639 */
702STATIC void 640STATIC void
703xfs_qm_dqattach_grouphint( 641xfs_qm_dqattach_grouphint(
@@ -708,45 +646,17 @@ xfs_qm_dqattach_grouphint(
708 646
709 xfs_dqlock(udq); 647 xfs_dqlock(udq);
710 648
711 if ((tmp = udq->q_gdquot)) { 649 tmp = udq->q_gdquot;
712 if (tmp == gdq) { 650 if (tmp) {
713 xfs_dqunlock(udq); 651 if (tmp == gdq)
714 return; 652 goto done;
715 }
716 653
717 udq->q_gdquot = NULL; 654 udq->q_gdquot = NULL;
718 /*
719 * We can't keep any dqlocks when calling dqrele,
720 * because the freelist lock comes before dqlocks.
721 */
722 xfs_dqunlock(udq);
723 /*
724 * we took a hard reference once upon a time in dqget,
725 * so give it back when the udquot no longer points at it
726 * dqput() does the unlocking of the dquot.
727 */
728 xfs_qm_dqrele(tmp); 655 xfs_qm_dqrele(tmp);
729
730 xfs_dqlock(udq);
731 xfs_dqlock(gdq);
732
733 } else {
734 ASSERT(XFS_DQ_IS_LOCKED(udq));
735 xfs_dqlock(gdq);
736 }
737
738 ASSERT(XFS_DQ_IS_LOCKED(udq));
739 ASSERT(XFS_DQ_IS_LOCKED(gdq));
740 /*
741 * Somebody could have attached a gdquot here,
742 * when we dropped the uqlock. If so, just do nothing.
743 */
744 if (udq->q_gdquot == NULL) {
745 XFS_DQHOLD(gdq);
746 udq->q_gdquot = gdq;
747 } 656 }
748 657
749 xfs_dqunlock(gdq); 658 udq->q_gdquot = xfs_qm_dqhold(gdq);
659done:
750 xfs_dqunlock(udq); 660 xfs_dqunlock(udq);
751} 661}
752 662
@@ -813,17 +723,13 @@ xfs_qm_dqattach_locked(
813 ASSERT(ip->i_gdquot); 723 ASSERT(ip->i_gdquot);
814 724
815 /* 725 /*
816 * We may or may not have the i_udquot locked at this point, 726 * We do not have i_udquot locked at this point, but this check
817 * but this check is OK since we don't depend on the i_gdquot to 727 * is OK since we don't depend on the i_gdquot to be accurate
818 * be accurate 100% all the time. It is just a hint, and this 728 * 100% all the time. It is just a hint, and this will
819 * will succeed in general. 729 * succeed in general.
820 */
821 if (ip->i_udquot->q_gdquot == ip->i_gdquot)
822 goto done;
823 /*
824 * Attach i_gdquot to the gdquot hint inside the i_udquot.
825 */ 730 */
826 xfs_qm_dqattach_grouphint(ip->i_udquot, ip->i_gdquot); 731 if (ip->i_udquot->q_gdquot != ip->i_gdquot)
732 xfs_qm_dqattach_grouphint(ip->i_udquot, ip->i_gdquot);
827 } 733 }
828 734
829 done: 735 done:
@@ -879,100 +785,6 @@ xfs_qm_dqdetach(
879 } 785 }
880} 786}
881 787
882int
883xfs_qm_sync(
884 struct xfs_mount *mp,
885 int flags)
886{
887 struct xfs_quotainfo *q = mp->m_quotainfo;
888 int recl, restarts;
889 struct xfs_dquot *dqp;
890 int error;
891
892 if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
893 return 0;
894
895 restarts = 0;
896
897 again:
898 mutex_lock(&q->qi_dqlist_lock);
899 /*
900 * dqpurge_all() also takes the mplist lock and iterate thru all dquots
901 * in quotaoff. However, if the QUOTA_ACTIVE bits are not cleared
902 * when we have the mplist lock, we know that dquots will be consistent
903 * as long as we have it locked.
904 */
905 if (!XFS_IS_QUOTA_ON(mp)) {
906 mutex_unlock(&q->qi_dqlist_lock);
907 return 0;
908 }
909 ASSERT(mutex_is_locked(&q->qi_dqlist_lock));
910 list_for_each_entry(dqp, &q->qi_dqlist, q_mplist) {
911 /*
912 * If this is vfs_sync calling, then skip the dquots that
913 * don't 'seem' to be dirty. ie. don't acquire dqlock.
914 * This is very similar to what xfs_sync does with inodes.
915 */
916 if (flags & SYNC_TRYLOCK) {
917 if (!XFS_DQ_IS_DIRTY(dqp))
918 continue;
919 if (!xfs_qm_dqlock_nowait(dqp))
920 continue;
921 } else {
922 xfs_dqlock(dqp);
923 }
924
925 /*
926 * Now, find out for sure if this dquot is dirty or not.
927 */
928 if (! XFS_DQ_IS_DIRTY(dqp)) {
929 xfs_dqunlock(dqp);
930 continue;
931 }
932
933 /* XXX a sentinel would be better */
934 recl = q->qi_dqreclaims;
935 if (!xfs_dqflock_nowait(dqp)) {
936 if (flags & SYNC_TRYLOCK) {
937 xfs_dqunlock(dqp);
938 continue;
939 }
940 /*
941 * If we can't grab the flush lock then if the caller
942 * really wanted us to give this our best shot, so
943 * see if we can give a push to the buffer before we wait
944 * on the flush lock. At this point, we know that
945 * even though the dquot is being flushed,
946 * it has (new) dirty data.
947 */
948 xfs_qm_dqflock_pushbuf_wait(dqp);
949 }
950 /*
951 * Let go of the mplist lock. We don't want to hold it
952 * across a disk write
953 */
954 mutex_unlock(&q->qi_dqlist_lock);
955 error = xfs_qm_dqflush(dqp, flags);
956 xfs_dqunlock(dqp);
957 if (error && XFS_FORCED_SHUTDOWN(mp))
958 return 0; /* Need to prevent umount failure */
959 else if (error)
960 return error;
961
962 mutex_lock(&q->qi_dqlist_lock);
963 if (recl != q->qi_dqreclaims) {
964 if (++restarts >= XFS_QM_SYNC_MAX_RESTARTS)
965 break;
966
967 mutex_unlock(&q->qi_dqlist_lock);
968 goto again;
969 }
970 }
971
972 mutex_unlock(&q->qi_dqlist_lock);
973 return 0;
974}
975
976/* 788/*
977 * The hash chains and the mplist use the same xfs_dqhash structure as 789 * The hash chains and the mplist use the same xfs_dqhash structure as
978 * their list head, but we can take the mplist qh_lock and one of the 790 * their list head, but we can take the mplist qh_lock and one of the
@@ -1034,18 +846,21 @@ xfs_qm_init_quotainfo(
1034 /* 846 /*
1035 * We try to get the limits from the superuser's limits fields. 847 * We try to get the limits from the superuser's limits fields.
1036 * This is quite hacky, but it is standard quota practice. 848 * This is quite hacky, but it is standard quota practice.
849 *
1037 * We look at the USR dquot with id == 0 first, but if user quotas 850 * We look at the USR dquot with id == 0 first, but if user quotas
1038 * are not enabled we goto the GRP dquot with id == 0. 851 * are not enabled we goto the GRP dquot with id == 0.
1039 * We don't really care to keep separate default limits for user 852 * We don't really care to keep separate default limits for user
1040 * and group quotas, at least not at this point. 853 * and group quotas, at least not at this point.
854 *
855 * Since we may not have done a quotacheck by this point, just read
856 * the dquot without attaching it to any hashtables or lists.
1041 */ 857 */
1042 error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t)0, 858 error = xfs_qm_dqread(mp, 0,
1043 XFS_IS_UQUOTA_RUNNING(mp) ? XFS_DQ_USER : 859 XFS_IS_UQUOTA_RUNNING(mp) ? XFS_DQ_USER :
1044 (XFS_IS_GQUOTA_RUNNING(mp) ? XFS_DQ_GROUP : 860 (XFS_IS_GQUOTA_RUNNING(mp) ? XFS_DQ_GROUP :
1045 XFS_DQ_PROJ), 861 XFS_DQ_PROJ),
1046 XFS_QMOPT_DQSUSER|XFS_QMOPT_DOWARN, 862 XFS_QMOPT_DOWARN, &dqp);
1047 &dqp); 863 if (!error) {
1048 if (! error) {
1049 xfs_disk_dquot_t *ddqp = &dqp->q_core; 864 xfs_disk_dquot_t *ddqp = &dqp->q_core;
1050 865
1051 /* 866 /*
@@ -1072,11 +887,6 @@ xfs_qm_init_quotainfo(
1072 qinf->qi_rtbhardlimit = be64_to_cpu(ddqp->d_rtb_hardlimit); 887 qinf->qi_rtbhardlimit = be64_to_cpu(ddqp->d_rtb_hardlimit);
1073 qinf->qi_rtbsoftlimit = be64_to_cpu(ddqp->d_rtb_softlimit); 888 qinf->qi_rtbsoftlimit = be64_to_cpu(ddqp->d_rtb_softlimit);
1074 889
1075 /*
1076 * We sent the XFS_QMOPT_DQSUSER flag to dqget because
1077 * we don't want this dquot cached. We haven't done a
1078 * quotacheck yet, and quotacheck doesn't like incore dquots.
1079 */
1080 xfs_qm_dqdestroy(dqp); 890 xfs_qm_dqdestroy(dqp);
1081 } else { 891 } else {
1082 qinf->qi_btimelimit = XFS_QM_BTIMELIMIT; 892 qinf->qi_btimelimit = XFS_QM_BTIMELIMIT;
@@ -1661,7 +1471,7 @@ xfs_qm_quotacheck(
1661 * successfully. 1471 * successfully.
1662 */ 1472 */
1663 if (!error) 1473 if (!error)
1664 error = xfs_qm_dqflush_all(mp, 0); 1474 error = xfs_qm_dqflush_all(mp);
1665 1475
1666 /* 1476 /*
1667 * We can get this error if we couldn't do a dquot allocation inside 1477 * We can get this error if we couldn't do a dquot allocation inside
@@ -1793,59 +1603,33 @@ xfs_qm_init_quotainos(
1793 1603
1794 1604
1795/* 1605/*
1796 * Just pop the least recently used dquot off the freelist and 1606 * Pop the least recently used dquot off the freelist and recycle it.
1797 * recycle it. The returned dquot is locked.
1798 */ 1607 */
1799STATIC xfs_dquot_t * 1608STATIC struct xfs_dquot *
1800xfs_qm_dqreclaim_one(void) 1609xfs_qm_dqreclaim_one(void)
1801{ 1610{
1802 xfs_dquot_t *dqpout; 1611 struct xfs_dquot *dqp;
1803 xfs_dquot_t *dqp; 1612 int restarts = 0;
1804 int restarts;
1805 int startagain;
1806
1807 restarts = 0;
1808 dqpout = NULL;
1809 1613
1810 /* lockorder: hashchainlock, freelistlock, mplistlock, dqlock, dqflock */
1811again:
1812 startagain = 0;
1813 mutex_lock(&xfs_Gqm->qm_dqfrlist_lock); 1614 mutex_lock(&xfs_Gqm->qm_dqfrlist_lock);
1814 1615restart:
1815 list_for_each_entry(dqp, &xfs_Gqm->qm_dqfrlist, q_freelist) { 1616 list_for_each_entry(dqp, &xfs_Gqm->qm_dqfrlist, q_freelist) {
1816 struct xfs_mount *mp = dqp->q_mount; 1617 struct xfs_mount *mp = dqp->q_mount;
1817 xfs_dqlock(dqp); 1618
1619 if (!xfs_dqlock_nowait(dqp))
1620 continue;
1818 1621
1819 /* 1622 /*
1820 * We are racing with dqlookup here. Naturally we don't 1623 * This dquot has already been grabbed by dqlookup.
1821 * want to reclaim a dquot that lookup wants. We release the 1624 * Remove it from the freelist and try again.
1822 * freelist lock and start over, so that lookup will grab
1823 * both the dquot and the freelistlock.
1824 */ 1625 */
1825 if (dqp->dq_flags & XFS_DQ_WANT) { 1626 if (dqp->q_nrefs) {
1826 ASSERT(! (dqp->dq_flags & XFS_DQ_INACTIVE));
1827
1828 trace_xfs_dqreclaim_want(dqp); 1627 trace_xfs_dqreclaim_want(dqp);
1829 XQM_STATS_INC(xqmstats.xs_qm_dqwants); 1628 XQM_STATS_INC(xqmstats.xs_qm_dqwants);
1830 restarts++;
1831 startagain = 1;
1832 goto dqunlock;
1833 }
1834 1629
1835 /*
1836 * If the dquot is inactive, we are assured that it is
1837 * not on the mplist or the hashlist, and that makes our
1838 * life easier.
1839 */
1840 if (dqp->dq_flags & XFS_DQ_INACTIVE) {
1841 ASSERT(mp == NULL);
1842 ASSERT(! XFS_DQ_IS_DIRTY(dqp));
1843 ASSERT(list_empty(&dqp->q_hashlist));
1844 ASSERT(list_empty(&dqp->q_mplist));
1845 list_del_init(&dqp->q_freelist); 1630 list_del_init(&dqp->q_freelist);
1846 xfs_Gqm->qm_dqfrlist_cnt--; 1631 xfs_Gqm->qm_dqfrlist_cnt--;
1847 dqpout = dqp; 1632 restarts++;
1848 XQM_STATS_INC(xqmstats.xs_qm_dqinact_reclaims);
1849 goto dqunlock; 1633 goto dqunlock;
1850 } 1634 }
1851 1635
@@ -1874,64 +1658,49 @@ again:
1874 * We flush it delayed write, so don't bother 1658 * We flush it delayed write, so don't bother
1875 * releasing the freelist lock. 1659 * releasing the freelist lock.
1876 */ 1660 */
1877 error = xfs_qm_dqflush(dqp, 0); 1661 error = xfs_qm_dqflush(dqp, SYNC_TRYLOCK);
1878 if (error) { 1662 if (error) {
1879 xfs_warn(mp, "%s: dquot %p flush failed", 1663 xfs_warn(mp, "%s: dquot %p flush failed",
1880 __func__, dqp); 1664 __func__, dqp);
1881 } 1665 }
1882 goto dqunlock; 1666 goto dqunlock;
1883 } 1667 }
1668 xfs_dqfunlock(dqp);
1884 1669
1885 /* 1670 /*
1886 * We're trying to get the hashlock out of order. This races 1671 * Prevent lookup now that we are going to reclaim the dquot.
1887 * with dqlookup; so, we giveup and goto the next dquot if 1672 * Once XFS_DQ_FREEING is set lookup won't touch the dquot,
1888 * we couldn't get the hashlock. This way, we won't starve 1673 * thus we can drop the lock now.
1889 * a dqlookup process that holds the hashlock that is
1890 * waiting for the freelist lock.
1891 */ 1674 */
1892 if (!mutex_trylock(&dqp->q_hash->qh_lock)) { 1675 dqp->dq_flags |= XFS_DQ_FREEING;
1893 restarts++; 1676 xfs_dqunlock(dqp);
1894 goto dqfunlock;
1895 }
1896 1677
1897 /* 1678 mutex_lock(&dqp->q_hash->qh_lock);
1898 * This races with dquot allocation code as well as dqflush_all 1679 list_del_init(&dqp->q_hashlist);
1899 * and reclaim code. So, if we failed to grab the mplist lock, 1680 dqp->q_hash->qh_version++;
1900 * giveup everything and start over. 1681 mutex_unlock(&dqp->q_hash->qh_lock);
1901 */
1902 if (!mutex_trylock(&mp->m_quotainfo->qi_dqlist_lock)) {
1903 restarts++;
1904 startagain = 1;
1905 goto qhunlock;
1906 }
1907 1682
1908 ASSERT(dqp->q_nrefs == 0); 1683 mutex_lock(&mp->m_quotainfo->qi_dqlist_lock);
1909 list_del_init(&dqp->q_mplist); 1684 list_del_init(&dqp->q_mplist);
1910 mp->m_quotainfo->qi_dquots--; 1685 mp->m_quotainfo->qi_dquots--;
1911 mp->m_quotainfo->qi_dqreclaims++; 1686 mp->m_quotainfo->qi_dqreclaims++;
1912 list_del_init(&dqp->q_hashlist); 1687 mutex_unlock(&mp->m_quotainfo->qi_dqlist_lock);
1913 dqp->q_hash->qh_version++; 1688
1689 ASSERT(dqp->q_nrefs == 0);
1914 list_del_init(&dqp->q_freelist); 1690 list_del_init(&dqp->q_freelist);
1915 xfs_Gqm->qm_dqfrlist_cnt--; 1691 xfs_Gqm->qm_dqfrlist_cnt--;
1916 dqpout = dqp; 1692
1917 mutex_unlock(&mp->m_quotainfo->qi_dqlist_lock); 1693 mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
1918qhunlock: 1694 return dqp;
1919 mutex_unlock(&dqp->q_hash->qh_lock);
1920dqfunlock:
1921 xfs_dqfunlock(dqp);
1922dqunlock: 1695dqunlock:
1923 xfs_dqunlock(dqp); 1696 xfs_dqunlock(dqp);
1924 if (dqpout)
1925 break;
1926 if (restarts >= XFS_QM_RECLAIM_MAX_RESTARTS) 1697 if (restarts >= XFS_QM_RECLAIM_MAX_RESTARTS)
1927 break; 1698 break;
1928 if (startagain) { 1699 goto restart;
1929 mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
1930 goto again;
1931 }
1932 } 1700 }
1701
1933 mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock); 1702 mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
1934 return dqpout; 1703 return NULL;
1935} 1704}
1936 1705
1937/* 1706/*
@@ -2151,10 +1920,7 @@ xfs_qm_vop_dqalloc(
2151 * this to caller 1920 * this to caller
2152 */ 1921 */
2153 ASSERT(ip->i_udquot); 1922 ASSERT(ip->i_udquot);
2154 uq = ip->i_udquot; 1923 uq = xfs_qm_dqhold(ip->i_udquot);
2155 xfs_dqlock(uq);
2156 XFS_DQHOLD(uq);
2157 xfs_dqunlock(uq);
2158 } 1924 }
2159 } 1925 }
2160 if ((flags & XFS_QMOPT_GQUOTA) && XFS_IS_GQUOTA_ON(mp)) { 1926 if ((flags & XFS_QMOPT_GQUOTA) && XFS_IS_GQUOTA_ON(mp)) {
@@ -2175,10 +1941,7 @@ xfs_qm_vop_dqalloc(
2175 xfs_ilock(ip, lockflags); 1941 xfs_ilock(ip, lockflags);
2176 } else { 1942 } else {
2177 ASSERT(ip->i_gdquot); 1943 ASSERT(ip->i_gdquot);
2178 gq = ip->i_gdquot; 1944 gq = xfs_qm_dqhold(ip->i_gdquot);
2179 xfs_dqlock(gq);
2180 XFS_DQHOLD(gq);
2181 xfs_dqunlock(gq);
2182 } 1945 }
2183 } else if ((flags & XFS_QMOPT_PQUOTA) && XFS_IS_PQUOTA_ON(mp)) { 1946 } else if ((flags & XFS_QMOPT_PQUOTA) && XFS_IS_PQUOTA_ON(mp)) {
2184 if (xfs_get_projid(ip) != prid) { 1947 if (xfs_get_projid(ip) != prid) {
@@ -2198,10 +1961,7 @@ xfs_qm_vop_dqalloc(
2198 xfs_ilock(ip, lockflags); 1961 xfs_ilock(ip, lockflags);
2199 } else { 1962 } else {
2200 ASSERT(ip->i_gdquot); 1963 ASSERT(ip->i_gdquot);
2201 gq = ip->i_gdquot; 1964 gq = xfs_qm_dqhold(ip->i_gdquot);
2202 xfs_dqlock(gq);
2203 XFS_DQHOLD(gq);
2204 xfs_dqunlock(gq);
2205 } 1965 }
2206 } 1966 }
2207 if (uq) 1967 if (uq)
@@ -2251,14 +2011,10 @@ xfs_qm_vop_chown(
2251 xfs_trans_mod_dquot(tp, newdq, XFS_TRANS_DQ_ICOUNT, 1); 2011 xfs_trans_mod_dquot(tp, newdq, XFS_TRANS_DQ_ICOUNT, 1);
2252 2012
2253 /* 2013 /*
2254 * Take an extra reference, because the inode 2014 * Take an extra reference, because the inode is going to keep
2255 * is going to keep this dquot pointer even 2015 * this dquot pointer even after the trans_commit.
2256 * after the trans_commit.
2257 */ 2016 */
2258 xfs_dqlock(newdq); 2017 *IO_olddq = xfs_qm_dqhold(newdq);
2259 XFS_DQHOLD(newdq);
2260 xfs_dqunlock(newdq);
2261 *IO_olddq = newdq;
2262 2018
2263 return prevdq; 2019 return prevdq;
2264} 2020}
@@ -2390,25 +2146,21 @@ xfs_qm_vop_create_dqattach(
2390 ASSERT(XFS_IS_QUOTA_RUNNING(mp)); 2146 ASSERT(XFS_IS_QUOTA_RUNNING(mp));
2391 2147
2392 if (udqp) { 2148 if (udqp) {
2393 xfs_dqlock(udqp);
2394 XFS_DQHOLD(udqp);
2395 xfs_dqunlock(udqp);
2396 ASSERT(ip->i_udquot == NULL); 2149 ASSERT(ip->i_udquot == NULL);
2397 ip->i_udquot = udqp;
2398 ASSERT(XFS_IS_UQUOTA_ON(mp)); 2150 ASSERT(XFS_IS_UQUOTA_ON(mp));
2399 ASSERT(ip->i_d.di_uid == be32_to_cpu(udqp->q_core.d_id)); 2151 ASSERT(ip->i_d.di_uid == be32_to_cpu(udqp->q_core.d_id));
2152
2153 ip->i_udquot = xfs_qm_dqhold(udqp);
2400 xfs_trans_mod_dquot(tp, udqp, XFS_TRANS_DQ_ICOUNT, 1); 2154 xfs_trans_mod_dquot(tp, udqp, XFS_TRANS_DQ_ICOUNT, 1);
2401 } 2155 }
2402 if (gdqp) { 2156 if (gdqp) {
2403 xfs_dqlock(gdqp);
2404 XFS_DQHOLD(gdqp);
2405 xfs_dqunlock(gdqp);
2406 ASSERT(ip->i_gdquot == NULL); 2157 ASSERT(ip->i_gdquot == NULL);
2407 ip->i_gdquot = gdqp;
2408 ASSERT(XFS_IS_OQUOTA_ON(mp)); 2158 ASSERT(XFS_IS_OQUOTA_ON(mp));
2409 ASSERT((XFS_IS_GQUOTA_ON(mp) ? 2159 ASSERT((XFS_IS_GQUOTA_ON(mp) ?
2410 ip->i_d.di_gid : xfs_get_projid(ip)) == 2160 ip->i_d.di_gid : xfs_get_projid(ip)) ==
2411 be32_to_cpu(gdqp->q_core.d_id)); 2161 be32_to_cpu(gdqp->q_core.d_id));
2162
2163 ip->i_gdquot = xfs_qm_dqhold(gdqp);
2412 xfs_trans_mod_dquot(tp, gdqp, XFS_TRANS_DQ_ICOUNT, 1); 2164 xfs_trans_mod_dquot(tp, gdqp, XFS_TRANS_DQ_ICOUNT, 1);
2413 } 2165 }
2414} 2166}
diff --git a/fs/xfs/xfs_qm.h b/fs/xfs/xfs_qm.h
index 43b9abe1052c..9b4f3adefbc5 100644
--- a/fs/xfs/xfs_qm.h
+++ b/fs/xfs/xfs_qm.h
@@ -33,12 +33,6 @@ extern kmem_zone_t *qm_dqzone;
33extern kmem_zone_t *qm_dqtrxzone; 33extern kmem_zone_t *qm_dqtrxzone;
34 34
35/* 35/*
36 * Used in xfs_qm_sync called by xfs_sync to count the max times that it can
37 * iterate over the mountpt's dquot list in one call.
38 */
39#define XFS_QM_SYNC_MAX_RESTARTS 7
40
41/*
42 * Ditto, for xfs_qm_dqreclaim_one. 36 * Ditto, for xfs_qm_dqreclaim_one.
43 */ 37 */
44#define XFS_QM_RECLAIM_MAX_RESTARTS 4 38#define XFS_QM_RECLAIM_MAX_RESTARTS 4
diff --git a/fs/xfs/xfs_quota.h b/fs/xfs/xfs_quota.h
index a595f29567fe..8a0807e0f979 100644
--- a/fs/xfs/xfs_quota.h
+++ b/fs/xfs/xfs_quota.h
@@ -87,8 +87,7 @@ typedef struct xfs_dqblk {
87#define XFS_DQ_PROJ 0x0002 /* project quota */ 87#define XFS_DQ_PROJ 0x0002 /* project quota */
88#define XFS_DQ_GROUP 0x0004 /* a group quota */ 88#define XFS_DQ_GROUP 0x0004 /* a group quota */
89#define XFS_DQ_DIRTY 0x0008 /* dquot is dirty */ 89#define XFS_DQ_DIRTY 0x0008 /* dquot is dirty */
90#define XFS_DQ_WANT 0x0010 /* for lookup/reclaim race */ 90#define XFS_DQ_FREEING 0x0010 /* dquot is beeing torn down */
91#define XFS_DQ_INACTIVE 0x0020 /* dq off mplist & hashlist */
92 91
93#define XFS_DQ_ALLTYPES (XFS_DQ_USER|XFS_DQ_PROJ|XFS_DQ_GROUP) 92#define XFS_DQ_ALLTYPES (XFS_DQ_USER|XFS_DQ_PROJ|XFS_DQ_GROUP)
94 93
@@ -97,8 +96,7 @@ typedef struct xfs_dqblk {
97 { XFS_DQ_PROJ, "PROJ" }, \ 96 { XFS_DQ_PROJ, "PROJ" }, \
98 { XFS_DQ_GROUP, "GROUP" }, \ 97 { XFS_DQ_GROUP, "GROUP" }, \
99 { XFS_DQ_DIRTY, "DIRTY" }, \ 98 { XFS_DQ_DIRTY, "DIRTY" }, \
100 { XFS_DQ_WANT, "WANT" }, \ 99 { XFS_DQ_FREEING, "FREEING" }
101 { XFS_DQ_INACTIVE, "INACTIVE" }
102 100
103/* 101/*
104 * In the worst case, when both user and group quotas are on, 102 * In the worst case, when both user and group quotas are on,
@@ -199,7 +197,6 @@ typedef struct xfs_qoff_logformat {
199#define XFS_QMOPT_UQUOTA 0x0000004 /* user dquot requested */ 197#define XFS_QMOPT_UQUOTA 0x0000004 /* user dquot requested */
200#define XFS_QMOPT_PQUOTA 0x0000008 /* project dquot requested */ 198#define XFS_QMOPT_PQUOTA 0x0000008 /* project dquot requested */
201#define XFS_QMOPT_FORCE_RES 0x0000010 /* ignore quota limits */ 199#define XFS_QMOPT_FORCE_RES 0x0000010 /* ignore quota limits */
202#define XFS_QMOPT_DQSUSER 0x0000020 /* don't cache super users dquot */
203#define XFS_QMOPT_SBVERSION 0x0000040 /* change superblock version num */ 200#define XFS_QMOPT_SBVERSION 0x0000040 /* change superblock version num */
204#define XFS_QMOPT_DOWARN 0x0000400 /* increase warning cnt if needed */ 201#define XFS_QMOPT_DOWARN 0x0000400 /* increase warning cnt if needed */
205#define XFS_QMOPT_DQREPAIR 0x0001000 /* repair dquot if damaged */ 202#define XFS_QMOPT_DQREPAIR 0x0001000 /* repair dquot if damaged */
@@ -326,7 +323,6 @@ extern int xfs_qm_dqattach_locked(struct xfs_inode *, uint);
326extern void xfs_qm_dqdetach(struct xfs_inode *); 323extern void xfs_qm_dqdetach(struct xfs_inode *);
327extern void xfs_qm_dqrele(struct xfs_dquot *); 324extern void xfs_qm_dqrele(struct xfs_dquot *);
328extern void xfs_qm_statvfs(struct xfs_inode *, struct kstatfs *); 325extern void xfs_qm_statvfs(struct xfs_inode *, struct kstatfs *);
329extern int xfs_qm_sync(struct xfs_mount *, int);
330extern int xfs_qm_newmount(struct xfs_mount *, uint *, uint *); 326extern int xfs_qm_newmount(struct xfs_mount *, uint *, uint *);
331extern void xfs_qm_mount_quotas(struct xfs_mount *); 327extern void xfs_qm_mount_quotas(struct xfs_mount *);
332extern void xfs_qm_unmount(struct xfs_mount *); 328extern void xfs_qm_unmount(struct xfs_mount *);
@@ -366,10 +362,6 @@ static inline int xfs_trans_reserve_quota_bydquots(struct xfs_trans *tp,
366#define xfs_qm_dqdetach(ip) 362#define xfs_qm_dqdetach(ip)
367#define xfs_qm_dqrele(d) 363#define xfs_qm_dqrele(d)
368#define xfs_qm_statvfs(ip, s) 364#define xfs_qm_statvfs(ip, s)
369static inline int xfs_qm_sync(struct xfs_mount *mp, int flags)
370{
371 return 0;
372}
373#define xfs_qm_newmount(mp, a, b) (0) 365#define xfs_qm_newmount(mp, a, b) (0)
374#define xfs_qm_mount_quotas(mp) 366#define xfs_qm_mount_quotas(mp)
375#define xfs_qm_unmount(mp) 367#define xfs_qm_unmount(mp)
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 7b7669507ee3..281961c1d81a 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -199,7 +199,6 @@ xfs_parseargs(
199 mp->m_flags |= XFS_MOUNT_BARRIER; 199 mp->m_flags |= XFS_MOUNT_BARRIER;
200 mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE; 200 mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE;
201 mp->m_flags |= XFS_MOUNT_SMALL_INUMS; 201 mp->m_flags |= XFS_MOUNT_SMALL_INUMS;
202 mp->m_flags |= XFS_MOUNT_DELAYLOG;
203 202
204 /* 203 /*
205 * These can be overridden by the mount option parsing. 204 * These can be overridden by the mount option parsing.
@@ -353,11 +352,11 @@ xfs_parseargs(
353 mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE); 352 mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE);
354 mp->m_qflags &= ~XFS_OQUOTA_ENFD; 353 mp->m_qflags &= ~XFS_OQUOTA_ENFD;
355 } else if (!strcmp(this_char, MNTOPT_DELAYLOG)) { 354 } else if (!strcmp(this_char, MNTOPT_DELAYLOG)) {
356 mp->m_flags |= XFS_MOUNT_DELAYLOG; 355 xfs_warn(mp,
356 "delaylog is the default now, option is deprecated.");
357 } else if (!strcmp(this_char, MNTOPT_NODELAYLOG)) { 357 } else if (!strcmp(this_char, MNTOPT_NODELAYLOG)) {
358 mp->m_flags &= ~XFS_MOUNT_DELAYLOG;
359 xfs_warn(mp, 358 xfs_warn(mp,
360 "nodelaylog is deprecated and will be removed in Linux 3.3"); 359 "nodelaylog support has been removed, option is deprecated.");
361 } else if (!strcmp(this_char, MNTOPT_DISCARD)) { 360 } else if (!strcmp(this_char, MNTOPT_DISCARD)) {
362 mp->m_flags |= XFS_MOUNT_DISCARD; 361 mp->m_flags |= XFS_MOUNT_DISCARD;
363 } else if (!strcmp(this_char, MNTOPT_NODISCARD)) { 362 } else if (!strcmp(this_char, MNTOPT_NODISCARD)) {
@@ -395,13 +394,6 @@ xfs_parseargs(
395 return EINVAL; 394 return EINVAL;
396 } 395 }
397 396
398 if ((mp->m_flags & XFS_MOUNT_DISCARD) &&
399 !(mp->m_flags & XFS_MOUNT_DELAYLOG)) {
400 xfs_warn(mp,
401 "the discard option is incompatible with the nodelaylog option");
402 return EINVAL;
403 }
404
405#ifndef CONFIG_XFS_QUOTA 397#ifndef CONFIG_XFS_QUOTA
406 if (XFS_IS_QUOTA_RUNNING(mp)) { 398 if (XFS_IS_QUOTA_RUNNING(mp)) {
407 xfs_warn(mp, "quota support not available in this kernel."); 399 xfs_warn(mp, "quota support not available in this kernel.");
@@ -501,7 +493,6 @@ xfs_showargs(
501 { XFS_MOUNT_ATTR2, "," MNTOPT_ATTR2 }, 493 { XFS_MOUNT_ATTR2, "," MNTOPT_ATTR2 },
502 { XFS_MOUNT_FILESTREAMS, "," MNTOPT_FILESTREAM }, 494 { XFS_MOUNT_FILESTREAMS, "," MNTOPT_FILESTREAM },
503 { XFS_MOUNT_GRPID, "," MNTOPT_GRPID }, 495 { XFS_MOUNT_GRPID, "," MNTOPT_GRPID },
504 { XFS_MOUNT_DELAYLOG, "," MNTOPT_DELAYLOG },
505 { XFS_MOUNT_DISCARD, "," MNTOPT_DISCARD }, 496 { XFS_MOUNT_DISCARD, "," MNTOPT_DISCARD },
506 { 0, NULL } 497 { 0, NULL }
507 }; 498 };
@@ -1014,17 +1005,10 @@ xfs_fs_sync_fs(
1014 int error; 1005 int error;
1015 1006
1016 /* 1007 /*
1017 * Not much we can do for the first async pass. Writing out the 1008 * Doing anything during the async pass would be counterproductive.
1018 * superblock would be counter-productive as we are going to redirty
1019 * when writing out other data and metadata (and writing out a single
1020 * block is quite fast anyway).
1021 *
1022 * Try to asynchronously kick off quota syncing at least.
1023 */ 1009 */
1024 if (!wait) { 1010 if (!wait)
1025 xfs_qm_sync(mp, SYNC_TRYLOCK);
1026 return 0; 1011 return 0;
1027 }
1028 1012
1029 error = xfs_quiesce_data(mp); 1013 error = xfs_quiesce_data(mp);
1030 if (error) 1014 if (error)
@@ -1621,12 +1605,12 @@ STATIC int __init
1621xfs_init_workqueues(void) 1605xfs_init_workqueues(void)
1622{ 1606{
1623 /* 1607 /*
1624 * max_active is set to 8 to give enough concurency to allow 1608 * We never want to the same work item to run twice, reclaiming inodes
1625 * multiple work operations on each CPU to run. This allows multiple 1609 * or idling the log is not going to get any faster by multiple CPUs
1626 * filesystems to be running sync work concurrently, and scales with 1610 * competing for ressources. Use the default large max_active value
1627 * the number of CPUs in the system. 1611 * so that even lots of filesystems can perform these task in parallel.
1628 */ 1612 */
1629 xfs_syncd_wq = alloc_workqueue("xfssyncd", WQ_CPU_INTENSIVE, 8); 1613 xfs_syncd_wq = alloc_workqueue("xfssyncd", WQ_NON_REENTRANT, 0);
1630 if (!xfs_syncd_wq) 1614 if (!xfs_syncd_wq)
1631 return -ENOMEM; 1615 return -ENOMEM;
1632 return 0; 1616 return 0;
diff --git a/fs/xfs/xfs_sync.c b/fs/xfs/xfs_sync.c
index f0994aedcd15..72c01a1c16e7 100644
--- a/fs/xfs/xfs_sync.c
+++ b/fs/xfs/xfs_sync.c
@@ -395,10 +395,7 @@ xfs_quiesce_data(
395 */ 395 */
396 xfs_inode_ag_iterator(mp, xfs_log_dirty_inode, 0); 396 xfs_inode_ag_iterator(mp, xfs_log_dirty_inode, 0);
397 397
398 xfs_qm_sync(mp, SYNC_TRYLOCK); 398 /* force out the log */
399 xfs_qm_sync(mp, SYNC_WAIT);
400
401 /* force out the newly dirtied log buffers */
402 xfs_log_force(mp, XFS_LOG_SYNC); 399 xfs_log_force(mp, XFS_LOG_SYNC);
403 400
404 /* write superblock and hoover up shutdown errors */ 401 /* write superblock and hoover up shutdown errors */
@@ -506,7 +503,6 @@ xfs_sync_worker(
506 error = xfs_fs_log_dummy(mp); 503 error = xfs_fs_log_dummy(mp);
507 else 504 else
508 xfs_log_force(mp, 0); 505 xfs_log_force(mp, 0);
509 error = xfs_qm_sync(mp, SYNC_TRYLOCK);
510 506
511 /* start pushing all the metadata that is currently dirty */ 507 /* start pushing all the metadata that is currently dirty */
512 xfs_ail_push_all(mp->m_ail); 508 xfs_ail_push_all(mp->m_ail);
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 494035798873..a9d5b1e06efe 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -743,8 +743,6 @@ DEFINE_DQUOT_EVENT(xfs_dqtobp_read);
743DEFINE_DQUOT_EVENT(xfs_dqread); 743DEFINE_DQUOT_EVENT(xfs_dqread);
744DEFINE_DQUOT_EVENT(xfs_dqread_fail); 744DEFINE_DQUOT_EVENT(xfs_dqread_fail);
745DEFINE_DQUOT_EVENT(xfs_dqlookup_found); 745DEFINE_DQUOT_EVENT(xfs_dqlookup_found);
746DEFINE_DQUOT_EVENT(xfs_dqlookup_want);
747DEFINE_DQUOT_EVENT(xfs_dqlookup_freelist);
748DEFINE_DQUOT_EVENT(xfs_dqlookup_done); 746DEFINE_DQUOT_EVENT(xfs_dqlookup_done);
749DEFINE_DQUOT_EVENT(xfs_dqget_hit); 747DEFINE_DQUOT_EVENT(xfs_dqget_hit);
750DEFINE_DQUOT_EVENT(xfs_dqget_miss); 748DEFINE_DQUOT_EVENT(xfs_dqget_miss);
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index 1f35b2feca97..329b06aba1c2 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -1158,7 +1158,6 @@ xfs_trans_add_item(
1158 1158
1159 lidp->lid_item = lip; 1159 lidp->lid_item = lip;
1160 lidp->lid_flags = 0; 1160 lidp->lid_flags = 0;
1161 lidp->lid_size = 0;
1162 list_add_tail(&lidp->lid_trans, &tp->t_items); 1161 list_add_tail(&lidp->lid_trans, &tp->t_items);
1163 1162
1164 lip->li_desc = lidp; 1163 lip->li_desc = lidp;
@@ -1210,219 +1209,6 @@ xfs_trans_free_items(
1210 } 1209 }
1211} 1210}
1212 1211
1213/*
1214 * Unlock the items associated with a transaction.
1215 *
1216 * Items which were not logged should be freed. Those which were logged must
1217 * still be tracked so they can be unpinned when the transaction commits.
1218 */
1219STATIC void
1220xfs_trans_unlock_items(
1221 struct xfs_trans *tp,
1222 xfs_lsn_t commit_lsn)
1223{
1224 struct xfs_log_item_desc *lidp, *next;
1225
1226 list_for_each_entry_safe(lidp, next, &tp->t_items, lid_trans) {
1227 struct xfs_log_item *lip = lidp->lid_item;
1228
1229 lip->li_desc = NULL;
1230
1231 if (commit_lsn != NULLCOMMITLSN)
1232 IOP_COMMITTING(lip, commit_lsn);
1233 IOP_UNLOCK(lip);
1234
1235 /*
1236 * Free the descriptor if the item is not dirty
1237 * within this transaction.
1238 */
1239 if (!(lidp->lid_flags & XFS_LID_DIRTY))
1240 xfs_trans_free_item_desc(lidp);
1241 }
1242}
1243
1244/*
1245 * Total up the number of log iovecs needed to commit this
1246 * transaction. The transaction itself needs one for the
1247 * transaction header. Ask each dirty item in turn how many
1248 * it needs to get the total.
1249 */
1250static uint
1251xfs_trans_count_vecs(
1252 struct xfs_trans *tp)
1253{
1254 int nvecs;
1255 struct xfs_log_item_desc *lidp;
1256
1257 nvecs = 1;
1258
1259 /* In the non-debug case we need to start bailing out if we
1260 * didn't find a log_item here, return zero and let trans_commit
1261 * deal with it.
1262 */
1263 if (list_empty(&tp->t_items)) {
1264 ASSERT(0);
1265 return 0;
1266 }
1267
1268 list_for_each_entry(lidp, &tp->t_items, lid_trans) {
1269 /*
1270 * Skip items which aren't dirty in this transaction.
1271 */
1272 if (!(lidp->lid_flags & XFS_LID_DIRTY))
1273 continue;
1274 lidp->lid_size = IOP_SIZE(lidp->lid_item);
1275 nvecs += lidp->lid_size;
1276 }
1277
1278 return nvecs;
1279}
1280
1281/*
1282 * Fill in the vector with pointers to data to be logged
1283 * by this transaction. The transaction header takes
1284 * the first vector, and then each dirty item takes the
1285 * number of vectors it indicated it needed in xfs_trans_count_vecs().
1286 *
1287 * As each item fills in the entries it needs, also pin the item
1288 * so that it cannot be flushed out until the log write completes.
1289 */
1290static void
1291xfs_trans_fill_vecs(
1292 struct xfs_trans *tp,
1293 struct xfs_log_iovec *log_vector)
1294{
1295 struct xfs_log_item_desc *lidp;
1296 struct xfs_log_iovec *vecp;
1297 uint nitems;
1298
1299 /*
1300 * Skip over the entry for the transaction header, we'll
1301 * fill that in at the end.
1302 */
1303 vecp = log_vector + 1;
1304
1305 nitems = 0;
1306 ASSERT(!list_empty(&tp->t_items));
1307 list_for_each_entry(lidp, &tp->t_items, lid_trans) {
1308 /* Skip items which aren't dirty in this transaction. */
1309 if (!(lidp->lid_flags & XFS_LID_DIRTY))
1310 continue;
1311
1312 /*
1313 * The item may be marked dirty but not log anything. This can
1314 * be used to get called when a transaction is committed.
1315 */
1316 if (lidp->lid_size)
1317 nitems++;
1318 IOP_FORMAT(lidp->lid_item, vecp);
1319 vecp += lidp->lid_size;
1320 IOP_PIN(lidp->lid_item);
1321 }
1322
1323 /*
1324 * Now that we've counted the number of items in this transaction, fill
1325 * in the transaction header. Note that the transaction header does not
1326 * have a log item.
1327 */
1328 tp->t_header.th_magic = XFS_TRANS_HEADER_MAGIC;
1329 tp->t_header.th_type = tp->t_type;
1330 tp->t_header.th_num_items = nitems;
1331 log_vector->i_addr = (xfs_caddr_t)&tp->t_header;
1332 log_vector->i_len = sizeof(xfs_trans_header_t);
1333 log_vector->i_type = XLOG_REG_TYPE_TRANSHDR;
1334}
1335
1336/*
1337 * The committed item processing consists of calling the committed routine of
1338 * each logged item, updating the item's position in the AIL if necessary, and
1339 * unpinning each item. If the committed routine returns -1, then do nothing
1340 * further with the item because it may have been freed.
1341 *
1342 * Since items are unlocked when they are copied to the incore log, it is
1343 * possible for two transactions to be completing and manipulating the same
1344 * item simultaneously. The AIL lock will protect the lsn field of each item.
1345 * The value of this field can never go backwards.
1346 *
1347 * We unpin the items after repositioning them in the AIL, because otherwise
1348 * they could be immediately flushed and we'd have to race with the flusher
1349 * trying to pull the item from the AIL as we add it.
1350 */
1351static void
1352xfs_trans_item_committed(
1353 struct xfs_log_item *lip,
1354 xfs_lsn_t commit_lsn,
1355 int aborted)
1356{
1357 xfs_lsn_t item_lsn;
1358 struct xfs_ail *ailp;
1359
1360 if (aborted)
1361 lip->li_flags |= XFS_LI_ABORTED;
1362 item_lsn = IOP_COMMITTED(lip, commit_lsn);
1363
1364 /* item_lsn of -1 means the item needs no further processing */
1365 if (XFS_LSN_CMP(item_lsn, (xfs_lsn_t)-1) == 0)
1366 return;
1367
1368 /*
1369 * If the returned lsn is greater than what it contained before, update
1370 * the location of the item in the AIL. If it is not, then do nothing.
1371 * Items can never move backwards in the AIL.
1372 *
1373 * While the new lsn should usually be greater, it is possible that a
1374 * later transaction completing simultaneously with an earlier one
1375 * using the same item could complete first with a higher lsn. This
1376 * would cause the earlier transaction to fail the test below.
1377 */
1378 ailp = lip->li_ailp;
1379 spin_lock(&ailp->xa_lock);
1380 if (XFS_LSN_CMP(item_lsn, lip->li_lsn) > 0) {
1381 /*
1382 * This will set the item's lsn to item_lsn and update the
1383 * position of the item in the AIL.
1384 *
1385 * xfs_trans_ail_update() drops the AIL lock.
1386 */
1387 xfs_trans_ail_update(ailp, lip, item_lsn);
1388 } else {
1389 spin_unlock(&ailp->xa_lock);
1390 }
1391
1392 /*
1393 * Now that we've repositioned the item in the AIL, unpin it so it can
1394 * be flushed. Pass information about buffer stale state down from the
1395 * log item flags, if anyone else stales the buffer we do not want to
1396 * pay any attention to it.
1397 */
1398 IOP_UNPIN(lip, 0);
1399}
1400
1401/*
1402 * This is typically called by the LM when a transaction has been fully
1403 * committed to disk. It needs to unpin the items which have
1404 * been logged by the transaction and update their positions
1405 * in the AIL if necessary.
1406 *
1407 * This also gets called when the transactions didn't get written out
1408 * because of an I/O error. Abortflag & XFS_LI_ABORTED is set then.
1409 */
1410STATIC void
1411xfs_trans_committed(
1412 void *arg,
1413 int abortflag)
1414{
1415 struct xfs_trans *tp = arg;
1416 struct xfs_log_item_desc *lidp, *next;
1417
1418 list_for_each_entry_safe(lidp, next, &tp->t_items, lid_trans) {
1419 xfs_trans_item_committed(lidp->lid_item, tp->t_lsn, abortflag);
1420 xfs_trans_free_item_desc(lidp);
1421 }
1422
1423 xfs_trans_free(tp);
1424}
1425
1426static inline void 1212static inline void
1427xfs_log_item_batch_insert( 1213xfs_log_item_batch_insert(
1428 struct xfs_ail *ailp, 1214 struct xfs_ail *ailp,
@@ -1538,258 +1324,6 @@ xfs_trans_committed_bulk(
1538} 1324}
1539 1325
1540/* 1326/*
1541 * Called from the trans_commit code when we notice that the filesystem is in
1542 * the middle of a forced shutdown.
1543 *
1544 * When we are called here, we have already pinned all the items in the
1545 * transaction. However, neither IOP_COMMITTING or IOP_UNLOCK has been called
1546 * so we can simply walk the items in the transaction, unpin them with an abort
1547 * flag and then free the items. Note that unpinning the items can result in
1548 * them being freed immediately, so we need to use a safe list traversal method
1549 * here.
1550 */
1551STATIC void
1552xfs_trans_uncommit(
1553 struct xfs_trans *tp,
1554 uint flags)
1555{
1556 struct xfs_log_item_desc *lidp, *n;
1557
1558 list_for_each_entry_safe(lidp, n, &tp->t_items, lid_trans) {
1559 if (lidp->lid_flags & XFS_LID_DIRTY)
1560 IOP_UNPIN(lidp->lid_item, 1);
1561 }
1562
1563 xfs_trans_unreserve_and_mod_sb(tp);
1564 xfs_trans_unreserve_and_mod_dquots(tp);
1565
1566 xfs_trans_free_items(tp, NULLCOMMITLSN, flags);
1567 xfs_trans_free(tp);
1568}
1569
1570/*
1571 * Format the transaction direct to the iclog. This isolates the physical
1572 * transaction commit operation from the logical operation and hence allows
1573 * other methods to be introduced without affecting the existing commit path.
1574 */
1575static int
1576xfs_trans_commit_iclog(
1577 struct xfs_mount *mp,
1578 struct xfs_trans *tp,
1579 xfs_lsn_t *commit_lsn,
1580 int flags)
1581{
1582 int shutdown;
1583 int error;
1584 int log_flags = 0;
1585 struct xlog_in_core *commit_iclog;
1586#define XFS_TRANS_LOGVEC_COUNT 16
1587 struct xfs_log_iovec log_vector_fast[XFS_TRANS_LOGVEC_COUNT];
1588 struct xfs_log_iovec *log_vector;
1589 uint nvec;
1590
1591
1592 /*
1593 * Ask each log item how many log_vector entries it will
1594 * need so we can figure out how many to allocate.
1595 * Try to avoid the kmem_alloc() call in the common case
1596 * by using a vector from the stack when it fits.
1597 */
1598 nvec = xfs_trans_count_vecs(tp);
1599 if (nvec == 0) {
1600 return ENOMEM; /* triggers a shutdown! */
1601 } else if (nvec <= XFS_TRANS_LOGVEC_COUNT) {
1602 log_vector = log_vector_fast;
1603 } else {
1604 log_vector = (xfs_log_iovec_t *)kmem_alloc(nvec *
1605 sizeof(xfs_log_iovec_t),
1606 KM_SLEEP);
1607 }
1608
1609 /*
1610 * Fill in the log_vector and pin the logged items, and
1611 * then write the transaction to the log.
1612 */
1613 xfs_trans_fill_vecs(tp, log_vector);
1614
1615 if (flags & XFS_TRANS_RELEASE_LOG_RES)
1616 log_flags = XFS_LOG_REL_PERM_RESERV;
1617
1618 error = xfs_log_write(mp, log_vector, nvec, tp->t_ticket, &(tp->t_lsn));
1619
1620 /*
1621 * The transaction is committed incore here, and can go out to disk
1622 * at any time after this call. However, all the items associated
1623 * with the transaction are still locked and pinned in memory.
1624 */
1625 *commit_lsn = xfs_log_done(mp, tp->t_ticket, &commit_iclog, log_flags);
1626
1627 tp->t_commit_lsn = *commit_lsn;
1628 trace_xfs_trans_commit_lsn(tp);
1629
1630 if (nvec > XFS_TRANS_LOGVEC_COUNT)
1631 kmem_free(log_vector);
1632
1633 /*
1634 * If we got a log write error. Unpin the logitems that we
1635 * had pinned, clean up, free trans structure, and return error.
1636 */
1637 if (error || *commit_lsn == -1) {
1638 current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS);
1639 xfs_trans_uncommit(tp, flags|XFS_TRANS_ABORT);
1640 return XFS_ERROR(EIO);
1641 }
1642
1643 /*
1644 * Once the transaction has committed, unused
1645 * reservations need to be released and changes to
1646 * the superblock need to be reflected in the in-core
1647 * version. Do that now.
1648 */
1649 xfs_trans_unreserve_and_mod_sb(tp);
1650
1651 /*
1652 * Tell the LM to call the transaction completion routine
1653 * when the log write with LSN commit_lsn completes (e.g.
1654 * when the transaction commit really hits the on-disk log).
1655 * After this call we cannot reference tp, because the call
1656 * can happen at any time and the call will free the transaction
1657 * structure pointed to by tp. The only case where we call
1658 * the completion routine (xfs_trans_committed) directly is
1659 * if the log is turned off on a debug kernel or we're
1660 * running in simulation mode (the log is explicitly turned
1661 * off).
1662 */
1663 tp->t_logcb.cb_func = xfs_trans_committed;
1664 tp->t_logcb.cb_arg = tp;
1665
1666 /*
1667 * We need to pass the iclog buffer which was used for the
1668 * transaction commit record into this function, and attach
1669 * the callback to it. The callback must be attached before
1670 * the items are unlocked to avoid racing with other threads
1671 * waiting for an item to unlock.
1672 */
1673 shutdown = xfs_log_notify(mp, commit_iclog, &(tp->t_logcb));
1674
1675 /*
1676 * Mark this thread as no longer being in a transaction
1677 */
1678 current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS);
1679
1680 /*
1681 * Once all the items of the transaction have been copied
1682 * to the in core log and the callback is attached, the
1683 * items can be unlocked.
1684 *
1685 * This will free descriptors pointing to items which were
1686 * not logged since there is nothing more to do with them.
1687 * For items which were logged, we will keep pointers to them
1688 * so they can be unpinned after the transaction commits to disk.
1689 * This will also stamp each modified meta-data item with
1690 * the commit lsn of this transaction for dependency tracking
1691 * purposes.
1692 */
1693 xfs_trans_unlock_items(tp, *commit_lsn);
1694
1695 /*
1696 * If we detected a log error earlier, finish committing
1697 * the transaction now (unpin log items, etc).
1698 *
1699 * Order is critical here, to avoid using the transaction
1700 * pointer after its been freed (by xfs_trans_committed
1701 * either here now, or as a callback). We cannot do this
1702 * step inside xfs_log_notify as was done earlier because
1703 * of this issue.
1704 */
1705 if (shutdown)
1706 xfs_trans_committed(tp, XFS_LI_ABORTED);
1707
1708 /*
1709 * Now that the xfs_trans_committed callback has been attached,
1710 * and the items are released we can finally allow the iclog to
1711 * go to disk.
1712 */
1713 return xfs_log_release_iclog(mp, commit_iclog);
1714}
1715
1716/*
1717 * Walk the log items and allocate log vector structures for
1718 * each item large enough to fit all the vectors they require.
1719 * Note that this format differs from the old log vector format in
1720 * that there is no transaction header in these log vectors.
1721 */
1722STATIC struct xfs_log_vec *
1723xfs_trans_alloc_log_vecs(
1724 xfs_trans_t *tp)
1725{
1726 struct xfs_log_item_desc *lidp;
1727 struct xfs_log_vec *lv = NULL;
1728 struct xfs_log_vec *ret_lv = NULL;
1729
1730
1731 /* Bail out if we didn't find a log item. */
1732 if (list_empty(&tp->t_items)) {
1733 ASSERT(0);
1734 return NULL;
1735 }
1736
1737 list_for_each_entry(lidp, &tp->t_items, lid_trans) {
1738 struct xfs_log_vec *new_lv;
1739
1740 /* Skip items which aren't dirty in this transaction. */
1741 if (!(lidp->lid_flags & XFS_LID_DIRTY))
1742 continue;
1743
1744 /* Skip items that do not have any vectors for writing */
1745 lidp->lid_size = IOP_SIZE(lidp->lid_item);
1746 if (!lidp->lid_size)
1747 continue;
1748
1749 new_lv = kmem_zalloc(sizeof(*new_lv) +
1750 lidp->lid_size * sizeof(struct xfs_log_iovec),
1751 KM_SLEEP);
1752
1753 /* The allocated iovec region lies beyond the log vector. */
1754 new_lv->lv_iovecp = (struct xfs_log_iovec *)&new_lv[1];
1755 new_lv->lv_niovecs = lidp->lid_size;
1756 new_lv->lv_item = lidp->lid_item;
1757 if (!ret_lv)
1758 ret_lv = new_lv;
1759 else
1760 lv->lv_next = new_lv;
1761 lv = new_lv;
1762 }
1763
1764 return ret_lv;
1765}
1766
1767static int
1768xfs_trans_commit_cil(
1769 struct xfs_mount *mp,
1770 struct xfs_trans *tp,
1771 xfs_lsn_t *commit_lsn,
1772 int flags)
1773{
1774 struct xfs_log_vec *log_vector;
1775
1776 /*
1777 * Get each log item to allocate a vector structure for
1778 * the log item to to pass to the log write code. The
1779 * CIL commit code will format the vector and save it away.
1780 */
1781 log_vector = xfs_trans_alloc_log_vecs(tp);
1782 if (!log_vector)
1783 return ENOMEM;
1784
1785 xfs_log_commit_cil(mp, tp, log_vector, commit_lsn, flags);
1786
1787 current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS);
1788 xfs_trans_free(tp);
1789 return 0;
1790}
1791
1792/*
1793 * Commit the given transaction to the log. 1327 * Commit the given transaction to the log.
1794 * 1328 *
1795 * XFS disk error handling mechanism is not based on a typical 1329 * XFS disk error handling mechanism is not based on a typical
@@ -1845,17 +1379,16 @@ xfs_trans_commit(
1845 xfs_trans_apply_sb_deltas(tp); 1379 xfs_trans_apply_sb_deltas(tp);
1846 xfs_trans_apply_dquot_deltas(tp); 1380 xfs_trans_apply_dquot_deltas(tp);
1847 1381
1848 if (mp->m_flags & XFS_MOUNT_DELAYLOG) 1382 error = xfs_log_commit_cil(mp, tp, &commit_lsn, flags);
1849 error = xfs_trans_commit_cil(mp, tp, &commit_lsn, flags);
1850 else
1851 error = xfs_trans_commit_iclog(mp, tp, &commit_lsn, flags);
1852
1853 if (error == ENOMEM) { 1383 if (error == ENOMEM) {
1854 xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR); 1384 xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR);
1855 error = XFS_ERROR(EIO); 1385 error = XFS_ERROR(EIO);
1856 goto out_unreserve; 1386 goto out_unreserve;
1857 } 1387 }
1858 1388
1389 current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS);
1390 xfs_trans_free(tp);
1391
1859 /* 1392 /*
1860 * If the transaction needs to be synchronous, then force the 1393 * If the transaction needs to be synchronous, then force the
1861 * log out now and wait for it. 1394 * log out now and wait for it.
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index 3ae713c0abd9..f6118703f20d 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -163,9 +163,8 @@ typedef struct xfs_trans_header {
163 */ 163 */
164struct xfs_log_item_desc { 164struct xfs_log_item_desc {
165 struct xfs_log_item *lid_item; 165 struct xfs_log_item *lid_item;
166 ushort lid_size;
167 unsigned char lid_flags;
168 struct list_head lid_trans; 166 struct list_head lid_trans;
167 unsigned char lid_flags;
169}; 168};
170 169
171#define XFS_LID_DIRTY 0x1 170#define XFS_LID_DIRTY 0x1