aboutsummaryrefslogtreecommitdiffstats
path: root/fs/xfs
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs')
-rw-r--r--fs/xfs/linux-2.6/xfs_aops.c231
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c27
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.h2
-rw-r--r--fs/xfs/linux-2.6/xfs_file.c2
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl.c4
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl32.c4
-rw-r--r--fs/xfs/linux-2.6/xfs_iops.c5
-rw-r--r--fs/xfs/linux-2.6/xfs_quotaops.c8
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c15
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.c203
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.h7
-rw-r--r--fs/xfs/linux-2.6/xfs_trace.c4
-rw-r--r--fs/xfs/linux-2.6/xfs_trace.h150
-rw-r--r--fs/xfs/quota/xfs_dquot.c193
-rw-r--r--fs/xfs/quota/xfs_dquot.h35
-rw-r--r--fs/xfs/quota/xfs_dquot_item.c30
-rw-r--r--fs/xfs/quota/xfs_qm.c609
-rw-r--r--fs/xfs/quota/xfs_qm.h23
-rw-r--r--fs/xfs/quota/xfs_qm_stats.c2
-rw-r--r--fs/xfs/quota/xfs_qm_syscalls.c165
-rw-r--r--fs/xfs/quota/xfs_quota_priv.h102
-rw-r--r--fs/xfs/quota/xfs_trans_dquot.c29
-rw-r--r--fs/xfs/xfs_ag.h1
-rw-r--r--fs/xfs/xfs_bmap.c2
-rw-r--r--fs/xfs/xfs_buf_item.c55
-rw-r--r--fs/xfs/xfs_buf_item.h2
-rw-r--r--fs/xfs/xfs_error.c30
-rw-r--r--fs/xfs/xfs_error.h9
-rw-r--r--fs/xfs/xfs_extfree_item.c18
-rw-r--r--fs/xfs/xfs_inode.c2
-rw-r--r--fs/xfs/xfs_inode_item.c21
-rw-r--r--fs/xfs/xfs_iomap.c123
-rw-r--r--fs/xfs/xfs_iomap.h47
-rw-r--r--fs/xfs/xfs_log.c702
-rw-r--r--fs/xfs/xfs_log.h13
-rw-r--r--fs/xfs/xfs_log_priv.h12
-rw-r--r--fs/xfs/xfs_log_recover.c311
-rw-r--r--fs/xfs/xfs_mount.c7
-rw-r--r--fs/xfs/xfs_mount.h1
-rw-r--r--fs/xfs/xfs_quota.h3
-rw-r--r--fs/xfs/xfs_trans.c760
-rw-r--r--fs/xfs/xfs_trans.h14
-rw-r--r--fs/xfs/xfs_trans_buf.c187
43 files changed, 2036 insertions, 2134 deletions
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index 0f8b9968a803..089eaca860b4 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -45,6 +45,15 @@
45#include <linux/pagevec.h> 45#include <linux/pagevec.h>
46#include <linux/writeback.h> 46#include <linux/writeback.h>
47 47
48/*
49 * Types of I/O for bmap clustering and I/O completion tracking.
50 */
51enum {
52 IO_READ, /* mapping for a read */
53 IO_DELAY, /* mapping covers delalloc region */
54 IO_UNWRITTEN, /* mapping covers allocated but uninitialized data */
55 IO_NEW /* just allocated */
56};
48 57
49/* 58/*
50 * Prime number of hash buckets since address is used as the key. 59 * Prime number of hash buckets since address is used as the key.
@@ -103,8 +112,9 @@ xfs_count_page_state(
103 112
104STATIC struct block_device * 113STATIC struct block_device *
105xfs_find_bdev_for_inode( 114xfs_find_bdev_for_inode(
106 struct xfs_inode *ip) 115 struct inode *inode)
107{ 116{
117 struct xfs_inode *ip = XFS_I(inode);
108 struct xfs_mount *mp = ip->i_mount; 118 struct xfs_mount *mp = ip->i_mount;
109 119
110 if (XFS_IS_REALTIME_INODE(ip)) 120 if (XFS_IS_REALTIME_INODE(ip))
@@ -183,7 +193,7 @@ xfs_setfilesize(
183 xfs_fsize_t isize; 193 xfs_fsize_t isize;
184 194
185 ASSERT((ip->i_d.di_mode & S_IFMT) == S_IFREG); 195 ASSERT((ip->i_d.di_mode & S_IFMT) == S_IFREG);
186 ASSERT(ioend->io_type != IOMAP_READ); 196 ASSERT(ioend->io_type != IO_READ);
187 197
188 if (unlikely(ioend->io_error)) 198 if (unlikely(ioend->io_error))
189 return 0; 199 return 0;
@@ -214,7 +224,7 @@ xfs_finish_ioend(
214 if (atomic_dec_and_test(&ioend->io_remaining)) { 224 if (atomic_dec_and_test(&ioend->io_remaining)) {
215 struct workqueue_struct *wq; 225 struct workqueue_struct *wq;
216 226
217 wq = (ioend->io_type == IOMAP_UNWRITTEN) ? 227 wq = (ioend->io_type == IO_UNWRITTEN) ?
218 xfsconvertd_workqueue : xfsdatad_workqueue; 228 xfsconvertd_workqueue : xfsdatad_workqueue;
219 queue_work(wq, &ioend->io_work); 229 queue_work(wq, &ioend->io_work);
220 if (wait) 230 if (wait)
@@ -237,7 +247,7 @@ xfs_end_io(
237 * For unwritten extents we need to issue transactions to convert a 247 * For unwritten extents we need to issue transactions to convert a
238 * range to normal written extens after the data I/O has finished. 248 * range to normal written extens after the data I/O has finished.
239 */ 249 */
240 if (ioend->io_type == IOMAP_UNWRITTEN && 250 if (ioend->io_type == IO_UNWRITTEN &&
241 likely(!ioend->io_error && !XFS_FORCED_SHUTDOWN(ip->i_mount))) { 251 likely(!ioend->io_error && !XFS_FORCED_SHUTDOWN(ip->i_mount))) {
242 252
243 error = xfs_iomap_write_unwritten(ip, ioend->io_offset, 253 error = xfs_iomap_write_unwritten(ip, ioend->io_offset,
@@ -250,7 +260,7 @@ xfs_end_io(
250 * We might have to update the on-disk file size after extending 260 * We might have to update the on-disk file size after extending
251 * writes. 261 * writes.
252 */ 262 */
253 if (ioend->io_type != IOMAP_READ) { 263 if (ioend->io_type != IO_READ) {
254 error = xfs_setfilesize(ioend); 264 error = xfs_setfilesize(ioend);
255 ASSERT(!error || error == EAGAIN); 265 ASSERT(!error || error == EAGAIN);
256 } 266 }
@@ -309,21 +319,25 @@ xfs_map_blocks(
309 struct inode *inode, 319 struct inode *inode,
310 loff_t offset, 320 loff_t offset,
311 ssize_t count, 321 ssize_t count,
312 xfs_iomap_t *mapp, 322 struct xfs_bmbt_irec *imap,
313 int flags) 323 int flags)
314{ 324{
315 int nmaps = 1; 325 int nmaps = 1;
326 int new = 0;
316 327
317 return -xfs_iomap(XFS_I(inode), offset, count, flags, mapp, &nmaps); 328 return -xfs_iomap(XFS_I(inode), offset, count, flags, imap, &nmaps, &new);
318} 329}
319 330
320STATIC int 331STATIC int
321xfs_iomap_valid( 332xfs_imap_valid(
322 xfs_iomap_t *iomapp, 333 struct inode *inode,
323 loff_t offset) 334 struct xfs_bmbt_irec *imap,
335 xfs_off_t offset)
324{ 336{
325 return offset >= iomapp->iomap_offset && 337 offset >>= inode->i_blkbits;
326 offset < iomapp->iomap_offset + iomapp->iomap_bsize; 338
339 return offset >= imap->br_startoff &&
340 offset < imap->br_startoff + imap->br_blockcount;
327} 341}
328 342
329/* 343/*
@@ -554,19 +568,23 @@ xfs_add_to_ioend(
554 568
555STATIC void 569STATIC void
556xfs_map_buffer( 570xfs_map_buffer(
571 struct inode *inode,
557 struct buffer_head *bh, 572 struct buffer_head *bh,
558 xfs_iomap_t *mp, 573 struct xfs_bmbt_irec *imap,
559 xfs_off_t offset, 574 xfs_off_t offset)
560 uint block_bits)
561{ 575{
562 sector_t bn; 576 sector_t bn;
577 struct xfs_mount *m = XFS_I(inode)->i_mount;
578 xfs_off_t iomap_offset = XFS_FSB_TO_B(m, imap->br_startoff);
579 xfs_daddr_t iomap_bn = xfs_fsb_to_db(XFS_I(inode), imap->br_startblock);
563 580
564 ASSERT(mp->iomap_bn != IOMAP_DADDR_NULL); 581 ASSERT(imap->br_startblock != HOLESTARTBLOCK);
582 ASSERT(imap->br_startblock != DELAYSTARTBLOCK);
565 583
566 bn = (mp->iomap_bn >> (block_bits - BBSHIFT)) + 584 bn = (iomap_bn >> (inode->i_blkbits - BBSHIFT)) +
567 ((offset - mp->iomap_offset) >> block_bits); 585 ((offset - iomap_offset) >> inode->i_blkbits);
568 586
569 ASSERT(bn || (mp->iomap_flags & IOMAP_REALTIME)); 587 ASSERT(bn || XFS_IS_REALTIME_INODE(XFS_I(inode)));
570 588
571 bh->b_blocknr = bn; 589 bh->b_blocknr = bn;
572 set_buffer_mapped(bh); 590 set_buffer_mapped(bh);
@@ -574,17 +592,17 @@ xfs_map_buffer(
574 592
575STATIC void 593STATIC void
576xfs_map_at_offset( 594xfs_map_at_offset(
595 struct inode *inode,
577 struct buffer_head *bh, 596 struct buffer_head *bh,
578 loff_t offset, 597 struct xfs_bmbt_irec *imap,
579 int block_bits, 598 xfs_off_t offset)
580 xfs_iomap_t *iomapp)
581{ 599{
582 ASSERT(!(iomapp->iomap_flags & IOMAP_HOLE)); 600 ASSERT(imap->br_startblock != HOLESTARTBLOCK);
583 ASSERT(!(iomapp->iomap_flags & IOMAP_DELAY)); 601 ASSERT(imap->br_startblock != DELAYSTARTBLOCK);
584 602
585 lock_buffer(bh); 603 lock_buffer(bh);
586 xfs_map_buffer(bh, iomapp, offset, block_bits); 604 xfs_map_buffer(inode, bh, imap, offset);
587 bh->b_bdev = iomapp->iomap_target->bt_bdev; 605 bh->b_bdev = xfs_find_bdev_for_inode(inode);
588 set_buffer_mapped(bh); 606 set_buffer_mapped(bh);
589 clear_buffer_delay(bh); 607 clear_buffer_delay(bh);
590 clear_buffer_unwritten(bh); 608 clear_buffer_unwritten(bh);
@@ -713,11 +731,11 @@ xfs_is_delayed_page(
713 bh = head = page_buffers(page); 731 bh = head = page_buffers(page);
714 do { 732 do {
715 if (buffer_unwritten(bh)) 733 if (buffer_unwritten(bh))
716 acceptable = (type == IOMAP_UNWRITTEN); 734 acceptable = (type == IO_UNWRITTEN);
717 else if (buffer_delay(bh)) 735 else if (buffer_delay(bh))
718 acceptable = (type == IOMAP_DELAY); 736 acceptable = (type == IO_DELAY);
719 else if (buffer_dirty(bh) && buffer_mapped(bh)) 737 else if (buffer_dirty(bh) && buffer_mapped(bh))
720 acceptable = (type == IOMAP_NEW); 738 acceptable = (type == IO_NEW);
721 else 739 else
722 break; 740 break;
723 } while ((bh = bh->b_this_page) != head); 741 } while ((bh = bh->b_this_page) != head);
@@ -740,7 +758,7 @@ xfs_convert_page(
740 struct inode *inode, 758 struct inode *inode,
741 struct page *page, 759 struct page *page,
742 loff_t tindex, 760 loff_t tindex,
743 xfs_iomap_t *mp, 761 struct xfs_bmbt_irec *imap,
744 xfs_ioend_t **ioendp, 762 xfs_ioend_t **ioendp,
745 struct writeback_control *wbc, 763 struct writeback_control *wbc,
746 int startio, 764 int startio,
@@ -750,7 +768,6 @@ xfs_convert_page(
750 xfs_off_t end_offset; 768 xfs_off_t end_offset;
751 unsigned long p_offset; 769 unsigned long p_offset;
752 unsigned int type; 770 unsigned int type;
753 int bbits = inode->i_blkbits;
754 int len, page_dirty; 771 int len, page_dirty;
755 int count = 0, done = 0, uptodate = 1; 772 int count = 0, done = 0, uptodate = 1;
756 xfs_off_t offset = page_offset(page); 773 xfs_off_t offset = page_offset(page);
@@ -802,19 +819,19 @@ xfs_convert_page(
802 819
803 if (buffer_unwritten(bh) || buffer_delay(bh)) { 820 if (buffer_unwritten(bh) || buffer_delay(bh)) {
804 if (buffer_unwritten(bh)) 821 if (buffer_unwritten(bh))
805 type = IOMAP_UNWRITTEN; 822 type = IO_UNWRITTEN;
806 else 823 else
807 type = IOMAP_DELAY; 824 type = IO_DELAY;
808 825
809 if (!xfs_iomap_valid(mp, offset)) { 826 if (!xfs_imap_valid(inode, imap, offset)) {
810 done = 1; 827 done = 1;
811 continue; 828 continue;
812 } 829 }
813 830
814 ASSERT(!(mp->iomap_flags & IOMAP_HOLE)); 831 ASSERT(imap->br_startblock != HOLESTARTBLOCK);
815 ASSERT(!(mp->iomap_flags & IOMAP_DELAY)); 832 ASSERT(imap->br_startblock != DELAYSTARTBLOCK);
816 833
817 xfs_map_at_offset(bh, offset, bbits, mp); 834 xfs_map_at_offset(inode, bh, imap, offset);
818 if (startio) { 835 if (startio) {
819 xfs_add_to_ioend(inode, bh, offset, 836 xfs_add_to_ioend(inode, bh, offset,
820 type, ioendp, done); 837 type, ioendp, done);
@@ -826,7 +843,7 @@ xfs_convert_page(
826 page_dirty--; 843 page_dirty--;
827 count++; 844 count++;
828 } else { 845 } else {
829 type = IOMAP_NEW; 846 type = IO_NEW;
830 if (buffer_mapped(bh) && all_bh && startio) { 847 if (buffer_mapped(bh) && all_bh && startio) {
831 lock_buffer(bh); 848 lock_buffer(bh);
832 xfs_add_to_ioend(inode, bh, offset, 849 xfs_add_to_ioend(inode, bh, offset,
@@ -866,7 +883,7 @@ STATIC void
866xfs_cluster_write( 883xfs_cluster_write(
867 struct inode *inode, 884 struct inode *inode,
868 pgoff_t tindex, 885 pgoff_t tindex,
869 xfs_iomap_t *iomapp, 886 struct xfs_bmbt_irec *imap,
870 xfs_ioend_t **ioendp, 887 xfs_ioend_t **ioendp,
871 struct writeback_control *wbc, 888 struct writeback_control *wbc,
872 int startio, 889 int startio,
@@ -885,7 +902,7 @@ xfs_cluster_write(
885 902
886 for (i = 0; i < pagevec_count(&pvec); i++) { 903 for (i = 0; i < pagevec_count(&pvec); i++) {
887 done = xfs_convert_page(inode, pvec.pages[i], tindex++, 904 done = xfs_convert_page(inode, pvec.pages[i], tindex++,
888 iomapp, ioendp, wbc, startio, all_bh); 905 imap, ioendp, wbc, startio, all_bh);
889 if (done) 906 if (done)
890 break; 907 break;
891 } 908 }
@@ -930,7 +947,7 @@ xfs_aops_discard_page(
930 loff_t offset = page_offset(page); 947 loff_t offset = page_offset(page);
931 ssize_t len = 1 << inode->i_blkbits; 948 ssize_t len = 1 << inode->i_blkbits;
932 949
933 if (!xfs_is_delayed_page(page, IOMAP_DELAY)) 950 if (!xfs_is_delayed_page(page, IO_DELAY))
934 goto out_invalidate; 951 goto out_invalidate;
935 952
936 if (XFS_FORCED_SHUTDOWN(ip->i_mount)) 953 if (XFS_FORCED_SHUTDOWN(ip->i_mount))
@@ -1042,15 +1059,15 @@ xfs_page_state_convert(
1042 int unmapped) /* also implies page uptodate */ 1059 int unmapped) /* also implies page uptodate */
1043{ 1060{
1044 struct buffer_head *bh, *head; 1061 struct buffer_head *bh, *head;
1045 xfs_iomap_t iomap; 1062 struct xfs_bmbt_irec imap;
1046 xfs_ioend_t *ioend = NULL, *iohead = NULL; 1063 xfs_ioend_t *ioend = NULL, *iohead = NULL;
1047 loff_t offset; 1064 loff_t offset;
1048 unsigned long p_offset = 0; 1065 unsigned long p_offset = 0;
1049 unsigned int type; 1066 unsigned int type;
1050 __uint64_t end_offset; 1067 __uint64_t end_offset;
1051 pgoff_t end_index, last_index, tlast; 1068 pgoff_t end_index, last_index;
1052 ssize_t size, len; 1069 ssize_t size, len;
1053 int flags, err, iomap_valid = 0, uptodate = 1; 1070 int flags, err, imap_valid = 0, uptodate = 1;
1054 int page_dirty, count = 0; 1071 int page_dirty, count = 0;
1055 int trylock = 0; 1072 int trylock = 0;
1056 int all_bh = unmapped; 1073 int all_bh = unmapped;
@@ -1097,7 +1114,7 @@ xfs_page_state_convert(
1097 bh = head = page_buffers(page); 1114 bh = head = page_buffers(page);
1098 offset = page_offset(page); 1115 offset = page_offset(page);
1099 flags = BMAPI_READ; 1116 flags = BMAPI_READ;
1100 type = IOMAP_NEW; 1117 type = IO_NEW;
1101 1118
1102 /* TODO: cleanup count and page_dirty */ 1119 /* TODO: cleanup count and page_dirty */
1103 1120
@@ -1111,12 +1128,12 @@ xfs_page_state_convert(
1111 * the iomap is actually still valid, but the ioend 1128 * the iomap is actually still valid, but the ioend
1112 * isn't. shouldn't happen too often. 1129 * isn't. shouldn't happen too often.
1113 */ 1130 */
1114 iomap_valid = 0; 1131 imap_valid = 0;
1115 continue; 1132 continue;
1116 } 1133 }
1117 1134
1118 if (iomap_valid) 1135 if (imap_valid)
1119 iomap_valid = xfs_iomap_valid(&iomap, offset); 1136 imap_valid = xfs_imap_valid(inode, &imap, offset);
1120 1137
1121 /* 1138 /*
1122 * First case, map an unwritten extent and prepare for 1139 * First case, map an unwritten extent and prepare for
@@ -1137,20 +1154,20 @@ xfs_page_state_convert(
1137 * Make sure we don't use a read-only iomap 1154 * Make sure we don't use a read-only iomap
1138 */ 1155 */
1139 if (flags == BMAPI_READ) 1156 if (flags == BMAPI_READ)
1140 iomap_valid = 0; 1157 imap_valid = 0;
1141 1158
1142 if (buffer_unwritten(bh)) { 1159 if (buffer_unwritten(bh)) {
1143 type = IOMAP_UNWRITTEN; 1160 type = IO_UNWRITTEN;
1144 flags = BMAPI_WRITE | BMAPI_IGNSTATE; 1161 flags = BMAPI_WRITE | BMAPI_IGNSTATE;
1145 } else if (buffer_delay(bh)) { 1162 } else if (buffer_delay(bh)) {
1146 type = IOMAP_DELAY; 1163 type = IO_DELAY;
1147 flags = BMAPI_ALLOCATE | trylock; 1164 flags = BMAPI_ALLOCATE | trylock;
1148 } else { 1165 } else {
1149 type = IOMAP_NEW; 1166 type = IO_NEW;
1150 flags = BMAPI_WRITE | BMAPI_MMAP; 1167 flags = BMAPI_WRITE | BMAPI_MMAP;
1151 } 1168 }
1152 1169
1153 if (!iomap_valid) { 1170 if (!imap_valid) {
1154 /* 1171 /*
1155 * if we didn't have a valid mapping then we 1172 * if we didn't have a valid mapping then we
1156 * need to ensure that we put the new mapping 1173 * need to ensure that we put the new mapping
@@ -1160,7 +1177,7 @@ xfs_page_state_convert(
1160 * for unwritten extent conversion. 1177 * for unwritten extent conversion.
1161 */ 1178 */
1162 new_ioend = 1; 1179 new_ioend = 1;
1163 if (type == IOMAP_NEW) { 1180 if (type == IO_NEW) {
1164 size = xfs_probe_cluster(inode, 1181 size = xfs_probe_cluster(inode,
1165 page, bh, head, 0); 1182 page, bh, head, 0);
1166 } else { 1183 } else {
@@ -1168,14 +1185,14 @@ xfs_page_state_convert(
1168 } 1185 }
1169 1186
1170 err = xfs_map_blocks(inode, offset, size, 1187 err = xfs_map_blocks(inode, offset, size,
1171 &iomap, flags); 1188 &imap, flags);
1172 if (err) 1189 if (err)
1173 goto error; 1190 goto error;
1174 iomap_valid = xfs_iomap_valid(&iomap, offset); 1191 imap_valid = xfs_imap_valid(inode, &imap,
1192 offset);
1175 } 1193 }
1176 if (iomap_valid) { 1194 if (imap_valid) {
1177 xfs_map_at_offset(bh, offset, 1195 xfs_map_at_offset(inode, bh, &imap, offset);
1178 inode->i_blkbits, &iomap);
1179 if (startio) { 1196 if (startio) {
1180 xfs_add_to_ioend(inode, bh, offset, 1197 xfs_add_to_ioend(inode, bh, offset,
1181 type, &ioend, 1198 type, &ioend,
@@ -1194,40 +1211,41 @@ xfs_page_state_convert(
1194 * That means it must already have extents allocated 1211 * That means it must already have extents allocated
1195 * underneath it. Map the extent by reading it. 1212 * underneath it. Map the extent by reading it.
1196 */ 1213 */
1197 if (!iomap_valid || flags != BMAPI_READ) { 1214 if (!imap_valid || flags != BMAPI_READ) {
1198 flags = BMAPI_READ; 1215 flags = BMAPI_READ;
1199 size = xfs_probe_cluster(inode, page, bh, 1216 size = xfs_probe_cluster(inode, page, bh,
1200 head, 1); 1217 head, 1);
1201 err = xfs_map_blocks(inode, offset, size, 1218 err = xfs_map_blocks(inode, offset, size,
1202 &iomap, flags); 1219 &imap, flags);
1203 if (err) 1220 if (err)
1204 goto error; 1221 goto error;
1205 iomap_valid = xfs_iomap_valid(&iomap, offset); 1222 imap_valid = xfs_imap_valid(inode, &imap,
1223 offset);
1206 } 1224 }
1207 1225
1208 /* 1226 /*
1209 * We set the type to IOMAP_NEW in case we are doing a 1227 * We set the type to IO_NEW in case we are doing a
1210 * small write at EOF that is extending the file but 1228 * small write at EOF that is extending the file but
1211 * without needing an allocation. We need to update the 1229 * without needing an allocation. We need to update the
1212 * file size on I/O completion in this case so it is 1230 * file size on I/O completion in this case so it is
1213 * the same case as having just allocated a new extent 1231 * the same case as having just allocated a new extent
1214 * that we are writing into for the first time. 1232 * that we are writing into for the first time.
1215 */ 1233 */
1216 type = IOMAP_NEW; 1234 type = IO_NEW;
1217 if (trylock_buffer(bh)) { 1235 if (trylock_buffer(bh)) {
1218 ASSERT(buffer_mapped(bh)); 1236 ASSERT(buffer_mapped(bh));
1219 if (iomap_valid) 1237 if (imap_valid)
1220 all_bh = 1; 1238 all_bh = 1;
1221 xfs_add_to_ioend(inode, bh, offset, type, 1239 xfs_add_to_ioend(inode, bh, offset, type,
1222 &ioend, !iomap_valid); 1240 &ioend, !imap_valid);
1223 page_dirty--; 1241 page_dirty--;
1224 count++; 1242 count++;
1225 } else { 1243 } else {
1226 iomap_valid = 0; 1244 imap_valid = 0;
1227 } 1245 }
1228 } else if ((buffer_uptodate(bh) || PageUptodate(page)) && 1246 } else if ((buffer_uptodate(bh) || PageUptodate(page)) &&
1229 (unmapped || startio)) { 1247 (unmapped || startio)) {
1230 iomap_valid = 0; 1248 imap_valid = 0;
1231 } 1249 }
1232 1250
1233 if (!iohead) 1251 if (!iohead)
@@ -1241,12 +1259,23 @@ xfs_page_state_convert(
1241 if (startio) 1259 if (startio)
1242 xfs_start_page_writeback(page, 1, count); 1260 xfs_start_page_writeback(page, 1, count);
1243 1261
1244 if (ioend && iomap_valid) { 1262 if (ioend && imap_valid) {
1245 offset = (iomap.iomap_offset + iomap.iomap_bsize - 1) >> 1263 xfs_off_t end_index;
1246 PAGE_CACHE_SHIFT; 1264
1247 tlast = min_t(pgoff_t, offset, last_index); 1265 end_index = imap.br_startoff + imap.br_blockcount;
1248 xfs_cluster_write(inode, page->index + 1, &iomap, &ioend, 1266
1249 wbc, startio, all_bh, tlast); 1267 /* to bytes */
1268 end_index <<= inode->i_blkbits;
1269
1270 /* to pages */
1271 end_index = (end_index - 1) >> PAGE_CACHE_SHIFT;
1272
1273 /* check against file size */
1274 if (end_index > last_index)
1275 end_index = last_index;
1276
1277 xfs_cluster_write(inode, page->index + 1, &imap, &ioend,
1278 wbc, startio, all_bh, end_index);
1250 } 1279 }
1251 1280
1252 if (iohead) 1281 if (iohead)
@@ -1448,10 +1477,11 @@ __xfs_get_blocks(
1448 int direct, 1477 int direct,
1449 bmapi_flags_t flags) 1478 bmapi_flags_t flags)
1450{ 1479{
1451 xfs_iomap_t iomap; 1480 struct xfs_bmbt_irec imap;
1452 xfs_off_t offset; 1481 xfs_off_t offset;
1453 ssize_t size; 1482 ssize_t size;
1454 int niomap = 1; 1483 int nimap = 1;
1484 int new = 0;
1455 int error; 1485 int error;
1456 1486
1457 offset = (xfs_off_t)iblock << inode->i_blkbits; 1487 offset = (xfs_off_t)iblock << inode->i_blkbits;
@@ -1462,22 +1492,21 @@ __xfs_get_blocks(
1462 return 0; 1492 return 0;
1463 1493
1464 error = xfs_iomap(XFS_I(inode), offset, size, 1494 error = xfs_iomap(XFS_I(inode), offset, size,
1465 create ? flags : BMAPI_READ, &iomap, &niomap); 1495 create ? flags : BMAPI_READ, &imap, &nimap, &new);
1466 if (error) 1496 if (error)
1467 return -error; 1497 return -error;
1468 if (niomap == 0) 1498 if (nimap == 0)
1469 return 0; 1499 return 0;
1470 1500
1471 if (iomap.iomap_bn != IOMAP_DADDR_NULL) { 1501 if (imap.br_startblock != HOLESTARTBLOCK &&
1502 imap.br_startblock != DELAYSTARTBLOCK) {
1472 /* 1503 /*
1473 * For unwritten extents do not report a disk address on 1504 * For unwritten extents do not report a disk address on
1474 * the read case (treat as if we're reading into a hole). 1505 * the read case (treat as if we're reading into a hole).
1475 */ 1506 */
1476 if (create || !(iomap.iomap_flags & IOMAP_UNWRITTEN)) { 1507 if (create || !ISUNWRITTEN(&imap))
1477 xfs_map_buffer(bh_result, &iomap, offset, 1508 xfs_map_buffer(inode, bh_result, &imap, offset);
1478 inode->i_blkbits); 1509 if (create && ISUNWRITTEN(&imap)) {
1479 }
1480 if (create && (iomap.iomap_flags & IOMAP_UNWRITTEN)) {
1481 if (direct) 1510 if (direct)
1482 bh_result->b_private = inode; 1511 bh_result->b_private = inode;
1483 set_buffer_unwritten(bh_result); 1512 set_buffer_unwritten(bh_result);
@@ -1488,7 +1517,7 @@ __xfs_get_blocks(
1488 * If this is a realtime file, data may be on a different device. 1517 * If this is a realtime file, data may be on a different device.
1489 * to that pointed to from the buffer_head b_bdev currently. 1518 * to that pointed to from the buffer_head b_bdev currently.
1490 */ 1519 */
1491 bh_result->b_bdev = iomap.iomap_target->bt_bdev; 1520 bh_result->b_bdev = xfs_find_bdev_for_inode(inode);
1492 1521
1493 /* 1522 /*
1494 * If we previously allocated a block out beyond eof and we are now 1523 * If we previously allocated a block out beyond eof and we are now
@@ -1502,10 +1531,10 @@ __xfs_get_blocks(
1502 if (create && 1531 if (create &&
1503 ((!buffer_mapped(bh_result) && !buffer_uptodate(bh_result)) || 1532 ((!buffer_mapped(bh_result) && !buffer_uptodate(bh_result)) ||
1504 (offset >= i_size_read(inode)) || 1533 (offset >= i_size_read(inode)) ||
1505 (iomap.iomap_flags & (IOMAP_NEW|IOMAP_UNWRITTEN)))) 1534 (new || ISUNWRITTEN(&imap))))
1506 set_buffer_new(bh_result); 1535 set_buffer_new(bh_result);
1507 1536
1508 if (iomap.iomap_flags & IOMAP_DELAY) { 1537 if (imap.br_startblock == DELAYSTARTBLOCK) {
1509 BUG_ON(direct); 1538 BUG_ON(direct);
1510 if (create) { 1539 if (create) {
1511 set_buffer_uptodate(bh_result); 1540 set_buffer_uptodate(bh_result);
@@ -1514,11 +1543,23 @@ __xfs_get_blocks(
1514 } 1543 }
1515 } 1544 }
1516 1545
1546 /*
1547 * If this is O_DIRECT or the mpage code calling tell them how large
1548 * the mapping is, so that we can avoid repeated get_blocks calls.
1549 */
1517 if (direct || size > (1 << inode->i_blkbits)) { 1550 if (direct || size > (1 << inode->i_blkbits)) {
1518 ASSERT(iomap.iomap_bsize - iomap.iomap_delta > 0); 1551 xfs_off_t mapping_size;
1519 offset = min_t(xfs_off_t, 1552
1520 iomap.iomap_bsize - iomap.iomap_delta, size); 1553 mapping_size = imap.br_startoff + imap.br_blockcount - iblock;
1521 bh_result->b_size = (ssize_t)min_t(xfs_off_t, LONG_MAX, offset); 1554 mapping_size <<= inode->i_blkbits;
1555
1556 ASSERT(mapping_size > 0);
1557 if (mapping_size > size)
1558 mapping_size = size;
1559 if (mapping_size > LONG_MAX)
1560 mapping_size = LONG_MAX;
1561
1562 bh_result->b_size = mapping_size;
1522 } 1563 }
1523 1564
1524 return 0; 1565 return 0;
@@ -1576,7 +1617,7 @@ xfs_end_io_direct(
1576 */ 1617 */
1577 ioend->io_offset = offset; 1618 ioend->io_offset = offset;
1578 ioend->io_size = size; 1619 ioend->io_size = size;
1579 if (ioend->io_type == IOMAP_READ) { 1620 if (ioend->io_type == IO_READ) {
1580 xfs_finish_ioend(ioend, 0); 1621 xfs_finish_ioend(ioend, 0);
1581 } else if (private && size > 0) { 1622 } else if (private && size > 0) {
1582 xfs_finish_ioend(ioend, is_sync_kiocb(iocb)); 1623 xfs_finish_ioend(ioend, is_sync_kiocb(iocb));
@@ -1587,7 +1628,7 @@ xfs_end_io_direct(
1587 * didn't map an unwritten extent so switch it's completion 1628 * didn't map an unwritten extent so switch it's completion
1588 * handler. 1629 * handler.
1589 */ 1630 */
1590 ioend->io_type = IOMAP_NEW; 1631 ioend->io_type = IO_NEW;
1591 xfs_finish_ioend(ioend, 0); 1632 xfs_finish_ioend(ioend, 0);
1592 } 1633 }
1593 1634
@@ -1612,10 +1653,10 @@ xfs_vm_direct_IO(
1612 struct block_device *bdev; 1653 struct block_device *bdev;
1613 ssize_t ret; 1654 ssize_t ret;
1614 1655
1615 bdev = xfs_find_bdev_for_inode(XFS_I(inode)); 1656 bdev = xfs_find_bdev_for_inode(inode);
1616 1657
1617 iocb->private = xfs_alloc_ioend(inode, rw == WRITE ? 1658 iocb->private = xfs_alloc_ioend(inode, rw == WRITE ?
1618 IOMAP_UNWRITTEN : IOMAP_READ); 1659 IO_UNWRITTEN : IO_READ);
1619 1660
1620 ret = blockdev_direct_IO_no_locking(rw, iocb, inode, bdev, iov, 1661 ret = blockdev_direct_IO_no_locking(rw, iocb, inode, bdev, iov,
1621 offset, nr_segs, 1662 offset, nr_segs,
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index 44c2b0ef9a41..f01de3c55c43 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -1007,25 +1007,20 @@ xfs_bwrite(
1007 struct xfs_mount *mp, 1007 struct xfs_mount *mp,
1008 struct xfs_buf *bp) 1008 struct xfs_buf *bp)
1009{ 1009{
1010 int iowait = (bp->b_flags & XBF_ASYNC) == 0; 1010 int error;
1011 int error = 0;
1012 1011
1013 bp->b_strat = xfs_bdstrat_cb; 1012 bp->b_strat = xfs_bdstrat_cb;
1014 bp->b_mount = mp; 1013 bp->b_mount = mp;
1015 bp->b_flags |= XBF_WRITE; 1014 bp->b_flags |= XBF_WRITE;
1016 if (!iowait) 1015 bp->b_flags &= ~(XBF_ASYNC | XBF_READ);
1017 bp->b_flags |= _XBF_RUN_QUEUES;
1018 1016
1019 xfs_buf_delwri_dequeue(bp); 1017 xfs_buf_delwri_dequeue(bp);
1020 xfs_buf_iostrategy(bp); 1018 xfs_buf_iostrategy(bp);
1021 1019
1022 if (iowait) { 1020 error = xfs_buf_iowait(bp);
1023 error = xfs_buf_iowait(bp); 1021 if (error)
1024 if (error) 1022 xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR);
1025 xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR); 1023 xfs_buf_relse(bp);
1026 xfs_buf_relse(bp);
1027 }
1028
1029 return error; 1024 return error;
1030} 1025}
1031 1026
@@ -1614,7 +1609,8 @@ xfs_mapping_buftarg(
1614 1609
1615STATIC int 1610STATIC int
1616xfs_alloc_delwrite_queue( 1611xfs_alloc_delwrite_queue(
1617 xfs_buftarg_t *btp) 1612 xfs_buftarg_t *btp,
1613 const char *fsname)
1618{ 1614{
1619 int error = 0; 1615 int error = 0;
1620 1616
@@ -1622,7 +1618,7 @@ xfs_alloc_delwrite_queue(
1622 INIT_LIST_HEAD(&btp->bt_delwrite_queue); 1618 INIT_LIST_HEAD(&btp->bt_delwrite_queue);
1623 spin_lock_init(&btp->bt_delwrite_lock); 1619 spin_lock_init(&btp->bt_delwrite_lock);
1624 btp->bt_flags = 0; 1620 btp->bt_flags = 0;
1625 btp->bt_task = kthread_run(xfsbufd, btp, "xfsbufd"); 1621 btp->bt_task = kthread_run(xfsbufd, btp, "xfsbufd/%s", fsname);
1626 if (IS_ERR(btp->bt_task)) { 1622 if (IS_ERR(btp->bt_task)) {
1627 error = PTR_ERR(btp->bt_task); 1623 error = PTR_ERR(btp->bt_task);
1628 goto out_error; 1624 goto out_error;
@@ -1635,7 +1631,8 @@ out_error:
1635xfs_buftarg_t * 1631xfs_buftarg_t *
1636xfs_alloc_buftarg( 1632xfs_alloc_buftarg(
1637 struct block_device *bdev, 1633 struct block_device *bdev,
1638 int external) 1634 int external,
1635 const char *fsname)
1639{ 1636{
1640 xfs_buftarg_t *btp; 1637 xfs_buftarg_t *btp;
1641 1638
@@ -1647,7 +1644,7 @@ xfs_alloc_buftarg(
1647 goto error; 1644 goto error;
1648 if (xfs_mapping_buftarg(btp, bdev)) 1645 if (xfs_mapping_buftarg(btp, bdev))
1649 goto error; 1646 goto error;
1650 if (xfs_alloc_delwrite_queue(btp)) 1647 if (xfs_alloc_delwrite_queue(btp, fsname))
1651 goto error; 1648 goto error;
1652 xfs_alloc_bufhash(btp, external); 1649 xfs_alloc_bufhash(btp, external);
1653 return btp; 1650 return btp;
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h
index 386e7361e50e..5fbecefa5dfd 100644
--- a/fs/xfs/linux-2.6/xfs_buf.h
+++ b/fs/xfs/linux-2.6/xfs_buf.h
@@ -390,7 +390,7 @@ static inline void xfs_buf_relse(xfs_buf_t *bp)
390/* 390/*
391 * Handling of buftargs. 391 * Handling of buftargs.
392 */ 392 */
393extern xfs_buftarg_t *xfs_alloc_buftarg(struct block_device *, int); 393extern xfs_buftarg_t *xfs_alloc_buftarg(struct block_device *, int, const char *);
394extern void xfs_free_buftarg(struct xfs_mount *, struct xfs_buftarg *); 394extern void xfs_free_buftarg(struct xfs_mount *, struct xfs_buftarg *);
395extern void xfs_wait_buftarg(xfs_buftarg_t *); 395extern void xfs_wait_buftarg(xfs_buftarg_t *);
396extern int xfs_setsize_buftarg(xfs_buftarg_t *, unsigned int, unsigned int); 396extern int xfs_setsize_buftarg(xfs_buftarg_t *, unsigned int, unsigned int);
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c
index 42dd3bcfba6b..d8fb1b5d6cb5 100644
--- a/fs/xfs/linux-2.6/xfs_file.c
+++ b/fs/xfs/linux-2.6/xfs_file.c
@@ -115,6 +115,8 @@ xfs_file_fsync(
115 115
116 xfs_iflags_clear(ip, XFS_ITRUNCATED); 116 xfs_iflags_clear(ip, XFS_ITRUNCATED);
117 117
118 xfs_ioend_wait(ip);
119
118 /* 120 /*
119 * We always need to make sure that the required inode state is safe on 121 * We always need to make sure that the required inode state is safe on
120 * disk. The inode might be clean but we still might need to force the 122 * disk. The inode might be clean but we still might need to force the
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index 7b26cc2fd284..699b60cbab9c 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -527,6 +527,10 @@ xfs_attrmulti_by_handle(
527 if (copy_from_user(&am_hreq, arg, sizeof(xfs_fsop_attrmulti_handlereq_t))) 527 if (copy_from_user(&am_hreq, arg, sizeof(xfs_fsop_attrmulti_handlereq_t)))
528 return -XFS_ERROR(EFAULT); 528 return -XFS_ERROR(EFAULT);
529 529
530 /* overflow check */
531 if (am_hreq.opcount >= INT_MAX / sizeof(xfs_attr_multiop_t))
532 return -E2BIG;
533
530 dentry = xfs_handlereq_to_dentry(parfilp, &am_hreq.hreq); 534 dentry = xfs_handlereq_to_dentry(parfilp, &am_hreq.hreq);
531 if (IS_ERR(dentry)) 535 if (IS_ERR(dentry))
532 return PTR_ERR(dentry); 536 return PTR_ERR(dentry);
diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.c b/fs/xfs/linux-2.6/xfs_ioctl32.c
index 593c05b4df8d..9287135e9bfc 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl32.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl32.c
@@ -420,6 +420,10 @@ xfs_compat_attrmulti_by_handle(
420 sizeof(compat_xfs_fsop_attrmulti_handlereq_t))) 420 sizeof(compat_xfs_fsop_attrmulti_handlereq_t)))
421 return -XFS_ERROR(EFAULT); 421 return -XFS_ERROR(EFAULT);
422 422
423 /* overflow check */
424 if (am_hreq.opcount >= INT_MAX / sizeof(compat_xfs_attr_multiop_t))
425 return -E2BIG;
426
423 dentry = xfs_compat_handlereq_to_dentry(parfilp, &am_hreq.hreq); 427 dentry = xfs_compat_handlereq_to_dentry(parfilp, &am_hreq.hreq);
424 if (IS_ERR(dentry)) 428 if (IS_ERR(dentry))
425 return PTR_ERR(dentry); 429 return PTR_ERR(dentry);
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index e65a7937f3a4..9c8019c78c92 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -673,7 +673,10 @@ xfs_vn_fiemap(
673 bm.bmv_length = BTOBB(length); 673 bm.bmv_length = BTOBB(length);
674 674
675 /* We add one because in getbmap world count includes the header */ 675 /* We add one because in getbmap world count includes the header */
676 bm.bmv_count = fieinfo->fi_extents_max + 1; 676 bm.bmv_count = !fieinfo->fi_extents_max ? MAXEXTNUM :
677 fieinfo->fi_extents_max + 1;
678 bm.bmv_count = min_t(__s32, bm.bmv_count,
679 (PAGE_SIZE * 16 / sizeof(struct getbmapx)));
677 bm.bmv_iflags = BMV_IF_PREALLOC; 680 bm.bmv_iflags = BMV_IF_PREALLOC;
678 if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR) 681 if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR)
679 bm.bmv_iflags |= BMV_IF_ATTRFORK; 682 bm.bmv_iflags |= BMV_IF_ATTRFORK;
diff --git a/fs/xfs/linux-2.6/xfs_quotaops.c b/fs/xfs/linux-2.6/xfs_quotaops.c
index 1947514ce1ad..e31bf21fe5d3 100644
--- a/fs/xfs/linux-2.6/xfs_quotaops.c
+++ b/fs/xfs/linux-2.6/xfs_quotaops.c
@@ -97,7 +97,7 @@ xfs_fs_set_xstate(
97} 97}
98 98
99STATIC int 99STATIC int
100xfs_fs_get_xquota( 100xfs_fs_get_dqblk(
101 struct super_block *sb, 101 struct super_block *sb,
102 int type, 102 int type,
103 qid_t id, 103 qid_t id,
@@ -114,7 +114,7 @@ xfs_fs_get_xquota(
114} 114}
115 115
116STATIC int 116STATIC int
117xfs_fs_set_xquota( 117xfs_fs_set_dqblk(
118 struct super_block *sb, 118 struct super_block *sb,
119 int type, 119 int type,
120 qid_t id, 120 qid_t id,
@@ -135,6 +135,6 @@ xfs_fs_set_xquota(
135const struct quotactl_ops xfs_quotactl_operations = { 135const struct quotactl_ops xfs_quotactl_operations = {
136 .get_xstate = xfs_fs_get_xstate, 136 .get_xstate = xfs_fs_get_xstate,
137 .set_xstate = xfs_fs_set_xstate, 137 .set_xstate = xfs_fs_set_xstate,
138 .get_xquota = xfs_fs_get_xquota, 138 .get_dqblk = xfs_fs_get_dqblk,
139 .set_xquota = xfs_fs_set_xquota, 139 .set_dqblk = xfs_fs_set_dqblk,
140}; 140};
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 2b177c778ba7..f24dbe5efde3 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -790,18 +790,18 @@ xfs_open_devices(
790 * Setup xfs_mount buffer target pointers 790 * Setup xfs_mount buffer target pointers
791 */ 791 */
792 error = ENOMEM; 792 error = ENOMEM;
793 mp->m_ddev_targp = xfs_alloc_buftarg(ddev, 0); 793 mp->m_ddev_targp = xfs_alloc_buftarg(ddev, 0, mp->m_fsname);
794 if (!mp->m_ddev_targp) 794 if (!mp->m_ddev_targp)
795 goto out_close_rtdev; 795 goto out_close_rtdev;
796 796
797 if (rtdev) { 797 if (rtdev) {
798 mp->m_rtdev_targp = xfs_alloc_buftarg(rtdev, 1); 798 mp->m_rtdev_targp = xfs_alloc_buftarg(rtdev, 1, mp->m_fsname);
799 if (!mp->m_rtdev_targp) 799 if (!mp->m_rtdev_targp)
800 goto out_free_ddev_targ; 800 goto out_free_ddev_targ;
801 } 801 }
802 802
803 if (logdev && logdev != ddev) { 803 if (logdev && logdev != ddev) {
804 mp->m_logdev_targp = xfs_alloc_buftarg(logdev, 1); 804 mp->m_logdev_targp = xfs_alloc_buftarg(logdev, 1, mp->m_fsname);
805 if (!mp->m_logdev_targp) 805 if (!mp->m_logdev_targp)
806 goto out_free_rtdev_targ; 806 goto out_free_rtdev_targ;
807 } else { 807 } else {
@@ -903,7 +903,8 @@ xfsaild_start(
903 struct xfs_ail *ailp) 903 struct xfs_ail *ailp)
904{ 904{
905 ailp->xa_target = 0; 905 ailp->xa_target = 0;
906 ailp->xa_task = kthread_run(xfsaild, ailp, "xfsaild"); 906 ailp->xa_task = kthread_run(xfsaild, ailp, "xfsaild/%s",
907 ailp->xa_mount->m_fsname);
907 if (IS_ERR(ailp->xa_task)) 908 if (IS_ERR(ailp->xa_task))
908 return -PTR_ERR(ailp->xa_task); 909 return -PTR_ERR(ailp->xa_task);
909 return 0; 910 return 0;
@@ -1093,6 +1094,7 @@ xfs_fs_write_inode(
1093 * the code will only flush the inode if it isn't already 1094 * the code will only flush the inode if it isn't already
1094 * being flushed. 1095 * being flushed.
1095 */ 1096 */
1097 xfs_ioend_wait(ip);
1096 xfs_ilock(ip, XFS_ILOCK_SHARED); 1098 xfs_ilock(ip, XFS_ILOCK_SHARED);
1097 if (ip->i_update_core) { 1099 if (ip->i_update_core) {
1098 error = xfs_log_inode(ip); 1100 error = xfs_log_inode(ip);
@@ -1210,6 +1212,7 @@ xfs_fs_put_super(
1210 1212
1211 xfs_unmountfs(mp); 1213 xfs_unmountfs(mp);
1212 xfs_freesb(mp); 1214 xfs_freesb(mp);
1215 xfs_inode_shrinker_unregister(mp);
1213 xfs_icsb_destroy_counters(mp); 1216 xfs_icsb_destroy_counters(mp);
1214 xfs_close_devices(mp); 1217 xfs_close_devices(mp);
1215 xfs_dmops_put(mp); 1218 xfs_dmops_put(mp);
@@ -1623,6 +1626,8 @@ xfs_fs_fill_super(
1623 if (error) 1626 if (error)
1624 goto fail_vnrele; 1627 goto fail_vnrele;
1625 1628
1629 xfs_inode_shrinker_register(mp);
1630
1626 kfree(mtpt); 1631 kfree(mtpt);
1627 return 0; 1632 return 0;
1628 1633
@@ -1868,6 +1873,7 @@ init_xfs_fs(void)
1868 goto out_cleanup_procfs; 1873 goto out_cleanup_procfs;
1869 1874
1870 vfs_initquota(); 1875 vfs_initquota();
1876 xfs_inode_shrinker_init();
1871 1877
1872 error = register_filesystem(&xfs_fs_type); 1878 error = register_filesystem(&xfs_fs_type);
1873 if (error) 1879 if (error)
@@ -1895,6 +1901,7 @@ exit_xfs_fs(void)
1895{ 1901{
1896 vfs_exitquota(); 1902 vfs_exitquota();
1897 unregister_filesystem(&xfs_fs_type); 1903 unregister_filesystem(&xfs_fs_type);
1904 xfs_inode_shrinker_destroy();
1898 xfs_sysctl_unregister(); 1905 xfs_sysctl_unregister();
1899 xfs_cleanup_procfs(); 1906 xfs_cleanup_procfs();
1900 xfs_buf_terminate(); 1907 xfs_buf_terminate();
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index fd9698215759..3884e20bc14e 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -95,7 +95,8 @@ xfs_inode_ag_walk(
95 struct xfs_perag *pag, int flags), 95 struct xfs_perag *pag, int flags),
96 int flags, 96 int flags,
97 int tag, 97 int tag,
98 int exclusive) 98 int exclusive,
99 int *nr_to_scan)
99{ 100{
100 uint32_t first_index; 101 uint32_t first_index;
101 int last_error = 0; 102 int last_error = 0;
@@ -134,7 +135,7 @@ restart:
134 if (error == EFSCORRUPTED) 135 if (error == EFSCORRUPTED)
135 break; 136 break;
136 137
137 } while (1); 138 } while ((*nr_to_scan)--);
138 139
139 if (skipped) { 140 if (skipped) {
140 delay(1); 141 delay(1);
@@ -150,12 +151,15 @@ xfs_inode_ag_iterator(
150 struct xfs_perag *pag, int flags), 151 struct xfs_perag *pag, int flags),
151 int flags, 152 int flags,
152 int tag, 153 int tag,
153 int exclusive) 154 int exclusive,
155 int *nr_to_scan)
154{ 156{
155 int error = 0; 157 int error = 0;
156 int last_error = 0; 158 int last_error = 0;
157 xfs_agnumber_t ag; 159 xfs_agnumber_t ag;
160 int nr;
158 161
162 nr = nr_to_scan ? *nr_to_scan : INT_MAX;
159 for (ag = 0; ag < mp->m_sb.sb_agcount; ag++) { 163 for (ag = 0; ag < mp->m_sb.sb_agcount; ag++) {
160 struct xfs_perag *pag; 164 struct xfs_perag *pag;
161 165
@@ -165,14 +169,18 @@ xfs_inode_ag_iterator(
165 continue; 169 continue;
166 } 170 }
167 error = xfs_inode_ag_walk(mp, pag, execute, flags, tag, 171 error = xfs_inode_ag_walk(mp, pag, execute, flags, tag,
168 exclusive); 172 exclusive, &nr);
169 xfs_perag_put(pag); 173 xfs_perag_put(pag);
170 if (error) { 174 if (error) {
171 last_error = error; 175 last_error = error;
172 if (error == EFSCORRUPTED) 176 if (error == EFSCORRUPTED)
173 break; 177 break;
174 } 178 }
179 if (nr <= 0)
180 break;
175 } 181 }
182 if (nr_to_scan)
183 *nr_to_scan = nr;
176 return XFS_ERROR(last_error); 184 return XFS_ERROR(last_error);
177} 185}
178 186
@@ -291,7 +299,7 @@ xfs_sync_data(
291 ASSERT((flags & ~(SYNC_TRYLOCK|SYNC_WAIT)) == 0); 299 ASSERT((flags & ~(SYNC_TRYLOCK|SYNC_WAIT)) == 0);
292 300
293 error = xfs_inode_ag_iterator(mp, xfs_sync_inode_data, flags, 301 error = xfs_inode_ag_iterator(mp, xfs_sync_inode_data, flags,
294 XFS_ICI_NO_TAG, 0); 302 XFS_ICI_NO_TAG, 0, NULL);
295 if (error) 303 if (error)
296 return XFS_ERROR(error); 304 return XFS_ERROR(error);
297 305
@@ -310,7 +318,7 @@ xfs_sync_attr(
310 ASSERT((flags & ~SYNC_WAIT) == 0); 318 ASSERT((flags & ~SYNC_WAIT) == 0);
311 319
312 return xfs_inode_ag_iterator(mp, xfs_sync_inode_attr, flags, 320 return xfs_inode_ag_iterator(mp, xfs_sync_inode_attr, flags,
313 XFS_ICI_NO_TAG, 0); 321 XFS_ICI_NO_TAG, 0, NULL);
314} 322}
315 323
316STATIC int 324STATIC int
@@ -348,68 +356,23 @@ xfs_commit_dummy_trans(
348 356
349STATIC int 357STATIC int
350xfs_sync_fsdata( 358xfs_sync_fsdata(
351 struct xfs_mount *mp, 359 struct xfs_mount *mp)
352 int flags)
353{ 360{
354 struct xfs_buf *bp; 361 struct xfs_buf *bp;
355 struct xfs_buf_log_item *bip;
356 int error = 0;
357
358 /*
359 * If this is xfssyncd() then only sync the superblock if we can
360 * lock it without sleeping and it is not pinned.
361 */
362 if (flags & SYNC_TRYLOCK) {
363 ASSERT(!(flags & SYNC_WAIT));
364
365 bp = xfs_getsb(mp, XBF_TRYLOCK);
366 if (!bp)
367 goto out;
368
369 bip = XFS_BUF_FSPRIVATE(bp, struct xfs_buf_log_item *);
370 if (!bip || !xfs_buf_item_dirty(bip) || XFS_BUF_ISPINNED(bp))
371 goto out_brelse;
372 } else {
373 bp = xfs_getsb(mp, 0);
374
375 /*
376 * If the buffer is pinned then push on the log so we won't
377 * get stuck waiting in the write for someone, maybe
378 * ourselves, to flush the log.
379 *
380 * Even though we just pushed the log above, we did not have
381 * the superblock buffer locked at that point so it can
382 * become pinned in between there and here.
383 */
384 if (XFS_BUF_ISPINNED(bp))
385 xfs_log_force(mp, 0);
386 }
387
388
389 if (flags & SYNC_WAIT)
390 XFS_BUF_UNASYNC(bp);
391 else
392 XFS_BUF_ASYNC(bp);
393
394 error = xfs_bwrite(mp, bp);
395 if (error)
396 return error;
397 362
398 /* 363 /*
399 * If this is a data integrity sync make sure all pending buffers 364 * If the buffer is pinned then push on the log so we won't get stuck
400 * are flushed out for the log coverage check below. 365 * waiting in the write for someone, maybe ourselves, to flush the log.
366 *
367 * Even though we just pushed the log above, we did not have the
368 * superblock buffer locked at that point so it can become pinned in
369 * between there and here.
401 */ 370 */
402 if (flags & SYNC_WAIT) 371 bp = xfs_getsb(mp, 0);
403 xfs_flush_buftarg(mp->m_ddev_targp, 1); 372 if (XFS_BUF_ISPINNED(bp))
404 373 xfs_log_force(mp, 0);
405 if (xfs_log_need_covered(mp))
406 error = xfs_commit_dummy_trans(mp, flags);
407 return error;
408 374
409 out_brelse: 375 return xfs_bwrite(mp, bp);
410 xfs_buf_relse(bp);
411 out:
412 return error;
413} 376}
414 377
415/* 378/*
@@ -433,7 +396,7 @@ int
433xfs_quiesce_data( 396xfs_quiesce_data(
434 struct xfs_mount *mp) 397 struct xfs_mount *mp)
435{ 398{
436 int error; 399 int error, error2 = 0;
437 400
438 /* push non-blocking */ 401 /* push non-blocking */
439 xfs_sync_data(mp, 0); 402 xfs_sync_data(mp, 0);
@@ -444,13 +407,20 @@ xfs_quiesce_data(
444 xfs_qm_sync(mp, SYNC_WAIT); 407 xfs_qm_sync(mp, SYNC_WAIT);
445 408
446 /* write superblock and hoover up shutdown errors */ 409 /* write superblock and hoover up shutdown errors */
447 error = xfs_sync_fsdata(mp, SYNC_WAIT); 410 error = xfs_sync_fsdata(mp);
411
412 /* make sure all delwri buffers are written out */
413 xfs_flush_buftarg(mp->m_ddev_targp, 1);
414
415 /* mark the log as covered if needed */
416 if (xfs_log_need_covered(mp))
417 error2 = xfs_commit_dummy_trans(mp, SYNC_WAIT);
448 418
449 /* flush data-only devices */ 419 /* flush data-only devices */
450 if (mp->m_rtdev_targp) 420 if (mp->m_rtdev_targp)
451 XFS_bflush(mp->m_rtdev_targp); 421 XFS_bflush(mp->m_rtdev_targp);
452 422
453 return error; 423 return error ? error : error2;
454} 424}
455 425
456STATIC void 426STATIC void
@@ -573,9 +543,9 @@ xfs_flush_inodes(
573} 543}
574 544
575/* 545/*
576 * Every sync period we need to unpin all items, reclaim inodes, sync 546 * Every sync period we need to unpin all items, reclaim inodes and sync
577 * quota and write out the superblock. We might need to cover the log 547 * disk quotas. We might need to cover the log to indicate that the
578 * to indicate it is idle. 548 * filesystem is idle.
579 */ 549 */
580STATIC void 550STATIC void
581xfs_sync_worker( 551xfs_sync_worker(
@@ -589,7 +559,8 @@ xfs_sync_worker(
589 xfs_reclaim_inodes(mp, 0); 559 xfs_reclaim_inodes(mp, 0);
590 /* dgc: errors ignored here */ 560 /* dgc: errors ignored here */
591 error = xfs_qm_sync(mp, SYNC_TRYLOCK); 561 error = xfs_qm_sync(mp, SYNC_TRYLOCK);
592 error = xfs_sync_fsdata(mp, SYNC_TRYLOCK); 562 if (xfs_log_need_covered(mp))
563 error = xfs_commit_dummy_trans(mp, 0);
593 } 564 }
594 mp->m_sync_seq++; 565 mp->m_sync_seq++;
595 wake_up(&mp->m_wait_single_sync_task); 566 wake_up(&mp->m_wait_single_sync_task);
@@ -652,7 +623,7 @@ xfs_syncd_init(
652 mp->m_sync_work.w_syncer = xfs_sync_worker; 623 mp->m_sync_work.w_syncer = xfs_sync_worker;
653 mp->m_sync_work.w_mount = mp; 624 mp->m_sync_work.w_mount = mp;
654 mp->m_sync_work.w_completion = NULL; 625 mp->m_sync_work.w_completion = NULL;
655 mp->m_sync_task = kthread_run(xfssyncd, mp, "xfssyncd"); 626 mp->m_sync_task = kthread_run(xfssyncd, mp, "xfssyncd/%s", mp->m_fsname);
656 if (IS_ERR(mp->m_sync_task)) 627 if (IS_ERR(mp->m_sync_task))
657 return -PTR_ERR(mp->m_sync_task); 628 return -PTR_ERR(mp->m_sync_task);
658 return 0; 629 return 0;
@@ -673,6 +644,7 @@ __xfs_inode_set_reclaim_tag(
673 radix_tree_tag_set(&pag->pag_ici_root, 644 radix_tree_tag_set(&pag->pag_ici_root,
674 XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino), 645 XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino),
675 XFS_ICI_RECLAIM_TAG); 646 XFS_ICI_RECLAIM_TAG);
647 pag->pag_ici_reclaimable++;
676} 648}
677 649
678/* 650/*
@@ -705,6 +677,7 @@ __xfs_inode_clear_reclaim_tag(
705{ 677{
706 radix_tree_tag_clear(&pag->pag_ici_root, 678 radix_tree_tag_clear(&pag->pag_ici_root,
707 XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG); 679 XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG);
680 pag->pag_ici_reclaimable--;
708} 681}
709 682
710/* 683/*
@@ -854,5 +827,93 @@ xfs_reclaim_inodes(
854 int mode) 827 int mode)
855{ 828{
856 return xfs_inode_ag_iterator(mp, xfs_reclaim_inode, mode, 829 return xfs_inode_ag_iterator(mp, xfs_reclaim_inode, mode,
857 XFS_ICI_RECLAIM_TAG, 1); 830 XFS_ICI_RECLAIM_TAG, 1, NULL);
831}
832
833/*
834 * Shrinker infrastructure.
835 *
836 * This is all far more complex than it needs to be. It adds a global list of
837 * mounts because the shrinkers can only call a global context. We need to make
838 * the shrinkers pass a context to avoid the need for global state.
839 */
840static LIST_HEAD(xfs_mount_list);
841static struct rw_semaphore xfs_mount_list_lock;
842
843static int
844xfs_reclaim_inode_shrink(
845 int nr_to_scan,
846 gfp_t gfp_mask)
847{
848 struct xfs_mount *mp;
849 struct xfs_perag *pag;
850 xfs_agnumber_t ag;
851 int reclaimable = 0;
852
853 if (nr_to_scan) {
854 if (!(gfp_mask & __GFP_FS))
855 return -1;
856
857 down_read(&xfs_mount_list_lock);
858 list_for_each_entry(mp, &xfs_mount_list, m_mplist) {
859 xfs_inode_ag_iterator(mp, xfs_reclaim_inode, 0,
860 XFS_ICI_RECLAIM_TAG, 1, &nr_to_scan);
861 if (nr_to_scan <= 0)
862 break;
863 }
864 up_read(&xfs_mount_list_lock);
865 }
866
867 down_read(&xfs_mount_list_lock);
868 list_for_each_entry(mp, &xfs_mount_list, m_mplist) {
869 for (ag = 0; ag < mp->m_sb.sb_agcount; ag++) {
870
871 pag = xfs_perag_get(mp, ag);
872 if (!pag->pag_ici_init) {
873 xfs_perag_put(pag);
874 continue;
875 }
876 reclaimable += pag->pag_ici_reclaimable;
877 xfs_perag_put(pag);
878 }
879 }
880 up_read(&xfs_mount_list_lock);
881 return reclaimable;
882}
883
884static struct shrinker xfs_inode_shrinker = {
885 .shrink = xfs_reclaim_inode_shrink,
886 .seeks = DEFAULT_SEEKS,
887};
888
889void __init
890xfs_inode_shrinker_init(void)
891{
892 init_rwsem(&xfs_mount_list_lock);
893 register_shrinker(&xfs_inode_shrinker);
894}
895
896void
897xfs_inode_shrinker_destroy(void)
898{
899 ASSERT(list_empty(&xfs_mount_list));
900 unregister_shrinker(&xfs_inode_shrinker);
901}
902
903void
904xfs_inode_shrinker_register(
905 struct xfs_mount *mp)
906{
907 down_write(&xfs_mount_list_lock);
908 list_add_tail(&mp->m_mplist, &xfs_mount_list);
909 up_write(&xfs_mount_list_lock);
910}
911
912void
913xfs_inode_shrinker_unregister(
914 struct xfs_mount *mp)
915{
916 down_write(&xfs_mount_list_lock);
917 list_del(&mp->m_mplist);
918 up_write(&xfs_mount_list_lock);
858} 919}
diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h
index d480c346cabb..cdcbaaca9880 100644
--- a/fs/xfs/linux-2.6/xfs_sync.h
+++ b/fs/xfs/linux-2.6/xfs_sync.h
@@ -53,6 +53,11 @@ void __xfs_inode_clear_reclaim_tag(struct xfs_mount *mp, struct xfs_perag *pag,
53int xfs_sync_inode_valid(struct xfs_inode *ip, struct xfs_perag *pag); 53int xfs_sync_inode_valid(struct xfs_inode *ip, struct xfs_perag *pag);
54int xfs_inode_ag_iterator(struct xfs_mount *mp, 54int xfs_inode_ag_iterator(struct xfs_mount *mp,
55 int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag, int flags), 55 int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag, int flags),
56 int flags, int tag, int write_lock); 56 int flags, int tag, int write_lock, int *nr_to_scan);
57
58void xfs_inode_shrinker_init(void);
59void xfs_inode_shrinker_destroy(void);
60void xfs_inode_shrinker_register(struct xfs_mount *mp);
61void xfs_inode_shrinker_unregister(struct xfs_mount *mp);
57 62
58#endif 63#endif
diff --git a/fs/xfs/linux-2.6/xfs_trace.c b/fs/xfs/linux-2.6/xfs_trace.c
index 5a107601e969..207fa77f63ae 100644
--- a/fs/xfs/linux-2.6/xfs_trace.c
+++ b/fs/xfs/linux-2.6/xfs_trace.c
@@ -41,7 +41,6 @@
41#include "xfs_alloc.h" 41#include "xfs_alloc.h"
42#include "xfs_bmap.h" 42#include "xfs_bmap.h"
43#include "xfs_attr.h" 43#include "xfs_attr.h"
44#include "xfs_attr_sf.h"
45#include "xfs_attr_leaf.h" 44#include "xfs_attr_leaf.h"
46#include "xfs_log_priv.h" 45#include "xfs_log_priv.h"
47#include "xfs_buf_item.h" 46#include "xfs_buf_item.h"
@@ -50,6 +49,9 @@
50#include "xfs_aops.h" 49#include "xfs_aops.h"
51#include "quota/xfs_dquot_item.h" 50#include "quota/xfs_dquot_item.h"
52#include "quota/xfs_dquot.h" 51#include "quota/xfs_dquot.h"
52#include "xfs_log_recover.h"
53#include "xfs_buf_item.h"
54#include "xfs_inode_item.h"
53 55
54/* 56/*
55 * We include this last to have the helpers above available for the trace 57 * We include this last to have the helpers above available for the trace
diff --git a/fs/xfs/linux-2.6/xfs_trace.h b/fs/xfs/linux-2.6/xfs_trace.h
index fcaa62f0799e..8a319cfd2901 100644
--- a/fs/xfs/linux-2.6/xfs_trace.h
+++ b/fs/xfs/linux-2.6/xfs_trace.h
@@ -32,6 +32,10 @@ struct xfs_da_node_entry;
32struct xfs_dquot; 32struct xfs_dquot;
33struct xlog_ticket; 33struct xlog_ticket;
34struct log; 34struct log;
35struct xlog_recover;
36struct xlog_recover_item;
37struct xfs_buf_log_format;
38struct xfs_inode_log_format;
35 39
36DECLARE_EVENT_CLASS(xfs_attr_list_class, 40DECLARE_EVENT_CLASS(xfs_attr_list_class,
37 TP_PROTO(struct xfs_attr_list_context *ctx), 41 TP_PROTO(struct xfs_attr_list_context *ctx),
@@ -562,18 +566,21 @@ DECLARE_EVENT_CLASS(xfs_inode_class,
562 __field(dev_t, dev) 566 __field(dev_t, dev)
563 __field(xfs_ino_t, ino) 567 __field(xfs_ino_t, ino)
564 __field(int, count) 568 __field(int, count)
569 __field(int, pincount)
565 __field(unsigned long, caller_ip) 570 __field(unsigned long, caller_ip)
566 ), 571 ),
567 TP_fast_assign( 572 TP_fast_assign(
568 __entry->dev = VFS_I(ip)->i_sb->s_dev; 573 __entry->dev = VFS_I(ip)->i_sb->s_dev;
569 __entry->ino = ip->i_ino; 574 __entry->ino = ip->i_ino;
570 __entry->count = atomic_read(&VFS_I(ip)->i_count); 575 __entry->count = atomic_read(&VFS_I(ip)->i_count);
576 __entry->pincount = atomic_read(&ip->i_pincount);
571 __entry->caller_ip = caller_ip; 577 __entry->caller_ip = caller_ip;
572 ), 578 ),
573 TP_printk("dev %d:%d ino 0x%llx count %d caller %pf", 579 TP_printk("dev %d:%d ino 0x%llx count %d pincount %d caller %pf",
574 MAJOR(__entry->dev), MINOR(__entry->dev), 580 MAJOR(__entry->dev), MINOR(__entry->dev),
575 __entry->ino, 581 __entry->ino,
576 __entry->count, 582 __entry->count,
583 __entry->pincount,
577 (char *)__entry->caller_ip) 584 (char *)__entry->caller_ip)
578) 585)
579 586
@@ -583,6 +590,10 @@ DEFINE_EVENT(xfs_inode_class, name, \
583 TP_ARGS(ip, caller_ip)) 590 TP_ARGS(ip, caller_ip))
584DEFINE_INODE_EVENT(xfs_ihold); 591DEFINE_INODE_EVENT(xfs_ihold);
585DEFINE_INODE_EVENT(xfs_irele); 592DEFINE_INODE_EVENT(xfs_irele);
593DEFINE_INODE_EVENT(xfs_inode_pin);
594DEFINE_INODE_EVENT(xfs_inode_unpin);
595DEFINE_INODE_EVENT(xfs_inode_unpin_nowait);
596
586/* the old xfs_itrace_entry tracer - to be replaced by s.th. in the VFS */ 597/* the old xfs_itrace_entry tracer - to be replaced by s.th. in the VFS */
587DEFINE_INODE_EVENT(xfs_inode); 598DEFINE_INODE_EVENT(xfs_inode);
588#define xfs_itrace_entry(ip) \ 599#define xfs_itrace_entry(ip) \
@@ -642,8 +653,6 @@ DEFINE_EVENT(xfs_dquot_class, name, \
642 TP_PROTO(struct xfs_dquot *dqp), \ 653 TP_PROTO(struct xfs_dquot *dqp), \
643 TP_ARGS(dqp)) 654 TP_ARGS(dqp))
644DEFINE_DQUOT_EVENT(xfs_dqadjust); 655DEFINE_DQUOT_EVENT(xfs_dqadjust);
645DEFINE_DQUOT_EVENT(xfs_dqshake_dirty);
646DEFINE_DQUOT_EVENT(xfs_dqshake_unlink);
647DEFINE_DQUOT_EVENT(xfs_dqreclaim_want); 656DEFINE_DQUOT_EVENT(xfs_dqreclaim_want);
648DEFINE_DQUOT_EVENT(xfs_dqreclaim_dirty); 657DEFINE_DQUOT_EVENT(xfs_dqreclaim_dirty);
649DEFINE_DQUOT_EVENT(xfs_dqreclaim_unlink); 658DEFINE_DQUOT_EVENT(xfs_dqreclaim_unlink);
@@ -658,7 +667,6 @@ DEFINE_DQUOT_EVENT(xfs_dqread_fail);
658DEFINE_DQUOT_EVENT(xfs_dqlookup_found); 667DEFINE_DQUOT_EVENT(xfs_dqlookup_found);
659DEFINE_DQUOT_EVENT(xfs_dqlookup_want); 668DEFINE_DQUOT_EVENT(xfs_dqlookup_want);
660DEFINE_DQUOT_EVENT(xfs_dqlookup_freelist); 669DEFINE_DQUOT_EVENT(xfs_dqlookup_freelist);
661DEFINE_DQUOT_EVENT(xfs_dqlookup_move);
662DEFINE_DQUOT_EVENT(xfs_dqlookup_done); 670DEFINE_DQUOT_EVENT(xfs_dqlookup_done);
663DEFINE_DQUOT_EVENT(xfs_dqget_hit); 671DEFINE_DQUOT_EVENT(xfs_dqget_hit);
664DEFINE_DQUOT_EVENT(xfs_dqget_miss); 672DEFINE_DQUOT_EVENT(xfs_dqget_miss);
@@ -1495,6 +1503,140 @@ DEFINE_EVENT(xfs_swap_extent_class, name, \
1495DEFINE_SWAPEXT_EVENT(xfs_swap_extent_before); 1503DEFINE_SWAPEXT_EVENT(xfs_swap_extent_before);
1496DEFINE_SWAPEXT_EVENT(xfs_swap_extent_after); 1504DEFINE_SWAPEXT_EVENT(xfs_swap_extent_after);
1497 1505
1506DECLARE_EVENT_CLASS(xfs_log_recover_item_class,
1507 TP_PROTO(struct log *log, struct xlog_recover *trans,
1508 struct xlog_recover_item *item, int pass),
1509 TP_ARGS(log, trans, item, pass),
1510 TP_STRUCT__entry(
1511 __field(dev_t, dev)
1512 __field(unsigned long, item)
1513 __field(xlog_tid_t, tid)
1514 __field(int, type)
1515 __field(int, pass)
1516 __field(int, count)
1517 __field(int, total)
1518 ),
1519 TP_fast_assign(
1520 __entry->dev = log->l_mp->m_super->s_dev;
1521 __entry->item = (unsigned long)item;
1522 __entry->tid = trans->r_log_tid;
1523 __entry->type = ITEM_TYPE(item);
1524 __entry->pass = pass;
1525 __entry->count = item->ri_cnt;
1526 __entry->total = item->ri_total;
1527 ),
1528 TP_printk("dev %d:%d trans 0x%x, pass %d, item 0x%p, item type %s "
1529 "item region count/total %d/%d",
1530 MAJOR(__entry->dev), MINOR(__entry->dev),
1531 __entry->tid,
1532 __entry->pass,
1533 (void *)__entry->item,
1534 __print_symbolic(__entry->type, XFS_LI_TYPE_DESC),
1535 __entry->count,
1536 __entry->total)
1537)
1538
1539#define DEFINE_LOG_RECOVER_ITEM(name) \
1540DEFINE_EVENT(xfs_log_recover_item_class, name, \
1541 TP_PROTO(struct log *log, struct xlog_recover *trans, \
1542 struct xlog_recover_item *item, int pass), \
1543 TP_ARGS(log, trans, item, pass))
1544
1545DEFINE_LOG_RECOVER_ITEM(xfs_log_recover_item_add);
1546DEFINE_LOG_RECOVER_ITEM(xfs_log_recover_item_add_cont);
1547DEFINE_LOG_RECOVER_ITEM(xfs_log_recover_item_reorder_head);
1548DEFINE_LOG_RECOVER_ITEM(xfs_log_recover_item_reorder_tail);
1549DEFINE_LOG_RECOVER_ITEM(xfs_log_recover_item_recover);
1550
1551DECLARE_EVENT_CLASS(xfs_log_recover_buf_item_class,
1552 TP_PROTO(struct log *log, struct xfs_buf_log_format *buf_f),
1553 TP_ARGS(log, buf_f),
1554 TP_STRUCT__entry(
1555 __field(dev_t, dev)
1556 __field(__int64_t, blkno)
1557 __field(unsigned short, len)
1558 __field(unsigned short, flags)
1559 __field(unsigned short, size)
1560 __field(unsigned int, map_size)
1561 ),
1562 TP_fast_assign(
1563 __entry->dev = log->l_mp->m_super->s_dev;
1564 __entry->blkno = buf_f->blf_blkno;
1565 __entry->len = buf_f->blf_len;
1566 __entry->flags = buf_f->blf_flags;
1567 __entry->size = buf_f->blf_size;
1568 __entry->map_size = buf_f->blf_map_size;
1569 ),
1570 TP_printk("dev %d:%d blkno 0x%llx, len %u, flags 0x%x, size %d, "
1571 "map_size %d",
1572 MAJOR(__entry->dev), MINOR(__entry->dev),
1573 __entry->blkno,
1574 __entry->len,
1575 __entry->flags,
1576 __entry->size,
1577 __entry->map_size)
1578)
1579
1580#define DEFINE_LOG_RECOVER_BUF_ITEM(name) \
1581DEFINE_EVENT(xfs_log_recover_buf_item_class, name, \
1582 TP_PROTO(struct log *log, struct xfs_buf_log_format *buf_f), \
1583 TP_ARGS(log, buf_f))
1584
1585DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_not_cancel);
1586DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_cancel);
1587DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_cancel_add);
1588DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_cancel_ref_inc);
1589DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_recover);
1590DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_inode_buf);
1591DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_reg_buf);
1592DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_dquot_buf);
1593
1594DECLARE_EVENT_CLASS(xfs_log_recover_ino_item_class,
1595 TP_PROTO(struct log *log, struct xfs_inode_log_format *in_f),
1596 TP_ARGS(log, in_f),
1597 TP_STRUCT__entry(
1598 __field(dev_t, dev)
1599 __field(xfs_ino_t, ino)
1600 __field(unsigned short, size)
1601 __field(int, fields)
1602 __field(unsigned short, asize)
1603 __field(unsigned short, dsize)
1604 __field(__int64_t, blkno)
1605 __field(int, len)
1606 __field(int, boffset)
1607 ),
1608 TP_fast_assign(
1609 __entry->dev = log->l_mp->m_super->s_dev;
1610 __entry->ino = in_f->ilf_ino;
1611 __entry->size = in_f->ilf_size;
1612 __entry->fields = in_f->ilf_fields;
1613 __entry->asize = in_f->ilf_asize;
1614 __entry->dsize = in_f->ilf_dsize;
1615 __entry->blkno = in_f->ilf_blkno;
1616 __entry->len = in_f->ilf_len;
1617 __entry->boffset = in_f->ilf_boffset;
1618 ),
1619 TP_printk("dev %d:%d ino 0x%llx, size %u, fields 0x%x, asize %d, "
1620 "dsize %d, blkno 0x%llx, len %d, boffset %d",
1621 MAJOR(__entry->dev), MINOR(__entry->dev),
1622 __entry->ino,
1623 __entry->size,
1624 __entry->fields,
1625 __entry->asize,
1626 __entry->dsize,
1627 __entry->blkno,
1628 __entry->len,
1629 __entry->boffset)
1630)
1631#define DEFINE_LOG_RECOVER_INO_ITEM(name) \
1632DEFINE_EVENT(xfs_log_recover_ino_item_class, name, \
1633 TP_PROTO(struct log *log, struct xfs_inode_log_format *in_f), \
1634 TP_ARGS(log, in_f))
1635
1636DEFINE_LOG_RECOVER_INO_ITEM(xfs_log_recover_inode_recover);
1637DEFINE_LOG_RECOVER_INO_ITEM(xfs_log_recover_inode_cancel);
1638DEFINE_LOG_RECOVER_INO_ITEM(xfs_log_recover_inode_skip);
1639
1498#endif /* _TRACE_XFS_H */ 1640#endif /* _TRACE_XFS_H */
1499 1641
1500#undef TRACE_INCLUDE_PATH 1642#undef TRACE_INCLUDE_PATH
diff --git a/fs/xfs/quota/xfs_dquot.c b/fs/xfs/quota/xfs_dquot.c
index 5f79dd78626b..b89ec5df0129 100644
--- a/fs/xfs/quota/xfs_dquot.c
+++ b/fs/xfs/quota/xfs_dquot.c
@@ -101,7 +101,7 @@ xfs_qm_dqinit(
101 * No need to re-initialize these if this is a reclaimed dquot. 101 * No need to re-initialize these if this is a reclaimed dquot.
102 */ 102 */
103 if (brandnewdquot) { 103 if (brandnewdquot) {
104 dqp->dq_flnext = dqp->dq_flprev = dqp; 104 INIT_LIST_HEAD(&dqp->q_freelist);
105 mutex_init(&dqp->q_qlock); 105 mutex_init(&dqp->q_qlock);
106 init_waitqueue_head(&dqp->q_pinwait); 106 init_waitqueue_head(&dqp->q_pinwait);
107 107
@@ -119,20 +119,20 @@ xfs_qm_dqinit(
119 * Only the q_core portion was zeroed in dqreclaim_one(). 119 * Only the q_core portion was zeroed in dqreclaim_one().
120 * So, we need to reset others. 120 * So, we need to reset others.
121 */ 121 */
122 dqp->q_nrefs = 0; 122 dqp->q_nrefs = 0;
123 dqp->q_blkno = 0; 123 dqp->q_blkno = 0;
124 dqp->MPL_NEXT = dqp->HL_NEXT = NULL; 124 INIT_LIST_HEAD(&dqp->q_mplist);
125 dqp->HL_PREVP = dqp->MPL_PREVP = NULL; 125 INIT_LIST_HEAD(&dqp->q_hashlist);
126 dqp->q_bufoffset = 0; 126 dqp->q_bufoffset = 0;
127 dqp->q_fileoffset = 0; 127 dqp->q_fileoffset = 0;
128 dqp->q_transp = NULL; 128 dqp->q_transp = NULL;
129 dqp->q_gdquot = NULL; 129 dqp->q_gdquot = NULL;
130 dqp->q_res_bcount = 0; 130 dqp->q_res_bcount = 0;
131 dqp->q_res_icount = 0; 131 dqp->q_res_icount = 0;
132 dqp->q_res_rtbcount = 0; 132 dqp->q_res_rtbcount = 0;
133 atomic_set(&dqp->q_pincount, 0); 133 atomic_set(&dqp->q_pincount, 0);
134 dqp->q_hash = NULL; 134 dqp->q_hash = NULL;
135 ASSERT(dqp->dq_flnext == dqp->dq_flprev); 135 ASSERT(list_empty(&dqp->q_freelist));
136 136
137 trace_xfs_dqreuse(dqp); 137 trace_xfs_dqreuse(dqp);
138 } 138 }
@@ -158,7 +158,7 @@ void
158xfs_qm_dqdestroy( 158xfs_qm_dqdestroy(
159 xfs_dquot_t *dqp) 159 xfs_dquot_t *dqp)
160{ 160{
161 ASSERT(! XFS_DQ_IS_ON_FREELIST(dqp)); 161 ASSERT(list_empty(&dqp->q_freelist));
162 162
163 mutex_destroy(&dqp->q_qlock); 163 mutex_destroy(&dqp->q_qlock);
164 sv_destroy(&dqp->q_pinwait); 164 sv_destroy(&dqp->q_pinwait);
@@ -252,7 +252,7 @@ xfs_qm_adjust_dqtimers(
252 (be64_to_cpu(d->d_bcount) >= 252 (be64_to_cpu(d->d_bcount) >=
253 be64_to_cpu(d->d_blk_hardlimit)))) { 253 be64_to_cpu(d->d_blk_hardlimit)))) {
254 d->d_btimer = cpu_to_be32(get_seconds() + 254 d->d_btimer = cpu_to_be32(get_seconds() +
255 XFS_QI_BTIMELIMIT(mp)); 255 mp->m_quotainfo->qi_btimelimit);
256 } else { 256 } else {
257 d->d_bwarns = 0; 257 d->d_bwarns = 0;
258 } 258 }
@@ -275,7 +275,7 @@ xfs_qm_adjust_dqtimers(
275 (be64_to_cpu(d->d_icount) >= 275 (be64_to_cpu(d->d_icount) >=
276 be64_to_cpu(d->d_ino_hardlimit)))) { 276 be64_to_cpu(d->d_ino_hardlimit)))) {
277 d->d_itimer = cpu_to_be32(get_seconds() + 277 d->d_itimer = cpu_to_be32(get_seconds() +
278 XFS_QI_ITIMELIMIT(mp)); 278 mp->m_quotainfo->qi_itimelimit);
279 } else { 279 } else {
280 d->d_iwarns = 0; 280 d->d_iwarns = 0;
281 } 281 }
@@ -298,7 +298,7 @@ xfs_qm_adjust_dqtimers(
298 (be64_to_cpu(d->d_rtbcount) >= 298 (be64_to_cpu(d->d_rtbcount) >=
299 be64_to_cpu(d->d_rtb_hardlimit)))) { 299 be64_to_cpu(d->d_rtb_hardlimit)))) {
300 d->d_rtbtimer = cpu_to_be32(get_seconds() + 300 d->d_rtbtimer = cpu_to_be32(get_seconds() +
301 XFS_QI_RTBTIMELIMIT(mp)); 301 mp->m_quotainfo->qi_rtbtimelimit);
302 } else { 302 } else {
303 d->d_rtbwarns = 0; 303 d->d_rtbwarns = 0;
304 } 304 }
@@ -325,6 +325,7 @@ xfs_qm_init_dquot_blk(
325 uint type, 325 uint type,
326 xfs_buf_t *bp) 326 xfs_buf_t *bp)
327{ 327{
328 struct xfs_quotainfo *q = mp->m_quotainfo;
328 xfs_dqblk_t *d; 329 xfs_dqblk_t *d;
329 int curid, i; 330 int curid, i;
330 331
@@ -337,16 +338,16 @@ xfs_qm_init_dquot_blk(
337 /* 338 /*
338 * ID of the first dquot in the block - id's are zero based. 339 * ID of the first dquot in the block - id's are zero based.
339 */ 340 */
340 curid = id - (id % XFS_QM_DQPERBLK(mp)); 341 curid = id - (id % q->qi_dqperchunk);
341 ASSERT(curid >= 0); 342 ASSERT(curid >= 0);
342 memset(d, 0, BBTOB(XFS_QI_DQCHUNKLEN(mp))); 343 memset(d, 0, BBTOB(q->qi_dqchunklen));
343 for (i = 0; i < XFS_QM_DQPERBLK(mp); i++, d++, curid++) 344 for (i = 0; i < q->qi_dqperchunk; i++, d++, curid++)
344 xfs_qm_dqinit_core(curid, type, d); 345 xfs_qm_dqinit_core(curid, type, d);
345 xfs_trans_dquot_buf(tp, bp, 346 xfs_trans_dquot_buf(tp, bp,
346 (type & XFS_DQ_USER ? XFS_BLI_UDQUOT_BUF : 347 (type & XFS_DQ_USER ? XFS_BLI_UDQUOT_BUF :
347 ((type & XFS_DQ_PROJ) ? XFS_BLI_PDQUOT_BUF : 348 ((type & XFS_DQ_PROJ) ? XFS_BLI_PDQUOT_BUF :
348 XFS_BLI_GDQUOT_BUF))); 349 XFS_BLI_GDQUOT_BUF)));
349 xfs_trans_log_buf(tp, bp, 0, BBTOB(XFS_QI_DQCHUNKLEN(mp)) - 1); 350 xfs_trans_log_buf(tp, bp, 0, BBTOB(q->qi_dqchunklen) - 1);
350} 351}
351 352
352 353
@@ -419,7 +420,7 @@ xfs_qm_dqalloc(
419 /* now we can just get the buffer (there's nothing to read yet) */ 420 /* now we can just get the buffer (there's nothing to read yet) */
420 bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, 421 bp = xfs_trans_get_buf(tp, mp->m_ddev_targp,
421 dqp->q_blkno, 422 dqp->q_blkno,
422 XFS_QI_DQCHUNKLEN(mp), 423 mp->m_quotainfo->qi_dqchunklen,
423 0); 424 0);
424 if (!bp || (error = XFS_BUF_GETERROR(bp))) 425 if (!bp || (error = XFS_BUF_GETERROR(bp)))
425 goto error1; 426 goto error1;
@@ -500,7 +501,8 @@ xfs_qm_dqtobp(
500 */ 501 */
501 if (dqp->q_blkno == (xfs_daddr_t) 0) { 502 if (dqp->q_blkno == (xfs_daddr_t) 0) {
502 /* We use the id as an index */ 503 /* We use the id as an index */
503 dqp->q_fileoffset = (xfs_fileoff_t)id / XFS_QM_DQPERBLK(mp); 504 dqp->q_fileoffset = (xfs_fileoff_t)id /
505 mp->m_quotainfo->qi_dqperchunk;
504 nmaps = 1; 506 nmaps = 1;
505 quotip = XFS_DQ_TO_QIP(dqp); 507 quotip = XFS_DQ_TO_QIP(dqp);
506 xfs_ilock(quotip, XFS_ILOCK_SHARED); 508 xfs_ilock(quotip, XFS_ILOCK_SHARED);
@@ -529,7 +531,7 @@ xfs_qm_dqtobp(
529 /* 531 /*
530 * offset of dquot in the (fixed sized) dquot chunk. 532 * offset of dquot in the (fixed sized) dquot chunk.
531 */ 533 */
532 dqp->q_bufoffset = (id % XFS_QM_DQPERBLK(mp)) * 534 dqp->q_bufoffset = (id % mp->m_quotainfo->qi_dqperchunk) *
533 sizeof(xfs_dqblk_t); 535 sizeof(xfs_dqblk_t);
534 if (map.br_startblock == HOLESTARTBLOCK) { 536 if (map.br_startblock == HOLESTARTBLOCK) {
535 /* 537 /*
@@ -559,15 +561,13 @@ xfs_qm_dqtobp(
559 * Read in the buffer, unless we've just done the allocation 561 * Read in the buffer, unless we've just done the allocation
560 * (in which case we already have the buf). 562 * (in which case we already have the buf).
561 */ 563 */
562 if (! newdquot) { 564 if (!newdquot) {
563 trace_xfs_dqtobp_read(dqp); 565 trace_xfs_dqtobp_read(dqp);
564 566
565 if ((error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, 567 error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
566 dqp->q_blkno, 568 dqp->q_blkno,
567 XFS_QI_DQCHUNKLEN(mp), 569 mp->m_quotainfo->qi_dqchunklen,
568 0, &bp))) { 570 0, &bp);
569 return (error);
570 }
571 if (error || !bp) 571 if (error || !bp)
572 return XFS_ERROR(error); 572 return XFS_ERROR(error);
573 } 573 }
@@ -689,14 +689,14 @@ xfs_qm_idtodq(
689 tp = NULL; 689 tp = NULL;
690 if (flags & XFS_QMOPT_DQALLOC) { 690 if (flags & XFS_QMOPT_DQALLOC) {
691 tp = xfs_trans_alloc(mp, XFS_TRANS_QM_DQALLOC); 691 tp = xfs_trans_alloc(mp, XFS_TRANS_QM_DQALLOC);
692 if ((error = xfs_trans_reserve(tp, 692 error = xfs_trans_reserve(tp, XFS_QM_DQALLOC_SPACE_RES(mp),
693 XFS_QM_DQALLOC_SPACE_RES(mp), 693 XFS_WRITE_LOG_RES(mp) +
694 XFS_WRITE_LOG_RES(mp) + 694 BBTOB(mp->m_quotainfo->qi_dqchunklen) - 1 +
695 BBTOB(XFS_QI_DQCHUNKLEN(mp)) - 1 + 695 128,
696 128, 696 0,
697 0, 697 XFS_TRANS_PERM_LOG_RES,
698 XFS_TRANS_PERM_LOG_RES, 698 XFS_WRITE_LOG_COUNT);
699 XFS_WRITE_LOG_COUNT))) { 699 if (error) {
700 cancelflags = 0; 700 cancelflags = 0;
701 goto error0; 701 goto error0;
702 } 702 }
@@ -751,7 +751,6 @@ xfs_qm_dqlookup(
751{ 751{
752 xfs_dquot_t *dqp; 752 xfs_dquot_t *dqp;
753 uint flist_locked; 753 uint flist_locked;
754 xfs_dquot_t *d;
755 754
756 ASSERT(mutex_is_locked(&qh->qh_lock)); 755 ASSERT(mutex_is_locked(&qh->qh_lock));
757 756
@@ -760,7 +759,7 @@ xfs_qm_dqlookup(
760 /* 759 /*
761 * Traverse the hashchain looking for a match 760 * Traverse the hashchain looking for a match
762 */ 761 */
763 for (dqp = qh->qh_next; dqp != NULL; dqp = dqp->HL_NEXT) { 762 list_for_each_entry(dqp, &qh->qh_list, q_hashlist) {
764 /* 763 /*
765 * We already have the hashlock. We don't need the 764 * We already have the hashlock. We don't need the
766 * dqlock to look at the id field of the dquot, since the 765 * dqlock to look at the id field of the dquot, since the
@@ -772,12 +771,12 @@ xfs_qm_dqlookup(
772 /* 771 /*
773 * All in core dquots must be on the dqlist of mp 772 * All in core dquots must be on the dqlist of mp
774 */ 773 */
775 ASSERT(dqp->MPL_PREVP != NULL); 774 ASSERT(!list_empty(&dqp->q_mplist));
776 775
777 xfs_dqlock(dqp); 776 xfs_dqlock(dqp);
778 if (dqp->q_nrefs == 0) { 777 if (dqp->q_nrefs == 0) {
779 ASSERT (XFS_DQ_IS_ON_FREELIST(dqp)); 778 ASSERT(!list_empty(&dqp->q_freelist));
780 if (! xfs_qm_freelist_lock_nowait(xfs_Gqm)) { 779 if (!mutex_trylock(&xfs_Gqm->qm_dqfrlist_lock)) {
781 trace_xfs_dqlookup_want(dqp); 780 trace_xfs_dqlookup_want(dqp);
782 781
783 /* 782 /*
@@ -787,7 +786,7 @@ xfs_qm_dqlookup(
787 */ 786 */
788 dqp->dq_flags |= XFS_DQ_WANT; 787 dqp->dq_flags |= XFS_DQ_WANT;
789 xfs_dqunlock(dqp); 788 xfs_dqunlock(dqp);
790 xfs_qm_freelist_lock(xfs_Gqm); 789 mutex_lock(&xfs_Gqm->qm_dqfrlist_lock);
791 xfs_dqlock(dqp); 790 xfs_dqlock(dqp);
792 dqp->dq_flags &= ~(XFS_DQ_WANT); 791 dqp->dq_flags &= ~(XFS_DQ_WANT);
793 } 792 }
@@ -802,46 +801,28 @@ xfs_qm_dqlookup(
802 801
803 if (flist_locked) { 802 if (flist_locked) {
804 if (dqp->q_nrefs != 0) { 803 if (dqp->q_nrefs != 0) {
805 xfs_qm_freelist_unlock(xfs_Gqm); 804 mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
806 flist_locked = B_FALSE; 805 flist_locked = B_FALSE;
807 } else { 806 } else {
808 /* 807 /* take it off the freelist */
809 * take it off the freelist
810 */
811 trace_xfs_dqlookup_freelist(dqp); 808 trace_xfs_dqlookup_freelist(dqp);
812 XQM_FREELIST_REMOVE(dqp); 809 list_del_init(&dqp->q_freelist);
813 /* xfs_qm_freelist_print(&(xfs_Gqm-> 810 xfs_Gqm->qm_dqfrlist_cnt--;
814 qm_dqfreelist),
815 "after removal"); */
816 } 811 }
817 } 812 }
818 813
819 /*
820 * grab a reference
821 */
822 XFS_DQHOLD(dqp); 814 XFS_DQHOLD(dqp);
823 815
824 if (flist_locked) 816 if (flist_locked)
825 xfs_qm_freelist_unlock(xfs_Gqm); 817 mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
826 /* 818 /*
827 * move the dquot to the front of the hashchain 819 * move the dquot to the front of the hashchain
828 */ 820 */
829 ASSERT(mutex_is_locked(&qh->qh_lock)); 821 ASSERT(mutex_is_locked(&qh->qh_lock));
830 if (dqp->HL_PREVP != &qh->qh_next) { 822 list_move(&dqp->q_hashlist, &qh->qh_list);
831 trace_xfs_dqlookup_move(dqp);
832 if ((d = dqp->HL_NEXT))
833 d->HL_PREVP = dqp->HL_PREVP;
834 *(dqp->HL_PREVP) = d;
835 d = qh->qh_next;
836 d->HL_PREVP = &dqp->HL_NEXT;
837 dqp->HL_NEXT = d;
838 dqp->HL_PREVP = &qh->qh_next;
839 qh->qh_next = dqp;
840 }
841 trace_xfs_dqlookup_done(dqp); 823 trace_xfs_dqlookup_done(dqp);
842 *O_dqpp = dqp; 824 *O_dqpp = dqp;
843 ASSERT(mutex_is_locked(&qh->qh_lock)); 825 return 0;
844 return (0);
845 } 826 }
846 } 827 }
847 828
@@ -975,16 +956,17 @@ xfs_qm_dqget(
975 */ 956 */
976 if (ip) { 957 if (ip) {
977 xfs_ilock(ip, XFS_ILOCK_EXCL); 958 xfs_ilock(ip, XFS_ILOCK_EXCL);
978 if (! XFS_IS_DQTYPE_ON(mp, type)) { 959
979 /* inode stays locked on return */
980 xfs_qm_dqdestroy(dqp);
981 return XFS_ERROR(ESRCH);
982 }
983 /* 960 /*
984 * A dquot could be attached to this inode by now, since 961 * A dquot could be attached to this inode by now, since
985 * we had dropped the ilock. 962 * we had dropped the ilock.
986 */ 963 */
987 if (type == XFS_DQ_USER) { 964 if (type == XFS_DQ_USER) {
965 if (!XFS_IS_UQUOTA_ON(mp)) {
966 /* inode stays locked on return */
967 xfs_qm_dqdestroy(dqp);
968 return XFS_ERROR(ESRCH);
969 }
988 if (ip->i_udquot) { 970 if (ip->i_udquot) {
989 xfs_qm_dqdestroy(dqp); 971 xfs_qm_dqdestroy(dqp);
990 dqp = ip->i_udquot; 972 dqp = ip->i_udquot;
@@ -992,6 +974,11 @@ xfs_qm_dqget(
992 goto dqret; 974 goto dqret;
993 } 975 }
994 } else { 976 } else {
977 if (!XFS_IS_OQUOTA_ON(mp)) {
978 /* inode stays locked on return */
979 xfs_qm_dqdestroy(dqp);
980 return XFS_ERROR(ESRCH);
981 }
995 if (ip->i_gdquot) { 982 if (ip->i_gdquot) {
996 xfs_qm_dqdestroy(dqp); 983 xfs_qm_dqdestroy(dqp);
997 dqp = ip->i_gdquot; 984 dqp = ip->i_gdquot;
@@ -1033,13 +1020,14 @@ xfs_qm_dqget(
1033 */ 1020 */
1034 ASSERT(mutex_is_locked(&h->qh_lock)); 1021 ASSERT(mutex_is_locked(&h->qh_lock));
1035 dqp->q_hash = h; 1022 dqp->q_hash = h;
1036 XQM_HASHLIST_INSERT(h, dqp); 1023 list_add(&dqp->q_hashlist, &h->qh_list);
1024 h->qh_version++;
1037 1025
1038 /* 1026 /*
1039 * Attach this dquot to this filesystem's list of all dquots, 1027 * Attach this dquot to this filesystem's list of all dquots,
1040 * kept inside the mount structure in m_quotainfo field 1028 * kept inside the mount structure in m_quotainfo field
1041 */ 1029 */
1042 xfs_qm_mplist_lock(mp); 1030 mutex_lock(&mp->m_quotainfo->qi_dqlist_lock);
1043 1031
1044 /* 1032 /*
1045 * We return a locked dquot to the caller, with a reference taken 1033 * We return a locked dquot to the caller, with a reference taken
@@ -1047,9 +1035,9 @@ xfs_qm_dqget(
1047 xfs_dqlock(dqp); 1035 xfs_dqlock(dqp);
1048 dqp->q_nrefs = 1; 1036 dqp->q_nrefs = 1;
1049 1037
1050 XQM_MPLIST_INSERT(&(XFS_QI_MPL_LIST(mp)), dqp); 1038 list_add(&dqp->q_mplist, &mp->m_quotainfo->qi_dqlist);
1051 1039 mp->m_quotainfo->qi_dquots++;
1052 xfs_qm_mplist_unlock(mp); 1040 mutex_unlock(&mp->m_quotainfo->qi_dqlist_lock);
1053 mutex_unlock(&h->qh_lock); 1041 mutex_unlock(&h->qh_lock);
1054 dqret: 1042 dqret:
1055 ASSERT((ip == NULL) || xfs_isilocked(ip, XFS_ILOCK_EXCL)); 1043 ASSERT((ip == NULL) || xfs_isilocked(ip, XFS_ILOCK_EXCL));
@@ -1086,10 +1074,10 @@ xfs_qm_dqput(
1086 * drop the dqlock and acquire the freelist and dqlock 1074 * drop the dqlock and acquire the freelist and dqlock
1087 * in the right order; but try to get it out-of-order first 1075 * in the right order; but try to get it out-of-order first
1088 */ 1076 */
1089 if (! xfs_qm_freelist_lock_nowait(xfs_Gqm)) { 1077 if (!mutex_trylock(&xfs_Gqm->qm_dqfrlist_lock)) {
1090 trace_xfs_dqput_wait(dqp); 1078 trace_xfs_dqput_wait(dqp);
1091 xfs_dqunlock(dqp); 1079 xfs_dqunlock(dqp);
1092 xfs_qm_freelist_lock(xfs_Gqm); 1080 mutex_lock(&xfs_Gqm->qm_dqfrlist_lock);
1093 xfs_dqlock(dqp); 1081 xfs_dqlock(dqp);
1094 } 1082 }
1095 1083
@@ -1100,10 +1088,8 @@ xfs_qm_dqput(
1100 if (--dqp->q_nrefs == 0) { 1088 if (--dqp->q_nrefs == 0) {
1101 trace_xfs_dqput_free(dqp); 1089 trace_xfs_dqput_free(dqp);
1102 1090
1103 /* 1091 list_add_tail(&dqp->q_freelist, &xfs_Gqm->qm_dqfrlist);
1104 * insert at end of the freelist. 1092 xfs_Gqm->qm_dqfrlist_cnt++;
1105 */
1106 XQM_FREELIST_INSERT(&(xfs_Gqm->qm_dqfreelist), dqp);
1107 1093
1108 /* 1094 /*
1109 * If we just added a udquot to the freelist, then 1095 * If we just added a udquot to the freelist, then
@@ -1118,10 +1104,6 @@ xfs_qm_dqput(
1118 xfs_dqlock(gdqp); 1104 xfs_dqlock(gdqp);
1119 dqp->q_gdquot = NULL; 1105 dqp->q_gdquot = NULL;
1120 } 1106 }
1121
1122 /* xfs_qm_freelist_print(&(xfs_Gqm->qm_dqfreelist),
1123 "@@@@@++ Free list (after append) @@@@@+");
1124 */
1125 } 1107 }
1126 xfs_dqunlock(dqp); 1108 xfs_dqunlock(dqp);
1127 1109
@@ -1133,7 +1115,7 @@ xfs_qm_dqput(
1133 break; 1115 break;
1134 dqp = gdqp; 1116 dqp = gdqp;
1135 } 1117 }
1136 xfs_qm_freelist_unlock(xfs_Gqm); 1118 mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
1137} 1119}
1138 1120
1139/* 1121/*
@@ -1386,10 +1368,10 @@ int
1386xfs_qm_dqpurge( 1368xfs_qm_dqpurge(
1387 xfs_dquot_t *dqp) 1369 xfs_dquot_t *dqp)
1388{ 1370{
1389 xfs_dqhash_t *thishash; 1371 xfs_dqhash_t *qh = dqp->q_hash;
1390 xfs_mount_t *mp = dqp->q_mount; 1372 xfs_mount_t *mp = dqp->q_mount;
1391 1373
1392 ASSERT(XFS_QM_IS_MPLIST_LOCKED(mp)); 1374 ASSERT(mutex_is_locked(&mp->m_quotainfo->qi_dqlist_lock));
1393 ASSERT(mutex_is_locked(&dqp->q_hash->qh_lock)); 1375 ASSERT(mutex_is_locked(&dqp->q_hash->qh_lock));
1394 1376
1395 xfs_dqlock(dqp); 1377 xfs_dqlock(dqp);
@@ -1407,7 +1389,7 @@ xfs_qm_dqpurge(
1407 return (1); 1389 return (1);
1408 } 1390 }
1409 1391
1410 ASSERT(XFS_DQ_IS_ON_FREELIST(dqp)); 1392 ASSERT(!list_empty(&dqp->q_freelist));
1411 1393
1412 /* 1394 /*
1413 * If we're turning off quotas, we have to make sure that, for 1395 * If we're turning off quotas, we have to make sure that, for
@@ -1452,14 +1434,16 @@ xfs_qm_dqpurge(
1452 ASSERT(XFS_FORCED_SHUTDOWN(mp) || 1434 ASSERT(XFS_FORCED_SHUTDOWN(mp) ||
1453 !(dqp->q_logitem.qli_item.li_flags & XFS_LI_IN_AIL)); 1435 !(dqp->q_logitem.qli_item.li_flags & XFS_LI_IN_AIL));
1454 1436
1455 thishash = dqp->q_hash; 1437 list_del_init(&dqp->q_hashlist);
1456 XQM_HASHLIST_REMOVE(thishash, dqp); 1438 qh->qh_version++;
1457 XQM_MPLIST_REMOVE(&(XFS_QI_MPL_LIST(mp)), dqp); 1439 list_del_init(&dqp->q_mplist);
1440 mp->m_quotainfo->qi_dqreclaims++;
1441 mp->m_quotainfo->qi_dquots--;
1458 /* 1442 /*
1459 * XXX Move this to the front of the freelist, if we can get the 1443 * XXX Move this to the front of the freelist, if we can get the
1460 * freelist lock. 1444 * freelist lock.
1461 */ 1445 */
1462 ASSERT(XFS_DQ_IS_ON_FREELIST(dqp)); 1446 ASSERT(!list_empty(&dqp->q_freelist));
1463 1447
1464 dqp->q_mount = NULL; 1448 dqp->q_mount = NULL;
1465 dqp->q_hash = NULL; 1449 dqp->q_hash = NULL;
@@ -1467,7 +1451,7 @@ xfs_qm_dqpurge(
1467 memset(&dqp->q_core, 0, sizeof(dqp->q_core)); 1451 memset(&dqp->q_core, 0, sizeof(dqp->q_core));
1468 xfs_dqfunlock(dqp); 1452 xfs_dqfunlock(dqp);
1469 xfs_dqunlock(dqp); 1453 xfs_dqunlock(dqp);
1470 mutex_unlock(&thishash->qh_lock); 1454 mutex_unlock(&qh->qh_lock);
1471 return (0); 1455 return (0);
1472} 1456}
1473 1457
@@ -1517,6 +1501,7 @@ void
1517xfs_qm_dqflock_pushbuf_wait( 1501xfs_qm_dqflock_pushbuf_wait(
1518 xfs_dquot_t *dqp) 1502 xfs_dquot_t *dqp)
1519{ 1503{
1504 xfs_mount_t *mp = dqp->q_mount;
1520 xfs_buf_t *bp; 1505 xfs_buf_t *bp;
1521 1506
1522 /* 1507 /*
@@ -1525,14 +1510,14 @@ xfs_qm_dqflock_pushbuf_wait(
1525 * out immediately. We'll be able to acquire 1510 * out immediately. We'll be able to acquire
1526 * the flush lock when the I/O completes. 1511 * the flush lock when the I/O completes.
1527 */ 1512 */
1528 bp = xfs_incore(dqp->q_mount->m_ddev_targp, dqp->q_blkno, 1513 bp = xfs_incore(mp->m_ddev_targp, dqp->q_blkno,
1529 XFS_QI_DQCHUNKLEN(dqp->q_mount), XBF_TRYLOCK); 1514 mp->m_quotainfo->qi_dqchunklen, XBF_TRYLOCK);
1530 if (!bp) 1515 if (!bp)
1531 goto out_lock; 1516 goto out_lock;
1532 1517
1533 if (XFS_BUF_ISDELAYWRITE(bp)) { 1518 if (XFS_BUF_ISDELAYWRITE(bp)) {
1534 if (XFS_BUF_ISPINNED(bp)) 1519 if (XFS_BUF_ISPINNED(bp))
1535 xfs_log_force(dqp->q_mount, 0); 1520 xfs_log_force(mp, 0);
1536 xfs_buf_delwri_promote(bp); 1521 xfs_buf_delwri_promote(bp);
1537 wake_up_process(bp->b_target->bt_task); 1522 wake_up_process(bp->b_target->bt_task);
1538 } 1523 }
diff --git a/fs/xfs/quota/xfs_dquot.h b/fs/xfs/quota/xfs_dquot.h
index a0f7da586d1b..5da3a23b820d 100644
--- a/fs/xfs/quota/xfs_dquot.h
+++ b/fs/xfs/quota/xfs_dquot.h
@@ -33,40 +33,23 @@
33 * The hash chain headers (hash buckets) 33 * The hash chain headers (hash buckets)
34 */ 34 */
35typedef struct xfs_dqhash { 35typedef struct xfs_dqhash {
36 struct xfs_dquot *qh_next; 36 struct list_head qh_list;
37 struct mutex qh_lock; 37 struct mutex qh_lock;
38 uint qh_version; /* ever increasing version */ 38 uint qh_version; /* ever increasing version */
39 uint qh_nelems; /* number of dquots on the list */ 39 uint qh_nelems; /* number of dquots on the list */
40} xfs_dqhash_t; 40} xfs_dqhash_t;
41 41
42typedef struct xfs_dqlink {
43 struct xfs_dquot *ql_next; /* forward link */
44 struct xfs_dquot **ql_prevp; /* pointer to prev ql_next */
45} xfs_dqlink_t;
46
47struct xfs_mount; 42struct xfs_mount;
48struct xfs_trans; 43struct xfs_trans;
49 44
50/* 45/*
51 * This is the marker which is designed to occupy the first few
52 * bytes of the xfs_dquot_t structure. Even inside this, the freelist pointers
53 * must come first.
54 * This serves as the marker ("sentinel") when we have to restart list
55 * iterations because of locking considerations.
56 */
57typedef struct xfs_dqmarker {
58 struct xfs_dquot*dqm_flnext; /* link to freelist: must be first */
59 struct xfs_dquot*dqm_flprev;
60 xfs_dqlink_t dqm_mplist; /* link to mount's list of dquots */
61 xfs_dqlink_t dqm_hashlist; /* link to the hash chain */
62 uint dqm_flags; /* various flags (XFS_DQ_*) */
63} xfs_dqmarker_t;
64
65/*
66 * The incore dquot structure 46 * The incore dquot structure
67 */ 47 */
68typedef struct xfs_dquot { 48typedef struct xfs_dquot {
69 xfs_dqmarker_t q_lists; /* list ptrs, q_flags (marker) */ 49 uint dq_flags; /* various flags (XFS_DQ_*) */
50 struct list_head q_freelist; /* global free list of dquots */
51 struct list_head q_mplist; /* mount's list of dquots */
52 struct list_head q_hashlist; /* gloabl hash list of dquots */
70 xfs_dqhash_t *q_hash; /* the hashchain header */ 53 xfs_dqhash_t *q_hash; /* the hashchain header */
71 struct xfs_mount*q_mount; /* filesystem this relates to */ 54 struct xfs_mount*q_mount; /* filesystem this relates to */
72 struct xfs_trans*q_transp; /* trans this belongs to currently */ 55 struct xfs_trans*q_transp; /* trans this belongs to currently */
@@ -87,13 +70,6 @@ typedef struct xfs_dquot {
87 wait_queue_head_t q_pinwait; /* dquot pinning wait queue */ 70 wait_queue_head_t q_pinwait; /* dquot pinning wait queue */
88} xfs_dquot_t; 71} xfs_dquot_t;
89 72
90
91#define dq_flnext q_lists.dqm_flnext
92#define dq_flprev q_lists.dqm_flprev
93#define dq_mplist q_lists.dqm_mplist
94#define dq_hashlist q_lists.dqm_hashlist
95#define dq_flags q_lists.dqm_flags
96
97/* 73/*
98 * Lock hierarchy for q_qlock: 74 * Lock hierarchy for q_qlock:
99 * XFS_QLOCK_NORMAL is the implicit default, 75 * XFS_QLOCK_NORMAL is the implicit default,
@@ -127,7 +103,6 @@ static inline void xfs_dqfunlock(xfs_dquot_t *dqp)
127} 103}
128 104
129#define XFS_DQ_IS_LOCKED(dqp) (mutex_is_locked(&((dqp)->q_qlock))) 105#define XFS_DQ_IS_LOCKED(dqp) (mutex_is_locked(&((dqp)->q_qlock)))
130#define XFS_DQ_IS_ON_FREELIST(dqp) ((dqp)->dq_flnext != (dqp))
131#define XFS_DQ_IS_DIRTY(dqp) ((dqp)->dq_flags & XFS_DQ_DIRTY) 106#define XFS_DQ_IS_DIRTY(dqp) ((dqp)->dq_flags & XFS_DQ_DIRTY)
132#define XFS_QM_ISUDQ(dqp) ((dqp)->dq_flags & XFS_DQ_USER) 107#define XFS_QM_ISUDQ(dqp) ((dqp)->dq_flags & XFS_DQ_USER)
133#define XFS_QM_ISPDQ(dqp) ((dqp)->dq_flags & XFS_DQ_PROJ) 108#define XFS_QM_ISPDQ(dqp) ((dqp)->dq_flags & XFS_DQ_PROJ)
diff --git a/fs/xfs/quota/xfs_dquot_item.c b/fs/xfs/quota/xfs_dquot_item.c
index 4e4ee9a57194..8d89a24ae324 100644
--- a/fs/xfs/quota/xfs_dquot_item.c
+++ b/fs/xfs/quota/xfs_dquot_item.c
@@ -107,8 +107,7 @@ xfs_qm_dquot_logitem_pin(
107/* ARGSUSED */ 107/* ARGSUSED */
108STATIC void 108STATIC void
109xfs_qm_dquot_logitem_unpin( 109xfs_qm_dquot_logitem_unpin(
110 xfs_dq_logitem_t *logitem, 110 xfs_dq_logitem_t *logitem)
111 int stale)
112{ 111{
113 xfs_dquot_t *dqp = logitem->qli_dquot; 112 xfs_dquot_t *dqp = logitem->qli_dquot;
114 113
@@ -123,7 +122,7 @@ xfs_qm_dquot_logitem_unpin_remove(
123 xfs_dq_logitem_t *logitem, 122 xfs_dq_logitem_t *logitem,
124 xfs_trans_t *tp) 123 xfs_trans_t *tp)
125{ 124{
126 xfs_qm_dquot_logitem_unpin(logitem, 0); 125 xfs_qm_dquot_logitem_unpin(logitem);
127} 126}
128 127
129/* 128/*
@@ -228,7 +227,7 @@ xfs_qm_dquot_logitem_pushbuf(
228 } 227 }
229 mp = dqp->q_mount; 228 mp = dqp->q_mount;
230 bp = xfs_incore(mp->m_ddev_targp, qip->qli_format.qlf_blkno, 229 bp = xfs_incore(mp->m_ddev_targp, qip->qli_format.qlf_blkno,
231 XFS_QI_DQCHUNKLEN(mp), XBF_TRYLOCK); 230 mp->m_quotainfo->qi_dqchunklen, XBF_TRYLOCK);
232 xfs_dqunlock(dqp); 231 xfs_dqunlock(dqp);
233 if (!bp) 232 if (!bp)
234 return; 233 return;
@@ -329,8 +328,7 @@ static struct xfs_item_ops xfs_dquot_item_ops = {
329 .iop_format = (void(*)(xfs_log_item_t*, xfs_log_iovec_t*)) 328 .iop_format = (void(*)(xfs_log_item_t*, xfs_log_iovec_t*))
330 xfs_qm_dquot_logitem_format, 329 xfs_qm_dquot_logitem_format,
331 .iop_pin = (void(*)(xfs_log_item_t*))xfs_qm_dquot_logitem_pin, 330 .iop_pin = (void(*)(xfs_log_item_t*))xfs_qm_dquot_logitem_pin,
332 .iop_unpin = (void(*)(xfs_log_item_t*, int)) 331 .iop_unpin = (void(*)(xfs_log_item_t*))xfs_qm_dquot_logitem_unpin,
333 xfs_qm_dquot_logitem_unpin,
334 .iop_unpin_remove = (void(*)(xfs_log_item_t*, xfs_trans_t*)) 332 .iop_unpin_remove = (void(*)(xfs_log_item_t*, xfs_trans_t*))
335 xfs_qm_dquot_logitem_unpin_remove, 333 xfs_qm_dquot_logitem_unpin_remove,
336 .iop_trylock = (uint(*)(xfs_log_item_t*)) 334 .iop_trylock = (uint(*)(xfs_log_item_t*))
@@ -357,9 +355,8 @@ xfs_qm_dquot_logitem_init(
357 xfs_dq_logitem_t *lp; 355 xfs_dq_logitem_t *lp;
358 lp = &dqp->q_logitem; 356 lp = &dqp->q_logitem;
359 357
360 lp->qli_item.li_type = XFS_LI_DQUOT; 358 xfs_log_item_init(dqp->q_mount, &lp->qli_item, XFS_LI_DQUOT,
361 lp->qli_item.li_ops = &xfs_dquot_item_ops; 359 &xfs_dquot_item_ops);
362 lp->qli_item.li_mountp = dqp->q_mount;
363 lp->qli_dquot = dqp; 360 lp->qli_dquot = dqp;
364 lp->qli_format.qlf_type = XFS_LI_DQUOT; 361 lp->qli_format.qlf_type = XFS_LI_DQUOT;
365 lp->qli_format.qlf_id = be32_to_cpu(dqp->q_core.d_id); 362 lp->qli_format.qlf_id = be32_to_cpu(dqp->q_core.d_id);
@@ -426,7 +423,7 @@ xfs_qm_qoff_logitem_pin(xfs_qoff_logitem_t *qf)
426 */ 423 */
427/*ARGSUSED*/ 424/*ARGSUSED*/
428STATIC void 425STATIC void
429xfs_qm_qoff_logitem_unpin(xfs_qoff_logitem_t *qf, int stale) 426xfs_qm_qoff_logitem_unpin(xfs_qoff_logitem_t *qf)
430{ 427{
431 return; 428 return;
432} 429}
@@ -537,8 +534,7 @@ static struct xfs_item_ops xfs_qm_qoffend_logitem_ops = {
537 .iop_format = (void(*)(xfs_log_item_t*, xfs_log_iovec_t*)) 534 .iop_format = (void(*)(xfs_log_item_t*, xfs_log_iovec_t*))
538 xfs_qm_qoff_logitem_format, 535 xfs_qm_qoff_logitem_format,
539 .iop_pin = (void(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_pin, 536 .iop_pin = (void(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_pin,
540 .iop_unpin = (void(*)(xfs_log_item_t* ,int)) 537 .iop_unpin = (void(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_unpin,
541 xfs_qm_qoff_logitem_unpin,
542 .iop_unpin_remove = (void(*)(xfs_log_item_t*,xfs_trans_t*)) 538 .iop_unpin_remove = (void(*)(xfs_log_item_t*,xfs_trans_t*))
543 xfs_qm_qoff_logitem_unpin_remove, 539 xfs_qm_qoff_logitem_unpin_remove,
544 .iop_trylock = (uint(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_trylock, 540 .iop_trylock = (uint(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_trylock,
@@ -559,8 +555,7 @@ static struct xfs_item_ops xfs_qm_qoff_logitem_ops = {
559 .iop_format = (void(*)(xfs_log_item_t*, xfs_log_iovec_t*)) 555 .iop_format = (void(*)(xfs_log_item_t*, xfs_log_iovec_t*))
560 xfs_qm_qoff_logitem_format, 556 xfs_qm_qoff_logitem_format,
561 .iop_pin = (void(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_pin, 557 .iop_pin = (void(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_pin,
562 .iop_unpin = (void(*)(xfs_log_item_t*, int)) 558 .iop_unpin = (void(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_unpin,
563 xfs_qm_qoff_logitem_unpin,
564 .iop_unpin_remove = (void(*)(xfs_log_item_t*,xfs_trans_t*)) 559 .iop_unpin_remove = (void(*)(xfs_log_item_t*,xfs_trans_t*))
565 xfs_qm_qoff_logitem_unpin_remove, 560 xfs_qm_qoff_logitem_unpin_remove,
566 .iop_trylock = (uint(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_trylock, 561 .iop_trylock = (uint(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_trylock,
@@ -586,11 +581,8 @@ xfs_qm_qoff_logitem_init(
586 581
587 qf = (xfs_qoff_logitem_t*) kmem_zalloc(sizeof(xfs_qoff_logitem_t), KM_SLEEP); 582 qf = (xfs_qoff_logitem_t*) kmem_zalloc(sizeof(xfs_qoff_logitem_t), KM_SLEEP);
588 583
589 qf->qql_item.li_type = XFS_LI_QUOTAOFF; 584 xfs_log_item_init(mp, &qf->qql_item, XFS_LI_QUOTAOFF, start ?
590 if (start) 585 &xfs_qm_qoffend_logitem_ops : &xfs_qm_qoff_logitem_ops);
591 qf->qql_item.li_ops = &xfs_qm_qoffend_logitem_ops;
592 else
593 qf->qql_item.li_ops = &xfs_qm_qoff_logitem_ops;
594 qf->qql_item.li_mountp = mp; 586 qf->qql_item.li_mountp = mp;
595 qf->qql_format.qf_type = XFS_LI_QUOTAOFF; 587 qf->qql_format.qf_type = XFS_LI_QUOTAOFF;
596 qf->qql_format.qf_flags = flags; 588 qf->qql_format.qf_flags = flags;
diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c
index 417e61e3d9dd..38e764146644 100644
--- a/fs/xfs/quota/xfs_qm.c
+++ b/fs/xfs/quota/xfs_qm.c
@@ -67,9 +67,6 @@ static cred_t xfs_zerocr;
67STATIC void xfs_qm_list_init(xfs_dqlist_t *, char *, int); 67STATIC void xfs_qm_list_init(xfs_dqlist_t *, char *, int);
68STATIC void xfs_qm_list_destroy(xfs_dqlist_t *); 68STATIC void xfs_qm_list_destroy(xfs_dqlist_t *);
69 69
70STATIC void xfs_qm_freelist_init(xfs_frlist_t *);
71STATIC void xfs_qm_freelist_destroy(xfs_frlist_t *);
72
73STATIC int xfs_qm_init_quotainos(xfs_mount_t *); 70STATIC int xfs_qm_init_quotainos(xfs_mount_t *);
74STATIC int xfs_qm_init_quotainfo(xfs_mount_t *); 71STATIC int xfs_qm_init_quotainfo(xfs_mount_t *);
75STATIC int xfs_qm_shake(int, gfp_t); 72STATIC int xfs_qm_shake(int, gfp_t);
@@ -84,21 +81,25 @@ extern struct mutex qcheck_lock;
84#endif 81#endif
85 82
86#ifdef QUOTADEBUG 83#ifdef QUOTADEBUG
87#define XQM_LIST_PRINT(l, NXT, title) \ 84static void
88{ \ 85xfs_qm_dquot_list_print(
89 xfs_dquot_t *dqp; int i = 0; \ 86 struct xfs_mount *mp)
90 cmn_err(CE_DEBUG, "%s (#%d)", title, (int) (l)->qh_nelems); \ 87{
91 for (dqp = (l)->qh_next; dqp != NULL; dqp = dqp->NXT) { \ 88 xfs_dquot_t *dqp;
92 cmn_err(CE_DEBUG, " %d. \"%d (%s)\" " \ 89 int i = 0;
93 "bcnt = %d, icnt = %d, refs = %d", \ 90
94 ++i, (int) be32_to_cpu(dqp->q_core.d_id), \ 91 list_for_each_entry(dqp, &mp->m_quotainfo->qi_dqlist_lock, qi_mplist) {
95 DQFLAGTO_TYPESTR(dqp), \ 92 cmn_err(CE_DEBUG, " %d. \"%d (%s)\" "
96 (int) be64_to_cpu(dqp->q_core.d_bcount), \ 93 "bcnt = %lld, icnt = %lld, refs = %d",
97 (int) be64_to_cpu(dqp->q_core.d_icount), \ 94 i++, be32_to_cpu(dqp->q_core.d_id),
98 (int) dqp->q_nrefs); } \ 95 DQFLAGTO_TYPESTR(dqp),
96 (long long)be64_to_cpu(dqp->q_core.d_bcount),
97 (long long)be64_to_cpu(dqp->q_core.d_icount),
98 dqp->q_nrefs);
99 }
99} 100}
100#else 101#else
101#define XQM_LIST_PRINT(l, NXT, title) do { } while (0) 102static void xfs_qm_dquot_list_print(struct xfs_mount *mp) { }
102#endif 103#endif
103 104
104/* 105/*
@@ -144,7 +145,9 @@ xfs_Gqm_init(void)
144 /* 145 /*
145 * Freelist of all dquots of all file systems 146 * Freelist of all dquots of all file systems
146 */ 147 */
147 xfs_qm_freelist_init(&(xqm->qm_dqfreelist)); 148 INIT_LIST_HEAD(&xqm->qm_dqfrlist);
149 xqm->qm_dqfrlist_cnt = 0;
150 mutex_init(&xqm->qm_dqfrlist_lock);
148 151
149 /* 152 /*
150 * dquot zone. we register our own low-memory callback. 153 * dquot zone. we register our own low-memory callback.
@@ -189,6 +192,7 @@ STATIC void
189xfs_qm_destroy( 192xfs_qm_destroy(
190 struct xfs_qm *xqm) 193 struct xfs_qm *xqm)
191{ 194{
195 struct xfs_dquot *dqp, *n;
192 int hsize, i; 196 int hsize, i;
193 197
194 ASSERT(xqm != NULL); 198 ASSERT(xqm != NULL);
@@ -204,7 +208,21 @@ xfs_qm_destroy(
204 xqm->qm_usr_dqhtable = NULL; 208 xqm->qm_usr_dqhtable = NULL;
205 xqm->qm_grp_dqhtable = NULL; 209 xqm->qm_grp_dqhtable = NULL;
206 xqm->qm_dqhashmask = 0; 210 xqm->qm_dqhashmask = 0;
207 xfs_qm_freelist_destroy(&(xqm->qm_dqfreelist)); 211
212 /* frlist cleanup */
213 mutex_lock(&xqm->qm_dqfrlist_lock);
214 list_for_each_entry_safe(dqp, n, &xqm->qm_dqfrlist, q_freelist) {
215 xfs_dqlock(dqp);
216#ifdef QUOTADEBUG
217 cmn_err(CE_DEBUG, "FREELIST destroy 0x%p", dqp);
218#endif
219 list_del_init(&dqp->q_freelist);
220 xfs_Gqm->qm_dqfrlist_cnt--;
221 xfs_dqunlock(dqp);
222 xfs_qm_dqdestroy(dqp);
223 }
224 mutex_unlock(&xqm->qm_dqfrlist_lock);
225 mutex_destroy(&xqm->qm_dqfrlist_lock);
208#ifdef DEBUG 226#ifdef DEBUG
209 mutex_destroy(&qcheck_lock); 227 mutex_destroy(&qcheck_lock);
210#endif 228#endif
@@ -256,7 +274,7 @@ STATIC void
256xfs_qm_rele_quotafs_ref( 274xfs_qm_rele_quotafs_ref(
257 struct xfs_mount *mp) 275 struct xfs_mount *mp)
258{ 276{
259 xfs_dquot_t *dqp, *nextdqp; 277 xfs_dquot_t *dqp, *n;
260 278
261 ASSERT(xfs_Gqm); 279 ASSERT(xfs_Gqm);
262 ASSERT(xfs_Gqm->qm_nrefs > 0); 280 ASSERT(xfs_Gqm->qm_nrefs > 0);
@@ -264,26 +282,24 @@ xfs_qm_rele_quotafs_ref(
264 /* 282 /*
265 * Go thru the freelist and destroy all inactive dquots. 283 * Go thru the freelist and destroy all inactive dquots.
266 */ 284 */
267 xfs_qm_freelist_lock(xfs_Gqm); 285 mutex_lock(&xfs_Gqm->qm_dqfrlist_lock);
268 286
269 for (dqp = xfs_Gqm->qm_dqfreelist.qh_next; 287 list_for_each_entry_safe(dqp, n, &xfs_Gqm->qm_dqfrlist, q_freelist) {
270 dqp != (xfs_dquot_t *)&(xfs_Gqm->qm_dqfreelist); ) {
271 xfs_dqlock(dqp); 288 xfs_dqlock(dqp);
272 nextdqp = dqp->dq_flnext;
273 if (dqp->dq_flags & XFS_DQ_INACTIVE) { 289 if (dqp->dq_flags & XFS_DQ_INACTIVE) {
274 ASSERT(dqp->q_mount == NULL); 290 ASSERT(dqp->q_mount == NULL);
275 ASSERT(! XFS_DQ_IS_DIRTY(dqp)); 291 ASSERT(! XFS_DQ_IS_DIRTY(dqp));
276 ASSERT(dqp->HL_PREVP == NULL); 292 ASSERT(list_empty(&dqp->q_hashlist));
277 ASSERT(dqp->MPL_PREVP == NULL); 293 ASSERT(list_empty(&dqp->q_mplist));
278 XQM_FREELIST_REMOVE(dqp); 294 list_del_init(&dqp->q_freelist);
295 xfs_Gqm->qm_dqfrlist_cnt--;
279 xfs_dqunlock(dqp); 296 xfs_dqunlock(dqp);
280 xfs_qm_dqdestroy(dqp); 297 xfs_qm_dqdestroy(dqp);
281 } else { 298 } else {
282 xfs_dqunlock(dqp); 299 xfs_dqunlock(dqp);
283 } 300 }
284 dqp = nextdqp;
285 } 301 }
286 xfs_qm_freelist_unlock(xfs_Gqm); 302 mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
287 303
288 /* 304 /*
289 * Destroy the entire XQM. If somebody mounts with quotaon, this'll 305 * Destroy the entire XQM. If somebody mounts with quotaon, this'll
@@ -305,7 +321,7 @@ xfs_qm_unmount(
305 struct xfs_mount *mp) 321 struct xfs_mount *mp)
306{ 322{
307 if (mp->m_quotainfo) { 323 if (mp->m_quotainfo) {
308 xfs_qm_dqpurge_all(mp, XFS_QMOPT_QUOTALL | XFS_QMOPT_UMOUNTING); 324 xfs_qm_dqpurge_all(mp, XFS_QMOPT_QUOTALL);
309 xfs_qm_destroy_quotainfo(mp); 325 xfs_qm_destroy_quotainfo(mp);
310 } 326 }
311} 327}
@@ -449,20 +465,21 @@ xfs_qm_unmount_quotas(
449 */ 465 */
450STATIC int 466STATIC int
451xfs_qm_dqflush_all( 467xfs_qm_dqflush_all(
452 xfs_mount_t *mp, 468 struct xfs_mount *mp,
453 int sync_mode) 469 int sync_mode)
454{ 470{
455 int recl; 471 struct xfs_quotainfo *q = mp->m_quotainfo;
456 xfs_dquot_t *dqp; 472 int recl;
457 int niters; 473 struct xfs_dquot *dqp;
458 int error; 474 int niters;
475 int error;
459 476
460 if (mp->m_quotainfo == NULL) 477 if (!q)
461 return 0; 478 return 0;
462 niters = 0; 479 niters = 0;
463again: 480again:
464 xfs_qm_mplist_lock(mp); 481 mutex_lock(&q->qi_dqlist_lock);
465 FOREACH_DQUOT_IN_MP(dqp, mp) { 482 list_for_each_entry(dqp, &q->qi_dqlist, q_mplist) {
466 xfs_dqlock(dqp); 483 xfs_dqlock(dqp);
467 if (! XFS_DQ_IS_DIRTY(dqp)) { 484 if (! XFS_DQ_IS_DIRTY(dqp)) {
468 xfs_dqunlock(dqp); 485 xfs_dqunlock(dqp);
@@ -470,7 +487,7 @@ again:
470 } 487 }
471 488
472 /* XXX a sentinel would be better */ 489 /* XXX a sentinel would be better */
473 recl = XFS_QI_MPLRECLAIMS(mp); 490 recl = q->qi_dqreclaims;
474 if (!xfs_dqflock_nowait(dqp)) { 491 if (!xfs_dqflock_nowait(dqp)) {
475 /* 492 /*
476 * If we can't grab the flush lock then check 493 * If we can't grab the flush lock then check
@@ -485,21 +502,21 @@ again:
485 * Let go of the mplist lock. We don't want to hold it 502 * Let go of the mplist lock. We don't want to hold it
486 * across a disk write. 503 * across a disk write.
487 */ 504 */
488 xfs_qm_mplist_unlock(mp); 505 mutex_unlock(&q->qi_dqlist_lock);
489 error = xfs_qm_dqflush(dqp, sync_mode); 506 error = xfs_qm_dqflush(dqp, sync_mode);
490 xfs_dqunlock(dqp); 507 xfs_dqunlock(dqp);
491 if (error) 508 if (error)
492 return error; 509 return error;
493 510
494 xfs_qm_mplist_lock(mp); 511 mutex_lock(&q->qi_dqlist_lock);
495 if (recl != XFS_QI_MPLRECLAIMS(mp)) { 512 if (recl != q->qi_dqreclaims) {
496 xfs_qm_mplist_unlock(mp); 513 mutex_unlock(&q->qi_dqlist_lock);
497 /* XXX restart limit */ 514 /* XXX restart limit */
498 goto again; 515 goto again;
499 } 516 }
500 } 517 }
501 518
502 xfs_qm_mplist_unlock(mp); 519 mutex_unlock(&q->qi_dqlist_lock);
503 /* return ! busy */ 520 /* return ! busy */
504 return 0; 521 return 0;
505} 522}
@@ -509,15 +526,15 @@ again:
509 */ 526 */
510STATIC void 527STATIC void
511xfs_qm_detach_gdquots( 528xfs_qm_detach_gdquots(
512 xfs_mount_t *mp) 529 struct xfs_mount *mp)
513{ 530{
514 xfs_dquot_t *dqp, *gdqp; 531 struct xfs_quotainfo *q = mp->m_quotainfo;
515 int nrecl; 532 struct xfs_dquot *dqp, *gdqp;
533 int nrecl;
516 534
517 again: 535 again:
518 ASSERT(XFS_QM_IS_MPLIST_LOCKED(mp)); 536 ASSERT(mutex_is_locked(&q->qi_dqlist_lock));
519 dqp = XFS_QI_MPLNEXT(mp); 537 list_for_each_entry(dqp, &q->qi_dqlist, q_mplist) {
520 while (dqp) {
521 xfs_dqlock(dqp); 538 xfs_dqlock(dqp);
522 if ((gdqp = dqp->q_gdquot)) { 539 if ((gdqp = dqp->q_gdquot)) {
523 xfs_dqlock(gdqp); 540 xfs_dqlock(gdqp);
@@ -530,15 +547,14 @@ xfs_qm_detach_gdquots(
530 * Can't hold the mplist lock across a dqput. 547 * Can't hold the mplist lock across a dqput.
531 * XXXmust convert to marker based iterations here. 548 * XXXmust convert to marker based iterations here.
532 */ 549 */
533 nrecl = XFS_QI_MPLRECLAIMS(mp); 550 nrecl = q->qi_dqreclaims;
534 xfs_qm_mplist_unlock(mp); 551 mutex_unlock(&q->qi_dqlist_lock);
535 xfs_qm_dqput(gdqp); 552 xfs_qm_dqput(gdqp);
536 553
537 xfs_qm_mplist_lock(mp); 554 mutex_lock(&q->qi_dqlist_lock);
538 if (nrecl != XFS_QI_MPLRECLAIMS(mp)) 555 if (nrecl != q->qi_dqreclaims)
539 goto again; 556 goto again;
540 } 557 }
541 dqp = dqp->MPL_NEXT;
542 } 558 }
543} 559}
544 560
@@ -550,23 +566,23 @@ xfs_qm_detach_gdquots(
550 */ 566 */
551STATIC int 567STATIC int
552xfs_qm_dqpurge_int( 568xfs_qm_dqpurge_int(
553 xfs_mount_t *mp, 569 struct xfs_mount *mp,
554 uint flags) /* QUOTAOFF/UMOUNTING/UQUOTA/PQUOTA/GQUOTA */ 570 uint flags)
555{ 571{
556 xfs_dquot_t *dqp; 572 struct xfs_quotainfo *q = mp->m_quotainfo;
557 uint dqtype; 573 struct xfs_dquot *dqp, *n;
558 int nrecl; 574 uint dqtype;
559 xfs_dquot_t *nextdqp; 575 int nrecl;
560 int nmisses; 576 int nmisses;
561 577
562 if (mp->m_quotainfo == NULL) 578 if (!q)
563 return 0; 579 return 0;
564 580
565 dqtype = (flags & XFS_QMOPT_UQUOTA) ? XFS_DQ_USER : 0; 581 dqtype = (flags & XFS_QMOPT_UQUOTA) ? XFS_DQ_USER : 0;
566 dqtype |= (flags & XFS_QMOPT_PQUOTA) ? XFS_DQ_PROJ : 0; 582 dqtype |= (flags & XFS_QMOPT_PQUOTA) ? XFS_DQ_PROJ : 0;
567 dqtype |= (flags & XFS_QMOPT_GQUOTA) ? XFS_DQ_GROUP : 0; 583 dqtype |= (flags & XFS_QMOPT_GQUOTA) ? XFS_DQ_GROUP : 0;
568 584
569 xfs_qm_mplist_lock(mp); 585 mutex_lock(&q->qi_dqlist_lock);
570 586
571 /* 587 /*
572 * In the first pass through all incore dquots of this filesystem, 588 * In the first pass through all incore dquots of this filesystem,
@@ -578,28 +594,25 @@ xfs_qm_dqpurge_int(
578 594
579 again: 595 again:
580 nmisses = 0; 596 nmisses = 0;
581 ASSERT(XFS_QM_IS_MPLIST_LOCKED(mp)); 597 ASSERT(mutex_is_locked(&q->qi_dqlist_lock));
582 /* 598 /*
583 * Try to get rid of all of the unwanted dquots. The idea is to 599 * Try to get rid of all of the unwanted dquots. The idea is to
584 * get them off mplist and hashlist, but leave them on freelist. 600 * get them off mplist and hashlist, but leave them on freelist.
585 */ 601 */
586 dqp = XFS_QI_MPLNEXT(mp); 602 list_for_each_entry_safe(dqp, n, &q->qi_dqlist, q_mplist) {
587 while (dqp) {
588 /* 603 /*
589 * It's OK to look at the type without taking dqlock here. 604 * It's OK to look at the type without taking dqlock here.
590 * We're holding the mplist lock here, and that's needed for 605 * We're holding the mplist lock here, and that's needed for
591 * a dqreclaim. 606 * a dqreclaim.
592 */ 607 */
593 if ((dqp->dq_flags & dqtype) == 0) { 608 if ((dqp->dq_flags & dqtype) == 0)
594 dqp = dqp->MPL_NEXT;
595 continue; 609 continue;
596 }
597 610
598 if (!mutex_trylock(&dqp->q_hash->qh_lock)) { 611 if (!mutex_trylock(&dqp->q_hash->qh_lock)) {
599 nrecl = XFS_QI_MPLRECLAIMS(mp); 612 nrecl = q->qi_dqreclaims;
600 xfs_qm_mplist_unlock(mp); 613 mutex_unlock(&q->qi_dqlist_lock);
601 mutex_lock(&dqp->q_hash->qh_lock); 614 mutex_lock(&dqp->q_hash->qh_lock);
602 xfs_qm_mplist_lock(mp); 615 mutex_lock(&q->qi_dqlist_lock);
603 616
604 /* 617 /*
605 * XXXTheoretically, we can get into a very long 618 * XXXTheoretically, we can get into a very long
@@ -607,7 +620,7 @@ xfs_qm_dqpurge_int(
607 * No one can be adding dquots to the mplist at 620 * No one can be adding dquots to the mplist at
608 * this point, but somebody might be taking things off. 621 * this point, but somebody might be taking things off.
609 */ 622 */
610 if (nrecl != XFS_QI_MPLRECLAIMS(mp)) { 623 if (nrecl != q->qi_dqreclaims) {
611 mutex_unlock(&dqp->q_hash->qh_lock); 624 mutex_unlock(&dqp->q_hash->qh_lock);
612 goto again; 625 goto again;
613 } 626 }
@@ -617,11 +630,9 @@ xfs_qm_dqpurge_int(
617 * Take the dquot off the mplist and hashlist. It may remain on 630 * Take the dquot off the mplist and hashlist. It may remain on
618 * freelist in INACTIVE state. 631 * freelist in INACTIVE state.
619 */ 632 */
620 nextdqp = dqp->MPL_NEXT;
621 nmisses += xfs_qm_dqpurge(dqp); 633 nmisses += xfs_qm_dqpurge(dqp);
622 dqp = nextdqp;
623 } 634 }
624 xfs_qm_mplist_unlock(mp); 635 mutex_unlock(&q->qi_dqlist_lock);
625 return nmisses; 636 return nmisses;
626} 637}
627 638
@@ -921,12 +932,13 @@ xfs_qm_dqdetach(
921 932
922int 933int
923xfs_qm_sync( 934xfs_qm_sync(
924 xfs_mount_t *mp, 935 struct xfs_mount *mp,
925 int flags) 936 int flags)
926{ 937{
927 int recl, restarts; 938 struct xfs_quotainfo *q = mp->m_quotainfo;
928 xfs_dquot_t *dqp; 939 int recl, restarts;
929 int error; 940 struct xfs_dquot *dqp;
941 int error;
930 942
931 if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp)) 943 if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
932 return 0; 944 return 0;
@@ -934,18 +946,19 @@ xfs_qm_sync(
934 restarts = 0; 946 restarts = 0;
935 947
936 again: 948 again:
937 xfs_qm_mplist_lock(mp); 949 mutex_lock(&q->qi_dqlist_lock);
938 /* 950 /*
939 * dqpurge_all() also takes the mplist lock and iterate thru all dquots 951 * dqpurge_all() also takes the mplist lock and iterate thru all dquots
940 * in quotaoff. However, if the QUOTA_ACTIVE bits are not cleared 952 * in quotaoff. However, if the QUOTA_ACTIVE bits are not cleared
941 * when we have the mplist lock, we know that dquots will be consistent 953 * when we have the mplist lock, we know that dquots will be consistent
942 * as long as we have it locked. 954 * as long as we have it locked.
943 */ 955 */
944 if (! XFS_IS_QUOTA_ON(mp)) { 956 if (!XFS_IS_QUOTA_ON(mp)) {
945 xfs_qm_mplist_unlock(mp); 957 mutex_unlock(&q->qi_dqlist_lock);
946 return 0; 958 return 0;
947 } 959 }
948 FOREACH_DQUOT_IN_MP(dqp, mp) { 960 ASSERT(mutex_is_locked(&q->qi_dqlist_lock));
961 list_for_each_entry(dqp, &q->qi_dqlist, q_mplist) {
949 /* 962 /*
950 * If this is vfs_sync calling, then skip the dquots that 963 * If this is vfs_sync calling, then skip the dquots that
951 * don't 'seem' to be dirty. ie. don't acquire dqlock. 964 * don't 'seem' to be dirty. ie. don't acquire dqlock.
@@ -969,7 +982,7 @@ xfs_qm_sync(
969 } 982 }
970 983
971 /* XXX a sentinel would be better */ 984 /* XXX a sentinel would be better */
972 recl = XFS_QI_MPLRECLAIMS(mp); 985 recl = q->qi_dqreclaims;
973 if (!xfs_dqflock_nowait(dqp)) { 986 if (!xfs_dqflock_nowait(dqp)) {
974 if (flags & SYNC_TRYLOCK) { 987 if (flags & SYNC_TRYLOCK) {
975 xfs_dqunlock(dqp); 988 xfs_dqunlock(dqp);
@@ -989,7 +1002,7 @@ xfs_qm_sync(
989 * Let go of the mplist lock. We don't want to hold it 1002 * Let go of the mplist lock. We don't want to hold it
990 * across a disk write 1003 * across a disk write
991 */ 1004 */
992 xfs_qm_mplist_unlock(mp); 1005 mutex_unlock(&q->qi_dqlist_lock);
993 error = xfs_qm_dqflush(dqp, flags); 1006 error = xfs_qm_dqflush(dqp, flags);
994 xfs_dqunlock(dqp); 1007 xfs_dqunlock(dqp);
995 if (error && XFS_FORCED_SHUTDOWN(mp)) 1008 if (error && XFS_FORCED_SHUTDOWN(mp))
@@ -997,17 +1010,17 @@ xfs_qm_sync(
997 else if (error) 1010 else if (error)
998 return error; 1011 return error;
999 1012
1000 xfs_qm_mplist_lock(mp); 1013 mutex_lock(&q->qi_dqlist_lock);
1001 if (recl != XFS_QI_MPLRECLAIMS(mp)) { 1014 if (recl != q->qi_dqreclaims) {
1002 if (++restarts >= XFS_QM_SYNC_MAX_RESTARTS) 1015 if (++restarts >= XFS_QM_SYNC_MAX_RESTARTS)
1003 break; 1016 break;
1004 1017
1005 xfs_qm_mplist_unlock(mp); 1018 mutex_unlock(&q->qi_dqlist_lock);
1006 goto again; 1019 goto again;
1007 } 1020 }
1008 } 1021 }
1009 1022
1010 xfs_qm_mplist_unlock(mp); 1023 mutex_unlock(&q->qi_dqlist_lock);
1011 return 0; 1024 return 0;
1012} 1025}
1013 1026
@@ -1052,8 +1065,9 @@ xfs_qm_init_quotainfo(
1052 return error; 1065 return error;
1053 } 1066 }
1054 1067
1055 xfs_qm_list_init(&qinf->qi_dqlist, "mpdqlist", 0); 1068 INIT_LIST_HEAD(&qinf->qi_dqlist);
1056 lockdep_set_class(&qinf->qi_dqlist.qh_lock, &xfs_quota_mplist_class); 1069 mutex_init(&qinf->qi_dqlist_lock);
1070 lockdep_set_class(&qinf->qi_dqlist_lock, &xfs_quota_mplist_class);
1057 1071
1058 qinf->qi_dqreclaims = 0; 1072 qinf->qi_dqreclaims = 0;
1059 1073
@@ -1150,7 +1164,8 @@ xfs_qm_destroy_quotainfo(
1150 */ 1164 */
1151 xfs_qm_rele_quotafs_ref(mp); 1165 xfs_qm_rele_quotafs_ref(mp);
1152 1166
1153 xfs_qm_list_destroy(&qi->qi_dqlist); 1167 ASSERT(list_empty(&qi->qi_dqlist));
1168 mutex_destroy(&qi->qi_dqlist_lock);
1154 1169
1155 if (qi->qi_uquotaip) { 1170 if (qi->qi_uquotaip) {
1156 IRELE(qi->qi_uquotaip); 1171 IRELE(qi->qi_uquotaip);
@@ -1177,7 +1192,7 @@ xfs_qm_list_init(
1177 int n) 1192 int n)
1178{ 1193{
1179 mutex_init(&list->qh_lock); 1194 mutex_init(&list->qh_lock);
1180 list->qh_next = NULL; 1195 INIT_LIST_HEAD(&list->qh_list);
1181 list->qh_version = 0; 1196 list->qh_version = 0;
1182 list->qh_nelems = 0; 1197 list->qh_nelems = 0;
1183} 1198}
@@ -1316,9 +1331,6 @@ xfs_qm_qino_alloc(
1316 */ 1331 */
1317 spin_lock(&mp->m_sb_lock); 1332 spin_lock(&mp->m_sb_lock);
1318 if (flags & XFS_QMOPT_SBVERSION) { 1333 if (flags & XFS_QMOPT_SBVERSION) {
1319#if defined(DEBUG) && defined(XFS_LOUD_RECOVERY)
1320 unsigned oldv = mp->m_sb.sb_versionnum;
1321#endif
1322 ASSERT(!xfs_sb_version_hasquota(&mp->m_sb)); 1334 ASSERT(!xfs_sb_version_hasquota(&mp->m_sb));
1323 ASSERT((sbfields & (XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO | 1335 ASSERT((sbfields & (XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO |
1324 XFS_SB_GQUOTINO | XFS_SB_QFLAGS)) == 1336 XFS_SB_GQUOTINO | XFS_SB_QFLAGS)) ==
@@ -1331,11 +1343,6 @@ xfs_qm_qino_alloc(
1331 1343
1332 /* qflags will get updated _after_ quotacheck */ 1344 /* qflags will get updated _after_ quotacheck */
1333 mp->m_sb.sb_qflags = 0; 1345 mp->m_sb.sb_qflags = 0;
1334#if defined(DEBUG) && defined(XFS_LOUD_RECOVERY)
1335 cmn_err(CE_NOTE,
1336 "Old superblock version %x, converting to %x.",
1337 oldv, mp->m_sb.sb_versionnum);
1338#endif
1339 } 1346 }
1340 if (flags & XFS_QMOPT_UQUOTA) 1347 if (flags & XFS_QMOPT_UQUOTA)
1341 mp->m_sb.sb_uquotino = (*ip)->i_ino; 1348 mp->m_sb.sb_uquotino = (*ip)->i_ino;
@@ -1371,10 +1378,10 @@ xfs_qm_reset_dqcounts(
1371#ifdef DEBUG 1378#ifdef DEBUG
1372 j = XFS_FSB_TO_B(mp, XFS_DQUOT_CLUSTER_SIZE_FSB); 1379 j = XFS_FSB_TO_B(mp, XFS_DQUOT_CLUSTER_SIZE_FSB);
1373 do_div(j, sizeof(xfs_dqblk_t)); 1380 do_div(j, sizeof(xfs_dqblk_t));
1374 ASSERT(XFS_QM_DQPERBLK(mp) == j); 1381 ASSERT(mp->m_quotainfo->qi_dqperchunk == j);
1375#endif 1382#endif
1376 ddq = (xfs_disk_dquot_t *)XFS_BUF_PTR(bp); 1383 ddq = (xfs_disk_dquot_t *)XFS_BUF_PTR(bp);
1377 for (j = 0; j < XFS_QM_DQPERBLK(mp); j++) { 1384 for (j = 0; j < mp->m_quotainfo->qi_dqperchunk; j++) {
1378 /* 1385 /*
1379 * Do a sanity check, and if needed, repair the dqblk. Don't 1386 * Do a sanity check, and if needed, repair the dqblk. Don't
1380 * output any warnings because it's perfectly possible to 1387 * output any warnings because it's perfectly possible to
@@ -1429,7 +1436,7 @@ xfs_qm_dqiter_bufs(
1429 while (blkcnt--) { 1436 while (blkcnt--) {
1430 error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, 1437 error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp,
1431 XFS_FSB_TO_DADDR(mp, bno), 1438 XFS_FSB_TO_DADDR(mp, bno),
1432 (int)XFS_QI_DQCHUNKLEN(mp), 0, &bp); 1439 mp->m_quotainfo->qi_dqchunklen, 0, &bp);
1433 if (error) 1440 if (error)
1434 break; 1441 break;
1435 1442
@@ -1439,7 +1446,7 @@ xfs_qm_dqiter_bufs(
1439 * goto the next block. 1446 * goto the next block.
1440 */ 1447 */
1441 bno++; 1448 bno++;
1442 firstid += XFS_QM_DQPERBLK(mp); 1449 firstid += mp->m_quotainfo->qi_dqperchunk;
1443 } 1450 }
1444 return error; 1451 return error;
1445} 1452}
@@ -1505,7 +1512,7 @@ xfs_qm_dqiterate(
1505 continue; 1512 continue;
1506 1513
1507 firstid = (xfs_dqid_t) map[i].br_startoff * 1514 firstid = (xfs_dqid_t) map[i].br_startoff *
1508 XFS_QM_DQPERBLK(mp); 1515 mp->m_quotainfo->qi_dqperchunk;
1509 /* 1516 /*
1510 * Do a read-ahead on the next extent. 1517 * Do a read-ahead on the next extent.
1511 */ 1518 */
@@ -1516,7 +1523,7 @@ xfs_qm_dqiterate(
1516 while (rablkcnt--) { 1523 while (rablkcnt--) {
1517 xfs_baread(mp->m_ddev_targp, 1524 xfs_baread(mp->m_ddev_targp,
1518 XFS_FSB_TO_DADDR(mp, rablkno), 1525 XFS_FSB_TO_DADDR(mp, rablkno),
1519 (int)XFS_QI_DQCHUNKLEN(mp)); 1526 mp->m_quotainfo->qi_dqchunklen);
1520 rablkno++; 1527 rablkno++;
1521 } 1528 }
1522 } 1529 }
@@ -1576,8 +1583,10 @@ xfs_qm_quotacheck_dqadjust(
1576 1583
1577 /* 1584 /*
1578 * Set default limits, adjust timers (since we changed usages) 1585 * Set default limits, adjust timers (since we changed usages)
1586 *
1587 * There are no timers for the default values set in the root dquot.
1579 */ 1588 */
1580 if (! XFS_IS_SUSER_DQUOT(dqp)) { 1589 if (dqp->q_core.d_id) {
1581 xfs_qm_adjust_dqlimits(dqp->q_mount, &dqp->q_core); 1590 xfs_qm_adjust_dqlimits(dqp->q_mount, &dqp->q_core);
1582 xfs_qm_adjust_dqtimers(dqp->q_mount, &dqp->q_core); 1591 xfs_qm_adjust_dqtimers(dqp->q_mount, &dqp->q_core);
1583 } 1592 }
@@ -1747,14 +1756,14 @@ xfs_qm_quotacheck(
1747 lastino = 0; 1756 lastino = 0;
1748 flags = 0; 1757 flags = 0;
1749 1758
1750 ASSERT(XFS_QI_UQIP(mp) || XFS_QI_GQIP(mp)); 1759 ASSERT(mp->m_quotainfo->qi_uquotaip || mp->m_quotainfo->qi_gquotaip);
1751 ASSERT(XFS_IS_QUOTA_RUNNING(mp)); 1760 ASSERT(XFS_IS_QUOTA_RUNNING(mp));
1752 1761
1753 /* 1762 /*
1754 * There should be no cached dquots. The (simplistic) quotacheck 1763 * There should be no cached dquots. The (simplistic) quotacheck
1755 * algorithm doesn't like that. 1764 * algorithm doesn't like that.
1756 */ 1765 */
1757 ASSERT(XFS_QI_MPLNDQUOTS(mp) == 0); 1766 ASSERT(list_empty(&mp->m_quotainfo->qi_dqlist));
1758 1767
1759 cmn_err(CE_NOTE, "XFS quotacheck %s: Please wait.", mp->m_fsname); 1768 cmn_err(CE_NOTE, "XFS quotacheck %s: Please wait.", mp->m_fsname);
1760 1769
@@ -1763,15 +1772,19 @@ xfs_qm_quotacheck(
1763 * their counters to zero. We need a clean slate. 1772 * their counters to zero. We need a clean slate.
1764 * We don't log our changes till later. 1773 * We don't log our changes till later.
1765 */ 1774 */
1766 if ((uip = XFS_QI_UQIP(mp))) { 1775 uip = mp->m_quotainfo->qi_uquotaip;
1767 if ((error = xfs_qm_dqiterate(mp, uip, XFS_QMOPT_UQUOTA))) 1776 if (uip) {
1777 error = xfs_qm_dqiterate(mp, uip, XFS_QMOPT_UQUOTA);
1778 if (error)
1768 goto error_return; 1779 goto error_return;
1769 flags |= XFS_UQUOTA_CHKD; 1780 flags |= XFS_UQUOTA_CHKD;
1770 } 1781 }
1771 1782
1772 if ((gip = XFS_QI_GQIP(mp))) { 1783 gip = mp->m_quotainfo->qi_gquotaip;
1773 if ((error = xfs_qm_dqiterate(mp, gip, XFS_IS_GQUOTA_ON(mp) ? 1784 if (gip) {
1774 XFS_QMOPT_GQUOTA : XFS_QMOPT_PQUOTA))) 1785 error = xfs_qm_dqiterate(mp, gip, XFS_IS_GQUOTA_ON(mp) ?
1786 XFS_QMOPT_GQUOTA : XFS_QMOPT_PQUOTA);
1787 if (error)
1775 goto error_return; 1788 goto error_return;
1776 flags |= XFS_OQUOTA_CHKD; 1789 flags |= XFS_OQUOTA_CHKD;
1777 } 1790 }
@@ -1804,7 +1817,7 @@ xfs_qm_quotacheck(
1804 * at this point (because we intentionally didn't in dqget_noattach). 1817 * at this point (because we intentionally didn't in dqget_noattach).
1805 */ 1818 */
1806 if (error) { 1819 if (error) {
1807 xfs_qm_dqpurge_all(mp, XFS_QMOPT_QUOTALL | XFS_QMOPT_QUOTAOFF); 1820 xfs_qm_dqpurge_all(mp, XFS_QMOPT_QUOTALL);
1808 goto error_return; 1821 goto error_return;
1809 } 1822 }
1810 1823
@@ -1825,7 +1838,7 @@ xfs_qm_quotacheck(
1825 mp->m_qflags &= ~(XFS_OQUOTA_CHKD | XFS_UQUOTA_CHKD); 1838 mp->m_qflags &= ~(XFS_OQUOTA_CHKD | XFS_UQUOTA_CHKD);
1826 mp->m_qflags |= flags; 1839 mp->m_qflags |= flags;
1827 1840
1828 XQM_LIST_PRINT(&(XFS_QI_MPL_LIST(mp)), MPL_NEXT, "++++ Mp list +++"); 1841 xfs_qm_dquot_list_print(mp);
1829 1842
1830 error_return: 1843 error_return:
1831 if (error) { 1844 if (error) {
@@ -1920,59 +1933,53 @@ xfs_qm_init_quotainos(
1920 } 1933 }
1921 } 1934 }
1922 1935
1923 XFS_QI_UQIP(mp) = uip; 1936 mp->m_quotainfo->qi_uquotaip = uip;
1924 XFS_QI_GQIP(mp) = gip; 1937 mp->m_quotainfo->qi_gquotaip = gip;
1925 1938
1926 return 0; 1939 return 0;
1927} 1940}
1928 1941
1929 1942
1943
1930/* 1944/*
1931 * Traverse the freelist of dquots and attempt to reclaim a maximum of 1945 * Just pop the least recently used dquot off the freelist and
1932 * 'howmany' dquots. This operation races with dqlookup(), and attempts to 1946 * recycle it. The returned dquot is locked.
1933 * favor the lookup function ...
1934 * XXXsup merge this with qm_reclaim_one().
1935 */ 1947 */
1936STATIC int 1948STATIC xfs_dquot_t *
1937xfs_qm_shake_freelist( 1949xfs_qm_dqreclaim_one(void)
1938 int howmany)
1939{ 1950{
1940 int nreclaimed; 1951 xfs_dquot_t *dqpout;
1941 xfs_dqhash_t *hash; 1952 xfs_dquot_t *dqp;
1942 xfs_dquot_t *dqp, *nextdqp;
1943 int restarts; 1953 int restarts;
1944 int nflushes;
1945
1946 if (howmany <= 0)
1947 return 0;
1948 1954
1949 nreclaimed = 0;
1950 restarts = 0; 1955 restarts = 0;
1951 nflushes = 0; 1956 dqpout = NULL;
1952 1957
1953#ifdef QUOTADEBUG 1958 /* lockorder: hashchainlock, freelistlock, mplistlock, dqlock, dqflock */
1954 cmn_err(CE_DEBUG, "Shake free 0x%x", howmany); 1959startagain:
1955#endif 1960 mutex_lock(&xfs_Gqm->qm_dqfrlist_lock);
1956 /* lock order is : hashchainlock, freelistlock, mplistlock */
1957 tryagain:
1958 xfs_qm_freelist_lock(xfs_Gqm);
1959 1961
1960 for (dqp = xfs_Gqm->qm_dqfreelist.qh_next; 1962 list_for_each_entry(dqp, &xfs_Gqm->qm_dqfrlist, q_freelist) {
1961 ((dqp != (xfs_dquot_t *) &xfs_Gqm->qm_dqfreelist) && 1963 struct xfs_mount *mp = dqp->q_mount;
1962 nreclaimed < howmany); ) {
1963 xfs_dqlock(dqp); 1964 xfs_dqlock(dqp);
1964 1965
1965 /* 1966 /*
1966 * We are racing with dqlookup here. Naturally we don't 1967 * We are racing with dqlookup here. Naturally we don't
1967 * want to reclaim a dquot that lookup wants. 1968 * want to reclaim a dquot that lookup wants. We release the
1969 * freelist lock and start over, so that lookup will grab
1970 * both the dquot and the freelistlock.
1968 */ 1971 */
1969 if (dqp->dq_flags & XFS_DQ_WANT) { 1972 if (dqp->dq_flags & XFS_DQ_WANT) {
1973 ASSERT(! (dqp->dq_flags & XFS_DQ_INACTIVE));
1974
1975 trace_xfs_dqreclaim_want(dqp);
1976
1970 xfs_dqunlock(dqp); 1977 xfs_dqunlock(dqp);
1971 xfs_qm_freelist_unlock(xfs_Gqm); 1978 mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
1972 if (++restarts >= XFS_QM_RECLAIM_MAX_RESTARTS) 1979 if (++restarts >= XFS_QM_RECLAIM_MAX_RESTARTS)
1973 return nreclaimed; 1980 return NULL;
1974 XQM_STATS_INC(xqmstats.xs_qm_dqwants); 1981 XQM_STATS_INC(xqmstats.xs_qm_dqwants);
1975 goto tryagain; 1982 goto startagain;
1976 } 1983 }
1977 1984
1978 /* 1985 /*
@@ -1981,23 +1988,27 @@ xfs_qm_shake_freelist(
1981 * life easier. 1988 * life easier.
1982 */ 1989 */
1983 if (dqp->dq_flags & XFS_DQ_INACTIVE) { 1990 if (dqp->dq_flags & XFS_DQ_INACTIVE) {
1984 ASSERT(dqp->q_mount == NULL); 1991 ASSERT(mp == NULL);
1985 ASSERT(! XFS_DQ_IS_DIRTY(dqp)); 1992 ASSERT(! XFS_DQ_IS_DIRTY(dqp));
1986 ASSERT(dqp->HL_PREVP == NULL); 1993 ASSERT(list_empty(&dqp->q_hashlist));
1987 ASSERT(dqp->MPL_PREVP == NULL); 1994 ASSERT(list_empty(&dqp->q_mplist));
1995 list_del_init(&dqp->q_freelist);
1996 xfs_Gqm->qm_dqfrlist_cnt--;
1997 xfs_dqunlock(dqp);
1998 dqpout = dqp;
1988 XQM_STATS_INC(xqmstats.xs_qm_dqinact_reclaims); 1999 XQM_STATS_INC(xqmstats.xs_qm_dqinact_reclaims);
1989 nextdqp = dqp->dq_flnext; 2000 break;
1990 goto off_freelist;
1991 } 2001 }
1992 2002
1993 ASSERT(dqp->MPL_PREVP); 2003 ASSERT(dqp->q_hash);
2004 ASSERT(!list_empty(&dqp->q_mplist));
2005
1994 /* 2006 /*
1995 * Try to grab the flush lock. If this dquot is in the process of 2007 * Try to grab the flush lock. If this dquot is in the process of
1996 * getting flushed to disk, we don't want to reclaim it. 2008 * getting flushed to disk, we don't want to reclaim it.
1997 */ 2009 */
1998 if (!xfs_dqflock_nowait(dqp)) { 2010 if (!xfs_dqflock_nowait(dqp)) {
1999 xfs_dqunlock(dqp); 2011 xfs_dqunlock(dqp);
2000 dqp = dqp->dq_flnext;
2001 continue; 2012 continue;
2002 } 2013 }
2003 2014
@@ -2010,21 +2021,21 @@ xfs_qm_shake_freelist(
2010 if (XFS_DQ_IS_DIRTY(dqp)) { 2021 if (XFS_DQ_IS_DIRTY(dqp)) {
2011 int error; 2022 int error;
2012 2023
2013 trace_xfs_dqshake_dirty(dqp); 2024 trace_xfs_dqreclaim_dirty(dqp);
2014 2025
2015 /* 2026 /*
2016 * We flush it delayed write, so don't bother 2027 * We flush it delayed write, so don't bother
2017 * releasing the mplock. 2028 * releasing the freelist lock.
2018 */ 2029 */
2019 error = xfs_qm_dqflush(dqp, 0); 2030 error = xfs_qm_dqflush(dqp, 0);
2020 if (error) { 2031 if (error) {
2021 xfs_fs_cmn_err(CE_WARN, dqp->q_mount, 2032 xfs_fs_cmn_err(CE_WARN, mp,
2022 "xfs_qm_dqflush_all: dquot %p flush failed", dqp); 2033 "xfs_qm_dqreclaim: dquot %p flush failed", dqp);
2023 } 2034 }
2024 xfs_dqunlock(dqp); /* dqflush unlocks dqflock */ 2035 xfs_dqunlock(dqp); /* dqflush unlocks dqflock */
2025 dqp = dqp->dq_flnext;
2026 continue; 2036 continue;
2027 } 2037 }
2038
2028 /* 2039 /*
2029 * We're trying to get the hashlock out of order. This races 2040 * We're trying to get the hashlock out of order. This races
2030 * with dqlookup; so, we giveup and goto the next dquot if 2041 * with dqlookup; so, we giveup and goto the next dquot if
@@ -2033,56 +2044,74 @@ xfs_qm_shake_freelist(
2033 * waiting for the freelist lock. 2044 * waiting for the freelist lock.
2034 */ 2045 */
2035 if (!mutex_trylock(&dqp->q_hash->qh_lock)) { 2046 if (!mutex_trylock(&dqp->q_hash->qh_lock)) {
2036 xfs_dqfunlock(dqp); 2047 restarts++;
2037 xfs_dqunlock(dqp); 2048 goto dqfunlock;
2038 dqp = dqp->dq_flnext;
2039 continue;
2040 } 2049 }
2050
2041 /* 2051 /*
2042 * This races with dquot allocation code as well as dqflush_all 2052 * This races with dquot allocation code as well as dqflush_all
2043 * and reclaim code. So, if we failed to grab the mplist lock, 2053 * and reclaim code. So, if we failed to grab the mplist lock,
2044 * giveup everything and start over. 2054 * giveup everything and start over.
2045 */ 2055 */
2046 hash = dqp->q_hash; 2056 if (!mutex_trylock(&mp->m_quotainfo->qi_dqlist_lock)) {
2047 ASSERT(hash); 2057 restarts++;
2048 if (! xfs_qm_mplist_nowait(dqp->q_mount)) { 2058 mutex_unlock(&dqp->q_hash->qh_lock);
2049 /* XXX put a sentinel so that we can come back here */
2050 xfs_dqfunlock(dqp); 2059 xfs_dqfunlock(dqp);
2051 xfs_dqunlock(dqp); 2060 xfs_dqunlock(dqp);
2052 mutex_unlock(&hash->qh_lock); 2061 mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
2053 xfs_qm_freelist_unlock(xfs_Gqm); 2062 if (restarts++ >= XFS_QM_RECLAIM_MAX_RESTARTS)
2054 if (++restarts >= XFS_QM_RECLAIM_MAX_RESTARTS) 2063 return NULL;
2055 return nreclaimed; 2064 goto startagain;
2056 goto tryagain;
2057 } 2065 }
2058 2066
2059 trace_xfs_dqshake_unlink(dqp);
2060
2061#ifdef QUOTADEBUG
2062 cmn_err(CE_DEBUG, "Shake 0x%p, ID 0x%x\n",
2063 dqp, be32_to_cpu(dqp->q_core.d_id));
2064#endif
2065 ASSERT(dqp->q_nrefs == 0); 2067 ASSERT(dqp->q_nrefs == 0);
2066 nextdqp = dqp->dq_flnext; 2068 list_del_init(&dqp->q_mplist);
2067 XQM_MPLIST_REMOVE(&(XFS_QI_MPL_LIST(dqp->q_mount)), dqp); 2069 mp->m_quotainfo->qi_dquots--;
2068 XQM_HASHLIST_REMOVE(hash, dqp); 2070 mp->m_quotainfo->qi_dqreclaims++;
2071 list_del_init(&dqp->q_hashlist);
2072 dqp->q_hash->qh_version++;
2073 list_del_init(&dqp->q_freelist);
2074 xfs_Gqm->qm_dqfrlist_cnt--;
2075 dqpout = dqp;
2076 mutex_unlock(&mp->m_quotainfo->qi_dqlist_lock);
2077 mutex_unlock(&dqp->q_hash->qh_lock);
2078dqfunlock:
2069 xfs_dqfunlock(dqp); 2079 xfs_dqfunlock(dqp);
2070 xfs_qm_mplist_unlock(dqp->q_mount);
2071 mutex_unlock(&hash->qh_lock);
2072
2073 off_freelist:
2074 XQM_FREELIST_REMOVE(dqp);
2075 xfs_dqunlock(dqp); 2080 xfs_dqunlock(dqp);
2076 nreclaimed++; 2081 if (dqpout)
2077 XQM_STATS_INC(xqmstats.xs_qm_dqshake_reclaims); 2082 break;
2083 if (restarts >= XFS_QM_RECLAIM_MAX_RESTARTS)
2084 return NULL;
2085 }
2086 mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
2087 return dqpout;
2088}
2089
2090/*
2091 * Traverse the freelist of dquots and attempt to reclaim a maximum of
2092 * 'howmany' dquots. This operation races with dqlookup(), and attempts to
2093 * favor the lookup function ...
2094 */
2095STATIC int
2096xfs_qm_shake_freelist(
2097 int howmany)
2098{
2099 int nreclaimed = 0;
2100 xfs_dquot_t *dqp;
2101
2102 if (howmany <= 0)
2103 return 0;
2104
2105 while (nreclaimed < howmany) {
2106 dqp = xfs_qm_dqreclaim_one();
2107 if (!dqp)
2108 return nreclaimed;
2078 xfs_qm_dqdestroy(dqp); 2109 xfs_qm_dqdestroy(dqp);
2079 dqp = nextdqp; 2110 nreclaimed++;
2080 } 2111 }
2081 xfs_qm_freelist_unlock(xfs_Gqm);
2082 return nreclaimed; 2112 return nreclaimed;
2083} 2113}
2084 2114
2085
2086/* 2115/*
2087 * The kmem_shake interface is invoked when memory is running low. 2116 * The kmem_shake interface is invoked when memory is running low.
2088 */ 2117 */
@@ -2097,7 +2126,7 @@ xfs_qm_shake(int nr_to_scan, gfp_t gfp_mask)
2097 if (!xfs_Gqm) 2126 if (!xfs_Gqm)
2098 return 0; 2127 return 0;
2099 2128
2100 nfree = xfs_Gqm->qm_dqfreelist.qh_nelems; /* free dquots */ 2129 nfree = xfs_Gqm->qm_dqfrlist_cnt; /* free dquots */
2101 /* incore dquots in all f/s's */ 2130 /* incore dquots in all f/s's */
2102 ndqused = atomic_read(&xfs_Gqm->qm_totaldquots) - nfree; 2131 ndqused = atomic_read(&xfs_Gqm->qm_totaldquots) - nfree;
2103 2132
@@ -2113,131 +2142,6 @@ xfs_qm_shake(int nr_to_scan, gfp_t gfp_mask)
2113} 2142}
2114 2143
2115 2144
2116/*
2117 * Just pop the least recently used dquot off the freelist and
2118 * recycle it. The returned dquot is locked.
2119 */
2120STATIC xfs_dquot_t *
2121xfs_qm_dqreclaim_one(void)
2122{
2123 xfs_dquot_t *dqpout;
2124 xfs_dquot_t *dqp;
2125 int restarts;
2126 int nflushes;
2127
2128 restarts = 0;
2129 dqpout = NULL;
2130 nflushes = 0;
2131
2132 /* lockorder: hashchainlock, freelistlock, mplistlock, dqlock, dqflock */
2133 startagain:
2134 xfs_qm_freelist_lock(xfs_Gqm);
2135
2136 FOREACH_DQUOT_IN_FREELIST(dqp, &(xfs_Gqm->qm_dqfreelist)) {
2137 xfs_dqlock(dqp);
2138
2139 /*
2140 * We are racing with dqlookup here. Naturally we don't
2141 * want to reclaim a dquot that lookup wants. We release the
2142 * freelist lock and start over, so that lookup will grab
2143 * both the dquot and the freelistlock.
2144 */
2145 if (dqp->dq_flags & XFS_DQ_WANT) {
2146 ASSERT(! (dqp->dq_flags & XFS_DQ_INACTIVE));
2147
2148 trace_xfs_dqreclaim_want(dqp);
2149
2150 xfs_dqunlock(dqp);
2151 xfs_qm_freelist_unlock(xfs_Gqm);
2152 if (++restarts >= XFS_QM_RECLAIM_MAX_RESTARTS)
2153 return NULL;
2154 XQM_STATS_INC(xqmstats.xs_qm_dqwants);
2155 goto startagain;
2156 }
2157
2158 /*
2159 * If the dquot is inactive, we are assured that it is
2160 * not on the mplist or the hashlist, and that makes our
2161 * life easier.
2162 */
2163 if (dqp->dq_flags & XFS_DQ_INACTIVE) {
2164 ASSERT(dqp->q_mount == NULL);
2165 ASSERT(! XFS_DQ_IS_DIRTY(dqp));
2166 ASSERT(dqp->HL_PREVP == NULL);
2167 ASSERT(dqp->MPL_PREVP == NULL);
2168 XQM_FREELIST_REMOVE(dqp);
2169 xfs_dqunlock(dqp);
2170 dqpout = dqp;
2171 XQM_STATS_INC(xqmstats.xs_qm_dqinact_reclaims);
2172 break;
2173 }
2174
2175 ASSERT(dqp->q_hash);
2176 ASSERT(dqp->MPL_PREVP);
2177
2178 /*
2179 * Try to grab the flush lock. If this dquot is in the process of
2180 * getting flushed to disk, we don't want to reclaim it.
2181 */
2182 if (!xfs_dqflock_nowait(dqp)) {
2183 xfs_dqunlock(dqp);
2184 continue;
2185 }
2186
2187 /*
2188 * We have the flush lock so we know that this is not in the
2189 * process of being flushed. So, if this is dirty, flush it
2190 * DELWRI so that we don't get a freelist infested with
2191 * dirty dquots.
2192 */
2193 if (XFS_DQ_IS_DIRTY(dqp)) {
2194 int error;
2195
2196 trace_xfs_dqreclaim_dirty(dqp);
2197
2198 /*
2199 * We flush it delayed write, so don't bother
2200 * releasing the freelist lock.
2201 */
2202 error = xfs_qm_dqflush(dqp, 0);
2203 if (error) {
2204 xfs_fs_cmn_err(CE_WARN, dqp->q_mount,
2205 "xfs_qm_dqreclaim: dquot %p flush failed", dqp);
2206 }
2207 xfs_dqunlock(dqp); /* dqflush unlocks dqflock */
2208 continue;
2209 }
2210
2211 if (! xfs_qm_mplist_nowait(dqp->q_mount)) {
2212 xfs_dqfunlock(dqp);
2213 xfs_dqunlock(dqp);
2214 continue;
2215 }
2216
2217 if (!mutex_trylock(&dqp->q_hash->qh_lock))
2218 goto mplistunlock;
2219
2220 trace_xfs_dqreclaim_unlink(dqp);
2221
2222 ASSERT(dqp->q_nrefs == 0);
2223 XQM_MPLIST_REMOVE(&(XFS_QI_MPL_LIST(dqp->q_mount)), dqp);
2224 XQM_HASHLIST_REMOVE(dqp->q_hash, dqp);
2225 XQM_FREELIST_REMOVE(dqp);
2226 dqpout = dqp;
2227 mutex_unlock(&dqp->q_hash->qh_lock);
2228 mplistunlock:
2229 xfs_qm_mplist_unlock(dqp->q_mount);
2230 xfs_dqfunlock(dqp);
2231 xfs_dqunlock(dqp);
2232 if (dqpout)
2233 break;
2234 }
2235
2236 xfs_qm_freelist_unlock(xfs_Gqm);
2237 return dqpout;
2238}
2239
2240
2241/*------------------------------------------------------------------*/ 2145/*------------------------------------------------------------------*/
2242 2146
2243/* 2147/*
@@ -2662,66 +2566,3 @@ xfs_qm_vop_create_dqattach(
2662 } 2566 }
2663} 2567}
2664 2568
2665/* ------------- list stuff -----------------*/
2666STATIC void
2667xfs_qm_freelist_init(xfs_frlist_t *ql)
2668{
2669 ql->qh_next = ql->qh_prev = (xfs_dquot_t *) ql;
2670 mutex_init(&ql->qh_lock);
2671 ql->qh_version = 0;
2672 ql->qh_nelems = 0;
2673}
2674
2675STATIC void
2676xfs_qm_freelist_destroy(xfs_frlist_t *ql)
2677{
2678 xfs_dquot_t *dqp, *nextdqp;
2679
2680 mutex_lock(&ql->qh_lock);
2681 for (dqp = ql->qh_next;
2682 dqp != (xfs_dquot_t *)ql; ) {
2683 xfs_dqlock(dqp);
2684 nextdqp = dqp->dq_flnext;
2685#ifdef QUOTADEBUG
2686 cmn_err(CE_DEBUG, "FREELIST destroy 0x%p", dqp);
2687#endif
2688 XQM_FREELIST_REMOVE(dqp);
2689 xfs_dqunlock(dqp);
2690 xfs_qm_dqdestroy(dqp);
2691 dqp = nextdqp;
2692 }
2693 mutex_unlock(&ql->qh_lock);
2694 mutex_destroy(&ql->qh_lock);
2695
2696 ASSERT(ql->qh_nelems == 0);
2697}
2698
2699STATIC void
2700xfs_qm_freelist_insert(xfs_frlist_t *ql, xfs_dquot_t *dq)
2701{
2702 dq->dq_flnext = ql->qh_next;
2703 dq->dq_flprev = (xfs_dquot_t *)ql;
2704 ql->qh_next = dq;
2705 dq->dq_flnext->dq_flprev = dq;
2706 xfs_Gqm->qm_dqfreelist.qh_nelems++;
2707 xfs_Gqm->qm_dqfreelist.qh_version++;
2708}
2709
2710void
2711xfs_qm_freelist_unlink(xfs_dquot_t *dq)
2712{
2713 xfs_dquot_t *next = dq->dq_flnext;
2714 xfs_dquot_t *prev = dq->dq_flprev;
2715
2716 next->dq_flprev = prev;
2717 prev->dq_flnext = next;
2718 dq->dq_flnext = dq->dq_flprev = dq;
2719 xfs_Gqm->qm_dqfreelist.qh_nelems--;
2720 xfs_Gqm->qm_dqfreelist.qh_version++;
2721}
2722
2723void
2724xfs_qm_freelist_append(xfs_frlist_t *ql, xfs_dquot_t *dq)
2725{
2726 xfs_qm_freelist_insert((xfs_frlist_t *)ql->qh_prev, dq);
2727}
diff --git a/fs/xfs/quota/xfs_qm.h b/fs/xfs/quota/xfs_qm.h
index 495564b8af38..c9446f1c726d 100644
--- a/fs/xfs/quota/xfs_qm.h
+++ b/fs/xfs/quota/xfs_qm.h
@@ -72,17 +72,6 @@ extern kmem_zone_t *qm_dqtrxzone;
72#define XFS_QM_MAX_DQCLUSTER_LOGSZ 3 72#define XFS_QM_MAX_DQCLUSTER_LOGSZ 3
73 73
74typedef xfs_dqhash_t xfs_dqlist_t; 74typedef xfs_dqhash_t xfs_dqlist_t;
75/*
76 * The freelist head. The first two fields match the first two in the
77 * xfs_dquot_t structure (in xfs_dqmarker_t)
78 */
79typedef struct xfs_frlist {
80 struct xfs_dquot *qh_next;
81 struct xfs_dquot *qh_prev;
82 struct mutex qh_lock;
83 uint qh_version;
84 uint qh_nelems;
85} xfs_frlist_t;
86 75
87/* 76/*
88 * Quota Manager (global) structure. Lives only in core. 77 * Quota Manager (global) structure. Lives only in core.
@@ -91,7 +80,9 @@ typedef struct xfs_qm {
91 xfs_dqlist_t *qm_usr_dqhtable;/* udquot hash table */ 80 xfs_dqlist_t *qm_usr_dqhtable;/* udquot hash table */
92 xfs_dqlist_t *qm_grp_dqhtable;/* gdquot hash table */ 81 xfs_dqlist_t *qm_grp_dqhtable;/* gdquot hash table */
93 uint qm_dqhashmask; /* # buckets in dq hashtab - 1 */ 82 uint qm_dqhashmask; /* # buckets in dq hashtab - 1 */
94 xfs_frlist_t qm_dqfreelist; /* freelist of dquots */ 83 struct list_head qm_dqfrlist; /* freelist of dquots */
84 struct mutex qm_dqfrlist_lock;
85 int qm_dqfrlist_cnt;
95 atomic_t qm_totaldquots; /* total incore dquots */ 86 atomic_t qm_totaldquots; /* total incore dquots */
96 uint qm_nrefs; /* file systems with quota on */ 87 uint qm_nrefs; /* file systems with quota on */
97 int qm_dqfree_ratio;/* ratio of free to inuse dquots */ 88 int qm_dqfree_ratio;/* ratio of free to inuse dquots */
@@ -106,7 +97,9 @@ typedef struct xfs_qm {
106typedef struct xfs_quotainfo { 97typedef struct xfs_quotainfo {
107 xfs_inode_t *qi_uquotaip; /* user quota inode */ 98 xfs_inode_t *qi_uquotaip; /* user quota inode */
108 xfs_inode_t *qi_gquotaip; /* group quota inode */ 99 xfs_inode_t *qi_gquotaip; /* group quota inode */
109 xfs_dqlist_t qi_dqlist; /* all dquots in filesys */ 100 struct list_head qi_dqlist; /* all dquots in filesys */
101 struct mutex qi_dqlist_lock;
102 int qi_dquots;
110 int qi_dqreclaims; /* a change here indicates 103 int qi_dqreclaims; /* a change here indicates
111 a removal in the dqlist */ 104 a removal in the dqlist */
112 time_t qi_btimelimit; /* limit for blks timer */ 105 time_t qi_btimelimit; /* limit for blks timer */
@@ -175,10 +168,6 @@ extern int xfs_qm_scall_getqstat(xfs_mount_t *, fs_quota_stat_t *);
175extern int xfs_qm_scall_quotaon(xfs_mount_t *, uint); 168extern int xfs_qm_scall_quotaon(xfs_mount_t *, uint);
176extern int xfs_qm_scall_quotaoff(xfs_mount_t *, uint); 169extern int xfs_qm_scall_quotaoff(xfs_mount_t *, uint);
177 170
178/* list stuff */
179extern void xfs_qm_freelist_append(xfs_frlist_t *, xfs_dquot_t *);
180extern void xfs_qm_freelist_unlink(xfs_dquot_t *);
181
182#ifdef DEBUG 171#ifdef DEBUG
183extern int xfs_qm_internalqcheck(xfs_mount_t *); 172extern int xfs_qm_internalqcheck(xfs_mount_t *);
184#else 173#else
diff --git a/fs/xfs/quota/xfs_qm_stats.c b/fs/xfs/quota/xfs_qm_stats.c
index 83e7ea3e25fa..3d1fc79532e2 100644
--- a/fs/xfs/quota/xfs_qm_stats.c
+++ b/fs/xfs/quota/xfs_qm_stats.c
@@ -55,7 +55,7 @@ static int xqm_proc_show(struct seq_file *m, void *v)
55 ndquot, 55 ndquot,
56 xfs_Gqm? atomic_read(&xfs_Gqm->qm_totaldquots) : 0, 56 xfs_Gqm? atomic_read(&xfs_Gqm->qm_totaldquots) : 0,
57 xfs_Gqm? xfs_Gqm->qm_dqfree_ratio : 0, 57 xfs_Gqm? xfs_Gqm->qm_dqfree_ratio : 0,
58 xfs_Gqm? xfs_Gqm->qm_dqfreelist.qh_nelems : 0); 58 xfs_Gqm? xfs_Gqm->qm_dqfrlist_cnt : 0);
59 return 0; 59 return 0;
60} 60}
61 61
diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c
index 5d0ee8d492db..92b002f1805f 100644
--- a/fs/xfs/quota/xfs_qm_syscalls.c
+++ b/fs/xfs/quota/xfs_qm_syscalls.c
@@ -79,6 +79,7 @@ xfs_qm_scall_quotaoff(
79 xfs_mount_t *mp, 79 xfs_mount_t *mp,
80 uint flags) 80 uint flags)
81{ 81{
82 struct xfs_quotainfo *q = mp->m_quotainfo;
82 uint dqtype; 83 uint dqtype;
83 int error; 84 int error;
84 uint inactivate_flags; 85 uint inactivate_flags;
@@ -102,11 +103,8 @@ xfs_qm_scall_quotaoff(
102 * critical thing. 103 * critical thing.
103 * If quotaoff, then we must be dealing with the root filesystem. 104 * If quotaoff, then we must be dealing with the root filesystem.
104 */ 105 */
105 ASSERT(mp->m_quotainfo); 106 ASSERT(q);
106 if (mp->m_quotainfo) 107 mutex_lock(&q->qi_quotaofflock);
107 mutex_lock(&(XFS_QI_QOFFLOCK(mp)));
108
109 ASSERT(mp->m_quotainfo);
110 108
111 /* 109 /*
112 * If we're just turning off quota enforcement, change mp and go. 110 * If we're just turning off quota enforcement, change mp and go.
@@ -117,7 +115,7 @@ xfs_qm_scall_quotaoff(
117 spin_lock(&mp->m_sb_lock); 115 spin_lock(&mp->m_sb_lock);
118 mp->m_sb.sb_qflags = mp->m_qflags; 116 mp->m_sb.sb_qflags = mp->m_qflags;
119 spin_unlock(&mp->m_sb_lock); 117 spin_unlock(&mp->m_sb_lock);
120 mutex_unlock(&(XFS_QI_QOFFLOCK(mp))); 118 mutex_unlock(&q->qi_quotaofflock);
121 119
122 /* XXX what to do if error ? Revert back to old vals incore ? */ 120 /* XXX what to do if error ? Revert back to old vals incore ? */
123 error = xfs_qm_write_sb_changes(mp, XFS_SB_QFLAGS); 121 error = xfs_qm_write_sb_changes(mp, XFS_SB_QFLAGS);
@@ -150,10 +148,8 @@ xfs_qm_scall_quotaoff(
150 * Nothing to do? Don't complain. This happens when we're just 148 * Nothing to do? Don't complain. This happens when we're just
151 * turning off quota enforcement. 149 * turning off quota enforcement.
152 */ 150 */
153 if ((mp->m_qflags & flags) == 0) { 151 if ((mp->m_qflags & flags) == 0)
154 mutex_unlock(&(XFS_QI_QOFFLOCK(mp))); 152 goto out_unlock;
155 return (0);
156 }
157 153
158 /* 154 /*
159 * Write the LI_QUOTAOFF log record, and do SB changes atomically, 155 * Write the LI_QUOTAOFF log record, and do SB changes atomically,
@@ -162,7 +158,7 @@ xfs_qm_scall_quotaoff(
162 */ 158 */
163 error = xfs_qm_log_quotaoff(mp, &qoffstart, flags); 159 error = xfs_qm_log_quotaoff(mp, &qoffstart, flags);
164 if (error) 160 if (error)
165 goto out_error; 161 goto out_unlock;
166 162
167 /* 163 /*
168 * Next we clear the XFS_MOUNT_*DQ_ACTIVE bit(s) in the mount struct 164 * Next we clear the XFS_MOUNT_*DQ_ACTIVE bit(s) in the mount struct
@@ -204,7 +200,7 @@ xfs_qm_scall_quotaoff(
204 * So, if we couldn't purge all the dquots from the filesystem, 200 * So, if we couldn't purge all the dquots from the filesystem,
205 * we can't get rid of the incore data structures. 201 * we can't get rid of the incore data structures.
206 */ 202 */
207 while ((nculprits = xfs_qm_dqpurge_all(mp, dqtype|XFS_QMOPT_QUOTAOFF))) 203 while ((nculprits = xfs_qm_dqpurge_all(mp, dqtype)))
208 delay(10 * nculprits); 204 delay(10 * nculprits);
209 205
210 /* 206 /*
@@ -222,7 +218,7 @@ xfs_qm_scall_quotaoff(
222 if (error) { 218 if (error) {
223 /* We're screwed now. Shutdown is the only option. */ 219 /* We're screwed now. Shutdown is the only option. */
224 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); 220 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
225 goto out_error; 221 goto out_unlock;
226 } 222 }
227 223
228 /* 224 /*
@@ -230,27 +226,26 @@ xfs_qm_scall_quotaoff(
230 */ 226 */
231 if (((flags & XFS_MOUNT_QUOTA_ALL) == XFS_MOUNT_QUOTA_SET1) || 227 if (((flags & XFS_MOUNT_QUOTA_ALL) == XFS_MOUNT_QUOTA_SET1) ||
232 ((flags & XFS_MOUNT_QUOTA_ALL) == XFS_MOUNT_QUOTA_SET2)) { 228 ((flags & XFS_MOUNT_QUOTA_ALL) == XFS_MOUNT_QUOTA_SET2)) {
233 mutex_unlock(&(XFS_QI_QOFFLOCK(mp))); 229 mutex_unlock(&q->qi_quotaofflock);
234 xfs_qm_destroy_quotainfo(mp); 230 xfs_qm_destroy_quotainfo(mp);
235 return (0); 231 return (0);
236 } 232 }
237 233
238 /* 234 /*
239 * Release our quotainode references, and vn_purge them, 235 * Release our quotainode references if we don't need them anymore.
240 * if we don't need them anymore.
241 */ 236 */
242 if ((dqtype & XFS_QMOPT_UQUOTA) && XFS_QI_UQIP(mp)) { 237 if ((dqtype & XFS_QMOPT_UQUOTA) && q->qi_uquotaip) {
243 IRELE(XFS_QI_UQIP(mp)); 238 IRELE(q->qi_uquotaip);
244 XFS_QI_UQIP(mp) = NULL; 239 q->qi_uquotaip = NULL;
245 } 240 }
246 if ((dqtype & (XFS_QMOPT_GQUOTA|XFS_QMOPT_PQUOTA)) && XFS_QI_GQIP(mp)) { 241 if ((dqtype & (XFS_QMOPT_GQUOTA|XFS_QMOPT_PQUOTA)) && q->qi_gquotaip) {
247 IRELE(XFS_QI_GQIP(mp)); 242 IRELE(q->qi_gquotaip);
248 XFS_QI_GQIP(mp) = NULL; 243 q->qi_gquotaip = NULL;
249 } 244 }
250out_error:
251 mutex_unlock(&(XFS_QI_QOFFLOCK(mp)));
252 245
253 return (error); 246out_unlock:
247 mutex_unlock(&q->qi_quotaofflock);
248 return error;
254} 249}
255 250
256int 251int
@@ -379,9 +374,9 @@ xfs_qm_scall_quotaon(
379 /* 374 /*
380 * Switch on quota enforcement in core. 375 * Switch on quota enforcement in core.
381 */ 376 */
382 mutex_lock(&(XFS_QI_QOFFLOCK(mp))); 377 mutex_lock(&mp->m_quotainfo->qi_quotaofflock);
383 mp->m_qflags |= (flags & XFS_ALL_QUOTA_ENFD); 378 mp->m_qflags |= (flags & XFS_ALL_QUOTA_ENFD);
384 mutex_unlock(&(XFS_QI_QOFFLOCK(mp))); 379 mutex_unlock(&mp->m_quotainfo->qi_quotaofflock);
385 380
386 return (0); 381 return (0);
387} 382}
@@ -392,11 +387,12 @@ xfs_qm_scall_quotaon(
392 */ 387 */
393int 388int
394xfs_qm_scall_getqstat( 389xfs_qm_scall_getqstat(
395 xfs_mount_t *mp, 390 struct xfs_mount *mp,
396 fs_quota_stat_t *out) 391 struct fs_quota_stat *out)
397{ 392{
398 xfs_inode_t *uip, *gip; 393 struct xfs_quotainfo *q = mp->m_quotainfo;
399 boolean_t tempuqip, tempgqip; 394 struct xfs_inode *uip, *gip;
395 boolean_t tempuqip, tempgqip;
400 396
401 uip = gip = NULL; 397 uip = gip = NULL;
402 tempuqip = tempgqip = B_FALSE; 398 tempuqip = tempgqip = B_FALSE;
@@ -415,9 +411,9 @@ xfs_qm_scall_getqstat(
415 out->qs_uquota.qfs_ino = mp->m_sb.sb_uquotino; 411 out->qs_uquota.qfs_ino = mp->m_sb.sb_uquotino;
416 out->qs_gquota.qfs_ino = mp->m_sb.sb_gquotino; 412 out->qs_gquota.qfs_ino = mp->m_sb.sb_gquotino;
417 413
418 if (mp->m_quotainfo) { 414 if (q) {
419 uip = mp->m_quotainfo->qi_uquotaip; 415 uip = q->qi_uquotaip;
420 gip = mp->m_quotainfo->qi_gquotaip; 416 gip = q->qi_gquotaip;
421 } 417 }
422 if (!uip && mp->m_sb.sb_uquotino != NULLFSINO) { 418 if (!uip && mp->m_sb.sb_uquotino != NULLFSINO) {
423 if (xfs_iget(mp, NULL, mp->m_sb.sb_uquotino, 419 if (xfs_iget(mp, NULL, mp->m_sb.sb_uquotino,
@@ -441,17 +437,20 @@ xfs_qm_scall_getqstat(
441 if (tempgqip) 437 if (tempgqip)
442 IRELE(gip); 438 IRELE(gip);
443 } 439 }
444 if (mp->m_quotainfo) { 440 if (q) {
445 out->qs_incoredqs = XFS_QI_MPLNDQUOTS(mp); 441 out->qs_incoredqs = q->qi_dquots;
446 out->qs_btimelimit = XFS_QI_BTIMELIMIT(mp); 442 out->qs_btimelimit = q->qi_btimelimit;
447 out->qs_itimelimit = XFS_QI_ITIMELIMIT(mp); 443 out->qs_itimelimit = q->qi_itimelimit;
448 out->qs_rtbtimelimit = XFS_QI_RTBTIMELIMIT(mp); 444 out->qs_rtbtimelimit = q->qi_rtbtimelimit;
449 out->qs_bwarnlimit = XFS_QI_BWARNLIMIT(mp); 445 out->qs_bwarnlimit = q->qi_bwarnlimit;
450 out->qs_iwarnlimit = XFS_QI_IWARNLIMIT(mp); 446 out->qs_iwarnlimit = q->qi_iwarnlimit;
451 } 447 }
452 return (0); 448 return 0;
453} 449}
454 450
451#define XFS_DQ_MASK \
452 (FS_DQ_LIMIT_MASK | FS_DQ_TIMER_MASK | FS_DQ_WARNS_MASK)
453
455/* 454/*
456 * Adjust quota limits, and start/stop timers accordingly. 455 * Adjust quota limits, and start/stop timers accordingly.
457 */ 456 */
@@ -462,15 +461,17 @@ xfs_qm_scall_setqlim(
462 uint type, 461 uint type,
463 fs_disk_quota_t *newlim) 462 fs_disk_quota_t *newlim)
464{ 463{
464 struct xfs_quotainfo *q = mp->m_quotainfo;
465 xfs_disk_dquot_t *ddq; 465 xfs_disk_dquot_t *ddq;
466 xfs_dquot_t *dqp; 466 xfs_dquot_t *dqp;
467 xfs_trans_t *tp; 467 xfs_trans_t *tp;
468 int error; 468 int error;
469 xfs_qcnt_t hard, soft; 469 xfs_qcnt_t hard, soft;
470 470
471 if ((newlim->d_fieldmask & 471 if (newlim->d_fieldmask & ~XFS_DQ_MASK)
472 (FS_DQ_LIMIT_MASK|FS_DQ_TIMER_MASK|FS_DQ_WARNS_MASK)) == 0) 472 return EINVAL;
473 return (0); 473 if ((newlim->d_fieldmask & XFS_DQ_MASK) == 0)
474 return 0;
474 475
475 tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SETQLIM); 476 tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SETQLIM);
476 if ((error = xfs_trans_reserve(tp, 0, sizeof(xfs_disk_dquot_t) + 128, 477 if ((error = xfs_trans_reserve(tp, 0, sizeof(xfs_disk_dquot_t) + 128,
@@ -485,7 +486,7 @@ xfs_qm_scall_setqlim(
485 * a quotaoff from happening). (XXXThis doesn't currently happen 486 * a quotaoff from happening). (XXXThis doesn't currently happen
486 * because we take the vfslock before calling xfs_qm_sysent). 487 * because we take the vfslock before calling xfs_qm_sysent).
487 */ 488 */
488 mutex_lock(&(XFS_QI_QOFFLOCK(mp))); 489 mutex_lock(&q->qi_quotaofflock);
489 490
490 /* 491 /*
491 * Get the dquot (locked), and join it to the transaction. 492 * Get the dquot (locked), and join it to the transaction.
@@ -493,9 +494,8 @@ xfs_qm_scall_setqlim(
493 */ 494 */
494 if ((error = xfs_qm_dqget(mp, NULL, id, type, XFS_QMOPT_DQALLOC, &dqp))) { 495 if ((error = xfs_qm_dqget(mp, NULL, id, type, XFS_QMOPT_DQALLOC, &dqp))) {
495 xfs_trans_cancel(tp, XFS_TRANS_ABORT); 496 xfs_trans_cancel(tp, XFS_TRANS_ABORT);
496 mutex_unlock(&(XFS_QI_QOFFLOCK(mp)));
497 ASSERT(error != ENOENT); 497 ASSERT(error != ENOENT);
498 return (error); 498 goto out_unlock;
499 } 499 }
500 xfs_trans_dqjoin(tp, dqp); 500 xfs_trans_dqjoin(tp, dqp);
501 ddq = &dqp->q_core; 501 ddq = &dqp->q_core;
@@ -513,8 +513,8 @@ xfs_qm_scall_setqlim(
513 ddq->d_blk_hardlimit = cpu_to_be64(hard); 513 ddq->d_blk_hardlimit = cpu_to_be64(hard);
514 ddq->d_blk_softlimit = cpu_to_be64(soft); 514 ddq->d_blk_softlimit = cpu_to_be64(soft);
515 if (id == 0) { 515 if (id == 0) {
516 mp->m_quotainfo->qi_bhardlimit = hard; 516 q->qi_bhardlimit = hard;
517 mp->m_quotainfo->qi_bsoftlimit = soft; 517 q->qi_bsoftlimit = soft;
518 } 518 }
519 } else { 519 } else {
520 qdprintk("blkhard %Ld < blksoft %Ld\n", hard, soft); 520 qdprintk("blkhard %Ld < blksoft %Ld\n", hard, soft);
@@ -529,8 +529,8 @@ xfs_qm_scall_setqlim(
529 ddq->d_rtb_hardlimit = cpu_to_be64(hard); 529 ddq->d_rtb_hardlimit = cpu_to_be64(hard);
530 ddq->d_rtb_softlimit = cpu_to_be64(soft); 530 ddq->d_rtb_softlimit = cpu_to_be64(soft);
531 if (id == 0) { 531 if (id == 0) {
532 mp->m_quotainfo->qi_rtbhardlimit = hard; 532 q->qi_rtbhardlimit = hard;
533 mp->m_quotainfo->qi_rtbsoftlimit = soft; 533 q->qi_rtbsoftlimit = soft;
534 } 534 }
535 } else { 535 } else {
536 qdprintk("rtbhard %Ld < rtbsoft %Ld\n", hard, soft); 536 qdprintk("rtbhard %Ld < rtbsoft %Ld\n", hard, soft);
@@ -546,8 +546,8 @@ xfs_qm_scall_setqlim(
546 ddq->d_ino_hardlimit = cpu_to_be64(hard); 546 ddq->d_ino_hardlimit = cpu_to_be64(hard);
547 ddq->d_ino_softlimit = cpu_to_be64(soft); 547 ddq->d_ino_softlimit = cpu_to_be64(soft);
548 if (id == 0) { 548 if (id == 0) {
549 mp->m_quotainfo->qi_ihardlimit = hard; 549 q->qi_ihardlimit = hard;
550 mp->m_quotainfo->qi_isoftlimit = soft; 550 q->qi_isoftlimit = soft;
551 } 551 }
552 } else { 552 } else {
553 qdprintk("ihard %Ld < isoft %Ld\n", hard, soft); 553 qdprintk("ihard %Ld < isoft %Ld\n", hard, soft);
@@ -572,23 +572,23 @@ xfs_qm_scall_setqlim(
572 * for warnings. 572 * for warnings.
573 */ 573 */
574 if (newlim->d_fieldmask & FS_DQ_BTIMER) { 574 if (newlim->d_fieldmask & FS_DQ_BTIMER) {
575 mp->m_quotainfo->qi_btimelimit = newlim->d_btimer; 575 q->qi_btimelimit = newlim->d_btimer;
576 ddq->d_btimer = cpu_to_be32(newlim->d_btimer); 576 ddq->d_btimer = cpu_to_be32(newlim->d_btimer);
577 } 577 }
578 if (newlim->d_fieldmask & FS_DQ_ITIMER) { 578 if (newlim->d_fieldmask & FS_DQ_ITIMER) {
579 mp->m_quotainfo->qi_itimelimit = newlim->d_itimer; 579 q->qi_itimelimit = newlim->d_itimer;
580 ddq->d_itimer = cpu_to_be32(newlim->d_itimer); 580 ddq->d_itimer = cpu_to_be32(newlim->d_itimer);
581 } 581 }
582 if (newlim->d_fieldmask & FS_DQ_RTBTIMER) { 582 if (newlim->d_fieldmask & FS_DQ_RTBTIMER) {
583 mp->m_quotainfo->qi_rtbtimelimit = newlim->d_rtbtimer; 583 q->qi_rtbtimelimit = newlim->d_rtbtimer;
584 ddq->d_rtbtimer = cpu_to_be32(newlim->d_rtbtimer); 584 ddq->d_rtbtimer = cpu_to_be32(newlim->d_rtbtimer);
585 } 585 }
586 if (newlim->d_fieldmask & FS_DQ_BWARNS) 586 if (newlim->d_fieldmask & FS_DQ_BWARNS)
587 mp->m_quotainfo->qi_bwarnlimit = newlim->d_bwarns; 587 q->qi_bwarnlimit = newlim->d_bwarns;
588 if (newlim->d_fieldmask & FS_DQ_IWARNS) 588 if (newlim->d_fieldmask & FS_DQ_IWARNS)
589 mp->m_quotainfo->qi_iwarnlimit = newlim->d_iwarns; 589 q->qi_iwarnlimit = newlim->d_iwarns;
590 if (newlim->d_fieldmask & FS_DQ_RTBWARNS) 590 if (newlim->d_fieldmask & FS_DQ_RTBWARNS)
591 mp->m_quotainfo->qi_rtbwarnlimit = newlim->d_rtbwarns; 591 q->qi_rtbwarnlimit = newlim->d_rtbwarns;
592 } else { 592 } else {
593 /* 593 /*
594 * If the user is now over quota, start the timelimit. 594 * If the user is now over quota, start the timelimit.
@@ -605,8 +605,9 @@ xfs_qm_scall_setqlim(
605 error = xfs_trans_commit(tp, 0); 605 error = xfs_trans_commit(tp, 0);
606 xfs_qm_dqprint(dqp); 606 xfs_qm_dqprint(dqp);
607 xfs_qm_dqrele(dqp); 607 xfs_qm_dqrele(dqp);
608 mutex_unlock(&(XFS_QI_QOFFLOCK(mp)));
609 608
609 out_unlock:
610 mutex_unlock(&q->qi_quotaofflock);
610 return error; 611 return error;
611} 612}
612 613
@@ -853,7 +854,8 @@ xfs_dqrele_inode(
853 int error; 854 int error;
854 855
855 /* skip quota inodes */ 856 /* skip quota inodes */
856 if (ip == XFS_QI_UQIP(ip->i_mount) || ip == XFS_QI_GQIP(ip->i_mount)) { 857 if (ip == ip->i_mount->m_quotainfo->qi_uquotaip ||
858 ip == ip->i_mount->m_quotainfo->qi_gquotaip) {
857 ASSERT(ip->i_udquot == NULL); 859 ASSERT(ip->i_udquot == NULL);
858 ASSERT(ip->i_gdquot == NULL); 860 ASSERT(ip->i_gdquot == NULL);
859 read_unlock(&pag->pag_ici_lock); 861 read_unlock(&pag->pag_ici_lock);
@@ -891,7 +893,8 @@ xfs_qm_dqrele_all_inodes(
891 uint flags) 893 uint flags)
892{ 894{
893 ASSERT(mp->m_quotainfo); 895 ASSERT(mp->m_quotainfo);
894 xfs_inode_ag_iterator(mp, xfs_dqrele_inode, flags, XFS_ICI_NO_TAG, 0); 896 xfs_inode_ag_iterator(mp, xfs_dqrele_inode, flags,
897 XFS_ICI_NO_TAG, 0, NULL);
895} 898}
896 899
897/*------------------------------------------------------------------------*/ 900/*------------------------------------------------------------------------*/
@@ -930,7 +933,8 @@ struct mutex qcheck_lock;
930} 933}
931 934
932typedef struct dqtest { 935typedef struct dqtest {
933 xfs_dqmarker_t q_lists; 936 uint dq_flags; /* various flags (XFS_DQ_*) */
937 struct list_head q_hashlist;
934 xfs_dqhash_t *q_hash; /* the hashchain header */ 938 xfs_dqhash_t *q_hash; /* the hashchain header */
935 xfs_mount_t *q_mount; /* filesystem this relates to */ 939 xfs_mount_t *q_mount; /* filesystem this relates to */
936 xfs_dqid_t d_id; /* user id or group id */ 940 xfs_dqid_t d_id; /* user id or group id */
@@ -941,14 +945,9 @@ typedef struct dqtest {
941STATIC void 945STATIC void
942xfs_qm_hashinsert(xfs_dqhash_t *h, xfs_dqtest_t *dqp) 946xfs_qm_hashinsert(xfs_dqhash_t *h, xfs_dqtest_t *dqp)
943{ 947{
944 xfs_dquot_t *d; 948 list_add(&dqp->q_hashlist, &h->qh_list);
945 if (((d) = (h)->qh_next)) 949 h->qh_version++;
946 (d)->HL_PREVP = &((dqp)->HL_NEXT); 950 h->qh_nelems++;
947 (dqp)->HL_NEXT = d;
948 (dqp)->HL_PREVP = &((h)->qh_next);
949 (h)->qh_next = (xfs_dquot_t *)dqp;
950 (h)->qh_version++;
951 (h)->qh_nelems++;
952} 951}
953STATIC void 952STATIC void
954xfs_qm_dqtest_print( 953xfs_qm_dqtest_print(
@@ -1060,9 +1059,7 @@ xfs_qm_internalqcheck_dqget(
1060 xfs_dqhash_t *h; 1059 xfs_dqhash_t *h;
1061 1060
1062 h = DQTEST_HASH(mp, id, type); 1061 h = DQTEST_HASH(mp, id, type);
1063 for (d = (xfs_dqtest_t *) h->qh_next; d != NULL; 1062 list_for_each_entry(d, &h->qh_list, q_hashlist) {
1064 d = (xfs_dqtest_t *) d->HL_NEXT) {
1065 /* DQTEST_LIST_PRINT(h, HL_NEXT, "@@@@@ dqtestlist @@@@@"); */
1066 if (d->d_id == id && mp == d->q_mount) { 1063 if (d->d_id == id && mp == d->q_mount) {
1067 *O_dq = d; 1064 *O_dq = d;
1068 return (0); 1065 return (0);
@@ -1073,6 +1070,7 @@ xfs_qm_internalqcheck_dqget(
1073 d->d_id = id; 1070 d->d_id = id;
1074 d->q_mount = mp; 1071 d->q_mount = mp;
1075 d->q_hash = h; 1072 d->q_hash = h;
1073 INIT_LIST_HEAD(&d->q_hashlist);
1076 xfs_qm_hashinsert(h, d); 1074 xfs_qm_hashinsert(h, d);
1077 *O_dq = d; 1075 *O_dq = d;
1078 return (0); 1076 return (0);
@@ -1179,8 +1177,6 @@ xfs_qm_internalqcheck(
1179 xfs_ino_t lastino; 1177 xfs_ino_t lastino;
1180 int done, count; 1178 int done, count;
1181 int i; 1179 int i;
1182 xfs_dqtest_t *d, *e;
1183 xfs_dqhash_t *h1;
1184 int error; 1180 int error;
1185 1181
1186 lastino = 0; 1182 lastino = 0;
@@ -1220,19 +1216,18 @@ xfs_qm_internalqcheck(
1220 } 1216 }
1221 cmn_err(CE_DEBUG, "Checking results against system dquots"); 1217 cmn_err(CE_DEBUG, "Checking results against system dquots");
1222 for (i = 0; i < qmtest_hashmask; i++) { 1218 for (i = 0; i < qmtest_hashmask; i++) {
1223 h1 = &qmtest_udqtab[i]; 1219 xfs_dqtest_t *d, *n;
1224 for (d = (xfs_dqtest_t *) h1->qh_next; d != NULL; ) { 1220 xfs_dqhash_t *h;
1221
1222 h = &qmtest_udqtab[i];
1223 list_for_each_entry_safe(d, n, &h->qh_list, q_hashlist) {
1225 xfs_dqtest_cmp(d); 1224 xfs_dqtest_cmp(d);
1226 e = (xfs_dqtest_t *) d->HL_NEXT;
1227 kmem_free(d); 1225 kmem_free(d);
1228 d = e;
1229 } 1226 }
1230 h1 = &qmtest_gdqtab[i]; 1227 h = &qmtest_gdqtab[i];
1231 for (d = (xfs_dqtest_t *) h1->qh_next; d != NULL; ) { 1228 list_for_each_entry_safe(d, n, &h->qh_list, q_hashlist) {
1232 xfs_dqtest_cmp(d); 1229 xfs_dqtest_cmp(d);
1233 e = (xfs_dqtest_t *) d->HL_NEXT;
1234 kmem_free(d); 1230 kmem_free(d);
1235 d = e;
1236 } 1231 }
1237 } 1232 }
1238 1233
diff --git a/fs/xfs/quota/xfs_quota_priv.h b/fs/xfs/quota/xfs_quota_priv.h
index 8286b2842b6b..94a3d927d716 100644
--- a/fs/xfs/quota/xfs_quota_priv.h
+++ b/fs/xfs/quota/xfs_quota_priv.h
@@ -24,43 +24,6 @@
24 */ 24 */
25#define XFS_DQITER_MAP_SIZE 10 25#define XFS_DQITER_MAP_SIZE 10
26 26
27/* Number of dquots that fit in to a dquot block */
28#define XFS_QM_DQPERBLK(mp) ((mp)->m_quotainfo->qi_dqperchunk)
29
30#define XFS_DQ_IS_ADDEDTO_TRX(t, d) ((d)->q_transp == (t))
31
32#define XFS_QI_MPLRECLAIMS(mp) ((mp)->m_quotainfo->qi_dqreclaims)
33#define XFS_QI_UQIP(mp) ((mp)->m_quotainfo->qi_uquotaip)
34#define XFS_QI_GQIP(mp) ((mp)->m_quotainfo->qi_gquotaip)
35#define XFS_QI_DQCHUNKLEN(mp) ((mp)->m_quotainfo->qi_dqchunklen)
36#define XFS_QI_BTIMELIMIT(mp) ((mp)->m_quotainfo->qi_btimelimit)
37#define XFS_QI_RTBTIMELIMIT(mp) ((mp)->m_quotainfo->qi_rtbtimelimit)
38#define XFS_QI_ITIMELIMIT(mp) ((mp)->m_quotainfo->qi_itimelimit)
39#define XFS_QI_BWARNLIMIT(mp) ((mp)->m_quotainfo->qi_bwarnlimit)
40#define XFS_QI_RTBWARNLIMIT(mp) ((mp)->m_quotainfo->qi_rtbwarnlimit)
41#define XFS_QI_IWARNLIMIT(mp) ((mp)->m_quotainfo->qi_iwarnlimit)
42#define XFS_QI_QOFFLOCK(mp) ((mp)->m_quotainfo->qi_quotaofflock)
43
44#define XFS_QI_MPL_LIST(mp) ((mp)->m_quotainfo->qi_dqlist)
45#define XFS_QI_MPLNEXT(mp) ((mp)->m_quotainfo->qi_dqlist.qh_next)
46#define XFS_QI_MPLNDQUOTS(mp) ((mp)->m_quotainfo->qi_dqlist.qh_nelems)
47
48#define xfs_qm_mplist_lock(mp) \
49 mutex_lock(&(XFS_QI_MPL_LIST(mp).qh_lock))
50#define xfs_qm_mplist_nowait(mp) \
51 mutex_trylock(&(XFS_QI_MPL_LIST(mp).qh_lock))
52#define xfs_qm_mplist_unlock(mp) \
53 mutex_unlock(&(XFS_QI_MPL_LIST(mp).qh_lock))
54#define XFS_QM_IS_MPLIST_LOCKED(mp) \
55 mutex_is_locked(&(XFS_QI_MPL_LIST(mp).qh_lock))
56
57#define xfs_qm_freelist_lock(qm) \
58 mutex_lock(&((qm)->qm_dqfreelist.qh_lock))
59#define xfs_qm_freelist_lock_nowait(qm) \
60 mutex_trylock(&((qm)->qm_dqfreelist.qh_lock))
61#define xfs_qm_freelist_unlock(qm) \
62 mutex_unlock(&((qm)->qm_dqfreelist.qh_lock))
63
64/* 27/*
65 * Hash into a bucket in the dquot hash table, based on <mp, id>. 28 * Hash into a bucket in the dquot hash table, based on <mp, id>.
66 */ 29 */
@@ -72,9 +35,6 @@
72 XFS_DQ_HASHVAL(mp, id)) : \ 35 XFS_DQ_HASHVAL(mp, id)) : \
73 (xfs_Gqm->qm_grp_dqhtable + \ 36 (xfs_Gqm->qm_grp_dqhtable + \
74 XFS_DQ_HASHVAL(mp, id))) 37 XFS_DQ_HASHVAL(mp, id)))
75#define XFS_IS_DQTYPE_ON(mp, type) (type == XFS_DQ_USER ? \
76 XFS_IS_UQUOTA_ON(mp) : \
77 XFS_IS_OQUOTA_ON(mp))
78#define XFS_IS_DQUOT_UNINITIALIZED(dqp) ( \ 38#define XFS_IS_DQUOT_UNINITIALIZED(dqp) ( \
79 !dqp->q_core.d_blk_hardlimit && \ 39 !dqp->q_core.d_blk_hardlimit && \
80 !dqp->q_core.d_blk_softlimit && \ 40 !dqp->q_core.d_blk_softlimit && \
@@ -86,68 +46,6 @@
86 !dqp->q_core.d_rtbcount && \ 46 !dqp->q_core.d_rtbcount && \
87 !dqp->q_core.d_icount) 47 !dqp->q_core.d_icount)
88 48
89#define HL_PREVP dq_hashlist.ql_prevp
90#define HL_NEXT dq_hashlist.ql_next
91#define MPL_PREVP dq_mplist.ql_prevp
92#define MPL_NEXT dq_mplist.ql_next
93
94
95#define _LIST_REMOVE(h, dqp, PVP, NXT) \
96 { \
97 xfs_dquot_t *d; \
98 if (((d) = (dqp)->NXT)) \
99 (d)->PVP = (dqp)->PVP; \
100 *((dqp)->PVP) = d; \
101 (dqp)->NXT = NULL; \
102 (dqp)->PVP = NULL; \
103 (h)->qh_version++; \
104 (h)->qh_nelems--; \
105 }
106
107#define _LIST_INSERT(h, dqp, PVP, NXT) \
108 { \
109 xfs_dquot_t *d; \
110 if (((d) = (h)->qh_next)) \
111 (d)->PVP = &((dqp)->NXT); \
112 (dqp)->NXT = d; \
113 (dqp)->PVP = &((h)->qh_next); \
114 (h)->qh_next = dqp; \
115 (h)->qh_version++; \
116 (h)->qh_nelems++; \
117 }
118
119#define FOREACH_DQUOT_IN_MP(dqp, mp) \
120 for ((dqp) = XFS_QI_MPLNEXT(mp); (dqp) != NULL; (dqp) = (dqp)->MPL_NEXT)
121
122#define FOREACH_DQUOT_IN_FREELIST(dqp, qlist) \
123for ((dqp) = (qlist)->qh_next; (dqp) != (xfs_dquot_t *)(qlist); \
124 (dqp) = (dqp)->dq_flnext)
125
126#define XQM_HASHLIST_INSERT(h, dqp) \
127 _LIST_INSERT(h, dqp, HL_PREVP, HL_NEXT)
128
129#define XQM_FREELIST_INSERT(h, dqp) \
130 xfs_qm_freelist_append(h, dqp)
131
132#define XQM_MPLIST_INSERT(h, dqp) \
133 _LIST_INSERT(h, dqp, MPL_PREVP, MPL_NEXT)
134
135#define XQM_HASHLIST_REMOVE(h, dqp) \
136 _LIST_REMOVE(h, dqp, HL_PREVP, HL_NEXT)
137#define XQM_FREELIST_REMOVE(dqp) \
138 xfs_qm_freelist_unlink(dqp)
139#define XQM_MPLIST_REMOVE(h, dqp) \
140 { _LIST_REMOVE(h, dqp, MPL_PREVP, MPL_NEXT); \
141 XFS_QI_MPLRECLAIMS((dqp)->q_mount)++; }
142
143#define XFS_DQ_IS_LOGITEM_INITD(dqp) ((dqp)->q_logitem.qli_dquot == (dqp))
144
145#define XFS_QM_DQP_TO_DQACCT(tp, dqp) (XFS_QM_ISUDQ(dqp) ? \
146 (tp)->t_dqinfo->dqa_usrdquots : \
147 (tp)->t_dqinfo->dqa_grpdquots)
148#define XFS_IS_SUSER_DQUOT(dqp) \
149 (!((dqp)->q_core.d_id))
150
151#define DQFLAGTO_TYPESTR(d) (((d)->dq_flags & XFS_DQ_USER) ? "USR" : \ 49#define DQFLAGTO_TYPESTR(d) (((d)->dq_flags & XFS_DQ_USER) ? "USR" : \
152 (((d)->dq_flags & XFS_DQ_GROUP) ? "GRP" : \ 50 (((d)->dq_flags & XFS_DQ_GROUP) ? "GRP" : \
153 (((d)->dq_flags & XFS_DQ_PROJ) ? "PRJ":"???"))) 51 (((d)->dq_flags & XFS_DQ_PROJ) ? "PRJ":"???")))
diff --git a/fs/xfs/quota/xfs_trans_dquot.c b/fs/xfs/quota/xfs_trans_dquot.c
index c3ab75cb1d9a..061d827da33c 100644
--- a/fs/xfs/quota/xfs_trans_dquot.c
+++ b/fs/xfs/quota/xfs_trans_dquot.c
@@ -59,12 +59,11 @@ xfs_trans_dqjoin(
59 xfs_trans_t *tp, 59 xfs_trans_t *tp,
60 xfs_dquot_t *dqp) 60 xfs_dquot_t *dqp)
61{ 61{
62 xfs_dq_logitem_t *lp; 62 xfs_dq_logitem_t *lp = &dqp->q_logitem;
63 63
64 ASSERT(! XFS_DQ_IS_ADDEDTO_TRX(tp, dqp)); 64 ASSERT(dqp->q_transp != tp);
65 ASSERT(XFS_DQ_IS_LOCKED(dqp)); 65 ASSERT(XFS_DQ_IS_LOCKED(dqp));
66 ASSERT(XFS_DQ_IS_LOGITEM_INITD(dqp)); 66 ASSERT(lp->qli_dquot == dqp);
67 lp = &dqp->q_logitem;
68 67
69 /* 68 /*
70 * Get a log_item_desc to point at the new item. 69 * Get a log_item_desc to point at the new item.
@@ -96,7 +95,7 @@ xfs_trans_log_dquot(
96{ 95{
97 xfs_log_item_desc_t *lidp; 96 xfs_log_item_desc_t *lidp;
98 97
99 ASSERT(XFS_DQ_IS_ADDEDTO_TRX(tp, dqp)); 98 ASSERT(dqp->q_transp == tp);
100 ASSERT(XFS_DQ_IS_LOCKED(dqp)); 99 ASSERT(XFS_DQ_IS_LOCKED(dqp));
101 100
102 lidp = xfs_trans_find_item(tp, (xfs_log_item_t*)(&dqp->q_logitem)); 101 lidp = xfs_trans_find_item(tp, (xfs_log_item_t*)(&dqp->q_logitem));
@@ -198,16 +197,16 @@ xfs_trans_get_dqtrx(
198 int i; 197 int i;
199 xfs_dqtrx_t *qa; 198 xfs_dqtrx_t *qa;
200 199
201 for (i = 0; i < XFS_QM_TRANS_MAXDQS; i++) { 200 qa = XFS_QM_ISUDQ(dqp) ?
202 qa = XFS_QM_DQP_TO_DQACCT(tp, dqp); 201 tp->t_dqinfo->dqa_usrdquots : tp->t_dqinfo->dqa_grpdquots;
203 202
203 for (i = 0; i < XFS_QM_TRANS_MAXDQS; i++) {
204 if (qa[i].qt_dquot == NULL || 204 if (qa[i].qt_dquot == NULL ||
205 qa[i].qt_dquot == dqp) { 205 qa[i].qt_dquot == dqp)
206 return (&qa[i]); 206 return &qa[i];
207 }
208 } 207 }
209 208
210 return (NULL); 209 return NULL;
211} 210}
212 211
213/* 212/*
@@ -381,7 +380,7 @@ xfs_trans_apply_dquot_deltas(
381 break; 380 break;
382 381
383 ASSERT(XFS_DQ_IS_LOCKED(dqp)); 382 ASSERT(XFS_DQ_IS_LOCKED(dqp));
384 ASSERT(XFS_DQ_IS_ADDEDTO_TRX(tp, dqp)); 383 ASSERT(dqp->q_transp == tp);
385 384
386 /* 385 /*
387 * adjust the actual number of blocks used 386 * adjust the actual number of blocks used
@@ -639,7 +638,7 @@ xfs_trans_dqresv(
639 softlimit = q->qi_bsoftlimit; 638 softlimit = q->qi_bsoftlimit;
640 timer = be32_to_cpu(dqp->q_core.d_btimer); 639 timer = be32_to_cpu(dqp->q_core.d_btimer);
641 warns = be16_to_cpu(dqp->q_core.d_bwarns); 640 warns = be16_to_cpu(dqp->q_core.d_bwarns);
642 warnlimit = XFS_QI_BWARNLIMIT(dqp->q_mount); 641 warnlimit = dqp->q_mount->m_quotainfo->qi_bwarnlimit;
643 resbcountp = &dqp->q_res_bcount; 642 resbcountp = &dqp->q_res_bcount;
644 } else { 643 } else {
645 ASSERT(flags & XFS_TRANS_DQ_RES_RTBLKS); 644 ASSERT(flags & XFS_TRANS_DQ_RES_RTBLKS);
@@ -651,7 +650,7 @@ xfs_trans_dqresv(
651 softlimit = q->qi_rtbsoftlimit; 650 softlimit = q->qi_rtbsoftlimit;
652 timer = be32_to_cpu(dqp->q_core.d_rtbtimer); 651 timer = be32_to_cpu(dqp->q_core.d_rtbtimer);
653 warns = be16_to_cpu(dqp->q_core.d_rtbwarns); 652 warns = be16_to_cpu(dqp->q_core.d_rtbwarns);
654 warnlimit = XFS_QI_RTBWARNLIMIT(dqp->q_mount); 653 warnlimit = dqp->q_mount->m_quotainfo->qi_rtbwarnlimit;
655 resbcountp = &dqp->q_res_rtbcount; 654 resbcountp = &dqp->q_res_rtbcount;
656 } 655 }
657 656
@@ -691,7 +690,7 @@ xfs_trans_dqresv(
691 count = be64_to_cpu(dqp->q_core.d_icount); 690 count = be64_to_cpu(dqp->q_core.d_icount);
692 timer = be32_to_cpu(dqp->q_core.d_itimer); 691 timer = be32_to_cpu(dqp->q_core.d_itimer);
693 warns = be16_to_cpu(dqp->q_core.d_iwarns); 692 warns = be16_to_cpu(dqp->q_core.d_iwarns);
694 warnlimit = XFS_QI_IWARNLIMIT(dqp->q_mount); 693 warnlimit = dqp->q_mount->m_quotainfo->qi_iwarnlimit;
695 hardlimit = be64_to_cpu(dqp->q_core.d_ino_hardlimit); 694 hardlimit = be64_to_cpu(dqp->q_core.d_ino_hardlimit);
696 if (!hardlimit) 695 if (!hardlimit)
697 hardlimit = q->qi_ihardlimit; 696 hardlimit = q->qi_ihardlimit;
diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h
index b1a5a1ff88ea..abb8222b88c9 100644
--- a/fs/xfs/xfs_ag.h
+++ b/fs/xfs/xfs_ag.h
@@ -223,6 +223,7 @@ typedef struct xfs_perag {
223 int pag_ici_init; /* incore inode cache initialised */ 223 int pag_ici_init; /* incore inode cache initialised */
224 rwlock_t pag_ici_lock; /* incore inode lock */ 224 rwlock_t pag_ici_lock; /* incore inode lock */
225 struct radix_tree_root pag_ici_root; /* incore inode cache root */ 225 struct radix_tree_root pag_ici_root; /* incore inode cache root */
226 int pag_ici_reclaimable; /* reclaimable inodes */
226#endif 227#endif
227 int pagb_count; /* pagb slots in use */ 228 int pagb_count; /* pagb slots in use */
228 xfs_perag_busy_t pagb_list[XFS_PAGB_NUM_SLOTS]; /* unstable blocks */ 229 xfs_perag_busy_t pagb_list[XFS_PAGB_NUM_SLOTS]; /* unstable blocks */
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index 5c11e4d17010..99587ded043f 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -3829,7 +3829,7 @@ xfs_bmap_add_attrfork(
3829 } 3829 }
3830 if ((error = xfs_bmap_finish(&tp, &flist, &committed))) 3830 if ((error = xfs_bmap_finish(&tp, &flist, &committed)))
3831 goto error2; 3831 goto error2;
3832 error = xfs_trans_commit(tp, XFS_TRANS_PERM_LOG_RES); 3832 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
3833 ASSERT(ip->i_df.if_ext_max == 3833 ASSERT(ip->i_df.if_ext_max ==
3834 XFS_IFORK_DSIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t)); 3834 XFS_IFORK_DSIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t));
3835 return error; 3835 return error;
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index f3c49e69eab9..240340a4727b 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -372,12 +372,12 @@ xfs_buf_item_pin(
372 */ 372 */
373STATIC void 373STATIC void
374xfs_buf_item_unpin( 374xfs_buf_item_unpin(
375 xfs_buf_log_item_t *bip, 375 xfs_buf_log_item_t *bip)
376 int stale)
377{ 376{
378 struct xfs_ail *ailp; 377 struct xfs_ail *ailp;
379 xfs_buf_t *bp; 378 xfs_buf_t *bp;
380 int freed; 379 int freed;
380 int stale = bip->bli_flags & XFS_BLI_STALE;
381 381
382 bp = bip->bli_buf; 382 bp = bip->bli_buf;
383 ASSERT(bp != NULL); 383 ASSERT(bp != NULL);
@@ -428,40 +428,34 @@ xfs_buf_item_unpin_remove(
428 xfs_buf_log_item_t *bip, 428 xfs_buf_log_item_t *bip,
429 xfs_trans_t *tp) 429 xfs_trans_t *tp)
430{ 430{
431 xfs_buf_t *bp; 431 /* will xfs_buf_item_unpin() call xfs_buf_item_relse()? */
432 xfs_log_item_desc_t *lidp;
433 int stale = 0;
434
435 bp = bip->bli_buf;
436 /*
437 * will xfs_buf_item_unpin() call xfs_buf_item_relse()?
438 */
439 if ((atomic_read(&bip->bli_refcount) == 1) && 432 if ((atomic_read(&bip->bli_refcount) == 1) &&
440 (bip->bli_flags & XFS_BLI_STALE)) { 433 (bip->bli_flags & XFS_BLI_STALE)) {
434 /*
435 * yes -- We can safely do some work here and then call
436 * buf_item_unpin to do the rest because we are
437 * are holding the buffer locked so no one else will be
438 * able to bump up the refcount. We have to remove the
439 * log item from the transaction as we are about to release
440 * our reference to the buffer. If we don't, the unlock that
441 * occurs later in the xfs_trans_uncommit() will try to
442 * reference the buffer which we no longer have a hold on.
443 */
444 struct xfs_log_item_desc *lidp;
445
441 ASSERT(XFS_BUF_VALUSEMA(bip->bli_buf) <= 0); 446 ASSERT(XFS_BUF_VALUSEMA(bip->bli_buf) <= 0);
442 trace_xfs_buf_item_unpin_stale(bip); 447 trace_xfs_buf_item_unpin_stale(bip);
443 448
444 /* 449 lidp = xfs_trans_find_item(tp, (xfs_log_item_t *)bip);
445 * yes -- clear the xaction descriptor in-use flag
446 * and free the chunk if required. We can safely
447 * do some work here and then call buf_item_unpin
448 * to do the rest because if the if is true, then
449 * we are holding the buffer locked so no one else
450 * will be able to bump up the refcount.
451 */
452 lidp = xfs_trans_find_item(tp, (xfs_log_item_t *) bip);
453 stale = lidp->lid_flags & XFS_LID_BUF_STALE;
454 xfs_trans_free_item(tp, lidp); 450 xfs_trans_free_item(tp, lidp);
451
455 /* 452 /*
456 * Since the transaction no longer refers to the buffer, 453 * Since the transaction no longer refers to the buffer, the
457 * the buffer should no longer refer to the transaction. 454 * buffer should no longer refer to the transaction.
458 */ 455 */
459 XFS_BUF_SET_FSPRIVATE2(bp, NULL); 456 XFS_BUF_SET_FSPRIVATE2(bip->bli_buf, NULL);
460 } 457 }
461 458 xfs_buf_item_unpin(bip);
462 xfs_buf_item_unpin(bip, stale);
463
464 return;
465} 459}
466 460
467/* 461/*
@@ -675,7 +669,7 @@ static struct xfs_item_ops xfs_buf_item_ops = {
675 .iop_format = (void(*)(xfs_log_item_t*, xfs_log_iovec_t*)) 669 .iop_format = (void(*)(xfs_log_item_t*, xfs_log_iovec_t*))
676 xfs_buf_item_format, 670 xfs_buf_item_format,
677 .iop_pin = (void(*)(xfs_log_item_t*))xfs_buf_item_pin, 671 .iop_pin = (void(*)(xfs_log_item_t*))xfs_buf_item_pin,
678 .iop_unpin = (void(*)(xfs_log_item_t*, int))xfs_buf_item_unpin, 672 .iop_unpin = (void(*)(xfs_log_item_t*))xfs_buf_item_unpin,
679 .iop_unpin_remove = (void(*)(xfs_log_item_t*, xfs_trans_t *)) 673 .iop_unpin_remove = (void(*)(xfs_log_item_t*, xfs_trans_t *))
680 xfs_buf_item_unpin_remove, 674 xfs_buf_item_unpin_remove,
681 .iop_trylock = (uint(*)(xfs_log_item_t*))xfs_buf_item_trylock, 675 .iop_trylock = (uint(*)(xfs_log_item_t*))xfs_buf_item_trylock,
@@ -733,10 +727,7 @@ xfs_buf_item_init(
733 727
734 bip = (xfs_buf_log_item_t*)kmem_zone_zalloc(xfs_buf_item_zone, 728 bip = (xfs_buf_log_item_t*)kmem_zone_zalloc(xfs_buf_item_zone,
735 KM_SLEEP); 729 KM_SLEEP);
736 bip->bli_item.li_type = XFS_LI_BUF; 730 xfs_log_item_init(mp, &bip->bli_item, XFS_LI_BUF, &xfs_buf_item_ops);
737 bip->bli_item.li_ops = &xfs_buf_item_ops;
738 bip->bli_item.li_mountp = mp;
739 bip->bli_item.li_ailp = mp->m_ail;
740 bip->bli_buf = bp; 731 bip->bli_buf = bp;
741 xfs_buf_hold(bp); 732 xfs_buf_hold(bp);
742 bip->bli_format.blf_type = XFS_LI_BUF; 733 bip->bli_format.blf_type = XFS_LI_BUF;
diff --git a/fs/xfs/xfs_buf_item.h b/fs/xfs/xfs_buf_item.h
index 217f34af00cb..df4454511f73 100644
--- a/fs/xfs/xfs_buf_item.h
+++ b/fs/xfs/xfs_buf_item.h
@@ -26,7 +26,7 @@ extern kmem_zone_t *xfs_buf_item_zone;
26 * have been logged. 26 * have been logged.
27 * For 6.2 and beyond, this is XFS_LI_BUF. We use this to log everything. 27 * For 6.2 and beyond, this is XFS_LI_BUF. We use this to log everything.
28 */ 28 */
29typedef struct xfs_buf_log_format_t { 29typedef struct xfs_buf_log_format {
30 unsigned short blf_type; /* buf log item type indicator */ 30 unsigned short blf_type; /* buf log item type indicator */
31 unsigned short blf_size; /* size of this item */ 31 unsigned short blf_size; /* size of this item */
32 ushort blf_flags; /* misc state */ 32 ushort blf_flags; /* misc state */
diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c
index 92d5cd5bf4f2..ef96175c0744 100644
--- a/fs/xfs/xfs_error.c
+++ b/fs/xfs/xfs_error.c
@@ -186,18 +186,18 @@ xfs_cmn_err(int panic_tag, int level, xfs_mount_t *mp, char *fmt, ...)
186 186
187void 187void
188xfs_error_report( 188xfs_error_report(
189 char *tag, 189 const char *tag,
190 int level, 190 int level,
191 xfs_mount_t *mp, 191 struct xfs_mount *mp,
192 char *fname, 192 const char *filename,
193 int linenum, 193 int linenum,
194 inst_t *ra) 194 inst_t *ra)
195{ 195{
196 if (level <= xfs_error_level) { 196 if (level <= xfs_error_level) {
197 xfs_cmn_err(XFS_PTAG_ERROR_REPORT, 197 xfs_cmn_err(XFS_PTAG_ERROR_REPORT,
198 CE_ALERT, mp, 198 CE_ALERT, mp,
199 "XFS internal error %s at line %d of file %s. Caller 0x%p\n", 199 "XFS internal error %s at line %d of file %s. Caller 0x%p\n",
200 tag, linenum, fname, ra); 200 tag, linenum, filename, ra);
201 201
202 xfs_stack_trace(); 202 xfs_stack_trace();
203 } 203 }
@@ -205,15 +205,15 @@ xfs_error_report(
205 205
206void 206void
207xfs_corruption_error( 207xfs_corruption_error(
208 char *tag, 208 const char *tag,
209 int level, 209 int level,
210 xfs_mount_t *mp, 210 struct xfs_mount *mp,
211 void *p, 211 void *p,
212 char *fname, 212 const char *filename,
213 int linenum, 213 int linenum,
214 inst_t *ra) 214 inst_t *ra)
215{ 215{
216 if (level <= xfs_error_level) 216 if (level <= xfs_error_level)
217 xfs_hex_dump(p, 16); 217 xfs_hex_dump(p, 16);
218 xfs_error_report(tag, level, mp, fname, linenum, ra); 218 xfs_error_report(tag, level, mp, filename, linenum, ra);
219} 219}
diff --git a/fs/xfs/xfs_error.h b/fs/xfs/xfs_error.h
index 0c93051c4651..c2c1a072bb82 100644
--- a/fs/xfs/xfs_error.h
+++ b/fs/xfs/xfs_error.h
@@ -29,10 +29,11 @@ extern int xfs_error_trap(int);
29 29
30struct xfs_mount; 30struct xfs_mount;
31 31
32extern void xfs_error_report(char *tag, int level, struct xfs_mount *mp, 32extern void xfs_error_report(const char *tag, int level, struct xfs_mount *mp,
33 char *fname, int linenum, inst_t *ra); 33 const char *filename, int linenum, inst_t *ra);
34extern void xfs_corruption_error(char *tag, int level, struct xfs_mount *mp, 34extern void xfs_corruption_error(const char *tag, int level,
35 void *p, char *fname, int linenum, inst_t *ra); 35 struct xfs_mount *mp, void *p, const char *filename,
36 int linenum, inst_t *ra);
36 37
37#define XFS_ERROR_REPORT(e, lvl, mp) \ 38#define XFS_ERROR_REPORT(e, lvl, mp) \
38 xfs_error_report(e, lvl, mp, __FILE__, __LINE__, __return_address) 39 xfs_error_report(e, lvl, mp, __FILE__, __LINE__, __return_address)
diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c
index 6f35ed1b39b9..409fe81585fd 100644
--- a/fs/xfs/xfs_extfree_item.c
+++ b/fs/xfs/xfs_extfree_item.c
@@ -106,7 +106,7 @@ xfs_efi_item_pin(xfs_efi_log_item_t *efip)
106 */ 106 */
107/*ARGSUSED*/ 107/*ARGSUSED*/
108STATIC void 108STATIC void
109xfs_efi_item_unpin(xfs_efi_log_item_t *efip, int stale) 109xfs_efi_item_unpin(xfs_efi_log_item_t *efip)
110{ 110{
111 struct xfs_ail *ailp = efip->efi_item.li_ailp; 111 struct xfs_ail *ailp = efip->efi_item.li_ailp;
112 112
@@ -224,7 +224,7 @@ static struct xfs_item_ops xfs_efi_item_ops = {
224 .iop_format = (void(*)(xfs_log_item_t*, xfs_log_iovec_t*)) 224 .iop_format = (void(*)(xfs_log_item_t*, xfs_log_iovec_t*))
225 xfs_efi_item_format, 225 xfs_efi_item_format,
226 .iop_pin = (void(*)(xfs_log_item_t*))xfs_efi_item_pin, 226 .iop_pin = (void(*)(xfs_log_item_t*))xfs_efi_item_pin,
227 .iop_unpin = (void(*)(xfs_log_item_t*, int))xfs_efi_item_unpin, 227 .iop_unpin = (void(*)(xfs_log_item_t*))xfs_efi_item_unpin,
228 .iop_unpin_remove = (void(*)(xfs_log_item_t*, xfs_trans_t *)) 228 .iop_unpin_remove = (void(*)(xfs_log_item_t*, xfs_trans_t *))
229 xfs_efi_item_unpin_remove, 229 xfs_efi_item_unpin_remove,
230 .iop_trylock = (uint(*)(xfs_log_item_t*))xfs_efi_item_trylock, 230 .iop_trylock = (uint(*)(xfs_log_item_t*))xfs_efi_item_trylock,
@@ -259,10 +259,7 @@ xfs_efi_init(xfs_mount_t *mp,
259 KM_SLEEP); 259 KM_SLEEP);
260 } 260 }
261 261
262 efip->efi_item.li_type = XFS_LI_EFI; 262 xfs_log_item_init(mp, &efip->efi_item, XFS_LI_EFI, &xfs_efi_item_ops);
263 efip->efi_item.li_ops = &xfs_efi_item_ops;
264 efip->efi_item.li_mountp = mp;
265 efip->efi_item.li_ailp = mp->m_ail;
266 efip->efi_format.efi_nextents = nextents; 263 efip->efi_format.efi_nextents = nextents;
267 efip->efi_format.efi_id = (__psint_t)(void*)efip; 264 efip->efi_format.efi_id = (__psint_t)(void*)efip;
268 265
@@ -428,7 +425,7 @@ xfs_efd_item_pin(xfs_efd_log_item_t *efdp)
428 */ 425 */
429/*ARGSUSED*/ 426/*ARGSUSED*/
430STATIC void 427STATIC void
431xfs_efd_item_unpin(xfs_efd_log_item_t *efdp, int stale) 428xfs_efd_item_unpin(xfs_efd_log_item_t *efdp)
432{ 429{
433 return; 430 return;
434} 431}
@@ -518,7 +515,7 @@ static struct xfs_item_ops xfs_efd_item_ops = {
518 .iop_format = (void(*)(xfs_log_item_t*, xfs_log_iovec_t*)) 515 .iop_format = (void(*)(xfs_log_item_t*, xfs_log_iovec_t*))
519 xfs_efd_item_format, 516 xfs_efd_item_format,
520 .iop_pin = (void(*)(xfs_log_item_t*))xfs_efd_item_pin, 517 .iop_pin = (void(*)(xfs_log_item_t*))xfs_efd_item_pin,
521 .iop_unpin = (void(*)(xfs_log_item_t*, int))xfs_efd_item_unpin, 518 .iop_unpin = (void(*)(xfs_log_item_t*))xfs_efd_item_unpin,
522 .iop_unpin_remove = (void(*)(xfs_log_item_t*, xfs_trans_t*)) 519 .iop_unpin_remove = (void(*)(xfs_log_item_t*, xfs_trans_t*))
523 xfs_efd_item_unpin_remove, 520 xfs_efd_item_unpin_remove,
524 .iop_trylock = (uint(*)(xfs_log_item_t*))xfs_efd_item_trylock, 521 .iop_trylock = (uint(*)(xfs_log_item_t*))xfs_efd_item_trylock,
@@ -554,10 +551,7 @@ xfs_efd_init(xfs_mount_t *mp,
554 KM_SLEEP); 551 KM_SLEEP);
555 } 552 }
556 553
557 efdp->efd_item.li_type = XFS_LI_EFD; 554 xfs_log_item_init(mp, &efdp->efd_item, XFS_LI_EFD, &xfs_efd_item_ops);
558 efdp->efd_item.li_ops = &xfs_efd_item_ops;
559 efdp->efd_item.li_mountp = mp;
560 efdp->efd_item.li_ailp = mp->m_ail;
561 efdp->efd_efip = efip; 555 efdp->efd_efip = efip;
562 efdp->efd_format.efd_nextents = nextents; 556 efdp->efd_format.efd_nextents = nextents;
563 efdp->efd_format.efd_efi_id = efip->efi_format.efi_id; 557 efdp->efd_format.efd_efi_id = efip->efi_format.efi_id;
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 0ffd56447045..8cd6e8d8fe9c 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -2449,6 +2449,8 @@ xfs_iunpin_nowait(
2449{ 2449{
2450 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)); 2450 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
2451 2451
2452 trace_xfs_inode_unpin_nowait(ip, _RET_IP_);
2453
2452 /* Give the log a push to start the unpinning I/O */ 2454 /* Give the log a push to start the unpinning I/O */
2453 xfs_log_force_lsn(ip->i_mount, ip->i_itemp->ili_last_lsn, 0); 2455 xfs_log_force_lsn(ip->i_mount, ip->i_itemp->ili_last_lsn, 0);
2454 2456
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index 7bfea8540159..cf8249a60004 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -543,6 +543,7 @@ xfs_inode_item_pin(
543{ 543{
544 ASSERT(xfs_isilocked(iip->ili_inode, XFS_ILOCK_EXCL)); 544 ASSERT(xfs_isilocked(iip->ili_inode, XFS_ILOCK_EXCL));
545 545
546 trace_xfs_inode_pin(iip->ili_inode, _RET_IP_);
546 atomic_inc(&iip->ili_inode->i_pincount); 547 atomic_inc(&iip->ili_inode->i_pincount);
547} 548}
548 549
@@ -556,11 +557,11 @@ xfs_inode_item_pin(
556/* ARGSUSED */ 557/* ARGSUSED */
557STATIC void 558STATIC void
558xfs_inode_item_unpin( 559xfs_inode_item_unpin(
559 xfs_inode_log_item_t *iip, 560 xfs_inode_log_item_t *iip)
560 int stale)
561{ 561{
562 struct xfs_inode *ip = iip->ili_inode; 562 struct xfs_inode *ip = iip->ili_inode;
563 563
564 trace_xfs_inode_unpin(ip, _RET_IP_);
564 ASSERT(atomic_read(&ip->i_pincount) > 0); 565 ASSERT(atomic_read(&ip->i_pincount) > 0);
565 if (atomic_dec_and_test(&ip->i_pincount)) 566 if (atomic_dec_and_test(&ip->i_pincount))
566 wake_up(&ip->i_ipin_wait); 567 wake_up(&ip->i_ipin_wait);
@@ -572,7 +573,7 @@ xfs_inode_item_unpin_remove(
572 xfs_inode_log_item_t *iip, 573 xfs_inode_log_item_t *iip,
573 xfs_trans_t *tp) 574 xfs_trans_t *tp)
574{ 575{
575 xfs_inode_item_unpin(iip, 0); 576 xfs_inode_item_unpin(iip);
576} 577}
577 578
578/* 579/*
@@ -838,7 +839,7 @@ static struct xfs_item_ops xfs_inode_item_ops = {
838 .iop_format = (void(*)(xfs_log_item_t*, xfs_log_iovec_t*)) 839 .iop_format = (void(*)(xfs_log_item_t*, xfs_log_iovec_t*))
839 xfs_inode_item_format, 840 xfs_inode_item_format,
840 .iop_pin = (void(*)(xfs_log_item_t*))xfs_inode_item_pin, 841 .iop_pin = (void(*)(xfs_log_item_t*))xfs_inode_item_pin,
841 .iop_unpin = (void(*)(xfs_log_item_t*, int))xfs_inode_item_unpin, 842 .iop_unpin = (void(*)(xfs_log_item_t*))xfs_inode_item_unpin,
842 .iop_unpin_remove = (void(*)(xfs_log_item_t*, xfs_trans_t*)) 843 .iop_unpin_remove = (void(*)(xfs_log_item_t*, xfs_trans_t*))
843 xfs_inode_item_unpin_remove, 844 xfs_inode_item_unpin_remove,
844 .iop_trylock = (uint(*)(xfs_log_item_t*))xfs_inode_item_trylock, 845 .iop_trylock = (uint(*)(xfs_log_item_t*))xfs_inode_item_trylock,
@@ -865,17 +866,9 @@ xfs_inode_item_init(
865 ASSERT(ip->i_itemp == NULL); 866 ASSERT(ip->i_itemp == NULL);
866 iip = ip->i_itemp = kmem_zone_zalloc(xfs_ili_zone, KM_SLEEP); 867 iip = ip->i_itemp = kmem_zone_zalloc(xfs_ili_zone, KM_SLEEP);
867 868
868 iip->ili_item.li_type = XFS_LI_INODE;
869 iip->ili_item.li_ops = &xfs_inode_item_ops;
870 iip->ili_item.li_mountp = mp;
871 iip->ili_item.li_ailp = mp->m_ail;
872 iip->ili_inode = ip; 869 iip->ili_inode = ip;
873 870 xfs_log_item_init(mp, &iip->ili_item, XFS_LI_INODE,
874 /* 871 &xfs_inode_item_ops);
875 We have zeroed memory. No need ...
876 iip->ili_extents_buf = NULL;
877 */
878
879 iip->ili_format.ilf_type = XFS_LI_INODE; 872 iip->ili_format.ilf_type = XFS_LI_INODE;
880 iip->ili_format.ilf_ino = ip->i_ino; 873 iip->ili_format.ilf_ino = ip->i_ino;
881 iip->ili_format.ilf_blkno = ip->i_imap.im_blkno; 874 iip->ili_format.ilf_blkno = ip->i_imap.im_blkno;
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 0b65039951a0..ef14943829da 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -55,71 +55,33 @@
55#define XFS_STRAT_WRITE_IMAPS 2 55#define XFS_STRAT_WRITE_IMAPS 2
56#define XFS_WRITE_IMAPS XFS_BMAP_MAX_NMAP 56#define XFS_WRITE_IMAPS XFS_BMAP_MAX_NMAP
57 57
58STATIC int 58STATIC int xfs_iomap_write_direct(struct xfs_inode *, xfs_off_t, size_t,
59xfs_imap_to_bmap( 59 int, struct xfs_bmbt_irec *, int *);
60 xfs_inode_t *ip, 60STATIC int xfs_iomap_write_delay(struct xfs_inode *, xfs_off_t, size_t, int,
61 xfs_off_t offset, 61 struct xfs_bmbt_irec *, int *);
62 xfs_bmbt_irec_t *imap, 62STATIC int xfs_iomap_write_allocate(struct xfs_inode *, xfs_off_t, size_t,
63 xfs_iomap_t *iomapp, 63 struct xfs_bmbt_irec *, int *);
64 int imaps, /* Number of imap entries */
65 int iomaps, /* Number of iomap entries */
66 int flags)
67{
68 xfs_mount_t *mp = ip->i_mount;
69 int pbm;
70 xfs_fsblock_t start_block;
71
72
73 for (pbm = 0; imaps && pbm < iomaps; imaps--, iomapp++, imap++, pbm++) {
74 iomapp->iomap_offset = XFS_FSB_TO_B(mp, imap->br_startoff);
75 iomapp->iomap_delta = offset - iomapp->iomap_offset;
76 iomapp->iomap_bsize = XFS_FSB_TO_B(mp, imap->br_blockcount);
77 iomapp->iomap_flags = flags;
78
79 if (XFS_IS_REALTIME_INODE(ip)) {
80 iomapp->iomap_flags |= IOMAP_REALTIME;
81 iomapp->iomap_target = mp->m_rtdev_targp;
82 } else {
83 iomapp->iomap_target = mp->m_ddev_targp;
84 }
85 start_block = imap->br_startblock;
86 if (start_block == HOLESTARTBLOCK) {
87 iomapp->iomap_bn = IOMAP_DADDR_NULL;
88 iomapp->iomap_flags |= IOMAP_HOLE;
89 } else if (start_block == DELAYSTARTBLOCK) {
90 iomapp->iomap_bn = IOMAP_DADDR_NULL;
91 iomapp->iomap_flags |= IOMAP_DELAY;
92 } else {
93 iomapp->iomap_bn = xfs_fsb_to_db(ip, start_block);
94 if (ISUNWRITTEN(imap))
95 iomapp->iomap_flags |= IOMAP_UNWRITTEN;
96 }
97
98 offset += iomapp->iomap_bsize - iomapp->iomap_delta;
99 }
100 return pbm; /* Return the number filled */
101}
102 64
103int 65int
104xfs_iomap( 66xfs_iomap(
105 xfs_inode_t *ip, 67 struct xfs_inode *ip,
106 xfs_off_t offset, 68 xfs_off_t offset,
107 ssize_t count, 69 ssize_t count,
108 int flags, 70 int flags,
109 xfs_iomap_t *iomapp, 71 struct xfs_bmbt_irec *imap,
110 int *niomaps) 72 int *nimaps,
73 int *new)
111{ 74{
112 xfs_mount_t *mp = ip->i_mount; 75 struct xfs_mount *mp = ip->i_mount;
113 xfs_fileoff_t offset_fsb, end_fsb; 76 xfs_fileoff_t offset_fsb, end_fsb;
114 int error = 0; 77 int error = 0;
115 int lockmode = 0; 78 int lockmode = 0;
116 xfs_bmbt_irec_t imap; 79 int bmapi_flags = 0;
117 int nimaps = 1;
118 int bmapi_flags = 0;
119 int iomap_flags = 0;
120 80
121 ASSERT((ip->i_d.di_mode & S_IFMT) == S_IFREG); 81 ASSERT((ip->i_d.di_mode & S_IFMT) == S_IFREG);
122 82
83 *new = 0;
84
123 if (XFS_FORCED_SHUTDOWN(mp)) 85 if (XFS_FORCED_SHUTDOWN(mp))
124 return XFS_ERROR(EIO); 86 return XFS_ERROR(EIO);
125 87
@@ -160,8 +122,8 @@ xfs_iomap(
160 122
161 error = xfs_bmapi(NULL, ip, offset_fsb, 123 error = xfs_bmapi(NULL, ip, offset_fsb,
162 (xfs_filblks_t)(end_fsb - offset_fsb), 124 (xfs_filblks_t)(end_fsb - offset_fsb),
163 bmapi_flags, NULL, 0, &imap, 125 bmapi_flags, NULL, 0, imap,
164 &nimaps, NULL, NULL); 126 nimaps, NULL, NULL);
165 127
166 if (error) 128 if (error)
167 goto out; 129 goto out;
@@ -169,46 +131,41 @@ xfs_iomap(
169 switch (flags & (BMAPI_WRITE|BMAPI_ALLOCATE)) { 131 switch (flags & (BMAPI_WRITE|BMAPI_ALLOCATE)) {
170 case BMAPI_WRITE: 132 case BMAPI_WRITE:
171 /* If we found an extent, return it */ 133 /* If we found an extent, return it */
172 if (nimaps && 134 if (*nimaps &&
173 (imap.br_startblock != HOLESTARTBLOCK) && 135 (imap->br_startblock != HOLESTARTBLOCK) &&
174 (imap.br_startblock != DELAYSTARTBLOCK)) { 136 (imap->br_startblock != DELAYSTARTBLOCK)) {
175 trace_xfs_iomap_found(ip, offset, count, flags, &imap); 137 trace_xfs_iomap_found(ip, offset, count, flags, imap);
176 break; 138 break;
177 } 139 }
178 140
179 if (flags & (BMAPI_DIRECT|BMAPI_MMAP)) { 141 if (flags & (BMAPI_DIRECT|BMAPI_MMAP)) {
180 error = xfs_iomap_write_direct(ip, offset, count, flags, 142 error = xfs_iomap_write_direct(ip, offset, count, flags,
181 &imap, &nimaps, nimaps); 143 imap, nimaps);
182 } else { 144 } else {
183 error = xfs_iomap_write_delay(ip, offset, count, flags, 145 error = xfs_iomap_write_delay(ip, offset, count, flags,
184 &imap, &nimaps); 146 imap, nimaps);
185 } 147 }
186 if (!error) { 148 if (!error) {
187 trace_xfs_iomap_alloc(ip, offset, count, flags, &imap); 149 trace_xfs_iomap_alloc(ip, offset, count, flags, imap);
188 } 150 }
189 iomap_flags = IOMAP_NEW; 151 *new = 1;
190 break; 152 break;
191 case BMAPI_ALLOCATE: 153 case BMAPI_ALLOCATE:
192 /* If we found an extent, return it */ 154 /* If we found an extent, return it */
193 xfs_iunlock(ip, lockmode); 155 xfs_iunlock(ip, lockmode);
194 lockmode = 0; 156 lockmode = 0;
195 157
196 if (nimaps && !isnullstartblock(imap.br_startblock)) { 158 if (*nimaps && !isnullstartblock(imap->br_startblock)) {
197 trace_xfs_iomap_found(ip, offset, count, flags, &imap); 159 trace_xfs_iomap_found(ip, offset, count, flags, imap);
198 break; 160 break;
199 } 161 }
200 162
201 error = xfs_iomap_write_allocate(ip, offset, count, 163 error = xfs_iomap_write_allocate(ip, offset, count,
202 &imap, &nimaps); 164 imap, nimaps);
203 break; 165 break;
204 } 166 }
205 167
206 if (nimaps) { 168 ASSERT(*nimaps <= 1);
207 *niomaps = xfs_imap_to_bmap(ip, offset, &imap,
208 iomapp, nimaps, *niomaps, iomap_flags);
209 } else if (niomaps) {
210 *niomaps = 0;
211 }
212 169
213out: 170out:
214 if (lockmode) 171 if (lockmode)
@@ -216,7 +173,6 @@ out:
216 return XFS_ERROR(error); 173 return XFS_ERROR(error);
217} 174}
218 175
219
220STATIC int 176STATIC int
221xfs_iomap_eof_align_last_fsb( 177xfs_iomap_eof_align_last_fsb(
222 xfs_mount_t *mp, 178 xfs_mount_t *mp,
@@ -285,15 +241,14 @@ xfs_cmn_err_fsblock_zero(
285 return EFSCORRUPTED; 241 return EFSCORRUPTED;
286} 242}
287 243
288int 244STATIC int
289xfs_iomap_write_direct( 245xfs_iomap_write_direct(
290 xfs_inode_t *ip, 246 xfs_inode_t *ip,
291 xfs_off_t offset, 247 xfs_off_t offset,
292 size_t count, 248 size_t count,
293 int flags, 249 int flags,
294 xfs_bmbt_irec_t *ret_imap, 250 xfs_bmbt_irec_t *ret_imap,
295 int *nmaps, 251 int *nmaps)
296 int found)
297{ 252{
298 xfs_mount_t *mp = ip->i_mount; 253 xfs_mount_t *mp = ip->i_mount;
299 xfs_fileoff_t offset_fsb; 254 xfs_fileoff_t offset_fsb;
@@ -330,7 +285,7 @@ xfs_iomap_write_direct(
330 if (error) 285 if (error)
331 goto error_out; 286 goto error_out;
332 } else { 287 } else {
333 if (found && (ret_imap->br_startblock == HOLESTARTBLOCK)) 288 if (*nmaps && (ret_imap->br_startblock == HOLESTARTBLOCK))
334 last_fsb = MIN(last_fsb, (xfs_fileoff_t) 289 last_fsb = MIN(last_fsb, (xfs_fileoff_t)
335 ret_imap->br_blockcount + 290 ret_imap->br_blockcount +
336 ret_imap->br_startoff); 291 ret_imap->br_startoff);
@@ -485,7 +440,7 @@ xfs_iomap_eof_want_preallocate(
485 return 0; 440 return 0;
486} 441}
487 442
488int 443STATIC int
489xfs_iomap_write_delay( 444xfs_iomap_write_delay(
490 xfs_inode_t *ip, 445 xfs_inode_t *ip,
491 xfs_off_t offset, 446 xfs_off_t offset,
@@ -588,7 +543,7 @@ retry:
588 * We no longer bother to look at the incoming map - all we have to 543 * We no longer bother to look at the incoming map - all we have to
589 * guarantee is that whatever we allocate fills the required range. 544 * guarantee is that whatever we allocate fills the required range.
590 */ 545 */
591int 546STATIC int
592xfs_iomap_write_allocate( 547xfs_iomap_write_allocate(
593 xfs_inode_t *ip, 548 xfs_inode_t *ip,
594 xfs_off_t offset, 549 xfs_off_t offset,
diff --git a/fs/xfs/xfs_iomap.h b/fs/xfs/xfs_iomap.h
index 174f29990991..81ac4afd45b3 100644
--- a/fs/xfs/xfs_iomap.h
+++ b/fs/xfs/xfs_iomap.h
@@ -18,19 +18,6 @@
18#ifndef __XFS_IOMAP_H__ 18#ifndef __XFS_IOMAP_H__
19#define __XFS_IOMAP_H__ 19#define __XFS_IOMAP_H__
20 20
21#define IOMAP_DADDR_NULL ((xfs_daddr_t) (-1LL))
22
23
24typedef enum { /* iomap_flags values */
25 IOMAP_READ = 0, /* mapping for a read */
26 IOMAP_HOLE = 0x02, /* mapping covers a hole */
27 IOMAP_DELAY = 0x04, /* mapping covers delalloc region */
28 IOMAP_REALTIME = 0x10, /* mapping on the realtime device */
29 IOMAP_UNWRITTEN = 0x20, /* mapping covers allocated */
30 /* but uninitialized file data */
31 IOMAP_NEW = 0x40 /* just allocate */
32} iomap_flags_t;
33
34typedef enum { 21typedef enum {
35 /* base extent manipulation calls */ 22 /* base extent manipulation calls */
36 BMAPI_READ = (1 << 0), /* read extents */ 23 BMAPI_READ = (1 << 0), /* read extents */
@@ -52,43 +39,11 @@ typedef enum {
52 { BMAPI_MMAP, "MMAP" }, \ 39 { BMAPI_MMAP, "MMAP" }, \
53 { BMAPI_TRYLOCK, "TRYLOCK" } 40 { BMAPI_TRYLOCK, "TRYLOCK" }
54 41
55/*
56 * xfs_iomap_t: File system I/O map
57 *
58 * The iomap_bn field is expressed in 512-byte blocks, and is where the
59 * mapping starts on disk.
60 *
61 * The iomap_offset, iomap_bsize and iomap_delta fields are in bytes.
62 * iomap_offset is the offset of the mapping in the file itself.
63 * iomap_bsize is the size of the mapping, iomap_delta is the
64 * desired data's offset into the mapping, given the offset supplied
65 * to the file I/O map routine.
66 *
67 * When a request is made to read beyond the logical end of the object,
68 * iomap_size may be set to 0, but iomap_offset and iomap_length should be set
69 * to the actual amount of underlying storage that has been allocated, if any.
70 */
71
72typedef struct xfs_iomap {
73 xfs_daddr_t iomap_bn; /* first 512B blk of mapping */
74 xfs_buftarg_t *iomap_target;
75 xfs_off_t iomap_offset; /* offset of mapping, bytes */
76 xfs_off_t iomap_bsize; /* size of mapping, bytes */
77 xfs_off_t iomap_delta; /* offset into mapping, bytes */
78 iomap_flags_t iomap_flags;
79} xfs_iomap_t;
80
81struct xfs_inode; 42struct xfs_inode;
82struct xfs_bmbt_irec; 43struct xfs_bmbt_irec;
83 44
84extern int xfs_iomap(struct xfs_inode *, xfs_off_t, ssize_t, int, 45extern int xfs_iomap(struct xfs_inode *, xfs_off_t, ssize_t, int,
85 struct xfs_iomap *, int *); 46 struct xfs_bmbt_irec *, int *, int *);
86extern int xfs_iomap_write_direct(struct xfs_inode *, xfs_off_t, size_t,
87 int, struct xfs_bmbt_irec *, int *, int);
88extern int xfs_iomap_write_delay(struct xfs_inode *, xfs_off_t, size_t, int,
89 struct xfs_bmbt_irec *, int *);
90extern int xfs_iomap_write_allocate(struct xfs_inode *, xfs_off_t, size_t,
91 struct xfs_bmbt_irec *, int *);
92extern int xfs_iomap_write_unwritten(struct xfs_inode *, xfs_off_t, size_t); 47extern int xfs_iomap_write_unwritten(struct xfs_inode *, xfs_off_t, size_t);
93 48
94#endif /* __XFS_IOMAP_H__*/ 49#endif /* __XFS_IOMAP_H__*/
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 2be019136287..3038dd52c72a 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -44,13 +44,8 @@
44 44
45kmem_zone_t *xfs_log_ticket_zone; 45kmem_zone_t *xfs_log_ticket_zone;
46 46
47#define xlog_write_adv_cnt(ptr, len, off, bytes) \
48 { (ptr) += (bytes); \
49 (len) -= (bytes); \
50 (off) += (bytes);}
51
52/* Local miscellaneous function prototypes */ 47/* Local miscellaneous function prototypes */
53STATIC int xlog_commit_record(xfs_mount_t *mp, xlog_ticket_t *ticket, 48STATIC int xlog_commit_record(struct log *log, struct xlog_ticket *ticket,
54 xlog_in_core_t **, xfs_lsn_t *); 49 xlog_in_core_t **, xfs_lsn_t *);
55STATIC xlog_t * xlog_alloc_log(xfs_mount_t *mp, 50STATIC xlog_t * xlog_alloc_log(xfs_mount_t *mp,
56 xfs_buftarg_t *log_target, 51 xfs_buftarg_t *log_target,
@@ -59,11 +54,9 @@ STATIC xlog_t * xlog_alloc_log(xfs_mount_t *mp,
59STATIC int xlog_space_left(xlog_t *log, int cycle, int bytes); 54STATIC int xlog_space_left(xlog_t *log, int cycle, int bytes);
60STATIC int xlog_sync(xlog_t *log, xlog_in_core_t *iclog); 55STATIC int xlog_sync(xlog_t *log, xlog_in_core_t *iclog);
61STATIC void xlog_dealloc_log(xlog_t *log); 56STATIC void xlog_dealloc_log(xlog_t *log);
62STATIC int xlog_write(xfs_mount_t *mp, xfs_log_iovec_t region[], 57STATIC int xlog_write(struct log *log, struct xfs_log_vec *log_vector,
63 int nentries, struct xlog_ticket *tic, 58 struct xlog_ticket *tic, xfs_lsn_t *start_lsn,
64 xfs_lsn_t *start_lsn, 59 xlog_in_core_t **commit_iclog, uint flags);
65 xlog_in_core_t **commit_iclog,
66 uint flags);
67 60
68/* local state machine functions */ 61/* local state machine functions */
69STATIC void xlog_state_done_syncing(xlog_in_core_t *iclog, int); 62STATIC void xlog_state_done_syncing(xlog_in_core_t *iclog, int);
@@ -102,7 +95,7 @@ STATIC xlog_ticket_t *xlog_ticket_alloc(xlog_t *log,
102 uint flags); 95 uint flags);
103 96
104#if defined(DEBUG) 97#if defined(DEBUG)
105STATIC void xlog_verify_dest_ptr(xlog_t *log, __psint_t ptr); 98STATIC void xlog_verify_dest_ptr(xlog_t *log, char *ptr);
106STATIC void xlog_verify_grant_head(xlog_t *log, int equals); 99STATIC void xlog_verify_grant_head(xlog_t *log, int equals);
107STATIC void xlog_verify_iclog(xlog_t *log, xlog_in_core_t *iclog, 100STATIC void xlog_verify_iclog(xlog_t *log, xlog_in_core_t *iclog,
108 int count, boolean_t syncing); 101 int count, boolean_t syncing);
@@ -258,7 +251,7 @@ xfs_log_done(
258 * If we get an error, just continue and give back the log ticket. 251 * If we get an error, just continue and give back the log ticket.
259 */ 252 */
260 (((ticket->t_flags & XLOG_TIC_INITED) == 0) && 253 (((ticket->t_flags & XLOG_TIC_INITED) == 0) &&
261 (xlog_commit_record(mp, ticket, iclog, &lsn)))) { 254 (xlog_commit_record(log, ticket, iclog, &lsn)))) {
262 lsn = (xfs_lsn_t) -1; 255 lsn = (xfs_lsn_t) -1;
263 if (ticket->t_flags & XLOG_TIC_PERM_RESERV) { 256 if (ticket->t_flags & XLOG_TIC_PERM_RESERV) {
264 flags |= XFS_LOG_REL_PERM_RESERV; 257 flags |= XFS_LOG_REL_PERM_RESERV;
@@ -516,18 +509,10 @@ xfs_log_unmount_write(xfs_mount_t *mp)
516#ifdef DEBUG 509#ifdef DEBUG
517 xlog_in_core_t *first_iclog; 510 xlog_in_core_t *first_iclog;
518#endif 511#endif
519 xfs_log_iovec_t reg[1];
520 xlog_ticket_t *tic = NULL; 512 xlog_ticket_t *tic = NULL;
521 xfs_lsn_t lsn; 513 xfs_lsn_t lsn;
522 int error; 514 int error;
523 515
524 /* the data section must be 32 bit size aligned */
525 struct {
526 __uint16_t magic;
527 __uint16_t pad1;
528 __uint32_t pad2; /* may as well make it 64 bits */
529 } magic = { XLOG_UNMOUNT_TYPE, 0, 0 };
530
531 /* 516 /*
532 * Don't write out unmount record on read-only mounts. 517 * Don't write out unmount record on read-only mounts.
533 * Or, if we are doing a forced umount (typically because of IO errors). 518 * Or, if we are doing a forced umount (typically because of IO errors).
@@ -549,16 +534,30 @@ xfs_log_unmount_write(xfs_mount_t *mp)
549 } while (iclog != first_iclog); 534 } while (iclog != first_iclog);
550#endif 535#endif
551 if (! (XLOG_FORCED_SHUTDOWN(log))) { 536 if (! (XLOG_FORCED_SHUTDOWN(log))) {
552 reg[0].i_addr = (void*)&magic;
553 reg[0].i_len = sizeof(magic);
554 reg[0].i_type = XLOG_REG_TYPE_UNMOUNT;
555
556 error = xfs_log_reserve(mp, 600, 1, &tic, 537 error = xfs_log_reserve(mp, 600, 1, &tic,
557 XFS_LOG, 0, XLOG_UNMOUNT_REC_TYPE); 538 XFS_LOG, 0, XLOG_UNMOUNT_REC_TYPE);
558 if (!error) { 539 if (!error) {
540 /* the data section must be 32 bit size aligned */
541 struct {
542 __uint16_t magic;
543 __uint16_t pad1;
544 __uint32_t pad2; /* may as well make it 64 bits */
545 } magic = {
546 .magic = XLOG_UNMOUNT_TYPE,
547 };
548 struct xfs_log_iovec reg = {
549 .i_addr = (void *)&magic,
550 .i_len = sizeof(magic),
551 .i_type = XLOG_REG_TYPE_UNMOUNT,
552 };
553 struct xfs_log_vec vec = {
554 .lv_niovecs = 1,
555 .lv_iovecp = &reg,
556 };
557
559 /* remove inited flag */ 558 /* remove inited flag */
560 ((xlog_ticket_t *)tic)->t_flags = 0; 559 tic->t_flags = 0;
561 error = xlog_write(mp, reg, 1, tic, &lsn, 560 error = xlog_write(log, &vec, tic, &lsn,
562 NULL, XLOG_UNMOUNT_TRANS); 561 NULL, XLOG_UNMOUNT_TRANS);
563 /* 562 /*
564 * At this point, we're umounting anyway, 563 * At this point, we're umounting anyway,
@@ -648,10 +647,26 @@ xfs_log_unmount(xfs_mount_t *mp)
648 xlog_dealloc_log(mp->m_log); 647 xlog_dealloc_log(mp->m_log);
649} 648}
650 649
650void
651xfs_log_item_init(
652 struct xfs_mount *mp,
653 struct xfs_log_item *item,
654 int type,
655 struct xfs_item_ops *ops)
656{
657 item->li_mountp = mp;
658 item->li_ailp = mp->m_ail;
659 item->li_type = type;
660 item->li_ops = ops;
661}
662
651/* 663/*
652 * Write region vectors to log. The write happens using the space reservation 664 * Write region vectors to log. The write happens using the space reservation
653 * of the ticket (tic). It is not a requirement that all writes for a given 665 * of the ticket (tic). It is not a requirement that all writes for a given
654 * transaction occur with one call to xfs_log_write(). 666 * transaction occur with one call to xfs_log_write(). However, it is important
667 * to note that the transaction reservation code makes an assumption about the
668 * number of log headers a transaction requires that may be violated if you
669 * don't pass all the transaction vectors in one call....
655 */ 670 */
656int 671int
657xfs_log_write( 672xfs_log_write(
@@ -663,11 +678,15 @@ xfs_log_write(
663{ 678{
664 struct log *log = mp->m_log; 679 struct log *log = mp->m_log;
665 int error; 680 int error;
681 struct xfs_log_vec vec = {
682 .lv_niovecs = nentries,
683 .lv_iovecp = reg,
684 };
666 685
667 if (XLOG_FORCED_SHUTDOWN(log)) 686 if (XLOG_FORCED_SHUTDOWN(log))
668 return XFS_ERROR(EIO); 687 return XFS_ERROR(EIO);
669 688
670 error = xlog_write(mp, reg, nentries, tic, start_lsn, NULL, 0); 689 error = xlog_write(log, &vec, tic, start_lsn, NULL, 0);
671 if (error) 690 if (error)
672 xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR); 691 xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR);
673 return error; 692 return error;
@@ -1020,6 +1039,7 @@ xlog_alloc_log(xfs_mount_t *mp,
1020 int i; 1039 int i;
1021 int iclogsize; 1040 int iclogsize;
1022 int error = ENOMEM; 1041 int error = ENOMEM;
1042 uint log2_size = 0;
1023 1043
1024 log = kmem_zalloc(sizeof(xlog_t), KM_MAYFAIL); 1044 log = kmem_zalloc(sizeof(xlog_t), KM_MAYFAIL);
1025 if (!log) { 1045 if (!log) {
@@ -1045,29 +1065,30 @@ xlog_alloc_log(xfs_mount_t *mp,
1045 1065
1046 error = EFSCORRUPTED; 1066 error = EFSCORRUPTED;
1047 if (xfs_sb_version_hassector(&mp->m_sb)) { 1067 if (xfs_sb_version_hassector(&mp->m_sb)) {
1048 log->l_sectbb_log = mp->m_sb.sb_logsectlog - BBSHIFT; 1068 log2_size = mp->m_sb.sb_logsectlog;
1049 if (log->l_sectbb_log < 0 || 1069 if (log2_size < BBSHIFT) {
1050 log->l_sectbb_log > mp->m_sectbb_log) { 1070 xlog_warn("XFS: Log sector size too small "
1051 xlog_warn("XFS: Log sector size (0x%x) out of range.", 1071 "(0x%x < 0x%x)", log2_size, BBSHIFT);
1052 log->l_sectbb_log);
1053 goto out_free_log; 1072 goto out_free_log;
1054 } 1073 }
1055 1074
1056 /* for larger sector sizes, must have v2 or external log */ 1075 log2_size -= BBSHIFT;
1057 if (log->l_sectbb_log != 0 && 1076 if (log2_size > mp->m_sectbb_log) {
1058 (log->l_logBBstart != 0 && 1077 xlog_warn("XFS: Log sector size too large "
1059 !xfs_sb_version_haslogv2(&mp->m_sb))) { 1078 "(0x%x > 0x%x)", log2_size, mp->m_sectbb_log);
1060 xlog_warn("XFS: log sector size (0x%x) invalid "
1061 "for configuration.", log->l_sectbb_log);
1062 goto out_free_log; 1079 goto out_free_log;
1063 } 1080 }
1064 if (mp->m_sb.sb_logsectlog < BBSHIFT) { 1081
1065 xlog_warn("XFS: Log sector log (0x%x) too small.", 1082 /* for larger sector sizes, must have v2 or external log */
1066 mp->m_sb.sb_logsectlog); 1083 if (log2_size && log->l_logBBstart > 0 &&
1084 !xfs_sb_version_haslogv2(&mp->m_sb)) {
1085
1086 xlog_warn("XFS: log sector size (0x%x) invalid "
1087 "for configuration.", log2_size);
1067 goto out_free_log; 1088 goto out_free_log;
1068 } 1089 }
1069 } 1090 }
1070 log->l_sectbb_mask = (1 << log->l_sectbb_log) - 1; 1091 log->l_sectBBsize = 1 << log2_size;
1071 1092
1072 xlog_get_iclog_buffer_size(mp, log); 1093 xlog_get_iclog_buffer_size(mp, log);
1073 1094
@@ -1174,26 +1195,31 @@ out:
1174 * ticket. Return the lsn of the commit record. 1195 * ticket. Return the lsn of the commit record.
1175 */ 1196 */
1176STATIC int 1197STATIC int
1177xlog_commit_record(xfs_mount_t *mp, 1198xlog_commit_record(
1178 xlog_ticket_t *ticket, 1199 struct log *log,
1179 xlog_in_core_t **iclog, 1200 struct xlog_ticket *ticket,
1180 xfs_lsn_t *commitlsnp) 1201 struct xlog_in_core **iclog,
1202 xfs_lsn_t *commitlsnp)
1181{ 1203{
1182 int error; 1204 struct xfs_mount *mp = log->l_mp;
1183 xfs_log_iovec_t reg[1]; 1205 int error;
1184 1206 struct xfs_log_iovec reg = {
1185 reg[0].i_addr = NULL; 1207 .i_addr = NULL,
1186 reg[0].i_len = 0; 1208 .i_len = 0,
1187 reg[0].i_type = XLOG_REG_TYPE_COMMIT; 1209 .i_type = XLOG_REG_TYPE_COMMIT,
1210 };
1211 struct xfs_log_vec vec = {
1212 .lv_niovecs = 1,
1213 .lv_iovecp = &reg,
1214 };
1188 1215
1189 ASSERT_ALWAYS(iclog); 1216 ASSERT_ALWAYS(iclog);
1190 if ((error = xlog_write(mp, reg, 1, ticket, commitlsnp, 1217 error = xlog_write(log, &vec, ticket, commitlsnp, iclog,
1191 iclog, XLOG_COMMIT_TRANS))) { 1218 XLOG_COMMIT_TRANS);
1219 if (error)
1192 xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR); 1220 xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR);
1193 }
1194 return error; 1221 return error;
1195} /* xlog_commit_record */ 1222}
1196
1197 1223
1198/* 1224/*
1199 * Push on the buffer cache code if we ever use more than 75% of the on-disk 1225 * Push on the buffer cache code if we ever use more than 75% of the on-disk
@@ -1614,6 +1640,192 @@ xlog_print_tic_res(xfs_mount_t *mp, xlog_ticket_t *ticket)
1614} 1640}
1615 1641
1616/* 1642/*
1643 * Calculate the potential space needed by the log vector. Each region gets
1644 * its own xlog_op_header_t and may need to be double word aligned.
1645 */
1646static int
1647xlog_write_calc_vec_length(
1648 struct xlog_ticket *ticket,
1649 struct xfs_log_vec *log_vector)
1650{
1651 struct xfs_log_vec *lv;
1652 int headers = 0;
1653 int len = 0;
1654 int i;
1655
1656 /* acct for start rec of xact */
1657 if (ticket->t_flags & XLOG_TIC_INITED)
1658 headers++;
1659
1660 for (lv = log_vector; lv; lv = lv->lv_next) {
1661 headers += lv->lv_niovecs;
1662
1663 for (i = 0; i < lv->lv_niovecs; i++) {
1664 struct xfs_log_iovec *vecp = &lv->lv_iovecp[i];
1665
1666 len += vecp->i_len;
1667 xlog_tic_add_region(ticket, vecp->i_len, vecp->i_type);
1668 }
1669 }
1670
1671 ticket->t_res_num_ophdrs += headers;
1672 len += headers * sizeof(struct xlog_op_header);
1673
1674 return len;
1675}
1676
1677/*
1678 * If first write for transaction, insert start record We can't be trying to
1679 * commit if we are inited. We can't have any "partial_copy" if we are inited.
1680 */
1681static int
1682xlog_write_start_rec(
1683 struct xlog_op_header *ophdr,
1684 struct xlog_ticket *ticket)
1685{
1686 if (!(ticket->t_flags & XLOG_TIC_INITED))
1687 return 0;
1688
1689 ophdr->oh_tid = cpu_to_be32(ticket->t_tid);
1690 ophdr->oh_clientid = ticket->t_clientid;
1691 ophdr->oh_len = 0;
1692 ophdr->oh_flags = XLOG_START_TRANS;
1693 ophdr->oh_res2 = 0;
1694
1695 ticket->t_flags &= ~XLOG_TIC_INITED;
1696
1697 return sizeof(struct xlog_op_header);
1698}
1699
1700static xlog_op_header_t *
1701xlog_write_setup_ophdr(
1702 struct log *log,
1703 struct xlog_op_header *ophdr,
1704 struct xlog_ticket *ticket,
1705 uint flags)
1706{
1707 ophdr->oh_tid = cpu_to_be32(ticket->t_tid);
1708 ophdr->oh_clientid = ticket->t_clientid;
1709 ophdr->oh_res2 = 0;
1710
1711 /* are we copying a commit or unmount record? */
1712 ophdr->oh_flags = flags;
1713
1714 /*
1715 * We've seen logs corrupted with bad transaction client ids. This
1716 * makes sure that XFS doesn't generate them on. Turn this into an EIO
1717 * and shut down the filesystem.
1718 */
1719 switch (ophdr->oh_clientid) {
1720 case XFS_TRANSACTION:
1721 case XFS_VOLUME:
1722 case XFS_LOG:
1723 break;
1724 default:
1725 xfs_fs_cmn_err(CE_WARN, log->l_mp,
1726 "Bad XFS transaction clientid 0x%x in ticket 0x%p",
1727 ophdr->oh_clientid, ticket);
1728 return NULL;
1729 }
1730
1731 return ophdr;
1732}
1733
1734/*
1735 * Set up the parameters of the region copy into the log. This has
1736 * to handle region write split across multiple log buffers - this
1737 * state is kept external to this function so that this code can
1738 * can be written in an obvious, self documenting manner.
1739 */
1740static int
1741xlog_write_setup_copy(
1742 struct xlog_ticket *ticket,
1743 struct xlog_op_header *ophdr,
1744 int space_available,
1745 int space_required,
1746 int *copy_off,
1747 int *copy_len,
1748 int *last_was_partial_copy,
1749 int *bytes_consumed)
1750{
1751 int still_to_copy;
1752
1753 still_to_copy = space_required - *bytes_consumed;
1754 *copy_off = *bytes_consumed;
1755
1756 if (still_to_copy <= space_available) {
1757 /* write of region completes here */
1758 *copy_len = still_to_copy;
1759 ophdr->oh_len = cpu_to_be32(*copy_len);
1760 if (*last_was_partial_copy)
1761 ophdr->oh_flags |= (XLOG_END_TRANS|XLOG_WAS_CONT_TRANS);
1762 *last_was_partial_copy = 0;
1763 *bytes_consumed = 0;
1764 return 0;
1765 }
1766
1767 /* partial write of region, needs extra log op header reservation */
1768 *copy_len = space_available;
1769 ophdr->oh_len = cpu_to_be32(*copy_len);
1770 ophdr->oh_flags |= XLOG_CONTINUE_TRANS;
1771 if (*last_was_partial_copy)
1772 ophdr->oh_flags |= XLOG_WAS_CONT_TRANS;
1773 *bytes_consumed += *copy_len;
1774 (*last_was_partial_copy)++;
1775
1776 /* account for new log op header */
1777 ticket->t_curr_res -= sizeof(struct xlog_op_header);
1778 ticket->t_res_num_ophdrs++;
1779
1780 return sizeof(struct xlog_op_header);
1781}
1782
1783static int
1784xlog_write_copy_finish(
1785 struct log *log,
1786 struct xlog_in_core *iclog,
1787 uint flags,
1788 int *record_cnt,
1789 int *data_cnt,
1790 int *partial_copy,
1791 int *partial_copy_len,
1792 int log_offset,
1793 struct xlog_in_core **commit_iclog)
1794{
1795 if (*partial_copy) {
1796 /*
1797 * This iclog has already been marked WANT_SYNC by
1798 * xlog_state_get_iclog_space.
1799 */
1800 xlog_state_finish_copy(log, iclog, *record_cnt, *data_cnt);
1801 *record_cnt = 0;
1802 *data_cnt = 0;
1803 return xlog_state_release_iclog(log, iclog);
1804 }
1805
1806 *partial_copy = 0;
1807 *partial_copy_len = 0;
1808
1809 if (iclog->ic_size - log_offset <= sizeof(xlog_op_header_t)) {
1810 /* no more space in this iclog - push it. */
1811 xlog_state_finish_copy(log, iclog, *record_cnt, *data_cnt);
1812 *record_cnt = 0;
1813 *data_cnt = 0;
1814
1815 spin_lock(&log->l_icloglock);
1816 xlog_state_want_sync(log, iclog);
1817 spin_unlock(&log->l_icloglock);
1818
1819 if (!commit_iclog)
1820 return xlog_state_release_iclog(log, iclog);
1821 ASSERT(flags & XLOG_COMMIT_TRANS);
1822 *commit_iclog = iclog;
1823 }
1824
1825 return 0;
1826}
1827
1828/*
1617 * Write some region out to in-core log 1829 * Write some region out to in-core log
1618 * 1830 *
1619 * This will be called when writing externally provided regions or when 1831 * This will be called when writing externally provided regions or when
@@ -1655,209 +1867,157 @@ xlog_print_tic_res(xfs_mount_t *mp, xlog_ticket_t *ticket)
1655 */ 1867 */
1656STATIC int 1868STATIC int
1657xlog_write( 1869xlog_write(
1658 struct xfs_mount *mp, 1870 struct log *log,
1659 struct xfs_log_iovec reg[], 1871 struct xfs_log_vec *log_vector,
1660 int nentries,
1661 struct xlog_ticket *ticket, 1872 struct xlog_ticket *ticket,
1662 xfs_lsn_t *start_lsn, 1873 xfs_lsn_t *start_lsn,
1663 struct xlog_in_core **commit_iclog, 1874 struct xlog_in_core **commit_iclog,
1664 uint flags) 1875 uint flags)
1665{ 1876{
1666 xlog_t *log = mp->m_log; 1877 struct xlog_in_core *iclog = NULL;
1667 xlog_in_core_t *iclog = NULL; /* ptr to current in-core log */ 1878 struct xfs_log_iovec *vecp;
1668 xlog_op_header_t *logop_head; /* ptr to log operation header */ 1879 struct xfs_log_vec *lv;
1669 __psint_t ptr; /* copy address into data region */ 1880 int len;
1670 int len; /* # xlog_write() bytes 2 still copy */ 1881 int index;
1671 int index; /* region index currently copying */ 1882 int partial_copy = 0;
1672 int log_offset; /* offset (from 0) into data region */ 1883 int partial_copy_len = 0;
1673 int start_rec_copy; /* # bytes to copy for start record */ 1884 int contwr = 0;
1674 int partial_copy; /* did we split a region? */ 1885 int record_cnt = 0;
1675 int partial_copy_len;/* # bytes copied if split region */ 1886 int data_cnt = 0;
1676 int need_copy; /* # bytes need to memcpy this region */ 1887 int error;
1677 int copy_len; /* # bytes actually memcpy'ing */
1678 int copy_off; /* # bytes from entry start */
1679 int contwr; /* continued write of in-core log? */
1680 int error;
1681 int record_cnt = 0, data_cnt = 0;
1682
1683 partial_copy_len = partial_copy = 0;
1684
1685 /* Calculate potential maximum space. Each region gets its own
1686 * xlog_op_header_t and may need to be double word aligned.
1687 */
1688 len = 0;
1689 if (ticket->t_flags & XLOG_TIC_INITED) { /* acct for start rec of xact */
1690 len += sizeof(xlog_op_header_t);
1691 ticket->t_res_num_ophdrs++;
1692 }
1693 1888
1694 for (index = 0; index < nentries; index++) { 1889 *start_lsn = 0;
1695 len += sizeof(xlog_op_header_t); /* each region gets >= 1 */
1696 ticket->t_res_num_ophdrs++;
1697 len += reg[index].i_len;
1698 xlog_tic_add_region(ticket, reg[index].i_len, reg[index].i_type);
1699 }
1700 contwr = *start_lsn = 0;
1701 1890
1702 if (ticket->t_curr_res < len) { 1891 len = xlog_write_calc_vec_length(ticket, log_vector);
1703 xlog_print_tic_res(mp, ticket); 1892 if (ticket->t_curr_res < len) {
1893 xlog_print_tic_res(log->l_mp, ticket);
1704#ifdef DEBUG 1894#ifdef DEBUG
1705 xlog_panic( 1895 xlog_panic(
1706 "xfs_log_write: reservation ran out. Need to up reservation"); 1896 "xfs_log_write: reservation ran out. Need to up reservation");
1707#else 1897#else
1708 /* Customer configurable panic */ 1898 /* Customer configurable panic */
1709 xfs_cmn_err(XFS_PTAG_LOGRES, CE_ALERT, mp, 1899 xfs_cmn_err(XFS_PTAG_LOGRES, CE_ALERT, log->l_mp,
1710 "xfs_log_write: reservation ran out. Need to up reservation"); 1900 "xfs_log_write: reservation ran out. Need to up reservation");
1711 /* If we did not panic, shutdown the filesystem */ 1901
1712 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); 1902 /* If we did not panic, shutdown the filesystem */
1903 xfs_force_shutdown(log->l_mp, SHUTDOWN_CORRUPT_INCORE);
1713#endif 1904#endif
1714 } else 1905 }
1906
1715 ticket->t_curr_res -= len; 1907 ticket->t_curr_res -= len;
1716 1908
1717 for (index = 0; index < nentries; ) { 1909 index = 0;
1718 if ((error = xlog_state_get_iclog_space(log, len, &iclog, ticket, 1910 lv = log_vector;
1719 &contwr, &log_offset))) 1911 vecp = lv->lv_iovecp;
1720 return error; 1912 while (lv && index < lv->lv_niovecs) {
1913 void *ptr;
1914 int log_offset;
1721 1915
1722 ASSERT(log_offset <= iclog->ic_size - 1); 1916 error = xlog_state_get_iclog_space(log, len, &iclog, ticket,
1723 ptr = (__psint_t) ((char *)iclog->ic_datap+log_offset); 1917 &contwr, &log_offset);
1918 if (error)
1919 return error;
1724 1920
1725 /* start_lsn is the first lsn written to. That's all we need. */ 1921 ASSERT(log_offset <= iclog->ic_size - 1);
1726 if (! *start_lsn) 1922 ptr = iclog->ic_datap + log_offset;
1727 *start_lsn = be64_to_cpu(iclog->ic_header.h_lsn);
1728 1923
1729 /* This loop writes out as many regions as can fit in the amount 1924 /* start_lsn is the first lsn written to. That's all we need. */
1730 * of space which was allocated by xlog_state_get_iclog_space(). 1925 if (!*start_lsn)
1731 */ 1926 *start_lsn = be64_to_cpu(iclog->ic_header.h_lsn);
1732 while (index < nentries) {
1733 ASSERT(reg[index].i_len % sizeof(__int32_t) == 0);
1734 ASSERT((__psint_t)ptr % sizeof(__int32_t) == 0);
1735 start_rec_copy = 0;
1736
1737 /* If first write for transaction, insert start record.
1738 * We can't be trying to commit if we are inited. We can't
1739 * have any "partial_copy" if we are inited.
1740 */
1741 if (ticket->t_flags & XLOG_TIC_INITED) {
1742 logop_head = (xlog_op_header_t *)ptr;
1743 logop_head->oh_tid = cpu_to_be32(ticket->t_tid);
1744 logop_head->oh_clientid = ticket->t_clientid;
1745 logop_head->oh_len = 0;
1746 logop_head->oh_flags = XLOG_START_TRANS;
1747 logop_head->oh_res2 = 0;
1748 ticket->t_flags &= ~XLOG_TIC_INITED; /* clear bit */
1749 record_cnt++;
1750
1751 start_rec_copy = sizeof(xlog_op_header_t);
1752 xlog_write_adv_cnt(ptr, len, log_offset, start_rec_copy);
1753 }
1754 1927
1755 /* Copy log operation header directly into data section */ 1928 /*
1756 logop_head = (xlog_op_header_t *)ptr; 1929 * This loop writes out as many regions as can fit in the amount
1757 logop_head->oh_tid = cpu_to_be32(ticket->t_tid); 1930 * of space which was allocated by xlog_state_get_iclog_space().
1758 logop_head->oh_clientid = ticket->t_clientid; 1931 */
1759 logop_head->oh_res2 = 0; 1932 while (lv && index < lv->lv_niovecs) {
1933 struct xfs_log_iovec *reg = &vecp[index];
1934 struct xlog_op_header *ophdr;
1935 int start_rec_copy;
1936 int copy_len;
1937 int copy_off;
1938
1939 ASSERT(reg->i_len % sizeof(__int32_t) == 0);
1940 ASSERT((unsigned long)ptr % sizeof(__int32_t) == 0);
1941
1942 start_rec_copy = xlog_write_start_rec(ptr, ticket);
1943 if (start_rec_copy) {
1944 record_cnt++;
1945 xlog_write_adv_cnt(&ptr, &len, &log_offset,
1946 start_rec_copy);
1947 }
1760 1948
1761 /* header copied directly */ 1949 ophdr = xlog_write_setup_ophdr(log, ptr, ticket, flags);
1762 xlog_write_adv_cnt(ptr, len, log_offset, sizeof(xlog_op_header_t)); 1950 if (!ophdr)
1951 return XFS_ERROR(EIO);
1763 1952
1764 /* are we copying a commit or unmount record? */ 1953 xlog_write_adv_cnt(&ptr, &len, &log_offset,
1765 logop_head->oh_flags = flags; 1954 sizeof(struct xlog_op_header));
1955
1956 len += xlog_write_setup_copy(ticket, ophdr,
1957 iclog->ic_size-log_offset,
1958 reg->i_len,
1959 &copy_off, &copy_len,
1960 &partial_copy,
1961 &partial_copy_len);
1962 xlog_verify_dest_ptr(log, ptr);
1963
1964 /* copy region */
1965 ASSERT(copy_len >= 0);
1966 memcpy(ptr, reg->i_addr + copy_off, copy_len);
1967 xlog_write_adv_cnt(&ptr, &len, &log_offset, copy_len);
1968
1969 copy_len += start_rec_copy + sizeof(xlog_op_header_t);
1970 record_cnt++;
1971 data_cnt += contwr ? copy_len : 0;
1972
1973 error = xlog_write_copy_finish(log, iclog, flags,
1974 &record_cnt, &data_cnt,
1975 &partial_copy,
1976 &partial_copy_len,
1977 log_offset,
1978 commit_iclog);
1979 if (error)
1980 return error;
1766 1981
1767 /* 1982 /*
1768 * We've seen logs corrupted with bad transaction client 1983 * if we had a partial copy, we need to get more iclog
1769 * ids. This makes sure that XFS doesn't generate them on. 1984 * space but we don't want to increment the region
1770 * Turn this into an EIO and shut down the filesystem. 1985 * index because there is still more is this region to
1771 */ 1986 * write.
1772 switch (logop_head->oh_clientid) { 1987 *
1773 case XFS_TRANSACTION: 1988 * If we completed writing this region, and we flushed
1774 case XFS_VOLUME: 1989 * the iclog (indicated by resetting of the record
1775 case XFS_LOG: 1990 * count), then we also need to get more log space. If
1776 break; 1991 * this was the last record, though, we are done and
1777 default: 1992 * can just return.
1778 xfs_fs_cmn_err(CE_WARN, mp, 1993 */
1779 "Bad XFS transaction clientid 0x%x in ticket 0x%p", 1994 if (partial_copy)
1780 logop_head->oh_clientid, ticket); 1995 break;
1781 return XFS_ERROR(EIO);
1782 }
1783 1996
1784 /* Partial write last time? => (partial_copy != 0) 1997 if (++index == lv->lv_niovecs) {
1785 * need_copy is the amount we'd like to copy if everything could 1998 lv = lv->lv_next;
1786 * fit in the current memcpy. 1999 index = 0;
1787 */ 2000 if (lv)
1788 need_copy = reg[index].i_len - partial_copy_len; 2001 vecp = lv->lv_iovecp;
1789 2002 }
1790 copy_off = partial_copy_len; 2003 if (record_cnt == 0) {
1791 if (need_copy <= iclog->ic_size - log_offset) { /*complete write */ 2004 if (!lv)
1792 copy_len = need_copy; 2005 return 0;
1793 logop_head->oh_len = cpu_to_be32(copy_len); 2006 break;
1794 if (partial_copy) 2007 }
1795 logop_head->oh_flags|= (XLOG_END_TRANS|XLOG_WAS_CONT_TRANS);
1796 partial_copy_len = partial_copy = 0;
1797 } else { /* partial write */
1798 copy_len = iclog->ic_size - log_offset;
1799 logop_head->oh_len = cpu_to_be32(copy_len);
1800 logop_head->oh_flags |= XLOG_CONTINUE_TRANS;
1801 if (partial_copy)
1802 logop_head->oh_flags |= XLOG_WAS_CONT_TRANS;
1803 partial_copy_len += copy_len;
1804 partial_copy++;
1805 len += sizeof(xlog_op_header_t); /* from splitting of region */
1806 /* account for new log op header */
1807 ticket->t_curr_res -= sizeof(xlog_op_header_t);
1808 ticket->t_res_num_ophdrs++;
1809 }
1810 xlog_verify_dest_ptr(log, ptr);
1811
1812 /* copy region */
1813 ASSERT(copy_len >= 0);
1814 memcpy((xfs_caddr_t)ptr, reg[index].i_addr + copy_off, copy_len);
1815 xlog_write_adv_cnt(ptr, len, log_offset, copy_len);
1816
1817 /* make copy_len total bytes copied, including headers */
1818 copy_len += start_rec_copy + sizeof(xlog_op_header_t);
1819 record_cnt++;
1820 data_cnt += contwr ? copy_len : 0;
1821 if (partial_copy) { /* copied partial region */
1822 /* already marked WANT_SYNC by xlog_state_get_iclog_space */
1823 xlog_state_finish_copy(log, iclog, record_cnt, data_cnt);
1824 record_cnt = data_cnt = 0;
1825 if ((error = xlog_state_release_iclog(log, iclog)))
1826 return error;
1827 break; /* don't increment index */
1828 } else { /* copied entire region */
1829 index++;
1830 partial_copy_len = partial_copy = 0;
1831
1832 if (iclog->ic_size - log_offset <= sizeof(xlog_op_header_t)) {
1833 xlog_state_finish_copy(log, iclog, record_cnt, data_cnt);
1834 record_cnt = data_cnt = 0;
1835 spin_lock(&log->l_icloglock);
1836 xlog_state_want_sync(log, iclog);
1837 spin_unlock(&log->l_icloglock);
1838 if (commit_iclog) {
1839 ASSERT(flags & XLOG_COMMIT_TRANS);
1840 *commit_iclog = iclog;
1841 } else if ((error = xlog_state_release_iclog(log, iclog)))
1842 return error;
1843 if (index == nentries)
1844 return 0; /* we are done */
1845 else
1846 break;
1847 } 2008 }
1848 } /* if (partial_copy) */ 2009 }
1849 } /* while (index < nentries) */ 2010
1850 } /* for (index = 0; index < nentries; ) */ 2011 ASSERT(len == 0);
1851 ASSERT(len == 0); 2012
2013 xlog_state_finish_copy(log, iclog, record_cnt, data_cnt);
2014 if (!commit_iclog)
2015 return xlog_state_release_iclog(log, iclog);
1852 2016
1853 xlog_state_finish_copy(log, iclog, record_cnt, data_cnt);
1854 if (commit_iclog) {
1855 ASSERT(flags & XLOG_COMMIT_TRANS); 2017 ASSERT(flags & XLOG_COMMIT_TRANS);
1856 *commit_iclog = iclog; 2018 *commit_iclog = iclog;
1857 return 0; 2019 return 0;
1858 } 2020}
1859 return xlog_state_release_iclog(log, iclog);
1860} /* xlog_write */
1861 2021
1862 2022
1863/***************************************************************************** 2023/*****************************************************************************
@@ -3157,14 +3317,16 @@ xfs_log_ticket_get(
3157 * Allocate and initialise a new log ticket. 3317 * Allocate and initialise a new log ticket.
3158 */ 3318 */
3159STATIC xlog_ticket_t * 3319STATIC xlog_ticket_t *
3160xlog_ticket_alloc(xlog_t *log, 3320xlog_ticket_alloc(
3161 int unit_bytes, 3321 struct log *log,
3162 int cnt, 3322 int unit_bytes,
3163 char client, 3323 int cnt,
3164 uint xflags) 3324 char client,
3325 uint xflags)
3165{ 3326{
3166 xlog_ticket_t *tic; 3327 struct xlog_ticket *tic;
3167 uint num_headers; 3328 uint num_headers;
3329 int iclog_space;
3168 3330
3169 tic = kmem_zone_zalloc(xfs_log_ticket_zone, KM_SLEEP|KM_MAYFAIL); 3331 tic = kmem_zone_zalloc(xfs_log_ticket_zone, KM_SLEEP|KM_MAYFAIL);
3170 if (!tic) 3332 if (!tic)
@@ -3208,16 +3370,40 @@ xlog_ticket_alloc(xlog_t *log,
3208 /* for start-rec */ 3370 /* for start-rec */
3209 unit_bytes += sizeof(xlog_op_header_t); 3371 unit_bytes += sizeof(xlog_op_header_t);
3210 3372
3211 /* for LR headers */ 3373 /*
3212 num_headers = ((unit_bytes + log->l_iclog_size-1) >> log->l_iclog_size_log); 3374 * for LR headers - the space for data in an iclog is the size minus
3375 * the space used for the headers. If we use the iclog size, then we
3376 * undercalculate the number of headers required.
3377 *
3378 * Furthermore - the addition of op headers for split-recs might
3379 * increase the space required enough to require more log and op
3380 * headers, so take that into account too.
3381 *
3382 * IMPORTANT: This reservation makes the assumption that if this
3383 * transaction is the first in an iclog and hence has the LR headers
3384 * accounted to it, then the remaining space in the iclog is
3385 * exclusively for this transaction. i.e. if the transaction is larger
3386 * than the iclog, it will be the only thing in that iclog.
3387 * Fundamentally, this means we must pass the entire log vector to
3388 * xlog_write to guarantee this.
3389 */
3390 iclog_space = log->l_iclog_size - log->l_iclog_hsize;
3391 num_headers = howmany(unit_bytes, iclog_space);
3392
3393 /* for split-recs - ophdrs added when data split over LRs */
3394 unit_bytes += sizeof(xlog_op_header_t) * num_headers;
3395
3396 /* add extra header reservations if we overrun */
3397 while (!num_headers ||
3398 howmany(unit_bytes, iclog_space) > num_headers) {
3399 unit_bytes += sizeof(xlog_op_header_t);
3400 num_headers++;
3401 }
3213 unit_bytes += log->l_iclog_hsize * num_headers; 3402 unit_bytes += log->l_iclog_hsize * num_headers;
3214 3403
3215 /* for commit-rec LR header - note: padding will subsume the ophdr */ 3404 /* for commit-rec LR header - note: padding will subsume the ophdr */
3216 unit_bytes += log->l_iclog_hsize; 3405 unit_bytes += log->l_iclog_hsize;
3217 3406
3218 /* for split-recs - ophdrs added when data split over LRs */
3219 unit_bytes += sizeof(xlog_op_header_t) * num_headers;
3220
3221 /* for roundoff padding for transaction data and one for commit record */ 3407 /* for roundoff padding for transaction data and one for commit record */
3222 if (xfs_sb_version_haslogv2(&log->l_mp->m_sb) && 3408 if (xfs_sb_version_haslogv2(&log->l_mp->m_sb) &&
3223 log->l_mp->m_sb.sb_logsunit > 1) { 3409 log->l_mp->m_sb.sb_logsunit > 1) {
@@ -3233,13 +3419,13 @@ xlog_ticket_alloc(xlog_t *log,
3233 tic->t_curr_res = unit_bytes; 3419 tic->t_curr_res = unit_bytes;
3234 tic->t_cnt = cnt; 3420 tic->t_cnt = cnt;
3235 tic->t_ocnt = cnt; 3421 tic->t_ocnt = cnt;
3236 tic->t_tid = (xlog_tid_t)((__psint_t)tic & 0xffffffff); 3422 tic->t_tid = random32();
3237 tic->t_clientid = client; 3423 tic->t_clientid = client;
3238 tic->t_flags = XLOG_TIC_INITED; 3424 tic->t_flags = XLOG_TIC_INITED;
3239 tic->t_trans_type = 0; 3425 tic->t_trans_type = 0;
3240 if (xflags & XFS_LOG_PERM_RESERV) 3426 if (xflags & XFS_LOG_PERM_RESERV)
3241 tic->t_flags |= XLOG_TIC_PERM_RESERV; 3427 tic->t_flags |= XLOG_TIC_PERM_RESERV;
3242 sv_init(&(tic->t_wait), SV_DEFAULT, "logtick"); 3428 sv_init(&tic->t_wait, SV_DEFAULT, "logtick");
3243 3429
3244 xlog_tic_reset_res(tic); 3430 xlog_tic_reset_res(tic);
3245 3431
@@ -3260,20 +3446,22 @@ xlog_ticket_alloc(xlog_t *log,
3260 * part of the log in case we trash the log structure. 3446 * part of the log in case we trash the log structure.
3261 */ 3447 */
3262void 3448void
3263xlog_verify_dest_ptr(xlog_t *log, 3449xlog_verify_dest_ptr(
3264 __psint_t ptr) 3450 struct log *log,
3451 char *ptr)
3265{ 3452{
3266 int i; 3453 int i;
3267 int good_ptr = 0; 3454 int good_ptr = 0;
3268 3455
3269 for (i=0; i < log->l_iclog_bufs; i++) { 3456 for (i = 0; i < log->l_iclog_bufs; i++) {
3270 if (ptr >= (__psint_t)log->l_iclog_bak[i] && 3457 if (ptr >= log->l_iclog_bak[i] &&
3271 ptr <= (__psint_t)log->l_iclog_bak[i]+log->l_iclog_size) 3458 ptr <= log->l_iclog_bak[i] + log->l_iclog_size)
3272 good_ptr++; 3459 good_ptr++;
3273 } 3460 }
3274 if (! good_ptr) 3461
3462 if (!good_ptr)
3275 xlog_panic("xlog_verify_dest_ptr: invalid ptr"); 3463 xlog_panic("xlog_verify_dest_ptr: invalid ptr");
3276} /* xlog_verify_dest_ptr */ 3464}
3277 3465
3278STATIC void 3466STATIC void
3279xlog_verify_grant_head(xlog_t *log, int equals) 3467xlog_verify_grant_head(xlog_t *log, int equals)
diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h
index 97a24c7795a4..229d1f36ba9a 100644
--- a/fs/xfs/xfs_log.h
+++ b/fs/xfs/xfs_log.h
@@ -110,6 +110,12 @@ typedef struct xfs_log_iovec {
110 uint i_type; /* type of region */ 110 uint i_type; /* type of region */
111} xfs_log_iovec_t; 111} xfs_log_iovec_t;
112 112
113struct xfs_log_vec {
114 struct xfs_log_vec *lv_next; /* next lv in build list */
115 int lv_niovecs; /* number of iovecs in lv */
116 struct xfs_log_iovec *lv_iovecp; /* iovec array */
117};
118
113/* 119/*
114 * Structure used to pass callback function and the function's argument 120 * Structure used to pass callback function and the function's argument
115 * to the log manager. 121 * to the log manager.
@@ -126,6 +132,13 @@ typedef struct xfs_log_callback {
126struct xfs_mount; 132struct xfs_mount;
127struct xlog_in_core; 133struct xlog_in_core;
128struct xlog_ticket; 134struct xlog_ticket;
135struct xfs_log_item;
136struct xfs_item_ops;
137
138void xfs_log_item_init(struct xfs_mount *mp,
139 struct xfs_log_item *item,
140 int type,
141 struct xfs_item_ops *ops);
129 142
130xfs_lsn_t xfs_log_done(struct xfs_mount *mp, 143xfs_lsn_t xfs_log_done(struct xfs_mount *mp,
131 struct xlog_ticket *ticket, 144 struct xlog_ticket *ticket,
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index fd02a18facd5..9cf695154451 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -396,9 +396,7 @@ typedef struct log {
396 struct xfs_buf_cancel **l_buf_cancel_table; 396 struct xfs_buf_cancel **l_buf_cancel_table;
397 int l_iclog_hsize; /* size of iclog header */ 397 int l_iclog_hsize; /* size of iclog header */
398 int l_iclog_heads; /* # of iclog header sectors */ 398 int l_iclog_heads; /* # of iclog header sectors */
399 uint l_sectbb_log; /* log2 of sector size in BBs */ 399 uint l_sectBBsize; /* sector size in BBs (2^n) */
400 uint l_sectbb_mask; /* sector size (in BBs)
401 * alignment mask */
402 int l_iclog_size; /* size of log in bytes */ 400 int l_iclog_size; /* size of log in bytes */
403 int l_iclog_size_log; /* log power size of log */ 401 int l_iclog_size_log; /* log power size of log */
404 int l_iclog_bufs; /* number of iclog buffers */ 402 int l_iclog_bufs; /* number of iclog buffers */
@@ -449,6 +447,14 @@ extern void xlog_pack_data(xlog_t *log, xlog_in_core_t *iclog, int);
449 447
450extern kmem_zone_t *xfs_log_ticket_zone; 448extern kmem_zone_t *xfs_log_ticket_zone;
451 449
450static inline void
451xlog_write_adv_cnt(void **ptr, int *len, int *off, size_t bytes)
452{
453 *ptr += bytes;
454 *len -= bytes;
455 *off += bytes;
456}
457
452/* 458/*
453 * Unmount record type is used as a pseudo transaction type for the ticket. 459 * Unmount record type is used as a pseudo transaction type for the ticket.
454 * It's value must be outside the range of XFS_TRANS_* values. 460 * It's value must be outside the range of XFS_TRANS_* values.
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 22e6efdc17ea..0de08e366315 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -56,33 +56,61 @@ STATIC void xlog_recover_check_summary(xlog_t *);
56#define xlog_recover_check_summary(log) 56#define xlog_recover_check_summary(log)
57#endif 57#endif
58 58
59
60/* 59/*
61 * Sector aligned buffer routines for buffer create/read/write/access 60 * Sector aligned buffer routines for buffer create/read/write/access
62 */ 61 */
63 62
64#define XLOG_SECTOR_ROUNDUP_BBCOUNT(log, bbs) \ 63/*
65 ( ((log)->l_sectbb_mask && (bbs & (log)->l_sectbb_mask)) ? \ 64 * Verify the given count of basic blocks is valid number of blocks
66 ((bbs + (log)->l_sectbb_mask + 1) & ~(log)->l_sectbb_mask) : (bbs) ) 65 * to specify for an operation involving the given XFS log buffer.
67#define XLOG_SECTOR_ROUNDDOWN_BLKNO(log, bno) ((bno) & ~(log)->l_sectbb_mask) 66 * Returns nonzero if the count is valid, 0 otherwise.
67 */
68 68
69static inline int
70xlog_buf_bbcount_valid(
71 xlog_t *log,
72 int bbcount)
73{
74 return bbcount > 0 && bbcount <= log->l_logBBsize;
75}
76
77/*
78 * Allocate a buffer to hold log data. The buffer needs to be able
79 * to map to a range of nbblks basic blocks at any valid (basic
80 * block) offset within the log.
81 */
69STATIC xfs_buf_t * 82STATIC xfs_buf_t *
70xlog_get_bp( 83xlog_get_bp(
71 xlog_t *log, 84 xlog_t *log,
72 int nbblks) 85 int nbblks)
73{ 86{
74 if (nbblks <= 0 || nbblks > log->l_logBBsize) { 87 if (!xlog_buf_bbcount_valid(log, nbblks)) {
75 xlog_warn("XFS: Invalid block length (0x%x) given for buffer", nbblks); 88 xlog_warn("XFS: Invalid block length (0x%x) given for buffer",
76 XFS_ERROR_REPORT("xlog_get_bp(1)", 89 nbblks);
77 XFS_ERRLEVEL_HIGH, log->l_mp); 90 XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_HIGH, log->l_mp);
78 return NULL; 91 return NULL;
79 } 92 }
80 93
81 if (log->l_sectbb_log) { 94 /*
82 if (nbblks > 1) 95 * We do log I/O in units of log sectors (a power-of-2
83 nbblks += XLOG_SECTOR_ROUNDUP_BBCOUNT(log, 1); 96 * multiple of the basic block size), so we round up the
84 nbblks = XLOG_SECTOR_ROUNDUP_BBCOUNT(log, nbblks); 97 * requested size to acommodate the basic blocks required
85 } 98 * for complete log sectors.
99 *
100 * In addition, the buffer may be used for a non-sector-
101 * aligned block offset, in which case an I/O of the
102 * requested size could extend beyond the end of the
103 * buffer. If the requested size is only 1 basic block it
104 * will never straddle a sector boundary, so this won't be
105 * an issue. Nor will this be a problem if the log I/O is
106 * done in basic blocks (sector size 1). But otherwise we
107 * extend the buffer by one extra log sector to ensure
108 * there's space to accomodate this possiblility.
109 */
110 if (nbblks > 1 && log->l_sectBBsize > 1)
111 nbblks += log->l_sectBBsize;
112 nbblks = round_up(nbblks, log->l_sectBBsize);
113
86 return xfs_buf_get_noaddr(BBTOB(nbblks), log->l_mp->m_logdev_targp); 114 return xfs_buf_get_noaddr(BBTOB(nbblks), log->l_mp->m_logdev_targp);
87} 115}
88 116
@@ -93,6 +121,10 @@ xlog_put_bp(
93 xfs_buf_free(bp); 121 xfs_buf_free(bp);
94} 122}
95 123
124/*
125 * Return the address of the start of the given block number's data
126 * in a log buffer. The buffer covers a log sector-aligned region.
127 */
96STATIC xfs_caddr_t 128STATIC xfs_caddr_t
97xlog_align( 129xlog_align(
98 xlog_t *log, 130 xlog_t *log,
@@ -100,14 +132,14 @@ xlog_align(
100 int nbblks, 132 int nbblks,
101 xfs_buf_t *bp) 133 xfs_buf_t *bp)
102{ 134{
135 xfs_daddr_t offset;
103 xfs_caddr_t ptr; 136 xfs_caddr_t ptr;
104 137
105 if (!log->l_sectbb_log) 138 offset = blk_no & ((xfs_daddr_t) log->l_sectBBsize - 1);
106 return XFS_BUF_PTR(bp); 139 ptr = XFS_BUF_PTR(bp) + BBTOB(offset);
140
141 ASSERT(ptr + BBTOB(nbblks) <= XFS_BUF_PTR(bp) + XFS_BUF_SIZE(bp));
107 142
108 ptr = XFS_BUF_PTR(bp) + BBTOB((int)blk_no & log->l_sectbb_mask);
109 ASSERT(XFS_BUF_SIZE(bp) >=
110 BBTOB(nbblks + (blk_no & log->l_sectbb_mask)));
111 return ptr; 143 return ptr;
112} 144}
113 145
@@ -124,21 +156,18 @@ xlog_bread_noalign(
124{ 156{
125 int error; 157 int error;
126 158
127 if (nbblks <= 0 || nbblks > log->l_logBBsize) { 159 if (!xlog_buf_bbcount_valid(log, nbblks)) {
128 xlog_warn("XFS: Invalid block length (0x%x) given for buffer", nbblks); 160 xlog_warn("XFS: Invalid block length (0x%x) given for buffer",
129 XFS_ERROR_REPORT("xlog_bread(1)", 161 nbblks);
130 XFS_ERRLEVEL_HIGH, log->l_mp); 162 XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_HIGH, log->l_mp);
131 return EFSCORRUPTED; 163 return EFSCORRUPTED;
132 } 164 }
133 165
134 if (log->l_sectbb_log) { 166 blk_no = round_down(blk_no, log->l_sectBBsize);
135 blk_no = XLOG_SECTOR_ROUNDDOWN_BLKNO(log, blk_no); 167 nbblks = round_up(nbblks, log->l_sectBBsize);
136 nbblks = XLOG_SECTOR_ROUNDUP_BBCOUNT(log, nbblks);
137 }
138 168
139 ASSERT(nbblks > 0); 169 ASSERT(nbblks > 0);
140 ASSERT(BBTOB(nbblks) <= XFS_BUF_SIZE(bp)); 170 ASSERT(BBTOB(nbblks) <= XFS_BUF_SIZE(bp));
141 ASSERT(bp);
142 171
143 XFS_BUF_SET_ADDR(bp, log->l_logBBstart + blk_no); 172 XFS_BUF_SET_ADDR(bp, log->l_logBBstart + blk_no);
144 XFS_BUF_READ(bp); 173 XFS_BUF_READ(bp);
@@ -186,17 +215,15 @@ xlog_bwrite(
186{ 215{
187 int error; 216 int error;
188 217
189 if (nbblks <= 0 || nbblks > log->l_logBBsize) { 218 if (!xlog_buf_bbcount_valid(log, nbblks)) {
190 xlog_warn("XFS: Invalid block length (0x%x) given for buffer", nbblks); 219 xlog_warn("XFS: Invalid block length (0x%x) given for buffer",
191 XFS_ERROR_REPORT("xlog_bwrite(1)", 220 nbblks);
192 XFS_ERRLEVEL_HIGH, log->l_mp); 221 XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_HIGH, log->l_mp);
193 return EFSCORRUPTED; 222 return EFSCORRUPTED;
194 } 223 }
195 224
196 if (log->l_sectbb_log) { 225 blk_no = round_down(blk_no, log->l_sectBBsize);
197 blk_no = XLOG_SECTOR_ROUNDDOWN_BLKNO(log, blk_no); 226 nbblks = round_up(nbblks, log->l_sectBBsize);
198 nbblks = XLOG_SECTOR_ROUNDUP_BBCOUNT(log, nbblks);
199 }
200 227
201 ASSERT(nbblks > 0); 228 ASSERT(nbblks > 0);
202 ASSERT(BBTOB(nbblks) <= XFS_BUF_SIZE(bp)); 229 ASSERT(BBTOB(nbblks) <= XFS_BUF_SIZE(bp));
@@ -327,39 +354,38 @@ xlog_find_cycle_start(
327{ 354{
328 xfs_caddr_t offset; 355 xfs_caddr_t offset;
329 xfs_daddr_t mid_blk; 356 xfs_daddr_t mid_blk;
357 xfs_daddr_t end_blk;
330 uint mid_cycle; 358 uint mid_cycle;
331 int error; 359 int error;
332 360
333 mid_blk = BLK_AVG(first_blk, *last_blk); 361 end_blk = *last_blk;
334 while (mid_blk != first_blk && mid_blk != *last_blk) { 362 mid_blk = BLK_AVG(first_blk, end_blk);
363 while (mid_blk != first_blk && mid_blk != end_blk) {
335 error = xlog_bread(log, mid_blk, 1, bp, &offset); 364 error = xlog_bread(log, mid_blk, 1, bp, &offset);
336 if (error) 365 if (error)
337 return error; 366 return error;
338 mid_cycle = xlog_get_cycle(offset); 367 mid_cycle = xlog_get_cycle(offset);
339 if (mid_cycle == cycle) { 368 if (mid_cycle == cycle)
340 *last_blk = mid_blk; 369 end_blk = mid_blk; /* last_half_cycle == mid_cycle */
341 /* last_half_cycle == mid_cycle */ 370 else
342 } else { 371 first_blk = mid_blk; /* first_half_cycle == mid_cycle */
343 first_blk = mid_blk; 372 mid_blk = BLK_AVG(first_blk, end_blk);
344 /* first_half_cycle == mid_cycle */
345 }
346 mid_blk = BLK_AVG(first_blk, *last_blk);
347 } 373 }
348 ASSERT((mid_blk == first_blk && mid_blk+1 == *last_blk) || 374 ASSERT((mid_blk == first_blk && mid_blk+1 == end_blk) ||
349 (mid_blk == *last_blk && mid_blk-1 == first_blk)); 375 (mid_blk == end_blk && mid_blk-1 == first_blk));
376
377 *last_blk = end_blk;
350 378
351 return 0; 379 return 0;
352} 380}
353 381
354/* 382/*
355 * Check that the range of blocks does not contain the cycle number 383 * Check that a range of blocks does not contain stop_on_cycle_no.
356 * given. The scan needs to occur from front to back and the ptr into the 384 * Fill in *new_blk with the block offset where such a block is
357 * region must be updated since a later routine will need to perform another 385 * found, or with -1 (an invalid block number) if there is no such
358 * test. If the region is completely good, we end up returning the same 386 * block in the range. The scan needs to occur from front to back
359 * last block number. 387 * and the pointer into the region must be updated since a later
360 * 388 * routine will need to perform another test.
361 * Set blkno to -1 if we encounter no errors. This is an invalid block number
362 * since we don't ever expect logs to get this large.
363 */ 389 */
364STATIC int 390STATIC int
365xlog_find_verify_cycle( 391xlog_find_verify_cycle(
@@ -376,12 +402,16 @@ xlog_find_verify_cycle(
376 xfs_caddr_t buf = NULL; 402 xfs_caddr_t buf = NULL;
377 int error = 0; 403 int error = 0;
378 404
405 /*
406 * Greedily allocate a buffer big enough to handle the full
407 * range of basic blocks we'll be examining. If that fails,
408 * try a smaller size. We need to be able to read at least
409 * a log sector, or we're out of luck.
410 */
379 bufblks = 1 << ffs(nbblks); 411 bufblks = 1 << ffs(nbblks);
380
381 while (!(bp = xlog_get_bp(log, bufblks))) { 412 while (!(bp = xlog_get_bp(log, bufblks))) {
382 /* can't get enough memory to do everything in one big buffer */
383 bufblks >>= 1; 413 bufblks >>= 1;
384 if (bufblks <= log->l_sectbb_log) 414 if (bufblks < log->l_sectBBsize)
385 return ENOMEM; 415 return ENOMEM;
386 } 416 }
387 417
@@ -629,7 +659,7 @@ xlog_find_head(
629 * In this case we want to find the first block with cycle 659 * In this case we want to find the first block with cycle
630 * number matching last_half_cycle. We expect the log to be 660 * number matching last_half_cycle. We expect the log to be
631 * some variation on 661 * some variation on
632 * x + 1 ... | x ... 662 * x + 1 ... | x ... | x
633 * The first block with cycle number x (last_half_cycle) will 663 * The first block with cycle number x (last_half_cycle) will
634 * be where the new head belongs. First we do a binary search 664 * be where the new head belongs. First we do a binary search
635 * for the first occurrence of last_half_cycle. The binary 665 * for the first occurrence of last_half_cycle. The binary
@@ -639,11 +669,13 @@ xlog_find_head(
639 * the log, then we look for occurrences of last_half_cycle - 1 669 * the log, then we look for occurrences of last_half_cycle - 1
640 * at the end of the log. The cases we're looking for look 670 * at the end of the log. The cases we're looking for look
641 * like 671 * like
642 * x + 1 ... | x | x + 1 | x ... 672 * v binary search stopped here
643 * ^ binary search stopped here 673 * x + 1 ... | x | x + 1 | x ... | x
674 * ^ but we want to locate this spot
644 * or 675 * or
645 * x + 1 ... | x ... | x - 1 | x
646 * <---------> less than scan distance 676 * <---------> less than scan distance
677 * x + 1 ... | x ... | x - 1 | x
678 * ^ we want to locate this spot
647 */ 679 */
648 stop_on_cycle = last_half_cycle; 680 stop_on_cycle = last_half_cycle;
649 if ((error = xlog_find_cycle_start(log, bp, first_blk, 681 if ((error = xlog_find_cycle_start(log, bp, first_blk,
@@ -699,16 +731,16 @@ xlog_find_head(
699 * certainly not the head of the log. By searching for 731 * certainly not the head of the log. By searching for
700 * last_half_cycle-1 we accomplish that. 732 * last_half_cycle-1 we accomplish that.
701 */ 733 */
702 start_blk = log_bbnum - num_scan_bblks + head_blk;
703 ASSERT(head_blk <= INT_MAX && 734 ASSERT(head_blk <= INT_MAX &&
704 (xfs_daddr_t) num_scan_bblks - head_blk >= 0); 735 (xfs_daddr_t) num_scan_bblks >= head_blk);
736 start_blk = log_bbnum - (num_scan_bblks - head_blk);
705 if ((error = xlog_find_verify_cycle(log, start_blk, 737 if ((error = xlog_find_verify_cycle(log, start_blk,
706 num_scan_bblks - (int)head_blk, 738 num_scan_bblks - (int)head_blk,
707 (stop_on_cycle - 1), &new_blk))) 739 (stop_on_cycle - 1), &new_blk)))
708 goto bp_err; 740 goto bp_err;
709 if (new_blk != -1) { 741 if (new_blk != -1) {
710 head_blk = new_blk; 742 head_blk = new_blk;
711 goto bad_blk; 743 goto validate_head;
712 } 744 }
713 745
714 /* 746 /*
@@ -726,7 +758,7 @@ xlog_find_head(
726 head_blk = new_blk; 758 head_blk = new_blk;
727 } 759 }
728 760
729 bad_blk: 761validate_head:
730 /* 762 /*
731 * Now we need to make sure head_blk is not pointing to a block in 763 * Now we need to make sure head_blk is not pointing to a block in
732 * the middle of a log record. 764 * the middle of a log record.
@@ -748,7 +780,7 @@ xlog_find_head(
748 if ((error = xlog_find_verify_log_record(log, start_blk, 780 if ((error = xlog_find_verify_log_record(log, start_blk,
749 &head_blk, 0)) == -1) { 781 &head_blk, 0)) == -1) {
750 /* We hit the beginning of the log during our search */ 782 /* We hit the beginning of the log during our search */
751 start_blk = log_bbnum - num_scan_bblks + head_blk; 783 start_blk = log_bbnum - (num_scan_bblks - head_blk);
752 new_blk = log_bbnum; 784 new_blk = log_bbnum;
753 ASSERT(start_blk <= INT_MAX && 785 ASSERT(start_blk <= INT_MAX &&
754 (xfs_daddr_t) log_bbnum-start_blk >= 0); 786 (xfs_daddr_t) log_bbnum-start_blk >= 0);
@@ -833,12 +865,12 @@ xlog_find_tail(
833 if (*head_blk == 0) { /* special case */ 865 if (*head_blk == 0) { /* special case */
834 error = xlog_bread(log, 0, 1, bp, &offset); 866 error = xlog_bread(log, 0, 1, bp, &offset);
835 if (error) 867 if (error)
836 goto bread_err; 868 goto done;
837 869
838 if (xlog_get_cycle(offset) == 0) { 870 if (xlog_get_cycle(offset) == 0) {
839 *tail_blk = 0; 871 *tail_blk = 0;
840 /* leave all other log inited values alone */ 872 /* leave all other log inited values alone */
841 goto exit; 873 goto done;
842 } 874 }
843 } 875 }
844 876
@@ -849,7 +881,7 @@ xlog_find_tail(
849 for (i = (int)(*head_blk) - 1; i >= 0; i--) { 881 for (i = (int)(*head_blk) - 1; i >= 0; i--) {
850 error = xlog_bread(log, i, 1, bp, &offset); 882 error = xlog_bread(log, i, 1, bp, &offset);
851 if (error) 883 if (error)
852 goto bread_err; 884 goto done;
853 885
854 if (XLOG_HEADER_MAGIC_NUM == be32_to_cpu(*(__be32 *)offset)) { 886 if (XLOG_HEADER_MAGIC_NUM == be32_to_cpu(*(__be32 *)offset)) {
855 found = 1; 887 found = 1;
@@ -866,7 +898,7 @@ xlog_find_tail(
866 for (i = log->l_logBBsize - 1; i >= (int)(*head_blk); i--) { 898 for (i = log->l_logBBsize - 1; i >= (int)(*head_blk); i--) {
867 error = xlog_bread(log, i, 1, bp, &offset); 899 error = xlog_bread(log, i, 1, bp, &offset);
868 if (error) 900 if (error)
869 goto bread_err; 901 goto done;
870 902
871 if (XLOG_HEADER_MAGIC_NUM == 903 if (XLOG_HEADER_MAGIC_NUM ==
872 be32_to_cpu(*(__be32 *)offset)) { 904 be32_to_cpu(*(__be32 *)offset)) {
@@ -941,7 +973,7 @@ xlog_find_tail(
941 umount_data_blk = (i + hblks) % log->l_logBBsize; 973 umount_data_blk = (i + hblks) % log->l_logBBsize;
942 error = xlog_bread(log, umount_data_blk, 1, bp, &offset); 974 error = xlog_bread(log, umount_data_blk, 1, bp, &offset);
943 if (error) 975 if (error)
944 goto bread_err; 976 goto done;
945 977
946 op_head = (xlog_op_header_t *)offset; 978 op_head = (xlog_op_header_t *)offset;
947 if (op_head->oh_flags & XLOG_UNMOUNT_TRANS) { 979 if (op_head->oh_flags & XLOG_UNMOUNT_TRANS) {
@@ -987,12 +1019,10 @@ xlog_find_tail(
987 * But... if the -device- itself is readonly, just skip this. 1019 * But... if the -device- itself is readonly, just skip this.
988 * We can't recover this device anyway, so it won't matter. 1020 * We can't recover this device anyway, so it won't matter.
989 */ 1021 */
990 if (!xfs_readonly_buftarg(log->l_mp->m_logdev_targp)) { 1022 if (!xfs_readonly_buftarg(log->l_mp->m_logdev_targp))
991 error = xlog_clear_stale_blocks(log, tail_lsn); 1023 error = xlog_clear_stale_blocks(log, tail_lsn);
992 }
993 1024
994bread_err: 1025done:
995exit:
996 xlog_put_bp(bp); 1026 xlog_put_bp(bp);
997 1027
998 if (error) 1028 if (error)
@@ -1152,16 +1182,22 @@ xlog_write_log_records(
1152 xfs_caddr_t offset; 1182 xfs_caddr_t offset;
1153 xfs_buf_t *bp; 1183 xfs_buf_t *bp;
1154 int balign, ealign; 1184 int balign, ealign;
1155 int sectbb = XLOG_SECTOR_ROUNDUP_BBCOUNT(log, 1); 1185 int sectbb = log->l_sectBBsize;
1156 int end_block = start_block + blocks; 1186 int end_block = start_block + blocks;
1157 int bufblks; 1187 int bufblks;
1158 int error = 0; 1188 int error = 0;
1159 int i, j = 0; 1189 int i, j = 0;
1160 1190
1191 /*
1192 * Greedily allocate a buffer big enough to handle the full
1193 * range of basic blocks to be written. If that fails, try
1194 * a smaller size. We need to be able to write at least a
1195 * log sector, or we're out of luck.
1196 */
1161 bufblks = 1 << ffs(blocks); 1197 bufblks = 1 << ffs(blocks);
1162 while (!(bp = xlog_get_bp(log, bufblks))) { 1198 while (!(bp = xlog_get_bp(log, bufblks))) {
1163 bufblks >>= 1; 1199 bufblks >>= 1;
1164 if (bufblks <= log->l_sectbb_log) 1200 if (bufblks < sectbb)
1165 return ENOMEM; 1201 return ENOMEM;
1166 } 1202 }
1167 1203
@@ -1169,7 +1205,7 @@ xlog_write_log_records(
1169 * the buffer in the starting sector not covered by the first 1205 * the buffer in the starting sector not covered by the first
1170 * write below. 1206 * write below.
1171 */ 1207 */
1172 balign = XLOG_SECTOR_ROUNDDOWN_BLKNO(log, start_block); 1208 balign = round_down(start_block, sectbb);
1173 if (balign != start_block) { 1209 if (balign != start_block) {
1174 error = xlog_bread_noalign(log, start_block, 1, bp); 1210 error = xlog_bread_noalign(log, start_block, 1, bp);
1175 if (error) 1211 if (error)
@@ -1188,7 +1224,7 @@ xlog_write_log_records(
1188 * the buffer in the final sector not covered by the write. 1224 * the buffer in the final sector not covered by the write.
1189 * If this is the same sector as the above read, skip it. 1225 * If this is the same sector as the above read, skip it.
1190 */ 1226 */
1191 ealign = XLOG_SECTOR_ROUNDDOWN_BLKNO(log, end_block); 1227 ealign = round_down(end_block, sectbb);
1192 if (j == 0 && (start_block + endcount > ealign)) { 1228 if (j == 0 && (start_block + endcount > ealign)) {
1193 offset = XFS_BUF_PTR(bp); 1229 offset = XFS_BUF_PTR(bp);
1194 balign = BBTOB(ealign - start_block); 1230 balign = BBTOB(ealign - start_block);
@@ -1408,6 +1444,7 @@ xlog_recover_add_item(
1408 1444
1409STATIC int 1445STATIC int
1410xlog_recover_add_to_cont_trans( 1446xlog_recover_add_to_cont_trans(
1447 struct log *log,
1411 xlog_recover_t *trans, 1448 xlog_recover_t *trans,
1412 xfs_caddr_t dp, 1449 xfs_caddr_t dp,
1413 int len) 1450 int len)
@@ -1434,6 +1471,7 @@ xlog_recover_add_to_cont_trans(
1434 memcpy(&ptr[old_len], dp, len); /* d, s, l */ 1471 memcpy(&ptr[old_len], dp, len); /* d, s, l */
1435 item->ri_buf[item->ri_cnt-1].i_len += len; 1472 item->ri_buf[item->ri_cnt-1].i_len += len;
1436 item->ri_buf[item->ri_cnt-1].i_addr = ptr; 1473 item->ri_buf[item->ri_cnt-1].i_addr = ptr;
1474 trace_xfs_log_recover_item_add_cont(log, trans, item, 0);
1437 return 0; 1475 return 0;
1438} 1476}
1439 1477
@@ -1452,6 +1490,7 @@ xlog_recover_add_to_cont_trans(
1452 */ 1490 */
1453STATIC int 1491STATIC int
1454xlog_recover_add_to_trans( 1492xlog_recover_add_to_trans(
1493 struct log *log,
1455 xlog_recover_t *trans, 1494 xlog_recover_t *trans,
1456 xfs_caddr_t dp, 1495 xfs_caddr_t dp,
1457 int len) 1496 int len)
@@ -1510,6 +1549,7 @@ xlog_recover_add_to_trans(
1510 item->ri_buf[item->ri_cnt].i_addr = ptr; 1549 item->ri_buf[item->ri_cnt].i_addr = ptr;
1511 item->ri_buf[item->ri_cnt].i_len = len; 1550 item->ri_buf[item->ri_cnt].i_len = len;
1512 item->ri_cnt++; 1551 item->ri_cnt++;
1552 trace_xfs_log_recover_item_add(log, trans, item, 0);
1513 return 0; 1553 return 0;
1514} 1554}
1515 1555
@@ -1521,7 +1561,9 @@ xlog_recover_add_to_trans(
1521 */ 1561 */
1522STATIC int 1562STATIC int
1523xlog_recover_reorder_trans( 1563xlog_recover_reorder_trans(
1524 xlog_recover_t *trans) 1564 struct log *log,
1565 xlog_recover_t *trans,
1566 int pass)
1525{ 1567{
1526 xlog_recover_item_t *item, *n; 1568 xlog_recover_item_t *item, *n;
1527 LIST_HEAD(sort_list); 1569 LIST_HEAD(sort_list);
@@ -1535,6 +1577,8 @@ xlog_recover_reorder_trans(
1535 switch (ITEM_TYPE(item)) { 1577 switch (ITEM_TYPE(item)) {
1536 case XFS_LI_BUF: 1578 case XFS_LI_BUF:
1537 if (!(buf_f->blf_flags & XFS_BLI_CANCEL)) { 1579 if (!(buf_f->blf_flags & XFS_BLI_CANCEL)) {
1580 trace_xfs_log_recover_item_reorder_head(log,
1581 trans, item, pass);
1538 list_move(&item->ri_list, &trans->r_itemq); 1582 list_move(&item->ri_list, &trans->r_itemq);
1539 break; 1583 break;
1540 } 1584 }
@@ -1543,6 +1587,8 @@ xlog_recover_reorder_trans(
1543 case XFS_LI_QUOTAOFF: 1587 case XFS_LI_QUOTAOFF:
1544 case XFS_LI_EFD: 1588 case XFS_LI_EFD:
1545 case XFS_LI_EFI: 1589 case XFS_LI_EFI:
1590 trace_xfs_log_recover_item_reorder_tail(log,
1591 trans, item, pass);
1546 list_move_tail(&item->ri_list, &trans->r_itemq); 1592 list_move_tail(&item->ri_list, &trans->r_itemq);
1547 break; 1593 break;
1548 default: 1594 default:
@@ -1592,8 +1638,10 @@ xlog_recover_do_buffer_pass1(
1592 /* 1638 /*
1593 * If this isn't a cancel buffer item, then just return. 1639 * If this isn't a cancel buffer item, then just return.
1594 */ 1640 */
1595 if (!(flags & XFS_BLI_CANCEL)) 1641 if (!(flags & XFS_BLI_CANCEL)) {
1642 trace_xfs_log_recover_buf_not_cancel(log, buf_f);
1596 return; 1643 return;
1644 }
1597 1645
1598 /* 1646 /*
1599 * Insert an xfs_buf_cancel record into the hash table of 1647 * Insert an xfs_buf_cancel record into the hash table of
@@ -1627,6 +1675,7 @@ xlog_recover_do_buffer_pass1(
1627 while (nextp != NULL) { 1675 while (nextp != NULL) {
1628 if (nextp->bc_blkno == blkno && nextp->bc_len == len) { 1676 if (nextp->bc_blkno == blkno && nextp->bc_len == len) {
1629 nextp->bc_refcount++; 1677 nextp->bc_refcount++;
1678 trace_xfs_log_recover_buf_cancel_ref_inc(log, buf_f);
1630 return; 1679 return;
1631 } 1680 }
1632 prevp = nextp; 1681 prevp = nextp;
@@ -1640,6 +1689,7 @@ xlog_recover_do_buffer_pass1(
1640 bcp->bc_refcount = 1; 1689 bcp->bc_refcount = 1;
1641 bcp->bc_next = NULL; 1690 bcp->bc_next = NULL;
1642 prevp->bc_next = bcp; 1691 prevp->bc_next = bcp;
1692 trace_xfs_log_recover_buf_cancel_add(log, buf_f);
1643} 1693}
1644 1694
1645/* 1695/*
@@ -1779,6 +1829,8 @@ xlog_recover_do_inode_buffer(
1779 unsigned int *data_map = NULL; 1829 unsigned int *data_map = NULL;
1780 unsigned int map_size = 0; 1830 unsigned int map_size = 0;
1781 1831
1832 trace_xfs_log_recover_buf_inode_buf(mp->m_log, buf_f);
1833
1782 switch (buf_f->blf_type) { 1834 switch (buf_f->blf_type) {
1783 case XFS_LI_BUF: 1835 case XFS_LI_BUF:
1784 data_map = buf_f->blf_data_map; 1836 data_map = buf_f->blf_data_map;
@@ -1874,6 +1926,7 @@ xlog_recover_do_inode_buffer(
1874/*ARGSUSED*/ 1926/*ARGSUSED*/
1875STATIC void 1927STATIC void
1876xlog_recover_do_reg_buffer( 1928xlog_recover_do_reg_buffer(
1929 struct xfs_mount *mp,
1877 xlog_recover_item_t *item, 1930 xlog_recover_item_t *item,
1878 xfs_buf_t *bp, 1931 xfs_buf_t *bp,
1879 xfs_buf_log_format_t *buf_f) 1932 xfs_buf_log_format_t *buf_f)
@@ -1885,6 +1938,8 @@ xlog_recover_do_reg_buffer(
1885 unsigned int map_size = 0; 1938 unsigned int map_size = 0;
1886 int error; 1939 int error;
1887 1940
1941 trace_xfs_log_recover_buf_reg_buf(mp->m_log, buf_f);
1942
1888 switch (buf_f->blf_type) { 1943 switch (buf_f->blf_type) {
1889 case XFS_LI_BUF: 1944 case XFS_LI_BUF:
1890 data_map = buf_f->blf_data_map; 1945 data_map = buf_f->blf_data_map;
@@ -2083,6 +2138,8 @@ xlog_recover_do_dquot_buffer(
2083{ 2138{
2084 uint type; 2139 uint type;
2085 2140
2141 trace_xfs_log_recover_buf_dquot_buf(log, buf_f);
2142
2086 /* 2143 /*
2087 * Filesystems are required to send in quota flags at mount time. 2144 * Filesystems are required to send in quota flags at mount time.
2088 */ 2145 */
@@ -2103,7 +2160,7 @@ xlog_recover_do_dquot_buffer(
2103 if (log->l_quotaoffs_flag & type) 2160 if (log->l_quotaoffs_flag & type)
2104 return; 2161 return;
2105 2162
2106 xlog_recover_do_reg_buffer(item, bp, buf_f); 2163 xlog_recover_do_reg_buffer(mp, item, bp, buf_f);
2107} 2164}
2108 2165
2109/* 2166/*
@@ -2164,9 +2221,11 @@ xlog_recover_do_buffer_trans(
2164 */ 2221 */
2165 cancel = xlog_recover_do_buffer_pass2(log, buf_f); 2222 cancel = xlog_recover_do_buffer_pass2(log, buf_f);
2166 if (cancel) { 2223 if (cancel) {
2224 trace_xfs_log_recover_buf_cancel(log, buf_f);
2167 return 0; 2225 return 0;
2168 } 2226 }
2169 } 2227 }
2228 trace_xfs_log_recover_buf_recover(log, buf_f);
2170 switch (buf_f->blf_type) { 2229 switch (buf_f->blf_type) {
2171 case XFS_LI_BUF: 2230 case XFS_LI_BUF:
2172 blkno = buf_f->blf_blkno; 2231 blkno = buf_f->blf_blkno;
@@ -2204,7 +2263,7 @@ xlog_recover_do_buffer_trans(
2204 (XFS_BLI_UDQUOT_BUF|XFS_BLI_PDQUOT_BUF|XFS_BLI_GDQUOT_BUF)) { 2263 (XFS_BLI_UDQUOT_BUF|XFS_BLI_PDQUOT_BUF|XFS_BLI_GDQUOT_BUF)) {
2205 xlog_recover_do_dquot_buffer(mp, log, item, bp, buf_f); 2264 xlog_recover_do_dquot_buffer(mp, log, item, bp, buf_f);
2206 } else { 2265 } else {
2207 xlog_recover_do_reg_buffer(item, bp, buf_f); 2266 xlog_recover_do_reg_buffer(mp, item, bp, buf_f);
2208 } 2267 }
2209 if (error) 2268 if (error)
2210 return XFS_ERROR(error); 2269 return XFS_ERROR(error);
@@ -2284,8 +2343,10 @@ xlog_recover_do_inode_trans(
2284 if (xlog_check_buffer_cancelled(log, in_f->ilf_blkno, 2343 if (xlog_check_buffer_cancelled(log, in_f->ilf_blkno,
2285 in_f->ilf_len, 0)) { 2344 in_f->ilf_len, 0)) {
2286 error = 0; 2345 error = 0;
2346 trace_xfs_log_recover_inode_cancel(log, in_f);
2287 goto error; 2347 goto error;
2288 } 2348 }
2349 trace_xfs_log_recover_inode_recover(log, in_f);
2289 2350
2290 bp = xfs_buf_read(mp->m_ddev_targp, in_f->ilf_blkno, in_f->ilf_len, 2351 bp = xfs_buf_read(mp->m_ddev_targp, in_f->ilf_blkno, in_f->ilf_len,
2291 XBF_LOCK); 2352 XBF_LOCK);
@@ -2337,6 +2398,7 @@ xlog_recover_do_inode_trans(
2337 /* do nothing */ 2398 /* do nothing */
2338 } else { 2399 } else {
2339 xfs_buf_relse(bp); 2400 xfs_buf_relse(bp);
2401 trace_xfs_log_recover_inode_skip(log, in_f);
2340 error = 0; 2402 error = 0;
2341 goto error; 2403 goto error;
2342 } 2404 }
@@ -2758,11 +2820,12 @@ xlog_recover_do_trans(
2758 int error = 0; 2820 int error = 0;
2759 xlog_recover_item_t *item; 2821 xlog_recover_item_t *item;
2760 2822
2761 error = xlog_recover_reorder_trans(trans); 2823 error = xlog_recover_reorder_trans(log, trans, pass);
2762 if (error) 2824 if (error)
2763 return error; 2825 return error;
2764 2826
2765 list_for_each_entry(item, &trans->r_itemq, ri_list) { 2827 list_for_each_entry(item, &trans->r_itemq, ri_list) {
2828 trace_xfs_log_recover_item_recover(log, trans, item, pass);
2766 switch (ITEM_TYPE(item)) { 2829 switch (ITEM_TYPE(item)) {
2767 case XFS_LI_BUF: 2830 case XFS_LI_BUF:
2768 error = xlog_recover_do_buffer_trans(log, item, pass); 2831 error = xlog_recover_do_buffer_trans(log, item, pass);
@@ -2919,8 +2982,9 @@ xlog_recover_process_data(
2919 error = xlog_recover_unmount_trans(trans); 2982 error = xlog_recover_unmount_trans(trans);
2920 break; 2983 break;
2921 case XLOG_WAS_CONT_TRANS: 2984 case XLOG_WAS_CONT_TRANS:
2922 error = xlog_recover_add_to_cont_trans(trans, 2985 error = xlog_recover_add_to_cont_trans(log,
2923 dp, be32_to_cpu(ohead->oh_len)); 2986 trans, dp,
2987 be32_to_cpu(ohead->oh_len));
2924 break; 2988 break;
2925 case XLOG_START_TRANS: 2989 case XLOG_START_TRANS:
2926 xlog_warn( 2990 xlog_warn(
@@ -2930,7 +2994,7 @@ xlog_recover_process_data(
2930 break; 2994 break;
2931 case 0: 2995 case 0:
2932 case XLOG_CONTINUE_TRANS: 2996 case XLOG_CONTINUE_TRANS:
2933 error = xlog_recover_add_to_trans(trans, 2997 error = xlog_recover_add_to_trans(log, trans,
2934 dp, be32_to_cpu(ohead->oh_len)); 2998 dp, be32_to_cpu(ohead->oh_len));
2935 break; 2999 break;
2936 default: 3000 default:
@@ -3331,42 +3395,6 @@ xlog_pack_data(
3331 } 3395 }
3332} 3396}
3333 3397
3334#if defined(DEBUG) && defined(XFS_LOUD_RECOVERY)
3335STATIC void
3336xlog_unpack_data_checksum(
3337 xlog_rec_header_t *rhead,
3338 xfs_caddr_t dp,
3339 xlog_t *log)
3340{
3341 __be32 *up = (__be32 *)dp;
3342 uint chksum = 0;
3343 int i;
3344
3345 /* divide length by 4 to get # words */
3346 for (i=0; i < be32_to_cpu(rhead->h_len) >> 2; i++) {
3347 chksum ^= be32_to_cpu(*up);
3348 up++;
3349 }
3350 if (chksum != be32_to_cpu(rhead->h_chksum)) {
3351 if (rhead->h_chksum ||
3352 ((log->l_flags & XLOG_CHKSUM_MISMATCH) == 0)) {
3353 cmn_err(CE_DEBUG,
3354 "XFS: LogR chksum mismatch: was (0x%x) is (0x%x)\n",
3355 be32_to_cpu(rhead->h_chksum), chksum);
3356 cmn_err(CE_DEBUG,
3357"XFS: Disregard message if filesystem was created with non-DEBUG kernel");
3358 if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) {
3359 cmn_err(CE_DEBUG,
3360 "XFS: LogR this is a LogV2 filesystem\n");
3361 }
3362 log->l_flags |= XLOG_CHKSUM_MISMATCH;
3363 }
3364 }
3365}
3366#else
3367#define xlog_unpack_data_checksum(rhead, dp, log)
3368#endif
3369
3370STATIC void 3398STATIC void
3371xlog_unpack_data( 3399xlog_unpack_data(
3372 xlog_rec_header_t *rhead, 3400 xlog_rec_header_t *rhead,
@@ -3390,8 +3418,6 @@ xlog_unpack_data(
3390 dp += BBSIZE; 3418 dp += BBSIZE;
3391 } 3419 }
3392 } 3420 }
3393
3394 xlog_unpack_data_checksum(rhead, dp, log);
3395} 3421}
3396 3422
3397STATIC int 3423STATIC int
@@ -3490,7 +3516,7 @@ xlog_do_recovery_pass(
3490 hblks = 1; 3516 hblks = 1;
3491 } 3517 }
3492 } else { 3518 } else {
3493 ASSERT(log->l_sectbb_log == 0); 3519 ASSERT(log->l_sectBBsize == 1);
3494 hblks = 1; 3520 hblks = 1;
3495 hbp = xlog_get_bp(log, 1); 3521 hbp = xlog_get_bp(log, 1);
3496 h_size = XLOG_BIG_RECORD_BSIZE; 3522 h_size = XLOG_BIG_RECORD_BSIZE;
@@ -3946,10 +3972,6 @@ xlog_recover_check_summary(
3946 xfs_agf_t *agfp; 3972 xfs_agf_t *agfp;
3947 xfs_buf_t *agfbp; 3973 xfs_buf_t *agfbp;
3948 xfs_buf_t *agibp; 3974 xfs_buf_t *agibp;
3949 xfs_buf_t *sbbp;
3950#ifdef XFS_LOUD_RECOVERY
3951 xfs_sb_t *sbp;
3952#endif
3953 xfs_agnumber_t agno; 3975 xfs_agnumber_t agno;
3954 __uint64_t freeblks; 3976 __uint64_t freeblks;
3955 __uint64_t itotal; 3977 __uint64_t itotal;
@@ -3984,30 +4006,5 @@ xlog_recover_check_summary(
3984 xfs_buf_relse(agibp); 4006 xfs_buf_relse(agibp);
3985 } 4007 }
3986 } 4008 }
3987
3988 sbbp = xfs_getsb(mp, 0);
3989#ifdef XFS_LOUD_RECOVERY
3990 sbp = &mp->m_sb;
3991 xfs_sb_from_disk(sbp, XFS_BUF_TO_SBP(sbbp));
3992 cmn_err(CE_NOTE,
3993 "xlog_recover_check_summary: sb_icount %Lu itotal %Lu",
3994 sbp->sb_icount, itotal);
3995 cmn_err(CE_NOTE,
3996 "xlog_recover_check_summary: sb_ifree %Lu itotal %Lu",
3997 sbp->sb_ifree, ifree);
3998 cmn_err(CE_NOTE,
3999 "xlog_recover_check_summary: sb_fdblocks %Lu freeblks %Lu",
4000 sbp->sb_fdblocks, freeblks);
4001#if 0
4002 /*
4003 * This is turned off until I account for the allocation
4004 * btree blocks which live in free space.
4005 */
4006 ASSERT(sbp->sb_icount == itotal);
4007 ASSERT(sbp->sb_ifree == ifree);
4008 ASSERT(sbp->sb_fdblocks == freeblks);
4009#endif
4010#endif
4011 xfs_buf_relse(sbbp);
4012} 4009}
4013#endif /* DEBUG */ 4010#endif /* DEBUG */
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index e79b56b4bca6..d7bf38c8cd1c 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -1405,13 +1405,6 @@ xfs_mountfs(
1405 xfs_qm_mount_quotas(mp); 1405 xfs_qm_mount_quotas(mp);
1406 } 1406 }
1407 1407
1408#if defined(DEBUG) && defined(XFS_LOUD_RECOVERY)
1409 if (XFS_IS_QUOTA_ON(mp))
1410 xfs_fs_cmn_err(CE_NOTE, mp, "Disk quotas turned on");
1411 else
1412 xfs_fs_cmn_err(CE_NOTE, mp, "Disk quotas not turned on");
1413#endif
1414
1415 /* 1408 /*
1416 * Now we are mounted, reserve a small amount of unused space for 1409 * Now we are mounted, reserve a small amount of unused space for
1417 * privileged transactions. This is needed so that transaction 1410 * privileged transactions. This is needed so that transaction
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 4fa0bc7b983e..9ff48a16a7ee 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -259,6 +259,7 @@ typedef struct xfs_mount {
259 wait_queue_head_t m_wait_single_sync_task; 259 wait_queue_head_t m_wait_single_sync_task;
260 __int64_t m_update_flags; /* sb flags we need to update 260 __int64_t m_update_flags; /* sb flags we need to update
261 on the next remount,rw */ 261 on the next remount,rw */
262 struct list_head m_mplist; /* inode shrinker mount list */
262} xfs_mount_t; 263} xfs_mount_t;
263 264
264/* 265/*
diff --git a/fs/xfs/xfs_quota.h b/fs/xfs/xfs_quota.h
index fdcab3f81dde..e0e64b113bd6 100644
--- a/fs/xfs/xfs_quota.h
+++ b/fs/xfs/xfs_quota.h
@@ -201,9 +201,6 @@ typedef struct xfs_qoff_logformat {
201#define XFS_QMOPT_FORCE_RES 0x0000010 /* ignore quota limits */ 201#define XFS_QMOPT_FORCE_RES 0x0000010 /* ignore quota limits */
202#define XFS_QMOPT_DQSUSER 0x0000020 /* don't cache super users dquot */ 202#define XFS_QMOPT_DQSUSER 0x0000020 /* don't cache super users dquot */
203#define XFS_QMOPT_SBVERSION 0x0000040 /* change superblock version num */ 203#define XFS_QMOPT_SBVERSION 0x0000040 /* change superblock version num */
204#define XFS_QMOPT_QUOTAOFF 0x0000080 /* quotas are being turned off */
205#define XFS_QMOPT_UMOUNTING 0x0000100 /* filesys is being unmounted */
206#define XFS_QMOPT_DOLOG 0x0000200 /* log buf changes (in quotacheck) */
207#define XFS_QMOPT_DOWARN 0x0000400 /* increase warning cnt if needed */ 204#define XFS_QMOPT_DOWARN 0x0000400 /* increase warning cnt if needed */
208#define XFS_QMOPT_DQREPAIR 0x0001000 /* repair dquot if damaged */ 205#define XFS_QMOPT_DQREPAIR 0x0001000 /* repair dquot if damaged */
209#define XFS_QMOPT_GQUOTA 0x0002000 /* group dquot requested */ 206#define XFS_QMOPT_GQUOTA 0x0002000 /* group dquot requested */
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index f73e358bae8d..be578ecb4af2 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -45,23 +45,12 @@
45#include "xfs_trans_space.h" 45#include "xfs_trans_space.h"
46#include "xfs_inode_item.h" 46#include "xfs_inode_item.h"
47 47
48
49STATIC void xfs_trans_apply_sb_deltas(xfs_trans_t *);
50STATIC uint xfs_trans_count_vecs(xfs_trans_t *);
51STATIC void xfs_trans_fill_vecs(xfs_trans_t *, xfs_log_iovec_t *);
52STATIC void xfs_trans_uncommit(xfs_trans_t *, uint);
53STATIC void xfs_trans_committed(xfs_trans_t *, int);
54STATIC void xfs_trans_chunk_committed(xfs_log_item_chunk_t *, xfs_lsn_t, int);
55STATIC void xfs_trans_free(xfs_trans_t *);
56
57kmem_zone_t *xfs_trans_zone; 48kmem_zone_t *xfs_trans_zone;
58 49
59
60/* 50/*
61 * Reservation functions here avoid a huge stack in xfs_trans_init 51 * Reservation functions here avoid a huge stack in xfs_trans_init
62 * due to register overflow from temporaries in the calculations. 52 * due to register overflow from temporaries in the calculations.
63 */ 53 */
64
65STATIC uint 54STATIC uint
66xfs_calc_write_reservation(xfs_mount_t *mp) 55xfs_calc_write_reservation(xfs_mount_t *mp)
67{ 56{
@@ -261,6 +250,19 @@ _xfs_trans_alloc(
261} 250}
262 251
263/* 252/*
253 * Free the transaction structure. If there is more clean up
254 * to do when the structure is freed, add it here.
255 */
256STATIC void
257xfs_trans_free(
258 xfs_trans_t *tp)
259{
260 atomic_dec(&tp->t_mountp->m_active_trans);
261 xfs_trans_free_dqinfo(tp);
262 kmem_zone_free(xfs_trans_zone, tp);
263}
264
265/*
264 * This is called to create a new transaction which will share the 266 * This is called to create a new transaction which will share the
265 * permanent log reservation of the given transaction. The remaining 267 * permanent log reservation of the given transaction. The remaining
266 * unused block and rt extent reservations are also inherited. This 268 * unused block and rt extent reservations are also inherited. This
@@ -764,94 +766,278 @@ xfs_trans_unreserve_and_mod_sb(
764 } 766 }
765} 767}
766 768
769/*
770 * Total up the number of log iovecs needed to commit this
771 * transaction. The transaction itself needs one for the
772 * transaction header. Ask each dirty item in turn how many
773 * it needs to get the total.
774 */
775static uint
776xfs_trans_count_vecs(
777 struct xfs_trans *tp)
778{
779 int nvecs;
780 xfs_log_item_desc_t *lidp;
781
782 nvecs = 1;
783 lidp = xfs_trans_first_item(tp);
784 ASSERT(lidp != NULL);
785
786 /* In the non-debug case we need to start bailing out if we
787 * didn't find a log_item here, return zero and let trans_commit
788 * deal with it.
789 */
790 if (lidp == NULL)
791 return 0;
792
793 while (lidp != NULL) {
794 /*
795 * Skip items which aren't dirty in this transaction.
796 */
797 if (!(lidp->lid_flags & XFS_LID_DIRTY)) {
798 lidp = xfs_trans_next_item(tp, lidp);
799 continue;
800 }
801 lidp->lid_size = IOP_SIZE(lidp->lid_item);
802 nvecs += lidp->lid_size;
803 lidp = xfs_trans_next_item(tp, lidp);
804 }
805
806 return nvecs;
807}
767 808
768/* 809/*
769 * xfs_trans_commit 810 * Fill in the vector with pointers to data to be logged
811 * by this transaction. The transaction header takes
812 * the first vector, and then each dirty item takes the
813 * number of vectors it indicated it needed in xfs_trans_count_vecs().
770 * 814 *
771 * Commit the given transaction to the log a/synchronously. 815 * As each item fills in the entries it needs, also pin the item
816 * so that it cannot be flushed out until the log write completes.
817 */
818static void
819xfs_trans_fill_vecs(
820 struct xfs_trans *tp,
821 struct xfs_log_iovec *log_vector)
822{
823 xfs_log_item_desc_t *lidp;
824 struct xfs_log_iovec *vecp;
825 uint nitems;
826
827 /*
828 * Skip over the entry for the transaction header, we'll
829 * fill that in at the end.
830 */
831 vecp = log_vector + 1;
832
833 nitems = 0;
834 lidp = xfs_trans_first_item(tp);
835 ASSERT(lidp);
836 while (lidp) {
837 /* Skip items which aren't dirty in this transaction. */
838 if (!(lidp->lid_flags & XFS_LID_DIRTY)) {
839 lidp = xfs_trans_next_item(tp, lidp);
840 continue;
841 }
842
843 /*
844 * The item may be marked dirty but not log anything. This can
845 * be used to get called when a transaction is committed.
846 */
847 if (lidp->lid_size)
848 nitems++;
849 IOP_FORMAT(lidp->lid_item, vecp);
850 vecp += lidp->lid_size;
851 IOP_PIN(lidp->lid_item);
852 lidp = xfs_trans_next_item(tp, lidp);
853 }
854
855 /*
856 * Now that we've counted the number of items in this transaction, fill
857 * in the transaction header. Note that the transaction header does not
858 * have a log item.
859 */
860 tp->t_header.th_magic = XFS_TRANS_HEADER_MAGIC;
861 tp->t_header.th_type = tp->t_type;
862 tp->t_header.th_num_items = nitems;
863 log_vector->i_addr = (xfs_caddr_t)&tp->t_header;
864 log_vector->i_len = sizeof(xfs_trans_header_t);
865 log_vector->i_type = XLOG_REG_TYPE_TRANSHDR;
866}
867
868/*
869 * The committed item processing consists of calling the committed routine of
870 * each logged item, updating the item's position in the AIL if necessary, and
871 * unpinning each item. If the committed routine returns -1, then do nothing
872 * further with the item because it may have been freed.
772 * 873 *
773 * XFS disk error handling mechanism is not based on a typical 874 * Since items are unlocked when they are copied to the incore log, it is
774 * transaction abort mechanism. Logically after the filesystem 875 * possible for two transactions to be completing and manipulating the same
775 * gets marked 'SHUTDOWN', we can't let any new transactions 876 * item simultaneously. The AIL lock will protect the lsn field of each item.
776 * be durable - ie. committed to disk - because some metadata might 877 * The value of this field can never go backwards.
777 * be inconsistent. In such cases, this returns an error, and the 878 *
778 * caller may assume that all locked objects joined to the transaction 879 * We unpin the items after repositioning them in the AIL, because otherwise
779 * have already been unlocked as if the commit had succeeded. 880 * they could be immediately flushed and we'd have to race with the flusher
780 * Do not reference the transaction structure after this call. 881 * trying to pull the item from the AIL as we add it.
781 */ 882 */
782 /*ARGSUSED*/ 883static void
783int 884xfs_trans_item_committed(
784_xfs_trans_commit( 885 struct xfs_log_item *lip,
785 xfs_trans_t *tp, 886 xfs_lsn_t commit_lsn,
786 uint flags, 887 int aborted)
787 int *log_flushed)
788{ 888{
789 xfs_log_iovec_t *log_vector; 889 xfs_lsn_t item_lsn;
790 int nvec; 890 struct xfs_ail *ailp;
791 xfs_mount_t *mp;
792 xfs_lsn_t commit_lsn;
793 /* REFERENCED */
794 int error;
795 int log_flags;
796 int sync;
797#define XFS_TRANS_LOGVEC_COUNT 16
798 xfs_log_iovec_t log_vector_fast[XFS_TRANS_LOGVEC_COUNT];
799 struct xlog_in_core *commit_iclog;
800 int shutdown;
801 891
802 commit_lsn = -1; 892 if (aborted)
893 lip->li_flags |= XFS_LI_ABORTED;
894 item_lsn = IOP_COMMITTED(lip, commit_lsn);
895
896 /* If the committed routine returns -1, item has been freed. */
897 if (XFS_LSN_CMP(item_lsn, (xfs_lsn_t)-1) == 0)
898 return;
803 899
804 /* 900 /*
805 * Determine whether this commit is releasing a permanent 901 * If the returned lsn is greater than what it contained before, update
806 * log reservation or not. 902 * the location of the item in the AIL. If it is not, then do nothing.
903 * Items can never move backwards in the AIL.
904 *
905 * While the new lsn should usually be greater, it is possible that a
906 * later transaction completing simultaneously with an earlier one
907 * using the same item could complete first with a higher lsn. This
908 * would cause the earlier transaction to fail the test below.
807 */ 909 */
808 if (flags & XFS_TRANS_RELEASE_LOG_RES) { 910 ailp = lip->li_ailp;
809 ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES); 911 spin_lock(&ailp->xa_lock);
810 log_flags = XFS_LOG_REL_PERM_RESERV; 912 if (XFS_LSN_CMP(item_lsn, lip->li_lsn) > 0) {
913 /*
914 * This will set the item's lsn to item_lsn and update the
915 * position of the item in the AIL.
916 *
917 * xfs_trans_ail_update() drops the AIL lock.
918 */
919 xfs_trans_ail_update(ailp, lip, item_lsn);
811 } else { 920 } else {
812 log_flags = 0; 921 spin_unlock(&ailp->xa_lock);
813 } 922 }
814 mp = tp->t_mountp;
815 923
816 /* 924 /*
817 * If there is nothing to be logged by the transaction, 925 * Now that we've repositioned the item in the AIL, unpin it so it can
818 * then unlock all of the items associated with the 926 * be flushed. Pass information about buffer stale state down from the
819 * transaction and free the transaction structure. 927 * log item flags, if anyone else stales the buffer we do not want to
820 * Also make sure to return any reserved blocks to 928 * pay any attention to it.
821 * the free pool.
822 */ 929 */
823shut_us_down: 930 IOP_UNPIN(lip);
824 shutdown = XFS_FORCED_SHUTDOWN(mp) ? EIO : 0; 931}
825 if (!(tp->t_flags & XFS_TRANS_DIRTY) || shutdown) { 932
826 xfs_trans_unreserve_and_mod_sb(tp); 933/* Clear all the per-AG busy list items listed in this transaction */
934static void
935xfs_trans_clear_busy_extents(
936 struct xfs_trans *tp)
937{
938 xfs_log_busy_chunk_t *lbcp;
939 xfs_log_busy_slot_t *lbsp;
940 int i;
941
942 for (lbcp = &tp->t_busy; lbcp != NULL; lbcp = lbcp->lbc_next) {
943 i = 0;
944 for (lbsp = lbcp->lbc_busy; i < lbcp->lbc_unused; i++, lbsp++) {
945 if (XFS_LBC_ISFREE(lbcp, i))
946 continue;
947 xfs_alloc_clear_busy(tp, lbsp->lbc_ag, lbsp->lbc_idx);
948 }
949 }
950 xfs_trans_free_busy(tp);
951}
952
953/*
954 * This is typically called by the LM when a transaction has been fully
955 * committed to disk. It needs to unpin the items which have
956 * been logged by the transaction and update their positions
957 * in the AIL if necessary.
958 *
959 * This also gets called when the transactions didn't get written out
960 * because of an I/O error. Abortflag & XFS_LI_ABORTED is set then.
961 */
962STATIC void
963xfs_trans_committed(
964 struct xfs_trans *tp,
965 int abortflag)
966{
967 xfs_log_item_desc_t *lidp;
968 xfs_log_item_chunk_t *licp;
969 xfs_log_item_chunk_t *next_licp;
970
971 /* Call the transaction's completion callback if there is one. */
972 if (tp->t_callback != NULL)
973 tp->t_callback(tp, tp->t_callarg);
974
975 for (lidp = xfs_trans_first_item(tp);
976 lidp != NULL;
977 lidp = xfs_trans_next_item(tp, lidp)) {
978 xfs_trans_item_committed(lidp->lid_item, tp->t_lsn, abortflag);
979 }
980
981 /* free the item chunks, ignoring the embedded chunk */
982 for (licp = tp->t_items.lic_next; licp != NULL; licp = next_licp) {
983 next_licp = licp->lic_next;
984 kmem_free(licp);
985 }
986
987 xfs_trans_clear_busy_extents(tp);
988 xfs_trans_free(tp);
989}
990
991/*
992 * Called from the trans_commit code when we notice that
993 * the filesystem is in the middle of a forced shutdown.
994 */
995STATIC void
996xfs_trans_uncommit(
997 struct xfs_trans *tp,
998 uint flags)
999{
1000 xfs_log_item_desc_t *lidp;
1001
1002 for (lidp = xfs_trans_first_item(tp);
1003 lidp != NULL;
1004 lidp = xfs_trans_next_item(tp, lidp)) {
827 /* 1005 /*
828 * It is indeed possible for the transaction to be 1006 * Unpin all but those that aren't dirty.
829 * not dirty but the dqinfo portion to be. All that
830 * means is that we have some (non-persistent) quota
831 * reservations that need to be unreserved.
832 */ 1007 */
833 xfs_trans_unreserve_and_mod_dquots(tp); 1008 if (lidp->lid_flags & XFS_LID_DIRTY)
834 if (tp->t_ticket) { 1009 IOP_UNPIN_REMOVE(lidp->lid_item, tp);
835 commit_lsn = xfs_log_done(mp, tp->t_ticket,
836 NULL, log_flags);
837 if (commit_lsn == -1 && !shutdown)
838 shutdown = XFS_ERROR(EIO);
839 }
840 current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS);
841 xfs_trans_free_items(tp, shutdown? XFS_TRANS_ABORT : 0);
842 xfs_trans_free_busy(tp);
843 xfs_trans_free(tp);
844 XFS_STATS_INC(xs_trans_empty);
845 return (shutdown);
846 } 1010 }
847 ASSERT(tp->t_ticket != NULL);
848 1011
849 /* 1012 xfs_trans_unreserve_and_mod_sb(tp);
850 * If we need to update the superblock, then do it now. 1013 xfs_trans_unreserve_and_mod_dquots(tp);
851 */ 1014
852 if (tp->t_flags & XFS_TRANS_SB_DIRTY) 1015 xfs_trans_free_items(tp, flags);
853 xfs_trans_apply_sb_deltas(tp); 1016 xfs_trans_free_busy(tp);
854 xfs_trans_apply_dquot_deltas(tp); 1017 xfs_trans_free(tp);
1018}
1019
1020/*
1021 * Format the transaction direct to the iclog. This isolates the physical
1022 * transaction commit operation from the logical operation and hence allows
1023 * other methods to be introduced without affecting the existing commit path.
1024 */
1025static int
1026xfs_trans_commit_iclog(
1027 struct xfs_mount *mp,
1028 struct xfs_trans *tp,
1029 xfs_lsn_t *commit_lsn,
1030 int flags)
1031{
1032 int shutdown;
1033 int error;
1034 int log_flags = 0;
1035 struct xlog_in_core *commit_iclog;
1036#define XFS_TRANS_LOGVEC_COUNT 16
1037 struct xfs_log_iovec log_vector_fast[XFS_TRANS_LOGVEC_COUNT];
1038 struct xfs_log_iovec *log_vector;
1039 uint nvec;
1040
855 1041
856 /* 1042 /*
857 * Ask each log item how many log_vector entries it will 1043 * Ask each log item how many log_vector entries it will
@@ -861,8 +1047,7 @@ shut_us_down:
861 */ 1047 */
862 nvec = xfs_trans_count_vecs(tp); 1048 nvec = xfs_trans_count_vecs(tp);
863 if (nvec == 0) { 1049 if (nvec == 0) {
864 xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR); 1050 return ENOMEM; /* triggers a shutdown! */
865 goto shut_us_down;
866 } else if (nvec <= XFS_TRANS_LOGVEC_COUNT) { 1051 } else if (nvec <= XFS_TRANS_LOGVEC_COUNT) {
867 log_vector = log_vector_fast; 1052 log_vector = log_vector_fast;
868 } else { 1053 } else {
@@ -877,6 +1062,9 @@ shut_us_down:
877 */ 1062 */
878 xfs_trans_fill_vecs(tp, log_vector); 1063 xfs_trans_fill_vecs(tp, log_vector);
879 1064
1065 if (flags & XFS_TRANS_RELEASE_LOG_RES)
1066 log_flags = XFS_LOG_REL_PERM_RESERV;
1067
880 error = xfs_log_write(mp, log_vector, nvec, tp->t_ticket, &(tp->t_lsn)); 1068 error = xfs_log_write(mp, log_vector, nvec, tp->t_ticket, &(tp->t_lsn));
881 1069
882 /* 1070 /*
@@ -884,18 +1072,17 @@ shut_us_down:
884 * at any time after this call. However, all the items associated 1072 * at any time after this call. However, all the items associated
885 * with the transaction are still locked and pinned in memory. 1073 * with the transaction are still locked and pinned in memory.
886 */ 1074 */
887 commit_lsn = xfs_log_done(mp, tp->t_ticket, &commit_iclog, log_flags); 1075 *commit_lsn = xfs_log_done(mp, tp->t_ticket, &commit_iclog, log_flags);
888 1076
889 tp->t_commit_lsn = commit_lsn; 1077 tp->t_commit_lsn = *commit_lsn;
890 if (nvec > XFS_TRANS_LOGVEC_COUNT) { 1078 if (nvec > XFS_TRANS_LOGVEC_COUNT)
891 kmem_free(log_vector); 1079 kmem_free(log_vector);
892 }
893 1080
894 /* 1081 /*
895 * If we got a log write error. Unpin the logitems that we 1082 * If we got a log write error. Unpin the logitems that we
896 * had pinned, clean up, free trans structure, and return error. 1083 * had pinned, clean up, free trans structure, and return error.
897 */ 1084 */
898 if (error || commit_lsn == -1) { 1085 if (error || *commit_lsn == -1) {
899 current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS); 1086 current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS);
900 xfs_trans_uncommit(tp, flags|XFS_TRANS_ABORT); 1087 xfs_trans_uncommit(tp, flags|XFS_TRANS_ABORT);
901 return XFS_ERROR(EIO); 1088 return XFS_ERROR(EIO);
@@ -909,8 +1096,6 @@ shut_us_down:
909 */ 1096 */
910 xfs_trans_unreserve_and_mod_sb(tp); 1097 xfs_trans_unreserve_and_mod_sb(tp);
911 1098
912 sync = tp->t_flags & XFS_TRANS_SYNC;
913
914 /* 1099 /*
915 * Tell the LM to call the transaction completion routine 1100 * Tell the LM to call the transaction completion routine
916 * when the log write with LSN commit_lsn completes (e.g. 1101 * when the log write with LSN commit_lsn completes (e.g.
@@ -953,7 +1138,7 @@ shut_us_down:
953 * the commit lsn of this transaction for dependency tracking 1138 * the commit lsn of this transaction for dependency tracking
954 * purposes. 1139 * purposes.
955 */ 1140 */
956 xfs_trans_unlock_items(tp, commit_lsn); 1141 xfs_trans_unlock_items(tp, *commit_lsn);
957 1142
958 /* 1143 /*
959 * If we detected a log error earlier, finish committing 1144 * If we detected a log error earlier, finish committing
@@ -973,156 +1158,114 @@ shut_us_down:
973 * and the items are released we can finally allow the iclog to 1158 * and the items are released we can finally allow the iclog to
974 * go to disk. 1159 * go to disk.
975 */ 1160 */
976 error = xfs_log_release_iclog(mp, commit_iclog); 1161 return xfs_log_release_iclog(mp, commit_iclog);
977
978 /*
979 * If the transaction needs to be synchronous, then force the
980 * log out now and wait for it.
981 */
982 if (sync) {
983 if (!error) {
984 error = _xfs_log_force_lsn(mp, commit_lsn,
985 XFS_LOG_SYNC, log_flushed);
986 }
987 XFS_STATS_INC(xs_trans_sync);
988 } else {
989 XFS_STATS_INC(xs_trans_async);
990 }
991
992 return (error);
993} 1162}
994 1163
995 1164
996/* 1165/*
997 * Total up the number of log iovecs needed to commit this 1166 * xfs_trans_commit
998 * transaction. The transaction itself needs one for the 1167 *
999 * transaction header. Ask each dirty item in turn how many 1168 * Commit the given transaction to the log a/synchronously.
1000 * it needs to get the total. 1169 *
1170 * XFS disk error handling mechanism is not based on a typical
1171 * transaction abort mechanism. Logically after the filesystem
1172 * gets marked 'SHUTDOWN', we can't let any new transactions
1173 * be durable - ie. committed to disk - because some metadata might
1174 * be inconsistent. In such cases, this returns an error, and the
1175 * caller may assume that all locked objects joined to the transaction
1176 * have already been unlocked as if the commit had succeeded.
1177 * Do not reference the transaction structure after this call.
1001 */ 1178 */
1002STATIC uint 1179int
1003xfs_trans_count_vecs( 1180_xfs_trans_commit(
1004 xfs_trans_t *tp) 1181 struct xfs_trans *tp,
1182 uint flags,
1183 int *log_flushed)
1005{ 1184{
1006 int nvecs; 1185 struct xfs_mount *mp = tp->t_mountp;
1007 xfs_log_item_desc_t *lidp; 1186 xfs_lsn_t commit_lsn = -1;
1187 int error = 0;
1188 int log_flags = 0;
1189 int sync = tp->t_flags & XFS_TRANS_SYNC;
1008 1190
1009 nvecs = 1; 1191 /*
1010 lidp = xfs_trans_first_item(tp); 1192 * Determine whether this commit is releasing a permanent
1011 ASSERT(lidp != NULL); 1193 * log reservation or not.
1012
1013 /* In the non-debug case we need to start bailing out if we
1014 * didn't find a log_item here, return zero and let trans_commit
1015 * deal with it.
1016 */ 1194 */
1017 if (lidp == NULL) 1195 if (flags & XFS_TRANS_RELEASE_LOG_RES) {
1018 return 0; 1196 ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
1019 1197 log_flags = XFS_LOG_REL_PERM_RESERV;
1020 while (lidp != NULL) {
1021 /*
1022 * Skip items which aren't dirty in this transaction.
1023 */
1024 if (!(lidp->lid_flags & XFS_LID_DIRTY)) {
1025 lidp = xfs_trans_next_item(tp, lidp);
1026 continue;
1027 }
1028 lidp->lid_size = IOP_SIZE(lidp->lid_item);
1029 nvecs += lidp->lid_size;
1030 lidp = xfs_trans_next_item(tp, lidp);
1031 } 1198 }
1032 1199
1033 return nvecs; 1200 /*
1034} 1201 * If there is nothing to be logged by the transaction,
1035 1202 * then unlock all of the items associated with the
1036/* 1203 * transaction and free the transaction structure.
1037 * Called from the trans_commit code when we notice that 1204 * Also make sure to return any reserved blocks to
1038 * the filesystem is in the middle of a forced shutdown. 1205 * the free pool.
1039 */ 1206 */
1040STATIC void 1207 if (!(tp->t_flags & XFS_TRANS_DIRTY))
1041xfs_trans_uncommit( 1208 goto out_unreserve;
1042 xfs_trans_t *tp,
1043 uint flags)
1044{
1045 xfs_log_item_desc_t *lidp;
1046 1209
1047 for (lidp = xfs_trans_first_item(tp); 1210 if (XFS_FORCED_SHUTDOWN(mp)) {
1048 lidp != NULL; 1211 error = XFS_ERROR(EIO);
1049 lidp = xfs_trans_next_item(tp, lidp)) { 1212 goto out_unreserve;
1050 /*
1051 * Unpin all but those that aren't dirty.
1052 */
1053 if (lidp->lid_flags & XFS_LID_DIRTY)
1054 IOP_UNPIN_REMOVE(lidp->lid_item, tp);
1055 } 1213 }
1056 1214
1057 xfs_trans_unreserve_and_mod_sb(tp); 1215 ASSERT(tp->t_ticket != NULL);
1058 xfs_trans_unreserve_and_mod_dquots(tp);
1059 1216
1060 xfs_trans_free_items(tp, flags); 1217 /*
1061 xfs_trans_free_busy(tp); 1218 * If we need to update the superblock, then do it now.
1062 xfs_trans_free(tp); 1219 */
1063} 1220 if (tp->t_flags & XFS_TRANS_SB_DIRTY)
1221 xfs_trans_apply_sb_deltas(tp);
1222 xfs_trans_apply_dquot_deltas(tp);
1064 1223
1065/* 1224 error = xfs_trans_commit_iclog(mp, tp, &commit_lsn, flags);
1066 * Fill in the vector with pointers to data to be logged 1225 if (error == ENOMEM) {
1067 * by this transaction. The transaction header takes 1226 xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR);
1068 * the first vector, and then each dirty item takes the 1227 error = XFS_ERROR(EIO);
1069 * number of vectors it indicated it needed in xfs_trans_count_vecs(). 1228 goto out_unreserve;
1070 * 1229 }
1071 * As each item fills in the entries it needs, also pin the item
1072 * so that it cannot be flushed out until the log write completes.
1073 */
1074STATIC void
1075xfs_trans_fill_vecs(
1076 xfs_trans_t *tp,
1077 xfs_log_iovec_t *log_vector)
1078{
1079 xfs_log_item_desc_t *lidp;
1080 xfs_log_iovec_t *vecp;
1081 uint nitems;
1082 1230
1083 /* 1231 /*
1084 * Skip over the entry for the transaction header, we'll 1232 * If the transaction needs to be synchronous, then force the
1085 * fill that in at the end. 1233 * log out now and wait for it.
1086 */ 1234 */
1087 vecp = log_vector + 1; /* pointer arithmetic */ 1235 if (sync) {
1088 1236 if (!error) {
1089 nitems = 0; 1237 error = _xfs_log_force_lsn(mp, commit_lsn,
1090 lidp = xfs_trans_first_item(tp); 1238 XFS_LOG_SYNC, log_flushed);
1091 ASSERT(lidp != NULL);
1092 while (lidp != NULL) {
1093 /*
1094 * Skip items which aren't dirty in this transaction.
1095 */
1096 if (!(lidp->lid_flags & XFS_LID_DIRTY)) {
1097 lidp = xfs_trans_next_item(tp, lidp);
1098 continue;
1099 }
1100 /*
1101 * The item may be marked dirty but not log anything.
1102 * This can be used to get called when a transaction
1103 * is committed.
1104 */
1105 if (lidp->lid_size) {
1106 nitems++;
1107 } 1239 }
1108 IOP_FORMAT(lidp->lid_item, vecp); 1240 XFS_STATS_INC(xs_trans_sync);
1109 vecp += lidp->lid_size; /* pointer arithmetic */ 1241 } else {
1110 IOP_PIN(lidp->lid_item); 1242 XFS_STATS_INC(xs_trans_async);
1111 lidp = xfs_trans_next_item(tp, lidp);
1112 } 1243 }
1113 1244
1245 return error;
1246
1247out_unreserve:
1248 xfs_trans_unreserve_and_mod_sb(tp);
1249
1114 /* 1250 /*
1115 * Now that we've counted the number of items in this 1251 * It is indeed possible for the transaction to be not dirty but
1116 * transaction, fill in the transaction header. 1252 * the dqinfo portion to be. All that means is that we have some
1253 * (non-persistent) quota reservations that need to be unreserved.
1117 */ 1254 */
1118 tp->t_header.th_magic = XFS_TRANS_HEADER_MAGIC; 1255 xfs_trans_unreserve_and_mod_dquots(tp);
1119 tp->t_header.th_type = tp->t_type; 1256 if (tp->t_ticket) {
1120 tp->t_header.th_num_items = nitems; 1257 commit_lsn = xfs_log_done(mp, tp->t_ticket, NULL, log_flags);
1121 log_vector->i_addr = (xfs_caddr_t)&tp->t_header; 1258 if (commit_lsn == -1 && !error)
1122 log_vector->i_len = sizeof(xfs_trans_header_t); 1259 error = XFS_ERROR(EIO);
1123 log_vector->i_type = XLOG_REG_TYPE_TRANSHDR; 1260 }
1124} 1261 current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS);
1262 xfs_trans_free_items(tp, error ? XFS_TRANS_ABORT : 0);
1263 xfs_trans_free_busy(tp);
1264 xfs_trans_free(tp);
1125 1265
1266 XFS_STATS_INC(xs_trans_empty);
1267 return error;
1268}
1126 1269
1127/* 1270/*
1128 * Unlock all of the transaction's items and free the transaction. 1271 * Unlock all of the transaction's items and free the transaction.
@@ -1200,20 +1343,6 @@ xfs_trans_cancel(
1200 xfs_trans_free(tp); 1343 xfs_trans_free(tp);
1201} 1344}
1202 1345
1203
1204/*
1205 * Free the transaction structure. If there is more clean up
1206 * to do when the structure is freed, add it here.
1207 */
1208STATIC void
1209xfs_trans_free(
1210 xfs_trans_t *tp)
1211{
1212 atomic_dec(&tp->t_mountp->m_active_trans);
1213 xfs_trans_free_dqinfo(tp);
1214 kmem_zone_free(xfs_trans_zone, tp);
1215}
1216
1217/* 1346/*
1218 * Roll from one trans in the sequence of PERMANENT transactions to 1347 * Roll from one trans in the sequence of PERMANENT transactions to
1219 * the next: permanent transactions are only flushed out when 1348 * the next: permanent transactions are only flushed out when
@@ -1283,174 +1412,3 @@ xfs_trans_roll(
1283 xfs_trans_ihold(trans, dp); 1412 xfs_trans_ihold(trans, dp);
1284 return 0; 1413 return 0;
1285} 1414}
1286
1287/*
1288 * THIS SHOULD BE REWRITTEN TO USE xfs_trans_next_item().
1289 *
1290 * This is typically called by the LM when a transaction has been fully
1291 * committed to disk. It needs to unpin the items which have
1292 * been logged by the transaction and update their positions
1293 * in the AIL if necessary.
1294 * This also gets called when the transactions didn't get written out
1295 * because of an I/O error. Abortflag & XFS_LI_ABORTED is set then.
1296 *
1297 * Call xfs_trans_chunk_committed() to process the items in
1298 * each chunk.
1299 */
1300STATIC void
1301xfs_trans_committed(
1302 xfs_trans_t *tp,
1303 int abortflag)
1304{
1305 xfs_log_item_chunk_t *licp;
1306 xfs_log_item_chunk_t *next_licp;
1307 xfs_log_busy_chunk_t *lbcp;
1308 xfs_log_busy_slot_t *lbsp;
1309 int i;
1310
1311 /*
1312 * Call the transaction's completion callback if there
1313 * is one.
1314 */
1315 if (tp->t_callback != NULL) {
1316 tp->t_callback(tp, tp->t_callarg);
1317 }
1318
1319 /*
1320 * Special case the chunk embedded in the transaction.
1321 */
1322 licp = &(tp->t_items);
1323 if (!(xfs_lic_are_all_free(licp))) {
1324 xfs_trans_chunk_committed(licp, tp->t_lsn, abortflag);
1325 }
1326
1327 /*
1328 * Process the items in each chunk in turn.
1329 */
1330 licp = licp->lic_next;
1331 while (licp != NULL) {
1332 ASSERT(!xfs_lic_are_all_free(licp));
1333 xfs_trans_chunk_committed(licp, tp->t_lsn, abortflag);
1334 next_licp = licp->lic_next;
1335 kmem_free(licp);
1336 licp = next_licp;
1337 }
1338
1339 /*
1340 * Clear all the per-AG busy list items listed in this transaction
1341 */
1342 lbcp = &tp->t_busy;
1343 while (lbcp != NULL) {
1344 for (i = 0, lbsp = lbcp->lbc_busy; i < lbcp->lbc_unused; i++, lbsp++) {
1345 if (!XFS_LBC_ISFREE(lbcp, i)) {
1346 xfs_alloc_clear_busy(tp, lbsp->lbc_ag,
1347 lbsp->lbc_idx);
1348 }
1349 }
1350 lbcp = lbcp->lbc_next;
1351 }
1352 xfs_trans_free_busy(tp);
1353
1354 /*
1355 * That's it for the transaction structure. Free it.
1356 */
1357 xfs_trans_free(tp);
1358}
1359
1360/*
1361 * This is called to perform the commit processing for each
1362 * item described by the given chunk.
1363 *
1364 * The commit processing consists of unlocking items which were
1365 * held locked with the SYNC_UNLOCK attribute, calling the committed
1366 * routine of each logged item, updating the item's position in the AIL
1367 * if necessary, and unpinning each item. If the committed routine
1368 * returns -1, then do nothing further with the item because it
1369 * may have been freed.
1370 *
1371 * Since items are unlocked when they are copied to the incore
1372 * log, it is possible for two transactions to be completing
1373 * and manipulating the same item simultaneously. The AIL lock
1374 * will protect the lsn field of each item. The value of this
1375 * field can never go backwards.
1376 *
1377 * We unpin the items after repositioning them in the AIL, because
1378 * otherwise they could be immediately flushed and we'd have to race
1379 * with the flusher trying to pull the item from the AIL as we add it.
1380 */
1381STATIC void
1382xfs_trans_chunk_committed(
1383 xfs_log_item_chunk_t *licp,
1384 xfs_lsn_t lsn,
1385 int aborted)
1386{
1387 xfs_log_item_desc_t *lidp;
1388 xfs_log_item_t *lip;
1389 xfs_lsn_t item_lsn;
1390 int i;
1391
1392 lidp = licp->lic_descs;
1393 for (i = 0; i < licp->lic_unused; i++, lidp++) {
1394 struct xfs_ail *ailp;
1395
1396 if (xfs_lic_isfree(licp, i)) {
1397 continue;
1398 }
1399
1400 lip = lidp->lid_item;
1401 if (aborted)
1402 lip->li_flags |= XFS_LI_ABORTED;
1403
1404 /*
1405 * Send in the ABORTED flag to the COMMITTED routine
1406 * so that it knows whether the transaction was aborted
1407 * or not.
1408 */
1409 item_lsn = IOP_COMMITTED(lip, lsn);
1410
1411 /*
1412 * If the committed routine returns -1, make
1413 * no more references to the item.
1414 */
1415 if (XFS_LSN_CMP(item_lsn, (xfs_lsn_t)-1) == 0) {
1416 continue;
1417 }
1418
1419 /*
1420 * If the returned lsn is greater than what it
1421 * contained before, update the location of the
1422 * item in the AIL. If it is not, then do nothing.
1423 * Items can never move backwards in the AIL.
1424 *
1425 * While the new lsn should usually be greater, it
1426 * is possible that a later transaction completing
1427 * simultaneously with an earlier one using the
1428 * same item could complete first with a higher lsn.
1429 * This would cause the earlier transaction to fail
1430 * the test below.
1431 */
1432 ailp = lip->li_ailp;
1433 spin_lock(&ailp->xa_lock);
1434 if (XFS_LSN_CMP(item_lsn, lip->li_lsn) > 0) {
1435 /*
1436 * This will set the item's lsn to item_lsn
1437 * and update the position of the item in
1438 * the AIL.
1439 *
1440 * xfs_trans_ail_update() drops the AIL lock.
1441 */
1442 xfs_trans_ail_update(ailp, lip, item_lsn);
1443 } else {
1444 spin_unlock(&ailp->xa_lock);
1445 }
1446
1447 /*
1448 * Now that we've repositioned the item in the AIL,
1449 * unpin it so it can be flushed. Pass information
1450 * about buffer stale state down from the log item
1451 * flags, if anyone else stales the buffer we do not
1452 * want to pay any attention to it.
1453 */
1454 IOP_UNPIN(lip, lidp->lid_flags & XFS_LID_BUF_STALE);
1455 }
1456}
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index 79c8bab9dfff..c62beee0921e 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -49,6 +49,15 @@ typedef struct xfs_trans_header {
49#define XFS_LI_DQUOT 0x123d 49#define XFS_LI_DQUOT 0x123d
50#define XFS_LI_QUOTAOFF 0x123e 50#define XFS_LI_QUOTAOFF 0x123e
51 51
52#define XFS_LI_TYPE_DESC \
53 { XFS_LI_EFI, "XFS_LI_EFI" }, \
54 { XFS_LI_EFD, "XFS_LI_EFD" }, \
55 { XFS_LI_IUNLINK, "XFS_LI_IUNLINK" }, \
56 { XFS_LI_INODE, "XFS_LI_INODE" }, \
57 { XFS_LI_BUF, "XFS_LI_BUF" }, \
58 { XFS_LI_DQUOT, "XFS_LI_DQUOT" }, \
59 { XFS_LI_QUOTAOFF, "XFS_LI_QUOTAOFF" }
60
52/* 61/*
53 * Transaction types. Used to distinguish types of buffers. 62 * Transaction types. Used to distinguish types of buffers.
54 */ 63 */
@@ -159,7 +168,6 @@ typedef struct xfs_log_item_desc {
159 168
160#define XFS_LID_DIRTY 0x1 169#define XFS_LID_DIRTY 0x1
161#define XFS_LID_PINNED 0x2 170#define XFS_LID_PINNED 0x2
162#define XFS_LID_BUF_STALE 0x8
163 171
164/* 172/*
165 * This structure is used to maintain a chunk list of log_item_desc 173 * This structure is used to maintain a chunk list of log_item_desc
@@ -833,7 +841,7 @@ typedef struct xfs_item_ops {
833 uint (*iop_size)(xfs_log_item_t *); 841 uint (*iop_size)(xfs_log_item_t *);
834 void (*iop_format)(xfs_log_item_t *, struct xfs_log_iovec *); 842 void (*iop_format)(xfs_log_item_t *, struct xfs_log_iovec *);
835 void (*iop_pin)(xfs_log_item_t *); 843 void (*iop_pin)(xfs_log_item_t *);
836 void (*iop_unpin)(xfs_log_item_t *, int); 844 void (*iop_unpin)(xfs_log_item_t *);
837 void (*iop_unpin_remove)(xfs_log_item_t *, struct xfs_trans *); 845 void (*iop_unpin_remove)(xfs_log_item_t *, struct xfs_trans *);
838 uint (*iop_trylock)(xfs_log_item_t *); 846 uint (*iop_trylock)(xfs_log_item_t *);
839 void (*iop_unlock)(xfs_log_item_t *); 847 void (*iop_unlock)(xfs_log_item_t *);
@@ -846,7 +854,7 @@ typedef struct xfs_item_ops {
846#define IOP_SIZE(ip) (*(ip)->li_ops->iop_size)(ip) 854#define IOP_SIZE(ip) (*(ip)->li_ops->iop_size)(ip)
847#define IOP_FORMAT(ip,vp) (*(ip)->li_ops->iop_format)(ip, vp) 855#define IOP_FORMAT(ip,vp) (*(ip)->li_ops->iop_format)(ip, vp)
848#define IOP_PIN(ip) (*(ip)->li_ops->iop_pin)(ip) 856#define IOP_PIN(ip) (*(ip)->li_ops->iop_pin)(ip)
849#define IOP_UNPIN(ip, flags) (*(ip)->li_ops->iop_unpin)(ip, flags) 857#define IOP_UNPIN(ip) (*(ip)->li_ops->iop_unpin)(ip)
850#define IOP_UNPIN_REMOVE(ip,tp) (*(ip)->li_ops->iop_unpin_remove)(ip, tp) 858#define IOP_UNPIN_REMOVE(ip,tp) (*(ip)->li_ops->iop_unpin_remove)(ip, tp)
851#define IOP_TRYLOCK(ip) (*(ip)->li_ops->iop_trylock)(ip) 859#define IOP_TRYLOCK(ip) (*(ip)->li_ops->iop_trylock)(ip)
852#define IOP_UNLOCK(ip) (*(ip)->li_ops->iop_unlock)(ip) 860#define IOP_UNLOCK(ip) (*(ip)->li_ops->iop_unlock)(ip)
diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c
index fb586360d1c9..9cd809025f3a 100644
--- a/fs/xfs/xfs_trans_buf.c
+++ b/fs/xfs/xfs_trans_buf.c
@@ -40,11 +40,51 @@
40#include "xfs_rw.h" 40#include "xfs_rw.h"
41#include "xfs_trace.h" 41#include "xfs_trace.h"
42 42
43/*
44 * Check to see if a buffer matching the given parameters is already
45 * a part of the given transaction.
46 */
47STATIC struct xfs_buf *
48xfs_trans_buf_item_match(
49 struct xfs_trans *tp,
50 struct xfs_buftarg *target,
51 xfs_daddr_t blkno,
52 int len)
53{
54 xfs_log_item_chunk_t *licp;
55 xfs_log_item_desc_t *lidp;
56 xfs_buf_log_item_t *blip;
57 int i;
43 58
44STATIC xfs_buf_t *xfs_trans_buf_item_match(xfs_trans_t *, xfs_buftarg_t *, 59 len = BBTOB(len);
45 xfs_daddr_t, int); 60 for (licp = &tp->t_items; licp != NULL; licp = licp->lic_next) {
46STATIC xfs_buf_t *xfs_trans_buf_item_match_all(xfs_trans_t *, xfs_buftarg_t *, 61 if (xfs_lic_are_all_free(licp)) {
47 xfs_daddr_t, int); 62 ASSERT(licp == &tp->t_items);
63 ASSERT(licp->lic_next == NULL);
64 return NULL;
65 }
66
67 for (i = 0; i < licp->lic_unused; i++) {
68 /*
69 * Skip unoccupied slots.
70 */
71 if (xfs_lic_isfree(licp, i))
72 continue;
73
74 lidp = xfs_lic_slot(licp, i);
75 blip = (xfs_buf_log_item_t *)lidp->lid_item;
76 if (blip->bli_item.li_type != XFS_LI_BUF)
77 continue;
78
79 if (XFS_BUF_TARGET(blip->bli_buf) == target &&
80 XFS_BUF_ADDR(blip->bli_buf) == blkno &&
81 XFS_BUF_COUNT(blip->bli_buf) == len)
82 return blip->bli_buf;
83 }
84 }
85
86 return NULL;
87}
48 88
49/* 89/*
50 * Add the locked buffer to the transaction. 90 * Add the locked buffer to the transaction.
@@ -112,14 +152,6 @@ xfs_trans_bjoin(
112 * within the transaction, just increment its lock recursion count 152 * within the transaction, just increment its lock recursion count
113 * and return a pointer to it. 153 * and return a pointer to it.
114 * 154 *
115 * Use the fast path function xfs_trans_buf_item_match() or the buffer
116 * cache routine incore_match() to find the buffer
117 * if it is already owned by this transaction.
118 *
119 * If we don't already own the buffer, use get_buf() to get it.
120 * If it doesn't yet have an associated xfs_buf_log_item structure,
121 * then allocate one and add the item to this transaction.
122 *
123 * If the transaction pointer is NULL, make this just a normal 155 * If the transaction pointer is NULL, make this just a normal
124 * get_buf() call. 156 * get_buf() call.
125 */ 157 */
@@ -149,11 +181,7 @@ xfs_trans_get_buf(xfs_trans_t *tp,
149 * have it locked. In this case we just increment the lock 181 * have it locked. In this case we just increment the lock
150 * recursion count and return the buffer to the caller. 182 * recursion count and return the buffer to the caller.
151 */ 183 */
152 if (tp->t_items.lic_next == NULL) { 184 bp = xfs_trans_buf_item_match(tp, target_dev, blkno, len);
153 bp = xfs_trans_buf_item_match(tp, target_dev, blkno, len);
154 } else {
155 bp = xfs_trans_buf_item_match_all(tp, target_dev, blkno, len);
156 }
157 if (bp != NULL) { 185 if (bp != NULL) {
158 ASSERT(XFS_BUF_VALUSEMA(bp) <= 0); 186 ASSERT(XFS_BUF_VALUSEMA(bp) <= 0);
159 if (XFS_FORCED_SHUTDOWN(tp->t_mountp)) 187 if (XFS_FORCED_SHUTDOWN(tp->t_mountp))
@@ -259,14 +287,6 @@ int xfs_error_mod = 33;
259 * within the transaction and already read in, just increment its 287 * within the transaction and already read in, just increment its
260 * lock recursion count and return a pointer to it. 288 * lock recursion count and return a pointer to it.
261 * 289 *
262 * Use the fast path function xfs_trans_buf_item_match() or the buffer
263 * cache routine incore_match() to find the buffer
264 * if it is already owned by this transaction.
265 *
266 * If we don't already own the buffer, use read_buf() to get it.
267 * If it doesn't yet have an associated xfs_buf_log_item structure,
268 * then allocate one and add the item to this transaction.
269 *
270 * If the transaction pointer is NULL, make this just a normal 290 * If the transaction pointer is NULL, make this just a normal
271 * read_buf() call. 291 * read_buf() call.
272 */ 292 */
@@ -328,11 +348,7 @@ xfs_trans_read_buf(
328 * If the buffer is not yet read in, then we read it in, increment 348 * If the buffer is not yet read in, then we read it in, increment
329 * the lock recursion count, and return it to the caller. 349 * the lock recursion count, and return it to the caller.
330 */ 350 */
331 if (tp->t_items.lic_next == NULL) { 351 bp = xfs_trans_buf_item_match(tp, target, blkno, len);
332 bp = xfs_trans_buf_item_match(tp, target, blkno, len);
333 } else {
334 bp = xfs_trans_buf_item_match_all(tp, target, blkno, len);
335 }
336 if (bp != NULL) { 352 if (bp != NULL) {
337 ASSERT(XFS_BUF_VALUSEMA(bp) <= 0); 353 ASSERT(XFS_BUF_VALUSEMA(bp) <= 0);
338 ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp); 354 ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp);
@@ -696,7 +712,6 @@ xfs_trans_log_buf(xfs_trans_t *tp,
696 712
697 tp->t_flags |= XFS_TRANS_DIRTY; 713 tp->t_flags |= XFS_TRANS_DIRTY;
698 lidp->lid_flags |= XFS_LID_DIRTY; 714 lidp->lid_flags |= XFS_LID_DIRTY;
699 lidp->lid_flags &= ~XFS_LID_BUF_STALE;
700 bip->bli_flags |= XFS_BLI_LOGGED; 715 bip->bli_flags |= XFS_BLI_LOGGED;
701 xfs_buf_item_log(bip, first, last); 716 xfs_buf_item_log(bip, first, last);
702} 717}
@@ -782,7 +797,7 @@ xfs_trans_binval(
782 bip->bli_format.blf_flags |= XFS_BLI_CANCEL; 797 bip->bli_format.blf_flags |= XFS_BLI_CANCEL;
783 memset((char *)(bip->bli_format.blf_data_map), 0, 798 memset((char *)(bip->bli_format.blf_data_map), 0,
784 (bip->bli_format.blf_map_size * sizeof(uint))); 799 (bip->bli_format.blf_map_size * sizeof(uint)));
785 lidp->lid_flags |= XFS_LID_DIRTY|XFS_LID_BUF_STALE; 800 lidp->lid_flags |= XFS_LID_DIRTY;
786 tp->t_flags |= XFS_TRANS_DIRTY; 801 tp->t_flags |= XFS_TRANS_DIRTY;
787} 802}
788 803
@@ -902,111 +917,3 @@ xfs_trans_dquot_buf(
902 917
903 bip->bli_format.blf_flags |= type; 918 bip->bli_format.blf_flags |= type;
904} 919}
905
906/*
907 * Check to see if a buffer matching the given parameters is already
908 * a part of the given transaction. Only check the first, embedded
909 * chunk, since we don't want to spend all day scanning large transactions.
910 */
911STATIC xfs_buf_t *
912xfs_trans_buf_item_match(
913 xfs_trans_t *tp,
914 xfs_buftarg_t *target,
915 xfs_daddr_t blkno,
916 int len)
917{
918 xfs_log_item_chunk_t *licp;
919 xfs_log_item_desc_t *lidp;
920 xfs_buf_log_item_t *blip;
921 xfs_buf_t *bp;
922 int i;
923
924 bp = NULL;
925 len = BBTOB(len);
926 licp = &tp->t_items;
927 if (!xfs_lic_are_all_free(licp)) {
928 for (i = 0; i < licp->lic_unused; i++) {
929 /*
930 * Skip unoccupied slots.
931 */
932 if (xfs_lic_isfree(licp, i)) {
933 continue;
934 }
935
936 lidp = xfs_lic_slot(licp, i);
937 blip = (xfs_buf_log_item_t *)lidp->lid_item;
938 if (blip->bli_item.li_type != XFS_LI_BUF) {
939 continue;
940 }
941
942 bp = blip->bli_buf;
943 if ((XFS_BUF_TARGET(bp) == target) &&
944 (XFS_BUF_ADDR(bp) == blkno) &&
945 (XFS_BUF_COUNT(bp) == len)) {
946 /*
947 * We found it. Break out and
948 * return the pointer to the buffer.
949 */
950 break;
951 } else {
952 bp = NULL;
953 }
954 }
955 }
956 return bp;
957}
958
959/*
960 * Check to see if a buffer matching the given parameters is already
961 * a part of the given transaction. Check all the chunks, we
962 * want to be thorough.
963 */
964STATIC xfs_buf_t *
965xfs_trans_buf_item_match_all(
966 xfs_trans_t *tp,
967 xfs_buftarg_t *target,
968 xfs_daddr_t blkno,
969 int len)
970{
971 xfs_log_item_chunk_t *licp;
972 xfs_log_item_desc_t *lidp;
973 xfs_buf_log_item_t *blip;
974 xfs_buf_t *bp;
975 int i;
976
977 bp = NULL;
978 len = BBTOB(len);
979 for (licp = &tp->t_items; licp != NULL; licp = licp->lic_next) {
980 if (xfs_lic_are_all_free(licp)) {
981 ASSERT(licp == &tp->t_items);
982 ASSERT(licp->lic_next == NULL);
983 return NULL;
984 }
985 for (i = 0; i < licp->lic_unused; i++) {
986 /*
987 * Skip unoccupied slots.
988 */
989 if (xfs_lic_isfree(licp, i)) {
990 continue;
991 }
992
993 lidp = xfs_lic_slot(licp, i);
994 blip = (xfs_buf_log_item_t *)lidp->lid_item;
995 if (blip->bli_item.li_type != XFS_LI_BUF) {
996 continue;
997 }
998
999 bp = blip->bli_buf;
1000 if ((XFS_BUF_TARGET(bp) == target) &&
1001 (XFS_BUF_ADDR(bp) == blkno) &&
1002 (XFS_BUF_COUNT(bp) == len)) {
1003 /*
1004 * We found it. Break out and
1005 * return the pointer to the buffer.
1006 */
1007 return bp;
1008 }
1009 }
1010 }
1011 return NULL;
1012}