diff options
Diffstat (limited to 'fs/xfs')
55 files changed, 3432 insertions, 2587 deletions
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile index b4769e40e8bc..c8fb13f83b3f 100644 --- a/fs/xfs/Makefile +++ b/fs/xfs/Makefile | |||
@@ -77,6 +77,7 @@ xfs-y += xfs_alloc.o \ | |||
77 | xfs_itable.o \ | 77 | xfs_itable.o \ |
78 | xfs_dfrag.o \ | 78 | xfs_dfrag.o \ |
79 | xfs_log.o \ | 79 | xfs_log.o \ |
80 | xfs_log_cil.o \ | ||
80 | xfs_log_recover.o \ | 81 | xfs_log_recover.o \ |
81 | xfs_mount.o \ | 82 | xfs_mount.o \ |
82 | xfs_mru_cache.o \ | 83 | xfs_mru_cache.o \ |
diff --git a/fs/xfs/linux-2.6/xfs_acl.c b/fs/xfs/linux-2.6/xfs_acl.c index a7bc925c4d60..9f769b5b38fc 100644 --- a/fs/xfs/linux-2.6/xfs_acl.c +++ b/fs/xfs/linux-2.6/xfs_acl.c | |||
@@ -440,14 +440,14 @@ xfs_xattr_acl_set(struct dentry *dentry, const char *name, | |||
440 | return error; | 440 | return error; |
441 | } | 441 | } |
442 | 442 | ||
443 | struct xattr_handler xfs_xattr_acl_access_handler = { | 443 | const struct xattr_handler xfs_xattr_acl_access_handler = { |
444 | .prefix = POSIX_ACL_XATTR_ACCESS, | 444 | .prefix = POSIX_ACL_XATTR_ACCESS, |
445 | .flags = ACL_TYPE_ACCESS, | 445 | .flags = ACL_TYPE_ACCESS, |
446 | .get = xfs_xattr_acl_get, | 446 | .get = xfs_xattr_acl_get, |
447 | .set = xfs_xattr_acl_set, | 447 | .set = xfs_xattr_acl_set, |
448 | }; | 448 | }; |
449 | 449 | ||
450 | struct xattr_handler xfs_xattr_acl_default_handler = { | 450 | const struct xattr_handler xfs_xattr_acl_default_handler = { |
451 | .prefix = POSIX_ACL_XATTR_DEFAULT, | 451 | .prefix = POSIX_ACL_XATTR_DEFAULT, |
452 | .flags = ACL_TYPE_DEFAULT, | 452 | .flags = ACL_TYPE_DEFAULT, |
453 | .get = xfs_xattr_acl_get, | 453 | .get = xfs_xattr_acl_get, |
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c index 0f8b9968a803..089eaca860b4 100644 --- a/fs/xfs/linux-2.6/xfs_aops.c +++ b/fs/xfs/linux-2.6/xfs_aops.c | |||
@@ -45,6 +45,15 @@ | |||
45 | #include <linux/pagevec.h> | 45 | #include <linux/pagevec.h> |
46 | #include <linux/writeback.h> | 46 | #include <linux/writeback.h> |
47 | 47 | ||
48 | /* | ||
49 | * Types of I/O for bmap clustering and I/O completion tracking. | ||
50 | */ | ||
51 | enum { | ||
52 | IO_READ, /* mapping for a read */ | ||
53 | IO_DELAY, /* mapping covers delalloc region */ | ||
54 | IO_UNWRITTEN, /* mapping covers allocated but uninitialized data */ | ||
55 | IO_NEW /* just allocated */ | ||
56 | }; | ||
48 | 57 | ||
49 | /* | 58 | /* |
50 | * Prime number of hash buckets since address is used as the key. | 59 | * Prime number of hash buckets since address is used as the key. |
@@ -103,8 +112,9 @@ xfs_count_page_state( | |||
103 | 112 | ||
104 | STATIC struct block_device * | 113 | STATIC struct block_device * |
105 | xfs_find_bdev_for_inode( | 114 | xfs_find_bdev_for_inode( |
106 | struct xfs_inode *ip) | 115 | struct inode *inode) |
107 | { | 116 | { |
117 | struct xfs_inode *ip = XFS_I(inode); | ||
108 | struct xfs_mount *mp = ip->i_mount; | 118 | struct xfs_mount *mp = ip->i_mount; |
109 | 119 | ||
110 | if (XFS_IS_REALTIME_INODE(ip)) | 120 | if (XFS_IS_REALTIME_INODE(ip)) |
@@ -183,7 +193,7 @@ xfs_setfilesize( | |||
183 | xfs_fsize_t isize; | 193 | xfs_fsize_t isize; |
184 | 194 | ||
185 | ASSERT((ip->i_d.di_mode & S_IFMT) == S_IFREG); | 195 | ASSERT((ip->i_d.di_mode & S_IFMT) == S_IFREG); |
186 | ASSERT(ioend->io_type != IOMAP_READ); | 196 | ASSERT(ioend->io_type != IO_READ); |
187 | 197 | ||
188 | if (unlikely(ioend->io_error)) | 198 | if (unlikely(ioend->io_error)) |
189 | return 0; | 199 | return 0; |
@@ -214,7 +224,7 @@ xfs_finish_ioend( | |||
214 | if (atomic_dec_and_test(&ioend->io_remaining)) { | 224 | if (atomic_dec_and_test(&ioend->io_remaining)) { |
215 | struct workqueue_struct *wq; | 225 | struct workqueue_struct *wq; |
216 | 226 | ||
217 | wq = (ioend->io_type == IOMAP_UNWRITTEN) ? | 227 | wq = (ioend->io_type == IO_UNWRITTEN) ? |
218 | xfsconvertd_workqueue : xfsdatad_workqueue; | 228 | xfsconvertd_workqueue : xfsdatad_workqueue; |
219 | queue_work(wq, &ioend->io_work); | 229 | queue_work(wq, &ioend->io_work); |
220 | if (wait) | 230 | if (wait) |
@@ -237,7 +247,7 @@ xfs_end_io( | |||
237 | * For unwritten extents we need to issue transactions to convert a | 247 | * For unwritten extents we need to issue transactions to convert a |
238 | * range to normal written extens after the data I/O has finished. | 248 | * range to normal written extens after the data I/O has finished. |
239 | */ | 249 | */ |
240 | if (ioend->io_type == IOMAP_UNWRITTEN && | 250 | if (ioend->io_type == IO_UNWRITTEN && |
241 | likely(!ioend->io_error && !XFS_FORCED_SHUTDOWN(ip->i_mount))) { | 251 | likely(!ioend->io_error && !XFS_FORCED_SHUTDOWN(ip->i_mount))) { |
242 | 252 | ||
243 | error = xfs_iomap_write_unwritten(ip, ioend->io_offset, | 253 | error = xfs_iomap_write_unwritten(ip, ioend->io_offset, |
@@ -250,7 +260,7 @@ xfs_end_io( | |||
250 | * We might have to update the on-disk file size after extending | 260 | * We might have to update the on-disk file size after extending |
251 | * writes. | 261 | * writes. |
252 | */ | 262 | */ |
253 | if (ioend->io_type != IOMAP_READ) { | 263 | if (ioend->io_type != IO_READ) { |
254 | error = xfs_setfilesize(ioend); | 264 | error = xfs_setfilesize(ioend); |
255 | ASSERT(!error || error == EAGAIN); | 265 | ASSERT(!error || error == EAGAIN); |
256 | } | 266 | } |
@@ -309,21 +319,25 @@ xfs_map_blocks( | |||
309 | struct inode *inode, | 319 | struct inode *inode, |
310 | loff_t offset, | 320 | loff_t offset, |
311 | ssize_t count, | 321 | ssize_t count, |
312 | xfs_iomap_t *mapp, | 322 | struct xfs_bmbt_irec *imap, |
313 | int flags) | 323 | int flags) |
314 | { | 324 | { |
315 | int nmaps = 1; | 325 | int nmaps = 1; |
326 | int new = 0; | ||
316 | 327 | ||
317 | return -xfs_iomap(XFS_I(inode), offset, count, flags, mapp, &nmaps); | 328 | return -xfs_iomap(XFS_I(inode), offset, count, flags, imap, &nmaps, &new); |
318 | } | 329 | } |
319 | 330 | ||
320 | STATIC int | 331 | STATIC int |
321 | xfs_iomap_valid( | 332 | xfs_imap_valid( |
322 | xfs_iomap_t *iomapp, | 333 | struct inode *inode, |
323 | loff_t offset) | 334 | struct xfs_bmbt_irec *imap, |
335 | xfs_off_t offset) | ||
324 | { | 336 | { |
325 | return offset >= iomapp->iomap_offset && | 337 | offset >>= inode->i_blkbits; |
326 | offset < iomapp->iomap_offset + iomapp->iomap_bsize; | 338 | |
339 | return offset >= imap->br_startoff && | ||
340 | offset < imap->br_startoff + imap->br_blockcount; | ||
327 | } | 341 | } |
328 | 342 | ||
329 | /* | 343 | /* |
@@ -554,19 +568,23 @@ xfs_add_to_ioend( | |||
554 | 568 | ||
555 | STATIC void | 569 | STATIC void |
556 | xfs_map_buffer( | 570 | xfs_map_buffer( |
571 | struct inode *inode, | ||
557 | struct buffer_head *bh, | 572 | struct buffer_head *bh, |
558 | xfs_iomap_t *mp, | 573 | struct xfs_bmbt_irec *imap, |
559 | xfs_off_t offset, | 574 | xfs_off_t offset) |
560 | uint block_bits) | ||
561 | { | 575 | { |
562 | sector_t bn; | 576 | sector_t bn; |
577 | struct xfs_mount *m = XFS_I(inode)->i_mount; | ||
578 | xfs_off_t iomap_offset = XFS_FSB_TO_B(m, imap->br_startoff); | ||
579 | xfs_daddr_t iomap_bn = xfs_fsb_to_db(XFS_I(inode), imap->br_startblock); | ||
563 | 580 | ||
564 | ASSERT(mp->iomap_bn != IOMAP_DADDR_NULL); | 581 | ASSERT(imap->br_startblock != HOLESTARTBLOCK); |
582 | ASSERT(imap->br_startblock != DELAYSTARTBLOCK); | ||
565 | 583 | ||
566 | bn = (mp->iomap_bn >> (block_bits - BBSHIFT)) + | 584 | bn = (iomap_bn >> (inode->i_blkbits - BBSHIFT)) + |
567 | ((offset - mp->iomap_offset) >> block_bits); | 585 | ((offset - iomap_offset) >> inode->i_blkbits); |
568 | 586 | ||
569 | ASSERT(bn || (mp->iomap_flags & IOMAP_REALTIME)); | 587 | ASSERT(bn || XFS_IS_REALTIME_INODE(XFS_I(inode))); |
570 | 588 | ||
571 | bh->b_blocknr = bn; | 589 | bh->b_blocknr = bn; |
572 | set_buffer_mapped(bh); | 590 | set_buffer_mapped(bh); |
@@ -574,17 +592,17 @@ xfs_map_buffer( | |||
574 | 592 | ||
575 | STATIC void | 593 | STATIC void |
576 | xfs_map_at_offset( | 594 | xfs_map_at_offset( |
595 | struct inode *inode, | ||
577 | struct buffer_head *bh, | 596 | struct buffer_head *bh, |
578 | loff_t offset, | 597 | struct xfs_bmbt_irec *imap, |
579 | int block_bits, | 598 | xfs_off_t offset) |
580 | xfs_iomap_t *iomapp) | ||
581 | { | 599 | { |
582 | ASSERT(!(iomapp->iomap_flags & IOMAP_HOLE)); | 600 | ASSERT(imap->br_startblock != HOLESTARTBLOCK); |
583 | ASSERT(!(iomapp->iomap_flags & IOMAP_DELAY)); | 601 | ASSERT(imap->br_startblock != DELAYSTARTBLOCK); |
584 | 602 | ||
585 | lock_buffer(bh); | 603 | lock_buffer(bh); |
586 | xfs_map_buffer(bh, iomapp, offset, block_bits); | 604 | xfs_map_buffer(inode, bh, imap, offset); |
587 | bh->b_bdev = iomapp->iomap_target->bt_bdev; | 605 | bh->b_bdev = xfs_find_bdev_for_inode(inode); |
588 | set_buffer_mapped(bh); | 606 | set_buffer_mapped(bh); |
589 | clear_buffer_delay(bh); | 607 | clear_buffer_delay(bh); |
590 | clear_buffer_unwritten(bh); | 608 | clear_buffer_unwritten(bh); |
@@ -713,11 +731,11 @@ xfs_is_delayed_page( | |||
713 | bh = head = page_buffers(page); | 731 | bh = head = page_buffers(page); |
714 | do { | 732 | do { |
715 | if (buffer_unwritten(bh)) | 733 | if (buffer_unwritten(bh)) |
716 | acceptable = (type == IOMAP_UNWRITTEN); | 734 | acceptable = (type == IO_UNWRITTEN); |
717 | else if (buffer_delay(bh)) | 735 | else if (buffer_delay(bh)) |
718 | acceptable = (type == IOMAP_DELAY); | 736 | acceptable = (type == IO_DELAY); |
719 | else if (buffer_dirty(bh) && buffer_mapped(bh)) | 737 | else if (buffer_dirty(bh) && buffer_mapped(bh)) |
720 | acceptable = (type == IOMAP_NEW); | 738 | acceptable = (type == IO_NEW); |
721 | else | 739 | else |
722 | break; | 740 | break; |
723 | } while ((bh = bh->b_this_page) != head); | 741 | } while ((bh = bh->b_this_page) != head); |
@@ -740,7 +758,7 @@ xfs_convert_page( | |||
740 | struct inode *inode, | 758 | struct inode *inode, |
741 | struct page *page, | 759 | struct page *page, |
742 | loff_t tindex, | 760 | loff_t tindex, |
743 | xfs_iomap_t *mp, | 761 | struct xfs_bmbt_irec *imap, |
744 | xfs_ioend_t **ioendp, | 762 | xfs_ioend_t **ioendp, |
745 | struct writeback_control *wbc, | 763 | struct writeback_control *wbc, |
746 | int startio, | 764 | int startio, |
@@ -750,7 +768,6 @@ xfs_convert_page( | |||
750 | xfs_off_t end_offset; | 768 | xfs_off_t end_offset; |
751 | unsigned long p_offset; | 769 | unsigned long p_offset; |
752 | unsigned int type; | 770 | unsigned int type; |
753 | int bbits = inode->i_blkbits; | ||
754 | int len, page_dirty; | 771 | int len, page_dirty; |
755 | int count = 0, done = 0, uptodate = 1; | 772 | int count = 0, done = 0, uptodate = 1; |
756 | xfs_off_t offset = page_offset(page); | 773 | xfs_off_t offset = page_offset(page); |
@@ -802,19 +819,19 @@ xfs_convert_page( | |||
802 | 819 | ||
803 | if (buffer_unwritten(bh) || buffer_delay(bh)) { | 820 | if (buffer_unwritten(bh) || buffer_delay(bh)) { |
804 | if (buffer_unwritten(bh)) | 821 | if (buffer_unwritten(bh)) |
805 | type = IOMAP_UNWRITTEN; | 822 | type = IO_UNWRITTEN; |
806 | else | 823 | else |
807 | type = IOMAP_DELAY; | 824 | type = IO_DELAY; |
808 | 825 | ||
809 | if (!xfs_iomap_valid(mp, offset)) { | 826 | if (!xfs_imap_valid(inode, imap, offset)) { |
810 | done = 1; | 827 | done = 1; |
811 | continue; | 828 | continue; |
812 | } | 829 | } |
813 | 830 | ||
814 | ASSERT(!(mp->iomap_flags & IOMAP_HOLE)); | 831 | ASSERT(imap->br_startblock != HOLESTARTBLOCK); |
815 | ASSERT(!(mp->iomap_flags & IOMAP_DELAY)); | 832 | ASSERT(imap->br_startblock != DELAYSTARTBLOCK); |
816 | 833 | ||
817 | xfs_map_at_offset(bh, offset, bbits, mp); | 834 | xfs_map_at_offset(inode, bh, imap, offset); |
818 | if (startio) { | 835 | if (startio) { |
819 | xfs_add_to_ioend(inode, bh, offset, | 836 | xfs_add_to_ioend(inode, bh, offset, |
820 | type, ioendp, done); | 837 | type, ioendp, done); |
@@ -826,7 +843,7 @@ xfs_convert_page( | |||
826 | page_dirty--; | 843 | page_dirty--; |
827 | count++; | 844 | count++; |
828 | } else { | 845 | } else { |
829 | type = IOMAP_NEW; | 846 | type = IO_NEW; |
830 | if (buffer_mapped(bh) && all_bh && startio) { | 847 | if (buffer_mapped(bh) && all_bh && startio) { |
831 | lock_buffer(bh); | 848 | lock_buffer(bh); |
832 | xfs_add_to_ioend(inode, bh, offset, | 849 | xfs_add_to_ioend(inode, bh, offset, |
@@ -866,7 +883,7 @@ STATIC void | |||
866 | xfs_cluster_write( | 883 | xfs_cluster_write( |
867 | struct inode *inode, | 884 | struct inode *inode, |
868 | pgoff_t tindex, | 885 | pgoff_t tindex, |
869 | xfs_iomap_t *iomapp, | 886 | struct xfs_bmbt_irec *imap, |
870 | xfs_ioend_t **ioendp, | 887 | xfs_ioend_t **ioendp, |
871 | struct writeback_control *wbc, | 888 | struct writeback_control *wbc, |
872 | int startio, | 889 | int startio, |
@@ -885,7 +902,7 @@ xfs_cluster_write( | |||
885 | 902 | ||
886 | for (i = 0; i < pagevec_count(&pvec); i++) { | 903 | for (i = 0; i < pagevec_count(&pvec); i++) { |
887 | done = xfs_convert_page(inode, pvec.pages[i], tindex++, | 904 | done = xfs_convert_page(inode, pvec.pages[i], tindex++, |
888 | iomapp, ioendp, wbc, startio, all_bh); | 905 | imap, ioendp, wbc, startio, all_bh); |
889 | if (done) | 906 | if (done) |
890 | break; | 907 | break; |
891 | } | 908 | } |
@@ -930,7 +947,7 @@ xfs_aops_discard_page( | |||
930 | loff_t offset = page_offset(page); | 947 | loff_t offset = page_offset(page); |
931 | ssize_t len = 1 << inode->i_blkbits; | 948 | ssize_t len = 1 << inode->i_blkbits; |
932 | 949 | ||
933 | if (!xfs_is_delayed_page(page, IOMAP_DELAY)) | 950 | if (!xfs_is_delayed_page(page, IO_DELAY)) |
934 | goto out_invalidate; | 951 | goto out_invalidate; |
935 | 952 | ||
936 | if (XFS_FORCED_SHUTDOWN(ip->i_mount)) | 953 | if (XFS_FORCED_SHUTDOWN(ip->i_mount)) |
@@ -1042,15 +1059,15 @@ xfs_page_state_convert( | |||
1042 | int unmapped) /* also implies page uptodate */ | 1059 | int unmapped) /* also implies page uptodate */ |
1043 | { | 1060 | { |
1044 | struct buffer_head *bh, *head; | 1061 | struct buffer_head *bh, *head; |
1045 | xfs_iomap_t iomap; | 1062 | struct xfs_bmbt_irec imap; |
1046 | xfs_ioend_t *ioend = NULL, *iohead = NULL; | 1063 | xfs_ioend_t *ioend = NULL, *iohead = NULL; |
1047 | loff_t offset; | 1064 | loff_t offset; |
1048 | unsigned long p_offset = 0; | 1065 | unsigned long p_offset = 0; |
1049 | unsigned int type; | 1066 | unsigned int type; |
1050 | __uint64_t end_offset; | 1067 | __uint64_t end_offset; |
1051 | pgoff_t end_index, last_index, tlast; | 1068 | pgoff_t end_index, last_index; |
1052 | ssize_t size, len; | 1069 | ssize_t size, len; |
1053 | int flags, err, iomap_valid = 0, uptodate = 1; | 1070 | int flags, err, imap_valid = 0, uptodate = 1; |
1054 | int page_dirty, count = 0; | 1071 | int page_dirty, count = 0; |
1055 | int trylock = 0; | 1072 | int trylock = 0; |
1056 | int all_bh = unmapped; | 1073 | int all_bh = unmapped; |
@@ -1097,7 +1114,7 @@ xfs_page_state_convert( | |||
1097 | bh = head = page_buffers(page); | 1114 | bh = head = page_buffers(page); |
1098 | offset = page_offset(page); | 1115 | offset = page_offset(page); |
1099 | flags = BMAPI_READ; | 1116 | flags = BMAPI_READ; |
1100 | type = IOMAP_NEW; | 1117 | type = IO_NEW; |
1101 | 1118 | ||
1102 | /* TODO: cleanup count and page_dirty */ | 1119 | /* TODO: cleanup count and page_dirty */ |
1103 | 1120 | ||
@@ -1111,12 +1128,12 @@ xfs_page_state_convert( | |||
1111 | * the iomap is actually still valid, but the ioend | 1128 | * the iomap is actually still valid, but the ioend |
1112 | * isn't. shouldn't happen too often. | 1129 | * isn't. shouldn't happen too often. |
1113 | */ | 1130 | */ |
1114 | iomap_valid = 0; | 1131 | imap_valid = 0; |
1115 | continue; | 1132 | continue; |
1116 | } | 1133 | } |
1117 | 1134 | ||
1118 | if (iomap_valid) | 1135 | if (imap_valid) |
1119 | iomap_valid = xfs_iomap_valid(&iomap, offset); | 1136 | imap_valid = xfs_imap_valid(inode, &imap, offset); |
1120 | 1137 | ||
1121 | /* | 1138 | /* |
1122 | * First case, map an unwritten extent and prepare for | 1139 | * First case, map an unwritten extent and prepare for |
@@ -1137,20 +1154,20 @@ xfs_page_state_convert( | |||
1137 | * Make sure we don't use a read-only iomap | 1154 | * Make sure we don't use a read-only iomap |
1138 | */ | 1155 | */ |
1139 | if (flags == BMAPI_READ) | 1156 | if (flags == BMAPI_READ) |
1140 | iomap_valid = 0; | 1157 | imap_valid = 0; |
1141 | 1158 | ||
1142 | if (buffer_unwritten(bh)) { | 1159 | if (buffer_unwritten(bh)) { |
1143 | type = IOMAP_UNWRITTEN; | 1160 | type = IO_UNWRITTEN; |
1144 | flags = BMAPI_WRITE | BMAPI_IGNSTATE; | 1161 | flags = BMAPI_WRITE | BMAPI_IGNSTATE; |
1145 | } else if (buffer_delay(bh)) { | 1162 | } else if (buffer_delay(bh)) { |
1146 | type = IOMAP_DELAY; | 1163 | type = IO_DELAY; |
1147 | flags = BMAPI_ALLOCATE | trylock; | 1164 | flags = BMAPI_ALLOCATE | trylock; |
1148 | } else { | 1165 | } else { |
1149 | type = IOMAP_NEW; | 1166 | type = IO_NEW; |
1150 | flags = BMAPI_WRITE | BMAPI_MMAP; | 1167 | flags = BMAPI_WRITE | BMAPI_MMAP; |
1151 | } | 1168 | } |
1152 | 1169 | ||
1153 | if (!iomap_valid) { | 1170 | if (!imap_valid) { |
1154 | /* | 1171 | /* |
1155 | * if we didn't have a valid mapping then we | 1172 | * if we didn't have a valid mapping then we |
1156 | * need to ensure that we put the new mapping | 1173 | * need to ensure that we put the new mapping |
@@ -1160,7 +1177,7 @@ xfs_page_state_convert( | |||
1160 | * for unwritten extent conversion. | 1177 | * for unwritten extent conversion. |
1161 | */ | 1178 | */ |
1162 | new_ioend = 1; | 1179 | new_ioend = 1; |
1163 | if (type == IOMAP_NEW) { | 1180 | if (type == IO_NEW) { |
1164 | size = xfs_probe_cluster(inode, | 1181 | size = xfs_probe_cluster(inode, |
1165 | page, bh, head, 0); | 1182 | page, bh, head, 0); |
1166 | } else { | 1183 | } else { |
@@ -1168,14 +1185,14 @@ xfs_page_state_convert( | |||
1168 | } | 1185 | } |
1169 | 1186 | ||
1170 | err = xfs_map_blocks(inode, offset, size, | 1187 | err = xfs_map_blocks(inode, offset, size, |
1171 | &iomap, flags); | 1188 | &imap, flags); |
1172 | if (err) | 1189 | if (err) |
1173 | goto error; | 1190 | goto error; |
1174 | iomap_valid = xfs_iomap_valid(&iomap, offset); | 1191 | imap_valid = xfs_imap_valid(inode, &imap, |
1192 | offset); | ||
1175 | } | 1193 | } |
1176 | if (iomap_valid) { | 1194 | if (imap_valid) { |
1177 | xfs_map_at_offset(bh, offset, | 1195 | xfs_map_at_offset(inode, bh, &imap, offset); |
1178 | inode->i_blkbits, &iomap); | ||
1179 | if (startio) { | 1196 | if (startio) { |
1180 | xfs_add_to_ioend(inode, bh, offset, | 1197 | xfs_add_to_ioend(inode, bh, offset, |
1181 | type, &ioend, | 1198 | type, &ioend, |
@@ -1194,40 +1211,41 @@ xfs_page_state_convert( | |||
1194 | * That means it must already have extents allocated | 1211 | * That means it must already have extents allocated |
1195 | * underneath it. Map the extent by reading it. | 1212 | * underneath it. Map the extent by reading it. |
1196 | */ | 1213 | */ |
1197 | if (!iomap_valid || flags != BMAPI_READ) { | 1214 | if (!imap_valid || flags != BMAPI_READ) { |
1198 | flags = BMAPI_READ; | 1215 | flags = BMAPI_READ; |
1199 | size = xfs_probe_cluster(inode, page, bh, | 1216 | size = xfs_probe_cluster(inode, page, bh, |
1200 | head, 1); | 1217 | head, 1); |
1201 | err = xfs_map_blocks(inode, offset, size, | 1218 | err = xfs_map_blocks(inode, offset, size, |
1202 | &iomap, flags); | 1219 | &imap, flags); |
1203 | if (err) | 1220 | if (err) |
1204 | goto error; | 1221 | goto error; |
1205 | iomap_valid = xfs_iomap_valid(&iomap, offset); | 1222 | imap_valid = xfs_imap_valid(inode, &imap, |
1223 | offset); | ||
1206 | } | 1224 | } |
1207 | 1225 | ||
1208 | /* | 1226 | /* |
1209 | * We set the type to IOMAP_NEW in case we are doing a | 1227 | * We set the type to IO_NEW in case we are doing a |
1210 | * small write at EOF that is extending the file but | 1228 | * small write at EOF that is extending the file but |
1211 | * without needing an allocation. We need to update the | 1229 | * without needing an allocation. We need to update the |
1212 | * file size on I/O completion in this case so it is | 1230 | * file size on I/O completion in this case so it is |
1213 | * the same case as having just allocated a new extent | 1231 | * the same case as having just allocated a new extent |
1214 | * that we are writing into for the first time. | 1232 | * that we are writing into for the first time. |
1215 | */ | 1233 | */ |
1216 | type = IOMAP_NEW; | 1234 | type = IO_NEW; |
1217 | if (trylock_buffer(bh)) { | 1235 | if (trylock_buffer(bh)) { |
1218 | ASSERT(buffer_mapped(bh)); | 1236 | ASSERT(buffer_mapped(bh)); |
1219 | if (iomap_valid) | 1237 | if (imap_valid) |
1220 | all_bh = 1; | 1238 | all_bh = 1; |
1221 | xfs_add_to_ioend(inode, bh, offset, type, | 1239 | xfs_add_to_ioend(inode, bh, offset, type, |
1222 | &ioend, !iomap_valid); | 1240 | &ioend, !imap_valid); |
1223 | page_dirty--; | 1241 | page_dirty--; |
1224 | count++; | 1242 | count++; |
1225 | } else { | 1243 | } else { |
1226 | iomap_valid = 0; | 1244 | imap_valid = 0; |
1227 | } | 1245 | } |
1228 | } else if ((buffer_uptodate(bh) || PageUptodate(page)) && | 1246 | } else if ((buffer_uptodate(bh) || PageUptodate(page)) && |
1229 | (unmapped || startio)) { | 1247 | (unmapped || startio)) { |
1230 | iomap_valid = 0; | 1248 | imap_valid = 0; |
1231 | } | 1249 | } |
1232 | 1250 | ||
1233 | if (!iohead) | 1251 | if (!iohead) |
@@ -1241,12 +1259,23 @@ xfs_page_state_convert( | |||
1241 | if (startio) | 1259 | if (startio) |
1242 | xfs_start_page_writeback(page, 1, count); | 1260 | xfs_start_page_writeback(page, 1, count); |
1243 | 1261 | ||
1244 | if (ioend && iomap_valid) { | 1262 | if (ioend && imap_valid) { |
1245 | offset = (iomap.iomap_offset + iomap.iomap_bsize - 1) >> | 1263 | xfs_off_t end_index; |
1246 | PAGE_CACHE_SHIFT; | 1264 | |
1247 | tlast = min_t(pgoff_t, offset, last_index); | 1265 | end_index = imap.br_startoff + imap.br_blockcount; |
1248 | xfs_cluster_write(inode, page->index + 1, &iomap, &ioend, | 1266 | |
1249 | wbc, startio, all_bh, tlast); | 1267 | /* to bytes */ |
1268 | end_index <<= inode->i_blkbits; | ||
1269 | |||
1270 | /* to pages */ | ||
1271 | end_index = (end_index - 1) >> PAGE_CACHE_SHIFT; | ||
1272 | |||
1273 | /* check against file size */ | ||
1274 | if (end_index > last_index) | ||
1275 | end_index = last_index; | ||
1276 | |||
1277 | xfs_cluster_write(inode, page->index + 1, &imap, &ioend, | ||
1278 | wbc, startio, all_bh, end_index); | ||
1250 | } | 1279 | } |
1251 | 1280 | ||
1252 | if (iohead) | 1281 | if (iohead) |
@@ -1448,10 +1477,11 @@ __xfs_get_blocks( | |||
1448 | int direct, | 1477 | int direct, |
1449 | bmapi_flags_t flags) | 1478 | bmapi_flags_t flags) |
1450 | { | 1479 | { |
1451 | xfs_iomap_t iomap; | 1480 | struct xfs_bmbt_irec imap; |
1452 | xfs_off_t offset; | 1481 | xfs_off_t offset; |
1453 | ssize_t size; | 1482 | ssize_t size; |
1454 | int niomap = 1; | 1483 | int nimap = 1; |
1484 | int new = 0; | ||
1455 | int error; | 1485 | int error; |
1456 | 1486 | ||
1457 | offset = (xfs_off_t)iblock << inode->i_blkbits; | 1487 | offset = (xfs_off_t)iblock << inode->i_blkbits; |
@@ -1462,22 +1492,21 @@ __xfs_get_blocks( | |||
1462 | return 0; | 1492 | return 0; |
1463 | 1493 | ||
1464 | error = xfs_iomap(XFS_I(inode), offset, size, | 1494 | error = xfs_iomap(XFS_I(inode), offset, size, |
1465 | create ? flags : BMAPI_READ, &iomap, &niomap); | 1495 | create ? flags : BMAPI_READ, &imap, &nimap, &new); |
1466 | if (error) | 1496 | if (error) |
1467 | return -error; | 1497 | return -error; |
1468 | if (niomap == 0) | 1498 | if (nimap == 0) |
1469 | return 0; | 1499 | return 0; |
1470 | 1500 | ||
1471 | if (iomap.iomap_bn != IOMAP_DADDR_NULL) { | 1501 | if (imap.br_startblock != HOLESTARTBLOCK && |
1502 | imap.br_startblock != DELAYSTARTBLOCK) { | ||
1472 | /* | 1503 | /* |
1473 | * For unwritten extents do not report a disk address on | 1504 | * For unwritten extents do not report a disk address on |
1474 | * the read case (treat as if we're reading into a hole). | 1505 | * the read case (treat as if we're reading into a hole). |
1475 | */ | 1506 | */ |
1476 | if (create || !(iomap.iomap_flags & IOMAP_UNWRITTEN)) { | 1507 | if (create || !ISUNWRITTEN(&imap)) |
1477 | xfs_map_buffer(bh_result, &iomap, offset, | 1508 | xfs_map_buffer(inode, bh_result, &imap, offset); |
1478 | inode->i_blkbits); | 1509 | if (create && ISUNWRITTEN(&imap)) { |
1479 | } | ||
1480 | if (create && (iomap.iomap_flags & IOMAP_UNWRITTEN)) { | ||
1481 | if (direct) | 1510 | if (direct) |
1482 | bh_result->b_private = inode; | 1511 | bh_result->b_private = inode; |
1483 | set_buffer_unwritten(bh_result); | 1512 | set_buffer_unwritten(bh_result); |
@@ -1488,7 +1517,7 @@ __xfs_get_blocks( | |||
1488 | * If this is a realtime file, data may be on a different device. | 1517 | * If this is a realtime file, data may be on a different device. |
1489 | * to that pointed to from the buffer_head b_bdev currently. | 1518 | * to that pointed to from the buffer_head b_bdev currently. |
1490 | */ | 1519 | */ |
1491 | bh_result->b_bdev = iomap.iomap_target->bt_bdev; | 1520 | bh_result->b_bdev = xfs_find_bdev_for_inode(inode); |
1492 | 1521 | ||
1493 | /* | 1522 | /* |
1494 | * If we previously allocated a block out beyond eof and we are now | 1523 | * If we previously allocated a block out beyond eof and we are now |
@@ -1502,10 +1531,10 @@ __xfs_get_blocks( | |||
1502 | if (create && | 1531 | if (create && |
1503 | ((!buffer_mapped(bh_result) && !buffer_uptodate(bh_result)) || | 1532 | ((!buffer_mapped(bh_result) && !buffer_uptodate(bh_result)) || |
1504 | (offset >= i_size_read(inode)) || | 1533 | (offset >= i_size_read(inode)) || |
1505 | (iomap.iomap_flags & (IOMAP_NEW|IOMAP_UNWRITTEN)))) | 1534 | (new || ISUNWRITTEN(&imap)))) |
1506 | set_buffer_new(bh_result); | 1535 | set_buffer_new(bh_result); |
1507 | 1536 | ||
1508 | if (iomap.iomap_flags & IOMAP_DELAY) { | 1537 | if (imap.br_startblock == DELAYSTARTBLOCK) { |
1509 | BUG_ON(direct); | 1538 | BUG_ON(direct); |
1510 | if (create) { | 1539 | if (create) { |
1511 | set_buffer_uptodate(bh_result); | 1540 | set_buffer_uptodate(bh_result); |
@@ -1514,11 +1543,23 @@ __xfs_get_blocks( | |||
1514 | } | 1543 | } |
1515 | } | 1544 | } |
1516 | 1545 | ||
1546 | /* | ||
1547 | * If this is O_DIRECT or the mpage code calling tell them how large | ||
1548 | * the mapping is, so that we can avoid repeated get_blocks calls. | ||
1549 | */ | ||
1517 | if (direct || size > (1 << inode->i_blkbits)) { | 1550 | if (direct || size > (1 << inode->i_blkbits)) { |
1518 | ASSERT(iomap.iomap_bsize - iomap.iomap_delta > 0); | 1551 | xfs_off_t mapping_size; |
1519 | offset = min_t(xfs_off_t, | 1552 | |
1520 | iomap.iomap_bsize - iomap.iomap_delta, size); | 1553 | mapping_size = imap.br_startoff + imap.br_blockcount - iblock; |
1521 | bh_result->b_size = (ssize_t)min_t(xfs_off_t, LONG_MAX, offset); | 1554 | mapping_size <<= inode->i_blkbits; |
1555 | |||
1556 | ASSERT(mapping_size > 0); | ||
1557 | if (mapping_size > size) | ||
1558 | mapping_size = size; | ||
1559 | if (mapping_size > LONG_MAX) | ||
1560 | mapping_size = LONG_MAX; | ||
1561 | |||
1562 | bh_result->b_size = mapping_size; | ||
1522 | } | 1563 | } |
1523 | 1564 | ||
1524 | return 0; | 1565 | return 0; |
@@ -1576,7 +1617,7 @@ xfs_end_io_direct( | |||
1576 | */ | 1617 | */ |
1577 | ioend->io_offset = offset; | 1618 | ioend->io_offset = offset; |
1578 | ioend->io_size = size; | 1619 | ioend->io_size = size; |
1579 | if (ioend->io_type == IOMAP_READ) { | 1620 | if (ioend->io_type == IO_READ) { |
1580 | xfs_finish_ioend(ioend, 0); | 1621 | xfs_finish_ioend(ioend, 0); |
1581 | } else if (private && size > 0) { | 1622 | } else if (private && size > 0) { |
1582 | xfs_finish_ioend(ioend, is_sync_kiocb(iocb)); | 1623 | xfs_finish_ioend(ioend, is_sync_kiocb(iocb)); |
@@ -1587,7 +1628,7 @@ xfs_end_io_direct( | |||
1587 | * didn't map an unwritten extent so switch it's completion | 1628 | * didn't map an unwritten extent so switch it's completion |
1588 | * handler. | 1629 | * handler. |
1589 | */ | 1630 | */ |
1590 | ioend->io_type = IOMAP_NEW; | 1631 | ioend->io_type = IO_NEW; |
1591 | xfs_finish_ioend(ioend, 0); | 1632 | xfs_finish_ioend(ioend, 0); |
1592 | } | 1633 | } |
1593 | 1634 | ||
@@ -1612,10 +1653,10 @@ xfs_vm_direct_IO( | |||
1612 | struct block_device *bdev; | 1653 | struct block_device *bdev; |
1613 | ssize_t ret; | 1654 | ssize_t ret; |
1614 | 1655 | ||
1615 | bdev = xfs_find_bdev_for_inode(XFS_I(inode)); | 1656 | bdev = xfs_find_bdev_for_inode(inode); |
1616 | 1657 | ||
1617 | iocb->private = xfs_alloc_ioend(inode, rw == WRITE ? | 1658 | iocb->private = xfs_alloc_ioend(inode, rw == WRITE ? |
1618 | IOMAP_UNWRITTEN : IOMAP_READ); | 1659 | IO_UNWRITTEN : IO_READ); |
1619 | 1660 | ||
1620 | ret = blockdev_direct_IO_no_locking(rw, iocb, inode, bdev, iov, | 1661 | ret = blockdev_direct_IO_no_locking(rw, iocb, inode, bdev, iov, |
1621 | offset, nr_segs, | 1662 | offset, nr_segs, |
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index 44c2b0ef9a41..649ade8ef598 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c | |||
@@ -37,6 +37,7 @@ | |||
37 | 37 | ||
38 | #include "xfs_sb.h" | 38 | #include "xfs_sb.h" |
39 | #include "xfs_inum.h" | 39 | #include "xfs_inum.h" |
40 | #include "xfs_log.h" | ||
40 | #include "xfs_ag.h" | 41 | #include "xfs_ag.h" |
41 | #include "xfs_dmapi.h" | 42 | #include "xfs_dmapi.h" |
42 | #include "xfs_mount.h" | 43 | #include "xfs_mount.h" |
@@ -850,6 +851,12 @@ xfs_buf_lock_value( | |||
850 | * Note that this in no way locks the underlying pages, so it is only | 851 | * Note that this in no way locks the underlying pages, so it is only |
851 | * useful for synchronizing concurrent use of buffer objects, not for | 852 | * useful for synchronizing concurrent use of buffer objects, not for |
852 | * synchronizing independent access to the underlying pages. | 853 | * synchronizing independent access to the underlying pages. |
854 | * | ||
855 | * If we come across a stale, pinned, locked buffer, we know that we | ||
856 | * are being asked to lock a buffer that has been reallocated. Because | ||
857 | * it is pinned, we know that the log has not been pushed to disk and | ||
858 | * hence it will still be locked. Rather than sleeping until someone | ||
859 | * else pushes the log, push it ourselves before trying to get the lock. | ||
853 | */ | 860 | */ |
854 | void | 861 | void |
855 | xfs_buf_lock( | 862 | xfs_buf_lock( |
@@ -857,6 +864,8 @@ xfs_buf_lock( | |||
857 | { | 864 | { |
858 | trace_xfs_buf_lock(bp, _RET_IP_); | 865 | trace_xfs_buf_lock(bp, _RET_IP_); |
859 | 866 | ||
867 | if (atomic_read(&bp->b_pin_count) && (bp->b_flags & XBF_STALE)) | ||
868 | xfs_log_force(bp->b_mount, 0); | ||
860 | if (atomic_read(&bp->b_io_remaining)) | 869 | if (atomic_read(&bp->b_io_remaining)) |
861 | blk_run_address_space(bp->b_target->bt_mapping); | 870 | blk_run_address_space(bp->b_target->bt_mapping); |
862 | down(&bp->b_sema); | 871 | down(&bp->b_sema); |
@@ -1007,25 +1016,20 @@ xfs_bwrite( | |||
1007 | struct xfs_mount *mp, | 1016 | struct xfs_mount *mp, |
1008 | struct xfs_buf *bp) | 1017 | struct xfs_buf *bp) |
1009 | { | 1018 | { |
1010 | int iowait = (bp->b_flags & XBF_ASYNC) == 0; | 1019 | int error; |
1011 | int error = 0; | ||
1012 | 1020 | ||
1013 | bp->b_strat = xfs_bdstrat_cb; | 1021 | bp->b_strat = xfs_bdstrat_cb; |
1014 | bp->b_mount = mp; | 1022 | bp->b_mount = mp; |
1015 | bp->b_flags |= XBF_WRITE; | 1023 | bp->b_flags |= XBF_WRITE; |
1016 | if (!iowait) | 1024 | bp->b_flags &= ~(XBF_ASYNC | XBF_READ); |
1017 | bp->b_flags |= _XBF_RUN_QUEUES; | ||
1018 | 1025 | ||
1019 | xfs_buf_delwri_dequeue(bp); | 1026 | xfs_buf_delwri_dequeue(bp); |
1020 | xfs_buf_iostrategy(bp); | 1027 | xfs_buf_iostrategy(bp); |
1021 | 1028 | ||
1022 | if (iowait) { | 1029 | error = xfs_buf_iowait(bp); |
1023 | error = xfs_buf_iowait(bp); | 1030 | if (error) |
1024 | if (error) | 1031 | xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR); |
1025 | xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR); | 1032 | xfs_buf_relse(bp); |
1026 | xfs_buf_relse(bp); | ||
1027 | } | ||
1028 | |||
1029 | return error; | 1033 | return error; |
1030 | } | 1034 | } |
1031 | 1035 | ||
@@ -1614,7 +1618,8 @@ xfs_mapping_buftarg( | |||
1614 | 1618 | ||
1615 | STATIC int | 1619 | STATIC int |
1616 | xfs_alloc_delwrite_queue( | 1620 | xfs_alloc_delwrite_queue( |
1617 | xfs_buftarg_t *btp) | 1621 | xfs_buftarg_t *btp, |
1622 | const char *fsname) | ||
1618 | { | 1623 | { |
1619 | int error = 0; | 1624 | int error = 0; |
1620 | 1625 | ||
@@ -1622,7 +1627,7 @@ xfs_alloc_delwrite_queue( | |||
1622 | INIT_LIST_HEAD(&btp->bt_delwrite_queue); | 1627 | INIT_LIST_HEAD(&btp->bt_delwrite_queue); |
1623 | spin_lock_init(&btp->bt_delwrite_lock); | 1628 | spin_lock_init(&btp->bt_delwrite_lock); |
1624 | btp->bt_flags = 0; | 1629 | btp->bt_flags = 0; |
1625 | btp->bt_task = kthread_run(xfsbufd, btp, "xfsbufd"); | 1630 | btp->bt_task = kthread_run(xfsbufd, btp, "xfsbufd/%s", fsname); |
1626 | if (IS_ERR(btp->bt_task)) { | 1631 | if (IS_ERR(btp->bt_task)) { |
1627 | error = PTR_ERR(btp->bt_task); | 1632 | error = PTR_ERR(btp->bt_task); |
1628 | goto out_error; | 1633 | goto out_error; |
@@ -1635,7 +1640,8 @@ out_error: | |||
1635 | xfs_buftarg_t * | 1640 | xfs_buftarg_t * |
1636 | xfs_alloc_buftarg( | 1641 | xfs_alloc_buftarg( |
1637 | struct block_device *bdev, | 1642 | struct block_device *bdev, |
1638 | int external) | 1643 | int external, |
1644 | const char *fsname) | ||
1639 | { | 1645 | { |
1640 | xfs_buftarg_t *btp; | 1646 | xfs_buftarg_t *btp; |
1641 | 1647 | ||
@@ -1647,7 +1653,7 @@ xfs_alloc_buftarg( | |||
1647 | goto error; | 1653 | goto error; |
1648 | if (xfs_mapping_buftarg(btp, bdev)) | 1654 | if (xfs_mapping_buftarg(btp, bdev)) |
1649 | goto error; | 1655 | goto error; |
1650 | if (xfs_alloc_delwrite_queue(btp)) | 1656 | if (xfs_alloc_delwrite_queue(btp, fsname)) |
1651 | goto error; | 1657 | goto error; |
1652 | xfs_alloc_bufhash(btp, external); | 1658 | xfs_alloc_bufhash(btp, external); |
1653 | return btp; | 1659 | return btp; |
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h index 386e7361e50e..5fbecefa5dfd 100644 --- a/fs/xfs/linux-2.6/xfs_buf.h +++ b/fs/xfs/linux-2.6/xfs_buf.h | |||
@@ -390,7 +390,7 @@ static inline void xfs_buf_relse(xfs_buf_t *bp) | |||
390 | /* | 390 | /* |
391 | * Handling of buftargs. | 391 | * Handling of buftargs. |
392 | */ | 392 | */ |
393 | extern xfs_buftarg_t *xfs_alloc_buftarg(struct block_device *, int); | 393 | extern xfs_buftarg_t *xfs_alloc_buftarg(struct block_device *, int, const char *); |
394 | extern void xfs_free_buftarg(struct xfs_mount *, struct xfs_buftarg *); | 394 | extern void xfs_free_buftarg(struct xfs_mount *, struct xfs_buftarg *); |
395 | extern void xfs_wait_buftarg(xfs_buftarg_t *); | 395 | extern void xfs_wait_buftarg(xfs_buftarg_t *); |
396 | extern int xfs_setsize_buftarg(xfs_buftarg_t *, unsigned int, unsigned int); | 396 | extern int xfs_setsize_buftarg(xfs_buftarg_t *, unsigned int, unsigned int); |
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c index 42dd3bcfba6b..d8fb1b5d6cb5 100644 --- a/fs/xfs/linux-2.6/xfs_file.c +++ b/fs/xfs/linux-2.6/xfs_file.c | |||
@@ -115,6 +115,8 @@ xfs_file_fsync( | |||
115 | 115 | ||
116 | xfs_iflags_clear(ip, XFS_ITRUNCATED); | 116 | xfs_iflags_clear(ip, XFS_ITRUNCATED); |
117 | 117 | ||
118 | xfs_ioend_wait(ip); | ||
119 | |||
118 | /* | 120 | /* |
119 | * We always need to make sure that the required inode state is safe on | 121 | * We always need to make sure that the required inode state is safe on |
120 | * disk. The inode might be clean but we still might need to force the | 122 | * disk. The inode might be clean but we still might need to force the |
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c index 7b26cc2fd284..699b60cbab9c 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl.c +++ b/fs/xfs/linux-2.6/xfs_ioctl.c | |||
@@ -527,6 +527,10 @@ xfs_attrmulti_by_handle( | |||
527 | if (copy_from_user(&am_hreq, arg, sizeof(xfs_fsop_attrmulti_handlereq_t))) | 527 | if (copy_from_user(&am_hreq, arg, sizeof(xfs_fsop_attrmulti_handlereq_t))) |
528 | return -XFS_ERROR(EFAULT); | 528 | return -XFS_ERROR(EFAULT); |
529 | 529 | ||
530 | /* overflow check */ | ||
531 | if (am_hreq.opcount >= INT_MAX / sizeof(xfs_attr_multiop_t)) | ||
532 | return -E2BIG; | ||
533 | |||
530 | dentry = xfs_handlereq_to_dentry(parfilp, &am_hreq.hreq); | 534 | dentry = xfs_handlereq_to_dentry(parfilp, &am_hreq.hreq); |
531 | if (IS_ERR(dentry)) | 535 | if (IS_ERR(dentry)) |
532 | return PTR_ERR(dentry); | 536 | return PTR_ERR(dentry); |
diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.c b/fs/xfs/linux-2.6/xfs_ioctl32.c index 593c05b4df8d..9287135e9bfc 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl32.c +++ b/fs/xfs/linux-2.6/xfs_ioctl32.c | |||
@@ -420,6 +420,10 @@ xfs_compat_attrmulti_by_handle( | |||
420 | sizeof(compat_xfs_fsop_attrmulti_handlereq_t))) | 420 | sizeof(compat_xfs_fsop_attrmulti_handlereq_t))) |
421 | return -XFS_ERROR(EFAULT); | 421 | return -XFS_ERROR(EFAULT); |
422 | 422 | ||
423 | /* overflow check */ | ||
424 | if (am_hreq.opcount >= INT_MAX / sizeof(compat_xfs_attr_multiop_t)) | ||
425 | return -E2BIG; | ||
426 | |||
423 | dentry = xfs_compat_handlereq_to_dentry(parfilp, &am_hreq.hreq); | 427 | dentry = xfs_compat_handlereq_to_dentry(parfilp, &am_hreq.hreq); |
424 | if (IS_ERR(dentry)) | 428 | if (IS_ERR(dentry)) |
425 | return PTR_ERR(dentry); | 429 | return PTR_ERR(dentry); |
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c index e65a7937f3a4..9c8019c78c92 100644 --- a/fs/xfs/linux-2.6/xfs_iops.c +++ b/fs/xfs/linux-2.6/xfs_iops.c | |||
@@ -673,7 +673,10 @@ xfs_vn_fiemap( | |||
673 | bm.bmv_length = BTOBB(length); | 673 | bm.bmv_length = BTOBB(length); |
674 | 674 | ||
675 | /* We add one because in getbmap world count includes the header */ | 675 | /* We add one because in getbmap world count includes the header */ |
676 | bm.bmv_count = fieinfo->fi_extents_max + 1; | 676 | bm.bmv_count = !fieinfo->fi_extents_max ? MAXEXTNUM : |
677 | fieinfo->fi_extents_max + 1; | ||
678 | bm.bmv_count = min_t(__s32, bm.bmv_count, | ||
679 | (PAGE_SIZE * 16 / sizeof(struct getbmapx))); | ||
677 | bm.bmv_iflags = BMV_IF_PREALLOC; | 680 | bm.bmv_iflags = BMV_IF_PREALLOC; |
678 | if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR) | 681 | if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR) |
679 | bm.bmv_iflags |= BMV_IF_ATTRFORK; | 682 | bm.bmv_iflags |= BMV_IF_ATTRFORK; |
diff --git a/fs/xfs/linux-2.6/xfs_quotaops.c b/fs/xfs/linux-2.6/xfs_quotaops.c index 1947514ce1ad..9ac8aea91529 100644 --- a/fs/xfs/linux-2.6/xfs_quotaops.c +++ b/fs/xfs/linux-2.6/xfs_quotaops.c | |||
@@ -19,6 +19,7 @@ | |||
19 | #include "xfs_dmapi.h" | 19 | #include "xfs_dmapi.h" |
20 | #include "xfs_sb.h" | 20 | #include "xfs_sb.h" |
21 | #include "xfs_inum.h" | 21 | #include "xfs_inum.h" |
22 | #include "xfs_log.h" | ||
22 | #include "xfs_ag.h" | 23 | #include "xfs_ag.h" |
23 | #include "xfs_mount.h" | 24 | #include "xfs_mount.h" |
24 | #include "xfs_quota.h" | 25 | #include "xfs_quota.h" |
@@ -97,7 +98,7 @@ xfs_fs_set_xstate( | |||
97 | } | 98 | } |
98 | 99 | ||
99 | STATIC int | 100 | STATIC int |
100 | xfs_fs_get_xquota( | 101 | xfs_fs_get_dqblk( |
101 | struct super_block *sb, | 102 | struct super_block *sb, |
102 | int type, | 103 | int type, |
103 | qid_t id, | 104 | qid_t id, |
@@ -114,7 +115,7 @@ xfs_fs_get_xquota( | |||
114 | } | 115 | } |
115 | 116 | ||
116 | STATIC int | 117 | STATIC int |
117 | xfs_fs_set_xquota( | 118 | xfs_fs_set_dqblk( |
118 | struct super_block *sb, | 119 | struct super_block *sb, |
119 | int type, | 120 | int type, |
120 | qid_t id, | 121 | qid_t id, |
@@ -135,6 +136,6 @@ xfs_fs_set_xquota( | |||
135 | const struct quotactl_ops xfs_quotactl_operations = { | 136 | const struct quotactl_ops xfs_quotactl_operations = { |
136 | .get_xstate = xfs_fs_get_xstate, | 137 | .get_xstate = xfs_fs_get_xstate, |
137 | .set_xstate = xfs_fs_set_xstate, | 138 | .set_xstate = xfs_fs_set_xstate, |
138 | .get_xquota = xfs_fs_get_xquota, | 139 | .get_dqblk = xfs_fs_get_dqblk, |
139 | .set_xquota = xfs_fs_set_xquota, | 140 | .set_dqblk = xfs_fs_set_dqblk, |
140 | }; | 141 | }; |
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index 29f1edca76de..f2d1718c9165 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c | |||
@@ -119,6 +119,8 @@ mempool_t *xfs_ioend_pool; | |||
119 | #define MNTOPT_DMAPI "dmapi" /* DMI enabled (DMAPI / XDSM) */ | 119 | #define MNTOPT_DMAPI "dmapi" /* DMI enabled (DMAPI / XDSM) */ |
120 | #define MNTOPT_XDSM "xdsm" /* DMI enabled (DMAPI / XDSM) */ | 120 | #define MNTOPT_XDSM "xdsm" /* DMI enabled (DMAPI / XDSM) */ |
121 | #define MNTOPT_DMI "dmi" /* DMI enabled (DMAPI / XDSM) */ | 121 | #define MNTOPT_DMI "dmi" /* DMI enabled (DMAPI / XDSM) */ |
122 | #define MNTOPT_DELAYLOG "delaylog" /* Delayed loging enabled */ | ||
123 | #define MNTOPT_NODELAYLOG "nodelaylog" /* Delayed loging disabled */ | ||
122 | 124 | ||
123 | /* | 125 | /* |
124 | * Table driven mount option parser. | 126 | * Table driven mount option parser. |
@@ -374,6 +376,13 @@ xfs_parseargs( | |||
374 | mp->m_flags |= XFS_MOUNT_DMAPI; | 376 | mp->m_flags |= XFS_MOUNT_DMAPI; |
375 | } else if (!strcmp(this_char, MNTOPT_DMI)) { | 377 | } else if (!strcmp(this_char, MNTOPT_DMI)) { |
376 | mp->m_flags |= XFS_MOUNT_DMAPI; | 378 | mp->m_flags |= XFS_MOUNT_DMAPI; |
379 | } else if (!strcmp(this_char, MNTOPT_DELAYLOG)) { | ||
380 | mp->m_flags |= XFS_MOUNT_DELAYLOG; | ||
381 | cmn_err(CE_WARN, | ||
382 | "Enabling EXPERIMENTAL delayed logging feature " | ||
383 | "- use at your own risk.\n"); | ||
384 | } else if (!strcmp(this_char, MNTOPT_NODELAYLOG)) { | ||
385 | mp->m_flags &= ~XFS_MOUNT_DELAYLOG; | ||
377 | } else if (!strcmp(this_char, "ihashsize")) { | 386 | } else if (!strcmp(this_char, "ihashsize")) { |
378 | cmn_err(CE_WARN, | 387 | cmn_err(CE_WARN, |
379 | "XFS: ihashsize no longer used, option is deprecated."); | 388 | "XFS: ihashsize no longer used, option is deprecated."); |
@@ -535,6 +544,7 @@ xfs_showargs( | |||
535 | { XFS_MOUNT_FILESTREAMS, "," MNTOPT_FILESTREAM }, | 544 | { XFS_MOUNT_FILESTREAMS, "," MNTOPT_FILESTREAM }, |
536 | { XFS_MOUNT_DMAPI, "," MNTOPT_DMAPI }, | 545 | { XFS_MOUNT_DMAPI, "," MNTOPT_DMAPI }, |
537 | { XFS_MOUNT_GRPID, "," MNTOPT_GRPID }, | 546 | { XFS_MOUNT_GRPID, "," MNTOPT_GRPID }, |
547 | { XFS_MOUNT_DELAYLOG, "," MNTOPT_DELAYLOG }, | ||
538 | { 0, NULL } | 548 | { 0, NULL } |
539 | }; | 549 | }; |
540 | static struct proc_xfs_info xfs_info_unset[] = { | 550 | static struct proc_xfs_info xfs_info_unset[] = { |
@@ -725,7 +735,8 @@ void | |||
725 | xfs_blkdev_issue_flush( | 735 | xfs_blkdev_issue_flush( |
726 | xfs_buftarg_t *buftarg) | 736 | xfs_buftarg_t *buftarg) |
727 | { | 737 | { |
728 | blkdev_issue_flush(buftarg->bt_bdev, NULL); | 738 | blkdev_issue_flush(buftarg->bt_bdev, GFP_KERNEL, NULL, |
739 | BLKDEV_IFL_WAIT); | ||
729 | } | 740 | } |
730 | 741 | ||
731 | STATIC void | 742 | STATIC void |
@@ -789,18 +800,18 @@ xfs_open_devices( | |||
789 | * Setup xfs_mount buffer target pointers | 800 | * Setup xfs_mount buffer target pointers |
790 | */ | 801 | */ |
791 | error = ENOMEM; | 802 | error = ENOMEM; |
792 | mp->m_ddev_targp = xfs_alloc_buftarg(ddev, 0); | 803 | mp->m_ddev_targp = xfs_alloc_buftarg(ddev, 0, mp->m_fsname); |
793 | if (!mp->m_ddev_targp) | 804 | if (!mp->m_ddev_targp) |
794 | goto out_close_rtdev; | 805 | goto out_close_rtdev; |
795 | 806 | ||
796 | if (rtdev) { | 807 | if (rtdev) { |
797 | mp->m_rtdev_targp = xfs_alloc_buftarg(rtdev, 1); | 808 | mp->m_rtdev_targp = xfs_alloc_buftarg(rtdev, 1, mp->m_fsname); |
798 | if (!mp->m_rtdev_targp) | 809 | if (!mp->m_rtdev_targp) |
799 | goto out_free_ddev_targ; | 810 | goto out_free_ddev_targ; |
800 | } | 811 | } |
801 | 812 | ||
802 | if (logdev && logdev != ddev) { | 813 | if (logdev && logdev != ddev) { |
803 | mp->m_logdev_targp = xfs_alloc_buftarg(logdev, 1); | 814 | mp->m_logdev_targp = xfs_alloc_buftarg(logdev, 1, mp->m_fsname); |
804 | if (!mp->m_logdev_targp) | 815 | if (!mp->m_logdev_targp) |
805 | goto out_free_rtdev_targ; | 816 | goto out_free_rtdev_targ; |
806 | } else { | 817 | } else { |
@@ -902,7 +913,8 @@ xfsaild_start( | |||
902 | struct xfs_ail *ailp) | 913 | struct xfs_ail *ailp) |
903 | { | 914 | { |
904 | ailp->xa_target = 0; | 915 | ailp->xa_target = 0; |
905 | ailp->xa_task = kthread_run(xfsaild, ailp, "xfsaild"); | 916 | ailp->xa_task = kthread_run(xfsaild, ailp, "xfsaild/%s", |
917 | ailp->xa_mount->m_fsname); | ||
906 | if (IS_ERR(ailp->xa_task)) | 918 | if (IS_ERR(ailp->xa_task)) |
907 | return -PTR_ERR(ailp->xa_task); | 919 | return -PTR_ERR(ailp->xa_task); |
908 | return 0; | 920 | return 0; |
@@ -1092,6 +1104,7 @@ xfs_fs_write_inode( | |||
1092 | * the code will only flush the inode if it isn't already | 1104 | * the code will only flush the inode if it isn't already |
1093 | * being flushed. | 1105 | * being flushed. |
1094 | */ | 1106 | */ |
1107 | xfs_ioend_wait(ip); | ||
1095 | xfs_ilock(ip, XFS_ILOCK_SHARED); | 1108 | xfs_ilock(ip, XFS_ILOCK_SHARED); |
1096 | if (ip->i_update_core) { | 1109 | if (ip->i_update_core) { |
1097 | error = xfs_log_inode(ip); | 1110 | error = xfs_log_inode(ip); |
@@ -1752,7 +1765,7 @@ xfs_init_zones(void) | |||
1752 | * but it is much faster. | 1765 | * but it is much faster. |
1753 | */ | 1766 | */ |
1754 | xfs_buf_item_zone = kmem_zone_init((sizeof(xfs_buf_log_item_t) + | 1767 | xfs_buf_item_zone = kmem_zone_init((sizeof(xfs_buf_log_item_t) + |
1755 | (((XFS_MAX_BLOCKSIZE / XFS_BLI_CHUNK) / | 1768 | (((XFS_MAX_BLOCKSIZE / XFS_BLF_CHUNK) / |
1756 | NBWORD) * sizeof(int))), "xfs_buf_item"); | 1769 | NBWORD) * sizeof(int))), "xfs_buf_item"); |
1757 | if (!xfs_buf_item_zone) | 1770 | if (!xfs_buf_item_zone) |
1758 | goto out_destroy_trans_zone; | 1771 | goto out_destroy_trans_zone; |
diff --git a/fs/xfs/linux-2.6/xfs_super.h b/fs/xfs/linux-2.6/xfs_super.h index 233d4b9881b1..519618e9279e 100644 --- a/fs/xfs/linux-2.6/xfs_super.h +++ b/fs/xfs/linux-2.6/xfs_super.h | |||
@@ -85,7 +85,7 @@ extern __uint64_t xfs_max_file_offset(unsigned int); | |||
85 | extern void xfs_blkdev_issue_flush(struct xfs_buftarg *); | 85 | extern void xfs_blkdev_issue_flush(struct xfs_buftarg *); |
86 | 86 | ||
87 | extern const struct export_operations xfs_export_operations; | 87 | extern const struct export_operations xfs_export_operations; |
88 | extern struct xattr_handler *xfs_xattr_handlers[]; | 88 | extern const struct xattr_handler *xfs_xattr_handlers[]; |
89 | extern const struct quotactl_ops xfs_quotactl_operations; | 89 | extern const struct quotactl_ops xfs_quotactl_operations; |
90 | 90 | ||
91 | #define XFS_M(sb) ((struct xfs_mount *)((sb)->s_fs_info)) | 91 | #define XFS_M(sb) ((struct xfs_mount *)((sb)->s_fs_info)) |
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index a427c638d909..3884e20bc14e 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c | |||
@@ -356,68 +356,23 @@ xfs_commit_dummy_trans( | |||
356 | 356 | ||
357 | STATIC int | 357 | STATIC int |
358 | xfs_sync_fsdata( | 358 | xfs_sync_fsdata( |
359 | struct xfs_mount *mp, | 359 | struct xfs_mount *mp) |
360 | int flags) | ||
361 | { | 360 | { |
362 | struct xfs_buf *bp; | 361 | struct xfs_buf *bp; |
363 | struct xfs_buf_log_item *bip; | ||
364 | int error = 0; | ||
365 | 362 | ||
366 | /* | 363 | /* |
367 | * If this is xfssyncd() then only sync the superblock if we can | 364 | * If the buffer is pinned then push on the log so we won't get stuck |
368 | * lock it without sleeping and it is not pinned. | 365 | * waiting in the write for someone, maybe ourselves, to flush the log. |
366 | * | ||
367 | * Even though we just pushed the log above, we did not have the | ||
368 | * superblock buffer locked at that point so it can become pinned in | ||
369 | * between there and here. | ||
369 | */ | 370 | */ |
370 | if (flags & SYNC_TRYLOCK) { | 371 | bp = xfs_getsb(mp, 0); |
371 | ASSERT(!(flags & SYNC_WAIT)); | 372 | if (XFS_BUF_ISPINNED(bp)) |
372 | 373 | xfs_log_force(mp, 0); | |
373 | bp = xfs_getsb(mp, XBF_TRYLOCK); | ||
374 | if (!bp) | ||
375 | goto out; | ||
376 | |||
377 | bip = XFS_BUF_FSPRIVATE(bp, struct xfs_buf_log_item *); | ||
378 | if (!bip || !xfs_buf_item_dirty(bip) || XFS_BUF_ISPINNED(bp)) | ||
379 | goto out_brelse; | ||
380 | } else { | ||
381 | bp = xfs_getsb(mp, 0); | ||
382 | |||
383 | /* | ||
384 | * If the buffer is pinned then push on the log so we won't | ||
385 | * get stuck waiting in the write for someone, maybe | ||
386 | * ourselves, to flush the log. | ||
387 | * | ||
388 | * Even though we just pushed the log above, we did not have | ||
389 | * the superblock buffer locked at that point so it can | ||
390 | * become pinned in between there and here. | ||
391 | */ | ||
392 | if (XFS_BUF_ISPINNED(bp)) | ||
393 | xfs_log_force(mp, 0); | ||
394 | } | ||
395 | |||
396 | |||
397 | if (flags & SYNC_WAIT) | ||
398 | XFS_BUF_UNASYNC(bp); | ||
399 | else | ||
400 | XFS_BUF_ASYNC(bp); | ||
401 | |||
402 | error = xfs_bwrite(mp, bp); | ||
403 | if (error) | ||
404 | return error; | ||
405 | |||
406 | /* | ||
407 | * If this is a data integrity sync make sure all pending buffers | ||
408 | * are flushed out for the log coverage check below. | ||
409 | */ | ||
410 | if (flags & SYNC_WAIT) | ||
411 | xfs_flush_buftarg(mp->m_ddev_targp, 1); | ||
412 | |||
413 | if (xfs_log_need_covered(mp)) | ||
414 | error = xfs_commit_dummy_trans(mp, flags); | ||
415 | return error; | ||
416 | 374 | ||
417 | out_brelse: | 375 | return xfs_bwrite(mp, bp); |
418 | xfs_buf_relse(bp); | ||
419 | out: | ||
420 | return error; | ||
421 | } | 376 | } |
422 | 377 | ||
423 | /* | 378 | /* |
@@ -441,7 +396,7 @@ int | |||
441 | xfs_quiesce_data( | 396 | xfs_quiesce_data( |
442 | struct xfs_mount *mp) | 397 | struct xfs_mount *mp) |
443 | { | 398 | { |
444 | int error; | 399 | int error, error2 = 0; |
445 | 400 | ||
446 | /* push non-blocking */ | 401 | /* push non-blocking */ |
447 | xfs_sync_data(mp, 0); | 402 | xfs_sync_data(mp, 0); |
@@ -452,13 +407,20 @@ xfs_quiesce_data( | |||
452 | xfs_qm_sync(mp, SYNC_WAIT); | 407 | xfs_qm_sync(mp, SYNC_WAIT); |
453 | 408 | ||
454 | /* write superblock and hoover up shutdown errors */ | 409 | /* write superblock and hoover up shutdown errors */ |
455 | error = xfs_sync_fsdata(mp, SYNC_WAIT); | 410 | error = xfs_sync_fsdata(mp); |
411 | |||
412 | /* make sure all delwri buffers are written out */ | ||
413 | xfs_flush_buftarg(mp->m_ddev_targp, 1); | ||
414 | |||
415 | /* mark the log as covered if needed */ | ||
416 | if (xfs_log_need_covered(mp)) | ||
417 | error2 = xfs_commit_dummy_trans(mp, SYNC_WAIT); | ||
456 | 418 | ||
457 | /* flush data-only devices */ | 419 | /* flush data-only devices */ |
458 | if (mp->m_rtdev_targp) | 420 | if (mp->m_rtdev_targp) |
459 | XFS_bflush(mp->m_rtdev_targp); | 421 | XFS_bflush(mp->m_rtdev_targp); |
460 | 422 | ||
461 | return error; | 423 | return error ? error : error2; |
462 | } | 424 | } |
463 | 425 | ||
464 | STATIC void | 426 | STATIC void |
@@ -581,9 +543,9 @@ xfs_flush_inodes( | |||
581 | } | 543 | } |
582 | 544 | ||
583 | /* | 545 | /* |
584 | * Every sync period we need to unpin all items, reclaim inodes, sync | 546 | * Every sync period we need to unpin all items, reclaim inodes and sync |
585 | * quota and write out the superblock. We might need to cover the log | 547 | * disk quotas. We might need to cover the log to indicate that the |
586 | * to indicate it is idle. | 548 | * filesystem is idle. |
587 | */ | 549 | */ |
588 | STATIC void | 550 | STATIC void |
589 | xfs_sync_worker( | 551 | xfs_sync_worker( |
@@ -597,7 +559,8 @@ xfs_sync_worker( | |||
597 | xfs_reclaim_inodes(mp, 0); | 559 | xfs_reclaim_inodes(mp, 0); |
598 | /* dgc: errors ignored here */ | 560 | /* dgc: errors ignored here */ |
599 | error = xfs_qm_sync(mp, SYNC_TRYLOCK); | 561 | error = xfs_qm_sync(mp, SYNC_TRYLOCK); |
600 | error = xfs_sync_fsdata(mp, SYNC_TRYLOCK); | 562 | if (xfs_log_need_covered(mp)) |
563 | error = xfs_commit_dummy_trans(mp, 0); | ||
601 | } | 564 | } |
602 | mp->m_sync_seq++; | 565 | mp->m_sync_seq++; |
603 | wake_up(&mp->m_wait_single_sync_task); | 566 | wake_up(&mp->m_wait_single_sync_task); |
@@ -660,7 +623,7 @@ xfs_syncd_init( | |||
660 | mp->m_sync_work.w_syncer = xfs_sync_worker; | 623 | mp->m_sync_work.w_syncer = xfs_sync_worker; |
661 | mp->m_sync_work.w_mount = mp; | 624 | mp->m_sync_work.w_mount = mp; |
662 | mp->m_sync_work.w_completion = NULL; | 625 | mp->m_sync_work.w_completion = NULL; |
663 | mp->m_sync_task = kthread_run(xfssyncd, mp, "xfssyncd"); | 626 | mp->m_sync_task = kthread_run(xfssyncd, mp, "xfssyncd/%s", mp->m_fsname); |
664 | if (IS_ERR(mp->m_sync_task)) | 627 | if (IS_ERR(mp->m_sync_task)) |
665 | return -PTR_ERR(mp->m_sync_task); | 628 | return -PTR_ERR(mp->m_sync_task); |
666 | return 0; | 629 | return 0; |
diff --git a/fs/xfs/linux-2.6/xfs_trace.c b/fs/xfs/linux-2.6/xfs_trace.c index 5a107601e969..207fa77f63ae 100644 --- a/fs/xfs/linux-2.6/xfs_trace.c +++ b/fs/xfs/linux-2.6/xfs_trace.c | |||
@@ -41,7 +41,6 @@ | |||
41 | #include "xfs_alloc.h" | 41 | #include "xfs_alloc.h" |
42 | #include "xfs_bmap.h" | 42 | #include "xfs_bmap.h" |
43 | #include "xfs_attr.h" | 43 | #include "xfs_attr.h" |
44 | #include "xfs_attr_sf.h" | ||
45 | #include "xfs_attr_leaf.h" | 44 | #include "xfs_attr_leaf.h" |
46 | #include "xfs_log_priv.h" | 45 | #include "xfs_log_priv.h" |
47 | #include "xfs_buf_item.h" | 46 | #include "xfs_buf_item.h" |
@@ -50,6 +49,9 @@ | |||
50 | #include "xfs_aops.h" | 49 | #include "xfs_aops.h" |
51 | #include "quota/xfs_dquot_item.h" | 50 | #include "quota/xfs_dquot_item.h" |
52 | #include "quota/xfs_dquot.h" | 51 | #include "quota/xfs_dquot.h" |
52 | #include "xfs_log_recover.h" | ||
53 | #include "xfs_buf_item.h" | ||
54 | #include "xfs_inode_item.h" | ||
53 | 55 | ||
54 | /* | 56 | /* |
55 | * We include this last to have the helpers above available for the trace | 57 | * We include this last to have the helpers above available for the trace |
diff --git a/fs/xfs/linux-2.6/xfs_trace.h b/fs/xfs/linux-2.6/xfs_trace.h index fcaa62f0799e..ff6bc797baf2 100644 --- a/fs/xfs/linux-2.6/xfs_trace.h +++ b/fs/xfs/linux-2.6/xfs_trace.h | |||
@@ -32,6 +32,10 @@ struct xfs_da_node_entry; | |||
32 | struct xfs_dquot; | 32 | struct xfs_dquot; |
33 | struct xlog_ticket; | 33 | struct xlog_ticket; |
34 | struct log; | 34 | struct log; |
35 | struct xlog_recover; | ||
36 | struct xlog_recover_item; | ||
37 | struct xfs_buf_log_format; | ||
38 | struct xfs_inode_log_format; | ||
35 | 39 | ||
36 | DECLARE_EVENT_CLASS(xfs_attr_list_class, | 40 | DECLARE_EVENT_CLASS(xfs_attr_list_class, |
37 | TP_PROTO(struct xfs_attr_list_context *ctx), | 41 | TP_PROTO(struct xfs_attr_list_context *ctx), |
@@ -562,18 +566,21 @@ DECLARE_EVENT_CLASS(xfs_inode_class, | |||
562 | __field(dev_t, dev) | 566 | __field(dev_t, dev) |
563 | __field(xfs_ino_t, ino) | 567 | __field(xfs_ino_t, ino) |
564 | __field(int, count) | 568 | __field(int, count) |
569 | __field(int, pincount) | ||
565 | __field(unsigned long, caller_ip) | 570 | __field(unsigned long, caller_ip) |
566 | ), | 571 | ), |
567 | TP_fast_assign( | 572 | TP_fast_assign( |
568 | __entry->dev = VFS_I(ip)->i_sb->s_dev; | 573 | __entry->dev = VFS_I(ip)->i_sb->s_dev; |
569 | __entry->ino = ip->i_ino; | 574 | __entry->ino = ip->i_ino; |
570 | __entry->count = atomic_read(&VFS_I(ip)->i_count); | 575 | __entry->count = atomic_read(&VFS_I(ip)->i_count); |
576 | __entry->pincount = atomic_read(&ip->i_pincount); | ||
571 | __entry->caller_ip = caller_ip; | 577 | __entry->caller_ip = caller_ip; |
572 | ), | 578 | ), |
573 | TP_printk("dev %d:%d ino 0x%llx count %d caller %pf", | 579 | TP_printk("dev %d:%d ino 0x%llx count %d pincount %d caller %pf", |
574 | MAJOR(__entry->dev), MINOR(__entry->dev), | 580 | MAJOR(__entry->dev), MINOR(__entry->dev), |
575 | __entry->ino, | 581 | __entry->ino, |
576 | __entry->count, | 582 | __entry->count, |
583 | __entry->pincount, | ||
577 | (char *)__entry->caller_ip) | 584 | (char *)__entry->caller_ip) |
578 | ) | 585 | ) |
579 | 586 | ||
@@ -583,6 +590,10 @@ DEFINE_EVENT(xfs_inode_class, name, \ | |||
583 | TP_ARGS(ip, caller_ip)) | 590 | TP_ARGS(ip, caller_ip)) |
584 | DEFINE_INODE_EVENT(xfs_ihold); | 591 | DEFINE_INODE_EVENT(xfs_ihold); |
585 | DEFINE_INODE_EVENT(xfs_irele); | 592 | DEFINE_INODE_EVENT(xfs_irele); |
593 | DEFINE_INODE_EVENT(xfs_inode_pin); | ||
594 | DEFINE_INODE_EVENT(xfs_inode_unpin); | ||
595 | DEFINE_INODE_EVENT(xfs_inode_unpin_nowait); | ||
596 | |||
586 | /* the old xfs_itrace_entry tracer - to be replaced by s.th. in the VFS */ | 597 | /* the old xfs_itrace_entry tracer - to be replaced by s.th. in the VFS */ |
587 | DEFINE_INODE_EVENT(xfs_inode); | 598 | DEFINE_INODE_EVENT(xfs_inode); |
588 | #define xfs_itrace_entry(ip) \ | 599 | #define xfs_itrace_entry(ip) \ |
@@ -642,8 +653,6 @@ DEFINE_EVENT(xfs_dquot_class, name, \ | |||
642 | TP_PROTO(struct xfs_dquot *dqp), \ | 653 | TP_PROTO(struct xfs_dquot *dqp), \ |
643 | TP_ARGS(dqp)) | 654 | TP_ARGS(dqp)) |
644 | DEFINE_DQUOT_EVENT(xfs_dqadjust); | 655 | DEFINE_DQUOT_EVENT(xfs_dqadjust); |
645 | DEFINE_DQUOT_EVENT(xfs_dqshake_dirty); | ||
646 | DEFINE_DQUOT_EVENT(xfs_dqshake_unlink); | ||
647 | DEFINE_DQUOT_EVENT(xfs_dqreclaim_want); | 656 | DEFINE_DQUOT_EVENT(xfs_dqreclaim_want); |
648 | DEFINE_DQUOT_EVENT(xfs_dqreclaim_dirty); | 657 | DEFINE_DQUOT_EVENT(xfs_dqreclaim_dirty); |
649 | DEFINE_DQUOT_EVENT(xfs_dqreclaim_unlink); | 658 | DEFINE_DQUOT_EVENT(xfs_dqreclaim_unlink); |
@@ -658,7 +667,6 @@ DEFINE_DQUOT_EVENT(xfs_dqread_fail); | |||
658 | DEFINE_DQUOT_EVENT(xfs_dqlookup_found); | 667 | DEFINE_DQUOT_EVENT(xfs_dqlookup_found); |
659 | DEFINE_DQUOT_EVENT(xfs_dqlookup_want); | 668 | DEFINE_DQUOT_EVENT(xfs_dqlookup_want); |
660 | DEFINE_DQUOT_EVENT(xfs_dqlookup_freelist); | 669 | DEFINE_DQUOT_EVENT(xfs_dqlookup_freelist); |
661 | DEFINE_DQUOT_EVENT(xfs_dqlookup_move); | ||
662 | DEFINE_DQUOT_EVENT(xfs_dqlookup_done); | 670 | DEFINE_DQUOT_EVENT(xfs_dqlookup_done); |
663 | DEFINE_DQUOT_EVENT(xfs_dqget_hit); | 671 | DEFINE_DQUOT_EVENT(xfs_dqget_hit); |
664 | DEFINE_DQUOT_EVENT(xfs_dqget_miss); | 672 | DEFINE_DQUOT_EVENT(xfs_dqget_miss); |
@@ -1051,83 +1059,112 @@ TRACE_EVENT(xfs_bunmap, | |||
1051 | 1059 | ||
1052 | ); | 1060 | ); |
1053 | 1061 | ||
1062 | #define XFS_BUSY_SYNC \ | ||
1063 | { 0, "async" }, \ | ||
1064 | { 1, "sync" } | ||
1065 | |||
1054 | TRACE_EVENT(xfs_alloc_busy, | 1066 | TRACE_EVENT(xfs_alloc_busy, |
1055 | TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_agblock_t agbno, | 1067 | TP_PROTO(struct xfs_trans *trans, xfs_agnumber_t agno, |
1056 | xfs_extlen_t len, int slot), | 1068 | xfs_agblock_t agbno, xfs_extlen_t len, int sync), |
1057 | TP_ARGS(mp, agno, agbno, len, slot), | 1069 | TP_ARGS(trans, agno, agbno, len, sync), |
1058 | TP_STRUCT__entry( | 1070 | TP_STRUCT__entry( |
1059 | __field(dev_t, dev) | 1071 | __field(dev_t, dev) |
1072 | __field(struct xfs_trans *, tp) | ||
1073 | __field(int, tid) | ||
1060 | __field(xfs_agnumber_t, agno) | 1074 | __field(xfs_agnumber_t, agno) |
1061 | __field(xfs_agblock_t, agbno) | 1075 | __field(xfs_agblock_t, agbno) |
1062 | __field(xfs_extlen_t, len) | 1076 | __field(xfs_extlen_t, len) |
1063 | __field(int, slot) | 1077 | __field(int, sync) |
1064 | ), | 1078 | ), |
1065 | TP_fast_assign( | 1079 | TP_fast_assign( |
1066 | __entry->dev = mp->m_super->s_dev; | 1080 | __entry->dev = trans->t_mountp->m_super->s_dev; |
1081 | __entry->tp = trans; | ||
1082 | __entry->tid = trans->t_ticket->t_tid; | ||
1067 | __entry->agno = agno; | 1083 | __entry->agno = agno; |
1068 | __entry->agbno = agbno; | 1084 | __entry->agbno = agbno; |
1069 | __entry->len = len; | 1085 | __entry->len = len; |
1070 | __entry->slot = slot; | 1086 | __entry->sync = sync; |
1071 | ), | 1087 | ), |
1072 | TP_printk("dev %d:%d agno %u agbno %u len %u slot %d", | 1088 | TP_printk("dev %d:%d trans 0x%p tid 0x%x agno %u agbno %u len %u %s", |
1073 | MAJOR(__entry->dev), MINOR(__entry->dev), | 1089 | MAJOR(__entry->dev), MINOR(__entry->dev), |
1090 | __entry->tp, | ||
1091 | __entry->tid, | ||
1074 | __entry->agno, | 1092 | __entry->agno, |
1075 | __entry->agbno, | 1093 | __entry->agbno, |
1076 | __entry->len, | 1094 | __entry->len, |
1077 | __entry->slot) | 1095 | __print_symbolic(__entry->sync, XFS_BUSY_SYNC)) |
1078 | 1096 | ||
1079 | ); | 1097 | ); |
1080 | 1098 | ||
1081 | #define XFS_BUSY_STATES \ | ||
1082 | { 0, "found" }, \ | ||
1083 | { 1, "missing" } | ||
1084 | |||
1085 | TRACE_EVENT(xfs_alloc_unbusy, | 1099 | TRACE_EVENT(xfs_alloc_unbusy, |
1086 | TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, | 1100 | TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, |
1087 | int slot, int found), | 1101 | xfs_agblock_t agbno, xfs_extlen_t len), |
1088 | TP_ARGS(mp, agno, slot, found), | 1102 | TP_ARGS(mp, agno, agbno, len), |
1089 | TP_STRUCT__entry( | 1103 | TP_STRUCT__entry( |
1090 | __field(dev_t, dev) | 1104 | __field(dev_t, dev) |
1091 | __field(xfs_agnumber_t, agno) | 1105 | __field(xfs_agnumber_t, agno) |
1092 | __field(int, slot) | 1106 | __field(xfs_agblock_t, agbno) |
1093 | __field(int, found) | 1107 | __field(xfs_extlen_t, len) |
1094 | ), | 1108 | ), |
1095 | TP_fast_assign( | 1109 | TP_fast_assign( |
1096 | __entry->dev = mp->m_super->s_dev; | 1110 | __entry->dev = mp->m_super->s_dev; |
1097 | __entry->agno = agno; | 1111 | __entry->agno = agno; |
1098 | __entry->slot = slot; | 1112 | __entry->agbno = agbno; |
1099 | __entry->found = found; | 1113 | __entry->len = len; |
1100 | ), | 1114 | ), |
1101 | TP_printk("dev %d:%d agno %u slot %d %s", | 1115 | TP_printk("dev %d:%d agno %u agbno %u len %u", |
1102 | MAJOR(__entry->dev), MINOR(__entry->dev), | 1116 | MAJOR(__entry->dev), MINOR(__entry->dev), |
1103 | __entry->agno, | 1117 | __entry->agno, |
1104 | __entry->slot, | 1118 | __entry->agbno, |
1105 | __print_symbolic(__entry->found, XFS_BUSY_STATES)) | 1119 | __entry->len) |
1106 | ); | 1120 | ); |
1107 | 1121 | ||
1122 | #define XFS_BUSY_STATES \ | ||
1123 | { 0, "missing" }, \ | ||
1124 | { 1, "found" } | ||
1125 | |||
1108 | TRACE_EVENT(xfs_alloc_busysearch, | 1126 | TRACE_EVENT(xfs_alloc_busysearch, |
1109 | TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_agblock_t agbno, | 1127 | TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, |
1110 | xfs_extlen_t len, xfs_lsn_t lsn), | 1128 | xfs_agblock_t agbno, xfs_extlen_t len, int found), |
1111 | TP_ARGS(mp, agno, agbno, len, lsn), | 1129 | TP_ARGS(mp, agno, agbno, len, found), |
1112 | TP_STRUCT__entry( | 1130 | TP_STRUCT__entry( |
1113 | __field(dev_t, dev) | 1131 | __field(dev_t, dev) |
1114 | __field(xfs_agnumber_t, agno) | 1132 | __field(xfs_agnumber_t, agno) |
1115 | __field(xfs_agblock_t, agbno) | 1133 | __field(xfs_agblock_t, agbno) |
1116 | __field(xfs_extlen_t, len) | 1134 | __field(xfs_extlen_t, len) |
1117 | __field(xfs_lsn_t, lsn) | 1135 | __field(int, found) |
1118 | ), | 1136 | ), |
1119 | TP_fast_assign( | 1137 | TP_fast_assign( |
1120 | __entry->dev = mp->m_super->s_dev; | 1138 | __entry->dev = mp->m_super->s_dev; |
1121 | __entry->agno = agno; | 1139 | __entry->agno = agno; |
1122 | __entry->agbno = agbno; | 1140 | __entry->agbno = agbno; |
1123 | __entry->len = len; | 1141 | __entry->len = len; |
1124 | __entry->lsn = lsn; | 1142 | __entry->found = found; |
1125 | ), | 1143 | ), |
1126 | TP_printk("dev %d:%d agno %u agbno %u len %u force lsn 0x%llx", | 1144 | TP_printk("dev %d:%d agno %u agbno %u len %u %s", |
1127 | MAJOR(__entry->dev), MINOR(__entry->dev), | 1145 | MAJOR(__entry->dev), MINOR(__entry->dev), |
1128 | __entry->agno, | 1146 | __entry->agno, |
1129 | __entry->agbno, | 1147 | __entry->agbno, |
1130 | __entry->len, | 1148 | __entry->len, |
1149 | __print_symbolic(__entry->found, XFS_BUSY_STATES)) | ||
1150 | ); | ||
1151 | |||
1152 | TRACE_EVENT(xfs_trans_commit_lsn, | ||
1153 | TP_PROTO(struct xfs_trans *trans), | ||
1154 | TP_ARGS(trans), | ||
1155 | TP_STRUCT__entry( | ||
1156 | __field(dev_t, dev) | ||
1157 | __field(struct xfs_trans *, tp) | ||
1158 | __field(xfs_lsn_t, lsn) | ||
1159 | ), | ||
1160 | TP_fast_assign( | ||
1161 | __entry->dev = trans->t_mountp->m_super->s_dev; | ||
1162 | __entry->tp = trans; | ||
1163 | __entry->lsn = trans->t_commit_lsn; | ||
1164 | ), | ||
1165 | TP_printk("dev %d:%d trans 0x%p commit_lsn 0x%llx", | ||
1166 | MAJOR(__entry->dev), MINOR(__entry->dev), | ||
1167 | __entry->tp, | ||
1131 | __entry->lsn) | 1168 | __entry->lsn) |
1132 | ); | 1169 | ); |
1133 | 1170 | ||
@@ -1495,6 +1532,140 @@ DEFINE_EVENT(xfs_swap_extent_class, name, \ | |||
1495 | DEFINE_SWAPEXT_EVENT(xfs_swap_extent_before); | 1532 | DEFINE_SWAPEXT_EVENT(xfs_swap_extent_before); |
1496 | DEFINE_SWAPEXT_EVENT(xfs_swap_extent_after); | 1533 | DEFINE_SWAPEXT_EVENT(xfs_swap_extent_after); |
1497 | 1534 | ||
1535 | DECLARE_EVENT_CLASS(xfs_log_recover_item_class, | ||
1536 | TP_PROTO(struct log *log, struct xlog_recover *trans, | ||
1537 | struct xlog_recover_item *item, int pass), | ||
1538 | TP_ARGS(log, trans, item, pass), | ||
1539 | TP_STRUCT__entry( | ||
1540 | __field(dev_t, dev) | ||
1541 | __field(unsigned long, item) | ||
1542 | __field(xlog_tid_t, tid) | ||
1543 | __field(int, type) | ||
1544 | __field(int, pass) | ||
1545 | __field(int, count) | ||
1546 | __field(int, total) | ||
1547 | ), | ||
1548 | TP_fast_assign( | ||
1549 | __entry->dev = log->l_mp->m_super->s_dev; | ||
1550 | __entry->item = (unsigned long)item; | ||
1551 | __entry->tid = trans->r_log_tid; | ||
1552 | __entry->type = ITEM_TYPE(item); | ||
1553 | __entry->pass = pass; | ||
1554 | __entry->count = item->ri_cnt; | ||
1555 | __entry->total = item->ri_total; | ||
1556 | ), | ||
1557 | TP_printk("dev %d:%d trans 0x%x, pass %d, item 0x%p, item type %s " | ||
1558 | "item region count/total %d/%d", | ||
1559 | MAJOR(__entry->dev), MINOR(__entry->dev), | ||
1560 | __entry->tid, | ||
1561 | __entry->pass, | ||
1562 | (void *)__entry->item, | ||
1563 | __print_symbolic(__entry->type, XFS_LI_TYPE_DESC), | ||
1564 | __entry->count, | ||
1565 | __entry->total) | ||
1566 | ) | ||
1567 | |||
1568 | #define DEFINE_LOG_RECOVER_ITEM(name) \ | ||
1569 | DEFINE_EVENT(xfs_log_recover_item_class, name, \ | ||
1570 | TP_PROTO(struct log *log, struct xlog_recover *trans, \ | ||
1571 | struct xlog_recover_item *item, int pass), \ | ||
1572 | TP_ARGS(log, trans, item, pass)) | ||
1573 | |||
1574 | DEFINE_LOG_RECOVER_ITEM(xfs_log_recover_item_add); | ||
1575 | DEFINE_LOG_RECOVER_ITEM(xfs_log_recover_item_add_cont); | ||
1576 | DEFINE_LOG_RECOVER_ITEM(xfs_log_recover_item_reorder_head); | ||
1577 | DEFINE_LOG_RECOVER_ITEM(xfs_log_recover_item_reorder_tail); | ||
1578 | DEFINE_LOG_RECOVER_ITEM(xfs_log_recover_item_recover); | ||
1579 | |||
1580 | DECLARE_EVENT_CLASS(xfs_log_recover_buf_item_class, | ||
1581 | TP_PROTO(struct log *log, struct xfs_buf_log_format *buf_f), | ||
1582 | TP_ARGS(log, buf_f), | ||
1583 | TP_STRUCT__entry( | ||
1584 | __field(dev_t, dev) | ||
1585 | __field(__int64_t, blkno) | ||
1586 | __field(unsigned short, len) | ||
1587 | __field(unsigned short, flags) | ||
1588 | __field(unsigned short, size) | ||
1589 | __field(unsigned int, map_size) | ||
1590 | ), | ||
1591 | TP_fast_assign( | ||
1592 | __entry->dev = log->l_mp->m_super->s_dev; | ||
1593 | __entry->blkno = buf_f->blf_blkno; | ||
1594 | __entry->len = buf_f->blf_len; | ||
1595 | __entry->flags = buf_f->blf_flags; | ||
1596 | __entry->size = buf_f->blf_size; | ||
1597 | __entry->map_size = buf_f->blf_map_size; | ||
1598 | ), | ||
1599 | TP_printk("dev %d:%d blkno 0x%llx, len %u, flags 0x%x, size %d, " | ||
1600 | "map_size %d", | ||
1601 | MAJOR(__entry->dev), MINOR(__entry->dev), | ||
1602 | __entry->blkno, | ||
1603 | __entry->len, | ||
1604 | __entry->flags, | ||
1605 | __entry->size, | ||
1606 | __entry->map_size) | ||
1607 | ) | ||
1608 | |||
1609 | #define DEFINE_LOG_RECOVER_BUF_ITEM(name) \ | ||
1610 | DEFINE_EVENT(xfs_log_recover_buf_item_class, name, \ | ||
1611 | TP_PROTO(struct log *log, struct xfs_buf_log_format *buf_f), \ | ||
1612 | TP_ARGS(log, buf_f)) | ||
1613 | |||
1614 | DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_not_cancel); | ||
1615 | DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_cancel); | ||
1616 | DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_cancel_add); | ||
1617 | DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_cancel_ref_inc); | ||
1618 | DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_recover); | ||
1619 | DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_inode_buf); | ||
1620 | DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_reg_buf); | ||
1621 | DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_dquot_buf); | ||
1622 | |||
1623 | DECLARE_EVENT_CLASS(xfs_log_recover_ino_item_class, | ||
1624 | TP_PROTO(struct log *log, struct xfs_inode_log_format *in_f), | ||
1625 | TP_ARGS(log, in_f), | ||
1626 | TP_STRUCT__entry( | ||
1627 | __field(dev_t, dev) | ||
1628 | __field(xfs_ino_t, ino) | ||
1629 | __field(unsigned short, size) | ||
1630 | __field(int, fields) | ||
1631 | __field(unsigned short, asize) | ||
1632 | __field(unsigned short, dsize) | ||
1633 | __field(__int64_t, blkno) | ||
1634 | __field(int, len) | ||
1635 | __field(int, boffset) | ||
1636 | ), | ||
1637 | TP_fast_assign( | ||
1638 | __entry->dev = log->l_mp->m_super->s_dev; | ||
1639 | __entry->ino = in_f->ilf_ino; | ||
1640 | __entry->size = in_f->ilf_size; | ||
1641 | __entry->fields = in_f->ilf_fields; | ||
1642 | __entry->asize = in_f->ilf_asize; | ||
1643 | __entry->dsize = in_f->ilf_dsize; | ||
1644 | __entry->blkno = in_f->ilf_blkno; | ||
1645 | __entry->len = in_f->ilf_len; | ||
1646 | __entry->boffset = in_f->ilf_boffset; | ||
1647 | ), | ||
1648 | TP_printk("dev %d:%d ino 0x%llx, size %u, fields 0x%x, asize %d, " | ||
1649 | "dsize %d, blkno 0x%llx, len %d, boffset %d", | ||
1650 | MAJOR(__entry->dev), MINOR(__entry->dev), | ||
1651 | __entry->ino, | ||
1652 | __entry->size, | ||
1653 | __entry->fields, | ||
1654 | __entry->asize, | ||
1655 | __entry->dsize, | ||
1656 | __entry->blkno, | ||
1657 | __entry->len, | ||
1658 | __entry->boffset) | ||
1659 | ) | ||
1660 | #define DEFINE_LOG_RECOVER_INO_ITEM(name) \ | ||
1661 | DEFINE_EVENT(xfs_log_recover_ino_item_class, name, \ | ||
1662 | TP_PROTO(struct log *log, struct xfs_inode_log_format *in_f), \ | ||
1663 | TP_ARGS(log, in_f)) | ||
1664 | |||
1665 | DEFINE_LOG_RECOVER_INO_ITEM(xfs_log_recover_inode_recover); | ||
1666 | DEFINE_LOG_RECOVER_INO_ITEM(xfs_log_recover_inode_cancel); | ||
1667 | DEFINE_LOG_RECOVER_INO_ITEM(xfs_log_recover_inode_skip); | ||
1668 | |||
1498 | #endif /* _TRACE_XFS_H */ | 1669 | #endif /* _TRACE_XFS_H */ |
1499 | 1670 | ||
1500 | #undef TRACE_INCLUDE_PATH | 1671 | #undef TRACE_INCLUDE_PATH |
diff --git a/fs/xfs/linux-2.6/xfs_xattr.c b/fs/xfs/linux-2.6/xfs_xattr.c index fa01b9daba6b..87d3e03878c8 100644 --- a/fs/xfs/linux-2.6/xfs_xattr.c +++ b/fs/xfs/linux-2.6/xfs_xattr.c | |||
@@ -72,28 +72,28 @@ xfs_xattr_set(struct dentry *dentry, const char *name, const void *value, | |||
72 | (void *)value, size, xflags); | 72 | (void *)value, size, xflags); |
73 | } | 73 | } |
74 | 74 | ||
75 | static struct xattr_handler xfs_xattr_user_handler = { | 75 | static const struct xattr_handler xfs_xattr_user_handler = { |
76 | .prefix = XATTR_USER_PREFIX, | 76 | .prefix = XATTR_USER_PREFIX, |
77 | .flags = 0, /* no flags implies user namespace */ | 77 | .flags = 0, /* no flags implies user namespace */ |
78 | .get = xfs_xattr_get, | 78 | .get = xfs_xattr_get, |
79 | .set = xfs_xattr_set, | 79 | .set = xfs_xattr_set, |
80 | }; | 80 | }; |
81 | 81 | ||
82 | static struct xattr_handler xfs_xattr_trusted_handler = { | 82 | static const struct xattr_handler xfs_xattr_trusted_handler = { |
83 | .prefix = XATTR_TRUSTED_PREFIX, | 83 | .prefix = XATTR_TRUSTED_PREFIX, |
84 | .flags = ATTR_ROOT, | 84 | .flags = ATTR_ROOT, |
85 | .get = xfs_xattr_get, | 85 | .get = xfs_xattr_get, |
86 | .set = xfs_xattr_set, | 86 | .set = xfs_xattr_set, |
87 | }; | 87 | }; |
88 | 88 | ||
89 | static struct xattr_handler xfs_xattr_security_handler = { | 89 | static const struct xattr_handler xfs_xattr_security_handler = { |
90 | .prefix = XATTR_SECURITY_PREFIX, | 90 | .prefix = XATTR_SECURITY_PREFIX, |
91 | .flags = ATTR_SECURE, | 91 | .flags = ATTR_SECURE, |
92 | .get = xfs_xattr_get, | 92 | .get = xfs_xattr_get, |
93 | .set = xfs_xattr_set, | 93 | .set = xfs_xattr_set, |
94 | }; | 94 | }; |
95 | 95 | ||
96 | struct xattr_handler *xfs_xattr_handlers[] = { | 96 | const struct xattr_handler *xfs_xattr_handlers[] = { |
97 | &xfs_xattr_user_handler, | 97 | &xfs_xattr_user_handler, |
98 | &xfs_xattr_trusted_handler, | 98 | &xfs_xattr_trusted_handler, |
99 | &xfs_xattr_security_handler, | 99 | &xfs_xattr_security_handler, |
diff --git a/fs/xfs/quota/xfs_dquot.c b/fs/xfs/quota/xfs_dquot.c index 5f79dd78626b..585e7633dfc7 100644 --- a/fs/xfs/quota/xfs_dquot.c +++ b/fs/xfs/quota/xfs_dquot.c | |||
@@ -101,7 +101,7 @@ xfs_qm_dqinit( | |||
101 | * No need to re-initialize these if this is a reclaimed dquot. | 101 | * No need to re-initialize these if this is a reclaimed dquot. |
102 | */ | 102 | */ |
103 | if (brandnewdquot) { | 103 | if (brandnewdquot) { |
104 | dqp->dq_flnext = dqp->dq_flprev = dqp; | 104 | INIT_LIST_HEAD(&dqp->q_freelist); |
105 | mutex_init(&dqp->q_qlock); | 105 | mutex_init(&dqp->q_qlock); |
106 | init_waitqueue_head(&dqp->q_pinwait); | 106 | init_waitqueue_head(&dqp->q_pinwait); |
107 | 107 | ||
@@ -119,20 +119,20 @@ xfs_qm_dqinit( | |||
119 | * Only the q_core portion was zeroed in dqreclaim_one(). | 119 | * Only the q_core portion was zeroed in dqreclaim_one(). |
120 | * So, we need to reset others. | 120 | * So, we need to reset others. |
121 | */ | 121 | */ |
122 | dqp->q_nrefs = 0; | 122 | dqp->q_nrefs = 0; |
123 | dqp->q_blkno = 0; | 123 | dqp->q_blkno = 0; |
124 | dqp->MPL_NEXT = dqp->HL_NEXT = NULL; | 124 | INIT_LIST_HEAD(&dqp->q_mplist); |
125 | dqp->HL_PREVP = dqp->MPL_PREVP = NULL; | 125 | INIT_LIST_HEAD(&dqp->q_hashlist); |
126 | dqp->q_bufoffset = 0; | 126 | dqp->q_bufoffset = 0; |
127 | dqp->q_fileoffset = 0; | 127 | dqp->q_fileoffset = 0; |
128 | dqp->q_transp = NULL; | 128 | dqp->q_transp = NULL; |
129 | dqp->q_gdquot = NULL; | 129 | dqp->q_gdquot = NULL; |
130 | dqp->q_res_bcount = 0; | 130 | dqp->q_res_bcount = 0; |
131 | dqp->q_res_icount = 0; | 131 | dqp->q_res_icount = 0; |
132 | dqp->q_res_rtbcount = 0; | 132 | dqp->q_res_rtbcount = 0; |
133 | atomic_set(&dqp->q_pincount, 0); | 133 | atomic_set(&dqp->q_pincount, 0); |
134 | dqp->q_hash = NULL; | 134 | dqp->q_hash = NULL; |
135 | ASSERT(dqp->dq_flnext == dqp->dq_flprev); | 135 | ASSERT(list_empty(&dqp->q_freelist)); |
136 | 136 | ||
137 | trace_xfs_dqreuse(dqp); | 137 | trace_xfs_dqreuse(dqp); |
138 | } | 138 | } |
@@ -158,7 +158,7 @@ void | |||
158 | xfs_qm_dqdestroy( | 158 | xfs_qm_dqdestroy( |
159 | xfs_dquot_t *dqp) | 159 | xfs_dquot_t *dqp) |
160 | { | 160 | { |
161 | ASSERT(! XFS_DQ_IS_ON_FREELIST(dqp)); | 161 | ASSERT(list_empty(&dqp->q_freelist)); |
162 | 162 | ||
163 | mutex_destroy(&dqp->q_qlock); | 163 | mutex_destroy(&dqp->q_qlock); |
164 | sv_destroy(&dqp->q_pinwait); | 164 | sv_destroy(&dqp->q_pinwait); |
@@ -252,7 +252,7 @@ xfs_qm_adjust_dqtimers( | |||
252 | (be64_to_cpu(d->d_bcount) >= | 252 | (be64_to_cpu(d->d_bcount) >= |
253 | be64_to_cpu(d->d_blk_hardlimit)))) { | 253 | be64_to_cpu(d->d_blk_hardlimit)))) { |
254 | d->d_btimer = cpu_to_be32(get_seconds() + | 254 | d->d_btimer = cpu_to_be32(get_seconds() + |
255 | XFS_QI_BTIMELIMIT(mp)); | 255 | mp->m_quotainfo->qi_btimelimit); |
256 | } else { | 256 | } else { |
257 | d->d_bwarns = 0; | 257 | d->d_bwarns = 0; |
258 | } | 258 | } |
@@ -275,7 +275,7 @@ xfs_qm_adjust_dqtimers( | |||
275 | (be64_to_cpu(d->d_icount) >= | 275 | (be64_to_cpu(d->d_icount) >= |
276 | be64_to_cpu(d->d_ino_hardlimit)))) { | 276 | be64_to_cpu(d->d_ino_hardlimit)))) { |
277 | d->d_itimer = cpu_to_be32(get_seconds() + | 277 | d->d_itimer = cpu_to_be32(get_seconds() + |
278 | XFS_QI_ITIMELIMIT(mp)); | 278 | mp->m_quotainfo->qi_itimelimit); |
279 | } else { | 279 | } else { |
280 | d->d_iwarns = 0; | 280 | d->d_iwarns = 0; |
281 | } | 281 | } |
@@ -298,7 +298,7 @@ xfs_qm_adjust_dqtimers( | |||
298 | (be64_to_cpu(d->d_rtbcount) >= | 298 | (be64_to_cpu(d->d_rtbcount) >= |
299 | be64_to_cpu(d->d_rtb_hardlimit)))) { | 299 | be64_to_cpu(d->d_rtb_hardlimit)))) { |
300 | d->d_rtbtimer = cpu_to_be32(get_seconds() + | 300 | d->d_rtbtimer = cpu_to_be32(get_seconds() + |
301 | XFS_QI_RTBTIMELIMIT(mp)); | 301 | mp->m_quotainfo->qi_rtbtimelimit); |
302 | } else { | 302 | } else { |
303 | d->d_rtbwarns = 0; | 303 | d->d_rtbwarns = 0; |
304 | } | 304 | } |
@@ -325,6 +325,7 @@ xfs_qm_init_dquot_blk( | |||
325 | uint type, | 325 | uint type, |
326 | xfs_buf_t *bp) | 326 | xfs_buf_t *bp) |
327 | { | 327 | { |
328 | struct xfs_quotainfo *q = mp->m_quotainfo; | ||
328 | xfs_dqblk_t *d; | 329 | xfs_dqblk_t *d; |
329 | int curid, i; | 330 | int curid, i; |
330 | 331 | ||
@@ -337,16 +338,16 @@ xfs_qm_init_dquot_blk( | |||
337 | /* | 338 | /* |
338 | * ID of the first dquot in the block - id's are zero based. | 339 | * ID of the first dquot in the block - id's are zero based. |
339 | */ | 340 | */ |
340 | curid = id - (id % XFS_QM_DQPERBLK(mp)); | 341 | curid = id - (id % q->qi_dqperchunk); |
341 | ASSERT(curid >= 0); | 342 | ASSERT(curid >= 0); |
342 | memset(d, 0, BBTOB(XFS_QI_DQCHUNKLEN(mp))); | 343 | memset(d, 0, BBTOB(q->qi_dqchunklen)); |
343 | for (i = 0; i < XFS_QM_DQPERBLK(mp); i++, d++, curid++) | 344 | for (i = 0; i < q->qi_dqperchunk; i++, d++, curid++) |
344 | xfs_qm_dqinit_core(curid, type, d); | 345 | xfs_qm_dqinit_core(curid, type, d); |
345 | xfs_trans_dquot_buf(tp, bp, | 346 | xfs_trans_dquot_buf(tp, bp, |
346 | (type & XFS_DQ_USER ? XFS_BLI_UDQUOT_BUF : | 347 | (type & XFS_DQ_USER ? XFS_BLF_UDQUOT_BUF : |
347 | ((type & XFS_DQ_PROJ) ? XFS_BLI_PDQUOT_BUF : | 348 | ((type & XFS_DQ_PROJ) ? XFS_BLF_PDQUOT_BUF : |
348 | XFS_BLI_GDQUOT_BUF))); | 349 | XFS_BLF_GDQUOT_BUF))); |
349 | xfs_trans_log_buf(tp, bp, 0, BBTOB(XFS_QI_DQCHUNKLEN(mp)) - 1); | 350 | xfs_trans_log_buf(tp, bp, 0, BBTOB(q->qi_dqchunklen) - 1); |
350 | } | 351 | } |
351 | 352 | ||
352 | 353 | ||
@@ -419,7 +420,7 @@ xfs_qm_dqalloc( | |||
419 | /* now we can just get the buffer (there's nothing to read yet) */ | 420 | /* now we can just get the buffer (there's nothing to read yet) */ |
420 | bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, | 421 | bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, |
421 | dqp->q_blkno, | 422 | dqp->q_blkno, |
422 | XFS_QI_DQCHUNKLEN(mp), | 423 | mp->m_quotainfo->qi_dqchunklen, |
423 | 0); | 424 | 0); |
424 | if (!bp || (error = XFS_BUF_GETERROR(bp))) | 425 | if (!bp || (error = XFS_BUF_GETERROR(bp))) |
425 | goto error1; | 426 | goto error1; |
@@ -500,7 +501,8 @@ xfs_qm_dqtobp( | |||
500 | */ | 501 | */ |
501 | if (dqp->q_blkno == (xfs_daddr_t) 0) { | 502 | if (dqp->q_blkno == (xfs_daddr_t) 0) { |
502 | /* We use the id as an index */ | 503 | /* We use the id as an index */ |
503 | dqp->q_fileoffset = (xfs_fileoff_t)id / XFS_QM_DQPERBLK(mp); | 504 | dqp->q_fileoffset = (xfs_fileoff_t)id / |
505 | mp->m_quotainfo->qi_dqperchunk; | ||
504 | nmaps = 1; | 506 | nmaps = 1; |
505 | quotip = XFS_DQ_TO_QIP(dqp); | 507 | quotip = XFS_DQ_TO_QIP(dqp); |
506 | xfs_ilock(quotip, XFS_ILOCK_SHARED); | 508 | xfs_ilock(quotip, XFS_ILOCK_SHARED); |
@@ -529,7 +531,7 @@ xfs_qm_dqtobp( | |||
529 | /* | 531 | /* |
530 | * offset of dquot in the (fixed sized) dquot chunk. | 532 | * offset of dquot in the (fixed sized) dquot chunk. |
531 | */ | 533 | */ |
532 | dqp->q_bufoffset = (id % XFS_QM_DQPERBLK(mp)) * | 534 | dqp->q_bufoffset = (id % mp->m_quotainfo->qi_dqperchunk) * |
533 | sizeof(xfs_dqblk_t); | 535 | sizeof(xfs_dqblk_t); |
534 | if (map.br_startblock == HOLESTARTBLOCK) { | 536 | if (map.br_startblock == HOLESTARTBLOCK) { |
535 | /* | 537 | /* |
@@ -559,15 +561,13 @@ xfs_qm_dqtobp( | |||
559 | * Read in the buffer, unless we've just done the allocation | 561 | * Read in the buffer, unless we've just done the allocation |
560 | * (in which case we already have the buf). | 562 | * (in which case we already have the buf). |
561 | */ | 563 | */ |
562 | if (! newdquot) { | 564 | if (!newdquot) { |
563 | trace_xfs_dqtobp_read(dqp); | 565 | trace_xfs_dqtobp_read(dqp); |
564 | 566 | ||
565 | if ((error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, | 567 | error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, |
566 | dqp->q_blkno, | 568 | dqp->q_blkno, |
567 | XFS_QI_DQCHUNKLEN(mp), | 569 | mp->m_quotainfo->qi_dqchunklen, |
568 | 0, &bp))) { | 570 | 0, &bp); |
569 | return (error); | ||
570 | } | ||
571 | if (error || !bp) | 571 | if (error || !bp) |
572 | return XFS_ERROR(error); | 572 | return XFS_ERROR(error); |
573 | } | 573 | } |
@@ -689,14 +689,14 @@ xfs_qm_idtodq( | |||
689 | tp = NULL; | 689 | tp = NULL; |
690 | if (flags & XFS_QMOPT_DQALLOC) { | 690 | if (flags & XFS_QMOPT_DQALLOC) { |
691 | tp = xfs_trans_alloc(mp, XFS_TRANS_QM_DQALLOC); | 691 | tp = xfs_trans_alloc(mp, XFS_TRANS_QM_DQALLOC); |
692 | if ((error = xfs_trans_reserve(tp, | 692 | error = xfs_trans_reserve(tp, XFS_QM_DQALLOC_SPACE_RES(mp), |
693 | XFS_QM_DQALLOC_SPACE_RES(mp), | 693 | XFS_WRITE_LOG_RES(mp) + |
694 | XFS_WRITE_LOG_RES(mp) + | 694 | BBTOB(mp->m_quotainfo->qi_dqchunklen) - 1 + |
695 | BBTOB(XFS_QI_DQCHUNKLEN(mp)) - 1 + | 695 | 128, |
696 | 128, | 696 | 0, |
697 | 0, | 697 | XFS_TRANS_PERM_LOG_RES, |
698 | XFS_TRANS_PERM_LOG_RES, | 698 | XFS_WRITE_LOG_COUNT); |
699 | XFS_WRITE_LOG_COUNT))) { | 699 | if (error) { |
700 | cancelflags = 0; | 700 | cancelflags = 0; |
701 | goto error0; | 701 | goto error0; |
702 | } | 702 | } |
@@ -751,7 +751,6 @@ xfs_qm_dqlookup( | |||
751 | { | 751 | { |
752 | xfs_dquot_t *dqp; | 752 | xfs_dquot_t *dqp; |
753 | uint flist_locked; | 753 | uint flist_locked; |
754 | xfs_dquot_t *d; | ||
755 | 754 | ||
756 | ASSERT(mutex_is_locked(&qh->qh_lock)); | 755 | ASSERT(mutex_is_locked(&qh->qh_lock)); |
757 | 756 | ||
@@ -760,7 +759,7 @@ xfs_qm_dqlookup( | |||
760 | /* | 759 | /* |
761 | * Traverse the hashchain looking for a match | 760 | * Traverse the hashchain looking for a match |
762 | */ | 761 | */ |
763 | for (dqp = qh->qh_next; dqp != NULL; dqp = dqp->HL_NEXT) { | 762 | list_for_each_entry(dqp, &qh->qh_list, q_hashlist) { |
764 | /* | 763 | /* |
765 | * We already have the hashlock. We don't need the | 764 | * We already have the hashlock. We don't need the |
766 | * dqlock to look at the id field of the dquot, since the | 765 | * dqlock to look at the id field of the dquot, since the |
@@ -772,12 +771,12 @@ xfs_qm_dqlookup( | |||
772 | /* | 771 | /* |
773 | * All in core dquots must be on the dqlist of mp | 772 | * All in core dquots must be on the dqlist of mp |
774 | */ | 773 | */ |
775 | ASSERT(dqp->MPL_PREVP != NULL); | 774 | ASSERT(!list_empty(&dqp->q_mplist)); |
776 | 775 | ||
777 | xfs_dqlock(dqp); | 776 | xfs_dqlock(dqp); |
778 | if (dqp->q_nrefs == 0) { | 777 | if (dqp->q_nrefs == 0) { |
779 | ASSERT (XFS_DQ_IS_ON_FREELIST(dqp)); | 778 | ASSERT(!list_empty(&dqp->q_freelist)); |
780 | if (! xfs_qm_freelist_lock_nowait(xfs_Gqm)) { | 779 | if (!mutex_trylock(&xfs_Gqm->qm_dqfrlist_lock)) { |
781 | trace_xfs_dqlookup_want(dqp); | 780 | trace_xfs_dqlookup_want(dqp); |
782 | 781 | ||
783 | /* | 782 | /* |
@@ -787,7 +786,7 @@ xfs_qm_dqlookup( | |||
787 | */ | 786 | */ |
788 | dqp->dq_flags |= XFS_DQ_WANT; | 787 | dqp->dq_flags |= XFS_DQ_WANT; |
789 | xfs_dqunlock(dqp); | 788 | xfs_dqunlock(dqp); |
790 | xfs_qm_freelist_lock(xfs_Gqm); | 789 | mutex_lock(&xfs_Gqm->qm_dqfrlist_lock); |
791 | xfs_dqlock(dqp); | 790 | xfs_dqlock(dqp); |
792 | dqp->dq_flags &= ~(XFS_DQ_WANT); | 791 | dqp->dq_flags &= ~(XFS_DQ_WANT); |
793 | } | 792 | } |
@@ -802,46 +801,28 @@ xfs_qm_dqlookup( | |||
802 | 801 | ||
803 | if (flist_locked) { | 802 | if (flist_locked) { |
804 | if (dqp->q_nrefs != 0) { | 803 | if (dqp->q_nrefs != 0) { |
805 | xfs_qm_freelist_unlock(xfs_Gqm); | 804 | mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock); |
806 | flist_locked = B_FALSE; | 805 | flist_locked = B_FALSE; |
807 | } else { | 806 | } else { |
808 | /* | 807 | /* take it off the freelist */ |
809 | * take it off the freelist | ||
810 | */ | ||
811 | trace_xfs_dqlookup_freelist(dqp); | 808 | trace_xfs_dqlookup_freelist(dqp); |
812 | XQM_FREELIST_REMOVE(dqp); | 809 | list_del_init(&dqp->q_freelist); |
813 | /* xfs_qm_freelist_print(&(xfs_Gqm-> | 810 | xfs_Gqm->qm_dqfrlist_cnt--; |
814 | qm_dqfreelist), | ||
815 | "after removal"); */ | ||
816 | } | 811 | } |
817 | } | 812 | } |
818 | 813 | ||
819 | /* | ||
820 | * grab a reference | ||
821 | */ | ||
822 | XFS_DQHOLD(dqp); | 814 | XFS_DQHOLD(dqp); |
823 | 815 | ||
824 | if (flist_locked) | 816 | if (flist_locked) |
825 | xfs_qm_freelist_unlock(xfs_Gqm); | 817 | mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock); |
826 | /* | 818 | /* |
827 | * move the dquot to the front of the hashchain | 819 | * move the dquot to the front of the hashchain |
828 | */ | 820 | */ |
829 | ASSERT(mutex_is_locked(&qh->qh_lock)); | 821 | ASSERT(mutex_is_locked(&qh->qh_lock)); |
830 | if (dqp->HL_PREVP != &qh->qh_next) { | 822 | list_move(&dqp->q_hashlist, &qh->qh_list); |
831 | trace_xfs_dqlookup_move(dqp); | ||
832 | if ((d = dqp->HL_NEXT)) | ||
833 | d->HL_PREVP = dqp->HL_PREVP; | ||
834 | *(dqp->HL_PREVP) = d; | ||
835 | d = qh->qh_next; | ||
836 | d->HL_PREVP = &dqp->HL_NEXT; | ||
837 | dqp->HL_NEXT = d; | ||
838 | dqp->HL_PREVP = &qh->qh_next; | ||
839 | qh->qh_next = dqp; | ||
840 | } | ||
841 | trace_xfs_dqlookup_done(dqp); | 823 | trace_xfs_dqlookup_done(dqp); |
842 | *O_dqpp = dqp; | 824 | *O_dqpp = dqp; |
843 | ASSERT(mutex_is_locked(&qh->qh_lock)); | 825 | return 0; |
844 | return (0); | ||
845 | } | 826 | } |
846 | } | 827 | } |
847 | 828 | ||
@@ -975,16 +956,17 @@ xfs_qm_dqget( | |||
975 | */ | 956 | */ |
976 | if (ip) { | 957 | if (ip) { |
977 | xfs_ilock(ip, XFS_ILOCK_EXCL); | 958 | xfs_ilock(ip, XFS_ILOCK_EXCL); |
978 | if (! XFS_IS_DQTYPE_ON(mp, type)) { | 959 | |
979 | /* inode stays locked on return */ | ||
980 | xfs_qm_dqdestroy(dqp); | ||
981 | return XFS_ERROR(ESRCH); | ||
982 | } | ||
983 | /* | 960 | /* |
984 | * A dquot could be attached to this inode by now, since | 961 | * A dquot could be attached to this inode by now, since |
985 | * we had dropped the ilock. | 962 | * we had dropped the ilock. |
986 | */ | 963 | */ |
987 | if (type == XFS_DQ_USER) { | 964 | if (type == XFS_DQ_USER) { |
965 | if (!XFS_IS_UQUOTA_ON(mp)) { | ||
966 | /* inode stays locked on return */ | ||
967 | xfs_qm_dqdestroy(dqp); | ||
968 | return XFS_ERROR(ESRCH); | ||
969 | } | ||
988 | if (ip->i_udquot) { | 970 | if (ip->i_udquot) { |
989 | xfs_qm_dqdestroy(dqp); | 971 | xfs_qm_dqdestroy(dqp); |
990 | dqp = ip->i_udquot; | 972 | dqp = ip->i_udquot; |
@@ -992,6 +974,11 @@ xfs_qm_dqget( | |||
992 | goto dqret; | 974 | goto dqret; |
993 | } | 975 | } |
994 | } else { | 976 | } else { |
977 | if (!XFS_IS_OQUOTA_ON(mp)) { | ||
978 | /* inode stays locked on return */ | ||
979 | xfs_qm_dqdestroy(dqp); | ||
980 | return XFS_ERROR(ESRCH); | ||
981 | } | ||
995 | if (ip->i_gdquot) { | 982 | if (ip->i_gdquot) { |
996 | xfs_qm_dqdestroy(dqp); | 983 | xfs_qm_dqdestroy(dqp); |
997 | dqp = ip->i_gdquot; | 984 | dqp = ip->i_gdquot; |
@@ -1033,13 +1020,14 @@ xfs_qm_dqget( | |||
1033 | */ | 1020 | */ |
1034 | ASSERT(mutex_is_locked(&h->qh_lock)); | 1021 | ASSERT(mutex_is_locked(&h->qh_lock)); |
1035 | dqp->q_hash = h; | 1022 | dqp->q_hash = h; |
1036 | XQM_HASHLIST_INSERT(h, dqp); | 1023 | list_add(&dqp->q_hashlist, &h->qh_list); |
1024 | h->qh_version++; | ||
1037 | 1025 | ||
1038 | /* | 1026 | /* |
1039 | * Attach this dquot to this filesystem's list of all dquots, | 1027 | * Attach this dquot to this filesystem's list of all dquots, |
1040 | * kept inside the mount structure in m_quotainfo field | 1028 | * kept inside the mount structure in m_quotainfo field |
1041 | */ | 1029 | */ |
1042 | xfs_qm_mplist_lock(mp); | 1030 | mutex_lock(&mp->m_quotainfo->qi_dqlist_lock); |
1043 | 1031 | ||
1044 | /* | 1032 | /* |
1045 | * We return a locked dquot to the caller, with a reference taken | 1033 | * We return a locked dquot to the caller, with a reference taken |
@@ -1047,9 +1035,9 @@ xfs_qm_dqget( | |||
1047 | xfs_dqlock(dqp); | 1035 | xfs_dqlock(dqp); |
1048 | dqp->q_nrefs = 1; | 1036 | dqp->q_nrefs = 1; |
1049 | 1037 | ||
1050 | XQM_MPLIST_INSERT(&(XFS_QI_MPL_LIST(mp)), dqp); | 1038 | list_add(&dqp->q_mplist, &mp->m_quotainfo->qi_dqlist); |
1051 | 1039 | mp->m_quotainfo->qi_dquots++; | |
1052 | xfs_qm_mplist_unlock(mp); | 1040 | mutex_unlock(&mp->m_quotainfo->qi_dqlist_lock); |
1053 | mutex_unlock(&h->qh_lock); | 1041 | mutex_unlock(&h->qh_lock); |
1054 | dqret: | 1042 | dqret: |
1055 | ASSERT((ip == NULL) || xfs_isilocked(ip, XFS_ILOCK_EXCL)); | 1043 | ASSERT((ip == NULL) || xfs_isilocked(ip, XFS_ILOCK_EXCL)); |
@@ -1086,10 +1074,10 @@ xfs_qm_dqput( | |||
1086 | * drop the dqlock and acquire the freelist and dqlock | 1074 | * drop the dqlock and acquire the freelist and dqlock |
1087 | * in the right order; but try to get it out-of-order first | 1075 | * in the right order; but try to get it out-of-order first |
1088 | */ | 1076 | */ |
1089 | if (! xfs_qm_freelist_lock_nowait(xfs_Gqm)) { | 1077 | if (!mutex_trylock(&xfs_Gqm->qm_dqfrlist_lock)) { |
1090 | trace_xfs_dqput_wait(dqp); | 1078 | trace_xfs_dqput_wait(dqp); |
1091 | xfs_dqunlock(dqp); | 1079 | xfs_dqunlock(dqp); |
1092 | xfs_qm_freelist_lock(xfs_Gqm); | 1080 | mutex_lock(&xfs_Gqm->qm_dqfrlist_lock); |
1093 | xfs_dqlock(dqp); | 1081 | xfs_dqlock(dqp); |
1094 | } | 1082 | } |
1095 | 1083 | ||
@@ -1100,10 +1088,8 @@ xfs_qm_dqput( | |||
1100 | if (--dqp->q_nrefs == 0) { | 1088 | if (--dqp->q_nrefs == 0) { |
1101 | trace_xfs_dqput_free(dqp); | 1089 | trace_xfs_dqput_free(dqp); |
1102 | 1090 | ||
1103 | /* | 1091 | list_add_tail(&dqp->q_freelist, &xfs_Gqm->qm_dqfrlist); |
1104 | * insert at end of the freelist. | 1092 | xfs_Gqm->qm_dqfrlist_cnt++; |
1105 | */ | ||
1106 | XQM_FREELIST_INSERT(&(xfs_Gqm->qm_dqfreelist), dqp); | ||
1107 | 1093 | ||
1108 | /* | 1094 | /* |
1109 | * If we just added a udquot to the freelist, then | 1095 | * If we just added a udquot to the freelist, then |
@@ -1118,10 +1104,6 @@ xfs_qm_dqput( | |||
1118 | xfs_dqlock(gdqp); | 1104 | xfs_dqlock(gdqp); |
1119 | dqp->q_gdquot = NULL; | 1105 | dqp->q_gdquot = NULL; |
1120 | } | 1106 | } |
1121 | |||
1122 | /* xfs_qm_freelist_print(&(xfs_Gqm->qm_dqfreelist), | ||
1123 | "@@@@@++ Free list (after append) @@@@@+"); | ||
1124 | */ | ||
1125 | } | 1107 | } |
1126 | xfs_dqunlock(dqp); | 1108 | xfs_dqunlock(dqp); |
1127 | 1109 | ||
@@ -1133,7 +1115,7 @@ xfs_qm_dqput( | |||
1133 | break; | 1115 | break; |
1134 | dqp = gdqp; | 1116 | dqp = gdqp; |
1135 | } | 1117 | } |
1136 | xfs_qm_freelist_unlock(xfs_Gqm); | 1118 | mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock); |
1137 | } | 1119 | } |
1138 | 1120 | ||
1139 | /* | 1121 | /* |
@@ -1386,10 +1368,10 @@ int | |||
1386 | xfs_qm_dqpurge( | 1368 | xfs_qm_dqpurge( |
1387 | xfs_dquot_t *dqp) | 1369 | xfs_dquot_t *dqp) |
1388 | { | 1370 | { |
1389 | xfs_dqhash_t *thishash; | 1371 | xfs_dqhash_t *qh = dqp->q_hash; |
1390 | xfs_mount_t *mp = dqp->q_mount; | 1372 | xfs_mount_t *mp = dqp->q_mount; |
1391 | 1373 | ||
1392 | ASSERT(XFS_QM_IS_MPLIST_LOCKED(mp)); | 1374 | ASSERT(mutex_is_locked(&mp->m_quotainfo->qi_dqlist_lock)); |
1393 | ASSERT(mutex_is_locked(&dqp->q_hash->qh_lock)); | 1375 | ASSERT(mutex_is_locked(&dqp->q_hash->qh_lock)); |
1394 | 1376 | ||
1395 | xfs_dqlock(dqp); | 1377 | xfs_dqlock(dqp); |
@@ -1407,7 +1389,7 @@ xfs_qm_dqpurge( | |||
1407 | return (1); | 1389 | return (1); |
1408 | } | 1390 | } |
1409 | 1391 | ||
1410 | ASSERT(XFS_DQ_IS_ON_FREELIST(dqp)); | 1392 | ASSERT(!list_empty(&dqp->q_freelist)); |
1411 | 1393 | ||
1412 | /* | 1394 | /* |
1413 | * If we're turning off quotas, we have to make sure that, for | 1395 | * If we're turning off quotas, we have to make sure that, for |
@@ -1452,14 +1434,16 @@ xfs_qm_dqpurge( | |||
1452 | ASSERT(XFS_FORCED_SHUTDOWN(mp) || | 1434 | ASSERT(XFS_FORCED_SHUTDOWN(mp) || |
1453 | !(dqp->q_logitem.qli_item.li_flags & XFS_LI_IN_AIL)); | 1435 | !(dqp->q_logitem.qli_item.li_flags & XFS_LI_IN_AIL)); |
1454 | 1436 | ||
1455 | thishash = dqp->q_hash; | 1437 | list_del_init(&dqp->q_hashlist); |
1456 | XQM_HASHLIST_REMOVE(thishash, dqp); | 1438 | qh->qh_version++; |
1457 | XQM_MPLIST_REMOVE(&(XFS_QI_MPL_LIST(mp)), dqp); | 1439 | list_del_init(&dqp->q_mplist); |
1440 | mp->m_quotainfo->qi_dqreclaims++; | ||
1441 | mp->m_quotainfo->qi_dquots--; | ||
1458 | /* | 1442 | /* |
1459 | * XXX Move this to the front of the freelist, if we can get the | 1443 | * XXX Move this to the front of the freelist, if we can get the |
1460 | * freelist lock. | 1444 | * freelist lock. |
1461 | */ | 1445 | */ |
1462 | ASSERT(XFS_DQ_IS_ON_FREELIST(dqp)); | 1446 | ASSERT(!list_empty(&dqp->q_freelist)); |
1463 | 1447 | ||
1464 | dqp->q_mount = NULL; | 1448 | dqp->q_mount = NULL; |
1465 | dqp->q_hash = NULL; | 1449 | dqp->q_hash = NULL; |
@@ -1467,7 +1451,7 @@ xfs_qm_dqpurge( | |||
1467 | memset(&dqp->q_core, 0, sizeof(dqp->q_core)); | 1451 | memset(&dqp->q_core, 0, sizeof(dqp->q_core)); |
1468 | xfs_dqfunlock(dqp); | 1452 | xfs_dqfunlock(dqp); |
1469 | xfs_dqunlock(dqp); | 1453 | xfs_dqunlock(dqp); |
1470 | mutex_unlock(&thishash->qh_lock); | 1454 | mutex_unlock(&qh->qh_lock); |
1471 | return (0); | 1455 | return (0); |
1472 | } | 1456 | } |
1473 | 1457 | ||
@@ -1517,6 +1501,7 @@ void | |||
1517 | xfs_qm_dqflock_pushbuf_wait( | 1501 | xfs_qm_dqflock_pushbuf_wait( |
1518 | xfs_dquot_t *dqp) | 1502 | xfs_dquot_t *dqp) |
1519 | { | 1503 | { |
1504 | xfs_mount_t *mp = dqp->q_mount; | ||
1520 | xfs_buf_t *bp; | 1505 | xfs_buf_t *bp; |
1521 | 1506 | ||
1522 | /* | 1507 | /* |
@@ -1525,14 +1510,14 @@ xfs_qm_dqflock_pushbuf_wait( | |||
1525 | * out immediately. We'll be able to acquire | 1510 | * out immediately. We'll be able to acquire |
1526 | * the flush lock when the I/O completes. | 1511 | * the flush lock when the I/O completes. |
1527 | */ | 1512 | */ |
1528 | bp = xfs_incore(dqp->q_mount->m_ddev_targp, dqp->q_blkno, | 1513 | bp = xfs_incore(mp->m_ddev_targp, dqp->q_blkno, |
1529 | XFS_QI_DQCHUNKLEN(dqp->q_mount), XBF_TRYLOCK); | 1514 | mp->m_quotainfo->qi_dqchunklen, XBF_TRYLOCK); |
1530 | if (!bp) | 1515 | if (!bp) |
1531 | goto out_lock; | 1516 | goto out_lock; |
1532 | 1517 | ||
1533 | if (XFS_BUF_ISDELAYWRITE(bp)) { | 1518 | if (XFS_BUF_ISDELAYWRITE(bp)) { |
1534 | if (XFS_BUF_ISPINNED(bp)) | 1519 | if (XFS_BUF_ISPINNED(bp)) |
1535 | xfs_log_force(dqp->q_mount, 0); | 1520 | xfs_log_force(mp, 0); |
1536 | xfs_buf_delwri_promote(bp); | 1521 | xfs_buf_delwri_promote(bp); |
1537 | wake_up_process(bp->b_target->bt_task); | 1522 | wake_up_process(bp->b_target->bt_task); |
1538 | } | 1523 | } |
diff --git a/fs/xfs/quota/xfs_dquot.h b/fs/xfs/quota/xfs_dquot.h index a0f7da586d1b..5da3a23b820d 100644 --- a/fs/xfs/quota/xfs_dquot.h +++ b/fs/xfs/quota/xfs_dquot.h | |||
@@ -33,40 +33,23 @@ | |||
33 | * The hash chain headers (hash buckets) | 33 | * The hash chain headers (hash buckets) |
34 | */ | 34 | */ |
35 | typedef struct xfs_dqhash { | 35 | typedef struct xfs_dqhash { |
36 | struct xfs_dquot *qh_next; | 36 | struct list_head qh_list; |
37 | struct mutex qh_lock; | 37 | struct mutex qh_lock; |
38 | uint qh_version; /* ever increasing version */ | 38 | uint qh_version; /* ever increasing version */ |
39 | uint qh_nelems; /* number of dquots on the list */ | 39 | uint qh_nelems; /* number of dquots on the list */ |
40 | } xfs_dqhash_t; | 40 | } xfs_dqhash_t; |
41 | 41 | ||
42 | typedef struct xfs_dqlink { | ||
43 | struct xfs_dquot *ql_next; /* forward link */ | ||
44 | struct xfs_dquot **ql_prevp; /* pointer to prev ql_next */ | ||
45 | } xfs_dqlink_t; | ||
46 | |||
47 | struct xfs_mount; | 42 | struct xfs_mount; |
48 | struct xfs_trans; | 43 | struct xfs_trans; |
49 | 44 | ||
50 | /* | 45 | /* |
51 | * This is the marker which is designed to occupy the first few | ||
52 | * bytes of the xfs_dquot_t structure. Even inside this, the freelist pointers | ||
53 | * must come first. | ||
54 | * This serves as the marker ("sentinel") when we have to restart list | ||
55 | * iterations because of locking considerations. | ||
56 | */ | ||
57 | typedef struct xfs_dqmarker { | ||
58 | struct xfs_dquot*dqm_flnext; /* link to freelist: must be first */ | ||
59 | struct xfs_dquot*dqm_flprev; | ||
60 | xfs_dqlink_t dqm_mplist; /* link to mount's list of dquots */ | ||
61 | xfs_dqlink_t dqm_hashlist; /* link to the hash chain */ | ||
62 | uint dqm_flags; /* various flags (XFS_DQ_*) */ | ||
63 | } xfs_dqmarker_t; | ||
64 | |||
65 | /* | ||
66 | * The incore dquot structure | 46 | * The incore dquot structure |
67 | */ | 47 | */ |
68 | typedef struct xfs_dquot { | 48 | typedef struct xfs_dquot { |
69 | xfs_dqmarker_t q_lists; /* list ptrs, q_flags (marker) */ | 49 | uint dq_flags; /* various flags (XFS_DQ_*) */ |
50 | struct list_head q_freelist; /* global free list of dquots */ | ||
51 | struct list_head q_mplist; /* mount's list of dquots */ | ||
52 | struct list_head q_hashlist; /* gloabl hash list of dquots */ | ||
70 | xfs_dqhash_t *q_hash; /* the hashchain header */ | 53 | xfs_dqhash_t *q_hash; /* the hashchain header */ |
71 | struct xfs_mount*q_mount; /* filesystem this relates to */ | 54 | struct xfs_mount*q_mount; /* filesystem this relates to */ |
72 | struct xfs_trans*q_transp; /* trans this belongs to currently */ | 55 | struct xfs_trans*q_transp; /* trans this belongs to currently */ |
@@ -87,13 +70,6 @@ typedef struct xfs_dquot { | |||
87 | wait_queue_head_t q_pinwait; /* dquot pinning wait queue */ | 70 | wait_queue_head_t q_pinwait; /* dquot pinning wait queue */ |
88 | } xfs_dquot_t; | 71 | } xfs_dquot_t; |
89 | 72 | ||
90 | |||
91 | #define dq_flnext q_lists.dqm_flnext | ||
92 | #define dq_flprev q_lists.dqm_flprev | ||
93 | #define dq_mplist q_lists.dqm_mplist | ||
94 | #define dq_hashlist q_lists.dqm_hashlist | ||
95 | #define dq_flags q_lists.dqm_flags | ||
96 | |||
97 | /* | 73 | /* |
98 | * Lock hierarchy for q_qlock: | 74 | * Lock hierarchy for q_qlock: |
99 | * XFS_QLOCK_NORMAL is the implicit default, | 75 | * XFS_QLOCK_NORMAL is the implicit default, |
@@ -127,7 +103,6 @@ static inline void xfs_dqfunlock(xfs_dquot_t *dqp) | |||
127 | } | 103 | } |
128 | 104 | ||
129 | #define XFS_DQ_IS_LOCKED(dqp) (mutex_is_locked(&((dqp)->q_qlock))) | 105 | #define XFS_DQ_IS_LOCKED(dqp) (mutex_is_locked(&((dqp)->q_qlock))) |
130 | #define XFS_DQ_IS_ON_FREELIST(dqp) ((dqp)->dq_flnext != (dqp)) | ||
131 | #define XFS_DQ_IS_DIRTY(dqp) ((dqp)->dq_flags & XFS_DQ_DIRTY) | 106 | #define XFS_DQ_IS_DIRTY(dqp) ((dqp)->dq_flags & XFS_DQ_DIRTY) |
132 | #define XFS_QM_ISUDQ(dqp) ((dqp)->dq_flags & XFS_DQ_USER) | 107 | #define XFS_QM_ISUDQ(dqp) ((dqp)->dq_flags & XFS_DQ_USER) |
133 | #define XFS_QM_ISPDQ(dqp) ((dqp)->dq_flags & XFS_DQ_PROJ) | 108 | #define XFS_QM_ISPDQ(dqp) ((dqp)->dq_flags & XFS_DQ_PROJ) |
diff --git a/fs/xfs/quota/xfs_dquot_item.c b/fs/xfs/quota/xfs_dquot_item.c index 4e4ee9a57194..8d89a24ae324 100644 --- a/fs/xfs/quota/xfs_dquot_item.c +++ b/fs/xfs/quota/xfs_dquot_item.c | |||
@@ -107,8 +107,7 @@ xfs_qm_dquot_logitem_pin( | |||
107 | /* ARGSUSED */ | 107 | /* ARGSUSED */ |
108 | STATIC void | 108 | STATIC void |
109 | xfs_qm_dquot_logitem_unpin( | 109 | xfs_qm_dquot_logitem_unpin( |
110 | xfs_dq_logitem_t *logitem, | 110 | xfs_dq_logitem_t *logitem) |
111 | int stale) | ||
112 | { | 111 | { |
113 | xfs_dquot_t *dqp = logitem->qli_dquot; | 112 | xfs_dquot_t *dqp = logitem->qli_dquot; |
114 | 113 | ||
@@ -123,7 +122,7 @@ xfs_qm_dquot_logitem_unpin_remove( | |||
123 | xfs_dq_logitem_t *logitem, | 122 | xfs_dq_logitem_t *logitem, |
124 | xfs_trans_t *tp) | 123 | xfs_trans_t *tp) |
125 | { | 124 | { |
126 | xfs_qm_dquot_logitem_unpin(logitem, 0); | 125 | xfs_qm_dquot_logitem_unpin(logitem); |
127 | } | 126 | } |
128 | 127 | ||
129 | /* | 128 | /* |
@@ -228,7 +227,7 @@ xfs_qm_dquot_logitem_pushbuf( | |||
228 | } | 227 | } |
229 | mp = dqp->q_mount; | 228 | mp = dqp->q_mount; |
230 | bp = xfs_incore(mp->m_ddev_targp, qip->qli_format.qlf_blkno, | 229 | bp = xfs_incore(mp->m_ddev_targp, qip->qli_format.qlf_blkno, |
231 | XFS_QI_DQCHUNKLEN(mp), XBF_TRYLOCK); | 230 | mp->m_quotainfo->qi_dqchunklen, XBF_TRYLOCK); |
232 | xfs_dqunlock(dqp); | 231 | xfs_dqunlock(dqp); |
233 | if (!bp) | 232 | if (!bp) |
234 | return; | 233 | return; |
@@ -329,8 +328,7 @@ static struct xfs_item_ops xfs_dquot_item_ops = { | |||
329 | .iop_format = (void(*)(xfs_log_item_t*, xfs_log_iovec_t*)) | 328 | .iop_format = (void(*)(xfs_log_item_t*, xfs_log_iovec_t*)) |
330 | xfs_qm_dquot_logitem_format, | 329 | xfs_qm_dquot_logitem_format, |
331 | .iop_pin = (void(*)(xfs_log_item_t*))xfs_qm_dquot_logitem_pin, | 330 | .iop_pin = (void(*)(xfs_log_item_t*))xfs_qm_dquot_logitem_pin, |
332 | .iop_unpin = (void(*)(xfs_log_item_t*, int)) | 331 | .iop_unpin = (void(*)(xfs_log_item_t*))xfs_qm_dquot_logitem_unpin, |
333 | xfs_qm_dquot_logitem_unpin, | ||
334 | .iop_unpin_remove = (void(*)(xfs_log_item_t*, xfs_trans_t*)) | 332 | .iop_unpin_remove = (void(*)(xfs_log_item_t*, xfs_trans_t*)) |
335 | xfs_qm_dquot_logitem_unpin_remove, | 333 | xfs_qm_dquot_logitem_unpin_remove, |
336 | .iop_trylock = (uint(*)(xfs_log_item_t*)) | 334 | .iop_trylock = (uint(*)(xfs_log_item_t*)) |
@@ -357,9 +355,8 @@ xfs_qm_dquot_logitem_init( | |||
357 | xfs_dq_logitem_t *lp; | 355 | xfs_dq_logitem_t *lp; |
358 | lp = &dqp->q_logitem; | 356 | lp = &dqp->q_logitem; |
359 | 357 | ||
360 | lp->qli_item.li_type = XFS_LI_DQUOT; | 358 | xfs_log_item_init(dqp->q_mount, &lp->qli_item, XFS_LI_DQUOT, |
361 | lp->qli_item.li_ops = &xfs_dquot_item_ops; | 359 | &xfs_dquot_item_ops); |
362 | lp->qli_item.li_mountp = dqp->q_mount; | ||
363 | lp->qli_dquot = dqp; | 360 | lp->qli_dquot = dqp; |
364 | lp->qli_format.qlf_type = XFS_LI_DQUOT; | 361 | lp->qli_format.qlf_type = XFS_LI_DQUOT; |
365 | lp->qli_format.qlf_id = be32_to_cpu(dqp->q_core.d_id); | 362 | lp->qli_format.qlf_id = be32_to_cpu(dqp->q_core.d_id); |
@@ -426,7 +423,7 @@ xfs_qm_qoff_logitem_pin(xfs_qoff_logitem_t *qf) | |||
426 | */ | 423 | */ |
427 | /*ARGSUSED*/ | 424 | /*ARGSUSED*/ |
428 | STATIC void | 425 | STATIC void |
429 | xfs_qm_qoff_logitem_unpin(xfs_qoff_logitem_t *qf, int stale) | 426 | xfs_qm_qoff_logitem_unpin(xfs_qoff_logitem_t *qf) |
430 | { | 427 | { |
431 | return; | 428 | return; |
432 | } | 429 | } |
@@ -537,8 +534,7 @@ static struct xfs_item_ops xfs_qm_qoffend_logitem_ops = { | |||
537 | .iop_format = (void(*)(xfs_log_item_t*, xfs_log_iovec_t*)) | 534 | .iop_format = (void(*)(xfs_log_item_t*, xfs_log_iovec_t*)) |
538 | xfs_qm_qoff_logitem_format, | 535 | xfs_qm_qoff_logitem_format, |
539 | .iop_pin = (void(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_pin, | 536 | .iop_pin = (void(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_pin, |
540 | .iop_unpin = (void(*)(xfs_log_item_t* ,int)) | 537 | .iop_unpin = (void(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_unpin, |
541 | xfs_qm_qoff_logitem_unpin, | ||
542 | .iop_unpin_remove = (void(*)(xfs_log_item_t*,xfs_trans_t*)) | 538 | .iop_unpin_remove = (void(*)(xfs_log_item_t*,xfs_trans_t*)) |
543 | xfs_qm_qoff_logitem_unpin_remove, | 539 | xfs_qm_qoff_logitem_unpin_remove, |
544 | .iop_trylock = (uint(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_trylock, | 540 | .iop_trylock = (uint(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_trylock, |
@@ -559,8 +555,7 @@ static struct xfs_item_ops xfs_qm_qoff_logitem_ops = { | |||
559 | .iop_format = (void(*)(xfs_log_item_t*, xfs_log_iovec_t*)) | 555 | .iop_format = (void(*)(xfs_log_item_t*, xfs_log_iovec_t*)) |
560 | xfs_qm_qoff_logitem_format, | 556 | xfs_qm_qoff_logitem_format, |
561 | .iop_pin = (void(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_pin, | 557 | .iop_pin = (void(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_pin, |
562 | .iop_unpin = (void(*)(xfs_log_item_t*, int)) | 558 | .iop_unpin = (void(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_unpin, |
563 | xfs_qm_qoff_logitem_unpin, | ||
564 | .iop_unpin_remove = (void(*)(xfs_log_item_t*,xfs_trans_t*)) | 559 | .iop_unpin_remove = (void(*)(xfs_log_item_t*,xfs_trans_t*)) |
565 | xfs_qm_qoff_logitem_unpin_remove, | 560 | xfs_qm_qoff_logitem_unpin_remove, |
566 | .iop_trylock = (uint(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_trylock, | 561 | .iop_trylock = (uint(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_trylock, |
@@ -586,11 +581,8 @@ xfs_qm_qoff_logitem_init( | |||
586 | 581 | ||
587 | qf = (xfs_qoff_logitem_t*) kmem_zalloc(sizeof(xfs_qoff_logitem_t), KM_SLEEP); | 582 | qf = (xfs_qoff_logitem_t*) kmem_zalloc(sizeof(xfs_qoff_logitem_t), KM_SLEEP); |
588 | 583 | ||
589 | qf->qql_item.li_type = XFS_LI_QUOTAOFF; | 584 | xfs_log_item_init(mp, &qf->qql_item, XFS_LI_QUOTAOFF, start ? |
590 | if (start) | 585 | &xfs_qm_qoffend_logitem_ops : &xfs_qm_qoff_logitem_ops); |
591 | qf->qql_item.li_ops = &xfs_qm_qoffend_logitem_ops; | ||
592 | else | ||
593 | qf->qql_item.li_ops = &xfs_qm_qoff_logitem_ops; | ||
594 | qf->qql_item.li_mountp = mp; | 586 | qf->qql_item.li_mountp = mp; |
595 | qf->qql_format.qf_type = XFS_LI_QUOTAOFF; | 587 | qf->qql_format.qf_type = XFS_LI_QUOTAOFF; |
596 | qf->qql_format.qf_flags = flags; | 588 | qf->qql_format.qf_flags = flags; |
diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c index 417e61e3d9dd..38e764146644 100644 --- a/fs/xfs/quota/xfs_qm.c +++ b/fs/xfs/quota/xfs_qm.c | |||
@@ -67,9 +67,6 @@ static cred_t xfs_zerocr; | |||
67 | STATIC void xfs_qm_list_init(xfs_dqlist_t *, char *, int); | 67 | STATIC void xfs_qm_list_init(xfs_dqlist_t *, char *, int); |
68 | STATIC void xfs_qm_list_destroy(xfs_dqlist_t *); | 68 | STATIC void xfs_qm_list_destroy(xfs_dqlist_t *); |
69 | 69 | ||
70 | STATIC void xfs_qm_freelist_init(xfs_frlist_t *); | ||
71 | STATIC void xfs_qm_freelist_destroy(xfs_frlist_t *); | ||
72 | |||
73 | STATIC int xfs_qm_init_quotainos(xfs_mount_t *); | 70 | STATIC int xfs_qm_init_quotainos(xfs_mount_t *); |
74 | STATIC int xfs_qm_init_quotainfo(xfs_mount_t *); | 71 | STATIC int xfs_qm_init_quotainfo(xfs_mount_t *); |
75 | STATIC int xfs_qm_shake(int, gfp_t); | 72 | STATIC int xfs_qm_shake(int, gfp_t); |
@@ -84,21 +81,25 @@ extern struct mutex qcheck_lock; | |||
84 | #endif | 81 | #endif |
85 | 82 | ||
86 | #ifdef QUOTADEBUG | 83 | #ifdef QUOTADEBUG |
87 | #define XQM_LIST_PRINT(l, NXT, title) \ | 84 | static void |
88 | { \ | 85 | xfs_qm_dquot_list_print( |
89 | xfs_dquot_t *dqp; int i = 0; \ | 86 | struct xfs_mount *mp) |
90 | cmn_err(CE_DEBUG, "%s (#%d)", title, (int) (l)->qh_nelems); \ | 87 | { |
91 | for (dqp = (l)->qh_next; dqp != NULL; dqp = dqp->NXT) { \ | 88 | xfs_dquot_t *dqp; |
92 | cmn_err(CE_DEBUG, " %d. \"%d (%s)\" " \ | 89 | int i = 0; |
93 | "bcnt = %d, icnt = %d, refs = %d", \ | 90 | |
94 | ++i, (int) be32_to_cpu(dqp->q_core.d_id), \ | 91 | list_for_each_entry(dqp, &mp->m_quotainfo->qi_dqlist_lock, qi_mplist) { |
95 | DQFLAGTO_TYPESTR(dqp), \ | 92 | cmn_err(CE_DEBUG, " %d. \"%d (%s)\" " |
96 | (int) be64_to_cpu(dqp->q_core.d_bcount), \ | 93 | "bcnt = %lld, icnt = %lld, refs = %d", |
97 | (int) be64_to_cpu(dqp->q_core.d_icount), \ | 94 | i++, be32_to_cpu(dqp->q_core.d_id), |
98 | (int) dqp->q_nrefs); } \ | 95 | DQFLAGTO_TYPESTR(dqp), |
96 | (long long)be64_to_cpu(dqp->q_core.d_bcount), | ||
97 | (long long)be64_to_cpu(dqp->q_core.d_icount), | ||
98 | dqp->q_nrefs); | ||
99 | } | ||
99 | } | 100 | } |
100 | #else | 101 | #else |
101 | #define XQM_LIST_PRINT(l, NXT, title) do { } while (0) | 102 | static void xfs_qm_dquot_list_print(struct xfs_mount *mp) { } |
102 | #endif | 103 | #endif |
103 | 104 | ||
104 | /* | 105 | /* |
@@ -144,7 +145,9 @@ xfs_Gqm_init(void) | |||
144 | /* | 145 | /* |
145 | * Freelist of all dquots of all file systems | 146 | * Freelist of all dquots of all file systems |
146 | */ | 147 | */ |
147 | xfs_qm_freelist_init(&(xqm->qm_dqfreelist)); | 148 | INIT_LIST_HEAD(&xqm->qm_dqfrlist); |
149 | xqm->qm_dqfrlist_cnt = 0; | ||
150 | mutex_init(&xqm->qm_dqfrlist_lock); | ||
148 | 151 | ||
149 | /* | 152 | /* |
150 | * dquot zone. we register our own low-memory callback. | 153 | * dquot zone. we register our own low-memory callback. |
@@ -189,6 +192,7 @@ STATIC void | |||
189 | xfs_qm_destroy( | 192 | xfs_qm_destroy( |
190 | struct xfs_qm *xqm) | 193 | struct xfs_qm *xqm) |
191 | { | 194 | { |
195 | struct xfs_dquot *dqp, *n; | ||
192 | int hsize, i; | 196 | int hsize, i; |
193 | 197 | ||
194 | ASSERT(xqm != NULL); | 198 | ASSERT(xqm != NULL); |
@@ -204,7 +208,21 @@ xfs_qm_destroy( | |||
204 | xqm->qm_usr_dqhtable = NULL; | 208 | xqm->qm_usr_dqhtable = NULL; |
205 | xqm->qm_grp_dqhtable = NULL; | 209 | xqm->qm_grp_dqhtable = NULL; |
206 | xqm->qm_dqhashmask = 0; | 210 | xqm->qm_dqhashmask = 0; |
207 | xfs_qm_freelist_destroy(&(xqm->qm_dqfreelist)); | 211 | |
212 | /* frlist cleanup */ | ||
213 | mutex_lock(&xqm->qm_dqfrlist_lock); | ||
214 | list_for_each_entry_safe(dqp, n, &xqm->qm_dqfrlist, q_freelist) { | ||
215 | xfs_dqlock(dqp); | ||
216 | #ifdef QUOTADEBUG | ||
217 | cmn_err(CE_DEBUG, "FREELIST destroy 0x%p", dqp); | ||
218 | #endif | ||
219 | list_del_init(&dqp->q_freelist); | ||
220 | xfs_Gqm->qm_dqfrlist_cnt--; | ||
221 | xfs_dqunlock(dqp); | ||
222 | xfs_qm_dqdestroy(dqp); | ||
223 | } | ||
224 | mutex_unlock(&xqm->qm_dqfrlist_lock); | ||
225 | mutex_destroy(&xqm->qm_dqfrlist_lock); | ||
208 | #ifdef DEBUG | 226 | #ifdef DEBUG |
209 | mutex_destroy(&qcheck_lock); | 227 | mutex_destroy(&qcheck_lock); |
210 | #endif | 228 | #endif |
@@ -256,7 +274,7 @@ STATIC void | |||
256 | xfs_qm_rele_quotafs_ref( | 274 | xfs_qm_rele_quotafs_ref( |
257 | struct xfs_mount *mp) | 275 | struct xfs_mount *mp) |
258 | { | 276 | { |
259 | xfs_dquot_t *dqp, *nextdqp; | 277 | xfs_dquot_t *dqp, *n; |
260 | 278 | ||
261 | ASSERT(xfs_Gqm); | 279 | ASSERT(xfs_Gqm); |
262 | ASSERT(xfs_Gqm->qm_nrefs > 0); | 280 | ASSERT(xfs_Gqm->qm_nrefs > 0); |
@@ -264,26 +282,24 @@ xfs_qm_rele_quotafs_ref( | |||
264 | /* | 282 | /* |
265 | * Go thru the freelist and destroy all inactive dquots. | 283 | * Go thru the freelist and destroy all inactive dquots. |
266 | */ | 284 | */ |
267 | xfs_qm_freelist_lock(xfs_Gqm); | 285 | mutex_lock(&xfs_Gqm->qm_dqfrlist_lock); |
268 | 286 | ||
269 | for (dqp = xfs_Gqm->qm_dqfreelist.qh_next; | 287 | list_for_each_entry_safe(dqp, n, &xfs_Gqm->qm_dqfrlist, q_freelist) { |
270 | dqp != (xfs_dquot_t *)&(xfs_Gqm->qm_dqfreelist); ) { | ||
271 | xfs_dqlock(dqp); | 288 | xfs_dqlock(dqp); |
272 | nextdqp = dqp->dq_flnext; | ||
273 | if (dqp->dq_flags & XFS_DQ_INACTIVE) { | 289 | if (dqp->dq_flags & XFS_DQ_INACTIVE) { |
274 | ASSERT(dqp->q_mount == NULL); | 290 | ASSERT(dqp->q_mount == NULL); |
275 | ASSERT(! XFS_DQ_IS_DIRTY(dqp)); | 291 | ASSERT(! XFS_DQ_IS_DIRTY(dqp)); |
276 | ASSERT(dqp->HL_PREVP == NULL); | 292 | ASSERT(list_empty(&dqp->q_hashlist)); |
277 | ASSERT(dqp->MPL_PREVP == NULL); | 293 | ASSERT(list_empty(&dqp->q_mplist)); |
278 | XQM_FREELIST_REMOVE(dqp); | 294 | list_del_init(&dqp->q_freelist); |
295 | xfs_Gqm->qm_dqfrlist_cnt--; | ||
279 | xfs_dqunlock(dqp); | 296 | xfs_dqunlock(dqp); |
280 | xfs_qm_dqdestroy(dqp); | 297 | xfs_qm_dqdestroy(dqp); |
281 | } else { | 298 | } else { |
282 | xfs_dqunlock(dqp); | 299 | xfs_dqunlock(dqp); |
283 | } | 300 | } |
284 | dqp = nextdqp; | ||
285 | } | 301 | } |
286 | xfs_qm_freelist_unlock(xfs_Gqm); | 302 | mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock); |
287 | 303 | ||
288 | /* | 304 | /* |
289 | * Destroy the entire XQM. If somebody mounts with quotaon, this'll | 305 | * Destroy the entire XQM. If somebody mounts with quotaon, this'll |
@@ -305,7 +321,7 @@ xfs_qm_unmount( | |||
305 | struct xfs_mount *mp) | 321 | struct xfs_mount *mp) |
306 | { | 322 | { |
307 | if (mp->m_quotainfo) { | 323 | if (mp->m_quotainfo) { |
308 | xfs_qm_dqpurge_all(mp, XFS_QMOPT_QUOTALL | XFS_QMOPT_UMOUNTING); | 324 | xfs_qm_dqpurge_all(mp, XFS_QMOPT_QUOTALL); |
309 | xfs_qm_destroy_quotainfo(mp); | 325 | xfs_qm_destroy_quotainfo(mp); |
310 | } | 326 | } |
311 | } | 327 | } |
@@ -449,20 +465,21 @@ xfs_qm_unmount_quotas( | |||
449 | */ | 465 | */ |
450 | STATIC int | 466 | STATIC int |
451 | xfs_qm_dqflush_all( | 467 | xfs_qm_dqflush_all( |
452 | xfs_mount_t *mp, | 468 | struct xfs_mount *mp, |
453 | int sync_mode) | 469 | int sync_mode) |
454 | { | 470 | { |
455 | int recl; | 471 | struct xfs_quotainfo *q = mp->m_quotainfo; |
456 | xfs_dquot_t *dqp; | 472 | int recl; |
457 | int niters; | 473 | struct xfs_dquot *dqp; |
458 | int error; | 474 | int niters; |
475 | int error; | ||
459 | 476 | ||
460 | if (mp->m_quotainfo == NULL) | 477 | if (!q) |
461 | return 0; | 478 | return 0; |
462 | niters = 0; | 479 | niters = 0; |
463 | again: | 480 | again: |
464 | xfs_qm_mplist_lock(mp); | 481 | mutex_lock(&q->qi_dqlist_lock); |
465 | FOREACH_DQUOT_IN_MP(dqp, mp) { | 482 | list_for_each_entry(dqp, &q->qi_dqlist, q_mplist) { |
466 | xfs_dqlock(dqp); | 483 | xfs_dqlock(dqp); |
467 | if (! XFS_DQ_IS_DIRTY(dqp)) { | 484 | if (! XFS_DQ_IS_DIRTY(dqp)) { |
468 | xfs_dqunlock(dqp); | 485 | xfs_dqunlock(dqp); |
@@ -470,7 +487,7 @@ again: | |||
470 | } | 487 | } |
471 | 488 | ||
472 | /* XXX a sentinel would be better */ | 489 | /* XXX a sentinel would be better */ |
473 | recl = XFS_QI_MPLRECLAIMS(mp); | 490 | recl = q->qi_dqreclaims; |
474 | if (!xfs_dqflock_nowait(dqp)) { | 491 | if (!xfs_dqflock_nowait(dqp)) { |
475 | /* | 492 | /* |
476 | * If we can't grab the flush lock then check | 493 | * If we can't grab the flush lock then check |
@@ -485,21 +502,21 @@ again: | |||
485 | * Let go of the mplist lock. We don't want to hold it | 502 | * Let go of the mplist lock. We don't want to hold it |
486 | * across a disk write. | 503 | * across a disk write. |
487 | */ | 504 | */ |
488 | xfs_qm_mplist_unlock(mp); | 505 | mutex_unlock(&q->qi_dqlist_lock); |
489 | error = xfs_qm_dqflush(dqp, sync_mode); | 506 | error = xfs_qm_dqflush(dqp, sync_mode); |
490 | xfs_dqunlock(dqp); | 507 | xfs_dqunlock(dqp); |
491 | if (error) | 508 | if (error) |
492 | return error; | 509 | return error; |
493 | 510 | ||
494 | xfs_qm_mplist_lock(mp); | 511 | mutex_lock(&q->qi_dqlist_lock); |
495 | if (recl != XFS_QI_MPLRECLAIMS(mp)) { | 512 | if (recl != q->qi_dqreclaims) { |
496 | xfs_qm_mplist_unlock(mp); | 513 | mutex_unlock(&q->qi_dqlist_lock); |
497 | /* XXX restart limit */ | 514 | /* XXX restart limit */ |
498 | goto again; | 515 | goto again; |
499 | } | 516 | } |
500 | } | 517 | } |
501 | 518 | ||
502 | xfs_qm_mplist_unlock(mp); | 519 | mutex_unlock(&q->qi_dqlist_lock); |
503 | /* return ! busy */ | 520 | /* return ! busy */ |
504 | return 0; | 521 | return 0; |
505 | } | 522 | } |
@@ -509,15 +526,15 @@ again: | |||
509 | */ | 526 | */ |
510 | STATIC void | 527 | STATIC void |
511 | xfs_qm_detach_gdquots( | 528 | xfs_qm_detach_gdquots( |
512 | xfs_mount_t *mp) | 529 | struct xfs_mount *mp) |
513 | { | 530 | { |
514 | xfs_dquot_t *dqp, *gdqp; | 531 | struct xfs_quotainfo *q = mp->m_quotainfo; |
515 | int nrecl; | 532 | struct xfs_dquot *dqp, *gdqp; |
533 | int nrecl; | ||
516 | 534 | ||
517 | again: | 535 | again: |
518 | ASSERT(XFS_QM_IS_MPLIST_LOCKED(mp)); | 536 | ASSERT(mutex_is_locked(&q->qi_dqlist_lock)); |
519 | dqp = XFS_QI_MPLNEXT(mp); | 537 | list_for_each_entry(dqp, &q->qi_dqlist, q_mplist) { |
520 | while (dqp) { | ||
521 | xfs_dqlock(dqp); | 538 | xfs_dqlock(dqp); |
522 | if ((gdqp = dqp->q_gdquot)) { | 539 | if ((gdqp = dqp->q_gdquot)) { |
523 | xfs_dqlock(gdqp); | 540 | xfs_dqlock(gdqp); |
@@ -530,15 +547,14 @@ xfs_qm_detach_gdquots( | |||
530 | * Can't hold the mplist lock across a dqput. | 547 | * Can't hold the mplist lock across a dqput. |
531 | * XXXmust convert to marker based iterations here. | 548 | * XXXmust convert to marker based iterations here. |
532 | */ | 549 | */ |
533 | nrecl = XFS_QI_MPLRECLAIMS(mp); | 550 | nrecl = q->qi_dqreclaims; |
534 | xfs_qm_mplist_unlock(mp); | 551 | mutex_unlock(&q->qi_dqlist_lock); |
535 | xfs_qm_dqput(gdqp); | 552 | xfs_qm_dqput(gdqp); |
536 | 553 | ||
537 | xfs_qm_mplist_lock(mp); | 554 | mutex_lock(&q->qi_dqlist_lock); |
538 | if (nrecl != XFS_QI_MPLRECLAIMS(mp)) | 555 | if (nrecl != q->qi_dqreclaims) |
539 | goto again; | 556 | goto again; |
540 | } | 557 | } |
541 | dqp = dqp->MPL_NEXT; | ||
542 | } | 558 | } |
543 | } | 559 | } |
544 | 560 | ||
@@ -550,23 +566,23 @@ xfs_qm_detach_gdquots( | |||
550 | */ | 566 | */ |
551 | STATIC int | 567 | STATIC int |
552 | xfs_qm_dqpurge_int( | 568 | xfs_qm_dqpurge_int( |
553 | xfs_mount_t *mp, | 569 | struct xfs_mount *mp, |
554 | uint flags) /* QUOTAOFF/UMOUNTING/UQUOTA/PQUOTA/GQUOTA */ | 570 | uint flags) |
555 | { | 571 | { |
556 | xfs_dquot_t *dqp; | 572 | struct xfs_quotainfo *q = mp->m_quotainfo; |
557 | uint dqtype; | 573 | struct xfs_dquot *dqp, *n; |
558 | int nrecl; | 574 | uint dqtype; |
559 | xfs_dquot_t *nextdqp; | 575 | int nrecl; |
560 | int nmisses; | 576 | int nmisses; |
561 | 577 | ||
562 | if (mp->m_quotainfo == NULL) | 578 | if (!q) |
563 | return 0; | 579 | return 0; |
564 | 580 | ||
565 | dqtype = (flags & XFS_QMOPT_UQUOTA) ? XFS_DQ_USER : 0; | 581 | dqtype = (flags & XFS_QMOPT_UQUOTA) ? XFS_DQ_USER : 0; |
566 | dqtype |= (flags & XFS_QMOPT_PQUOTA) ? XFS_DQ_PROJ : 0; | 582 | dqtype |= (flags & XFS_QMOPT_PQUOTA) ? XFS_DQ_PROJ : 0; |
567 | dqtype |= (flags & XFS_QMOPT_GQUOTA) ? XFS_DQ_GROUP : 0; | 583 | dqtype |= (flags & XFS_QMOPT_GQUOTA) ? XFS_DQ_GROUP : 0; |
568 | 584 | ||
569 | xfs_qm_mplist_lock(mp); | 585 | mutex_lock(&q->qi_dqlist_lock); |
570 | 586 | ||
571 | /* | 587 | /* |
572 | * In the first pass through all incore dquots of this filesystem, | 588 | * In the first pass through all incore dquots of this filesystem, |
@@ -578,28 +594,25 @@ xfs_qm_dqpurge_int( | |||
578 | 594 | ||
579 | again: | 595 | again: |
580 | nmisses = 0; | 596 | nmisses = 0; |
581 | ASSERT(XFS_QM_IS_MPLIST_LOCKED(mp)); | 597 | ASSERT(mutex_is_locked(&q->qi_dqlist_lock)); |
582 | /* | 598 | /* |
583 | * Try to get rid of all of the unwanted dquots. The idea is to | 599 | * Try to get rid of all of the unwanted dquots. The idea is to |
584 | * get them off mplist and hashlist, but leave them on freelist. | 600 | * get them off mplist and hashlist, but leave them on freelist. |
585 | */ | 601 | */ |
586 | dqp = XFS_QI_MPLNEXT(mp); | 602 | list_for_each_entry_safe(dqp, n, &q->qi_dqlist, q_mplist) { |
587 | while (dqp) { | ||
588 | /* | 603 | /* |
589 | * It's OK to look at the type without taking dqlock here. | 604 | * It's OK to look at the type without taking dqlock here. |
590 | * We're holding the mplist lock here, and that's needed for | 605 | * We're holding the mplist lock here, and that's needed for |
591 | * a dqreclaim. | 606 | * a dqreclaim. |
592 | */ | 607 | */ |
593 | if ((dqp->dq_flags & dqtype) == 0) { | 608 | if ((dqp->dq_flags & dqtype) == 0) |
594 | dqp = dqp->MPL_NEXT; | ||
595 | continue; | 609 | continue; |
596 | } | ||
597 | 610 | ||
598 | if (!mutex_trylock(&dqp->q_hash->qh_lock)) { | 611 | if (!mutex_trylock(&dqp->q_hash->qh_lock)) { |
599 | nrecl = XFS_QI_MPLRECLAIMS(mp); | 612 | nrecl = q->qi_dqreclaims; |
600 | xfs_qm_mplist_unlock(mp); | 613 | mutex_unlock(&q->qi_dqlist_lock); |
601 | mutex_lock(&dqp->q_hash->qh_lock); | 614 | mutex_lock(&dqp->q_hash->qh_lock); |
602 | xfs_qm_mplist_lock(mp); | 615 | mutex_lock(&q->qi_dqlist_lock); |
603 | 616 | ||
604 | /* | 617 | /* |
605 | * XXXTheoretically, we can get into a very long | 618 | * XXXTheoretically, we can get into a very long |
@@ -607,7 +620,7 @@ xfs_qm_dqpurge_int( | |||
607 | * No one can be adding dquots to the mplist at | 620 | * No one can be adding dquots to the mplist at |
608 | * this point, but somebody might be taking things off. | 621 | * this point, but somebody might be taking things off. |
609 | */ | 622 | */ |
610 | if (nrecl != XFS_QI_MPLRECLAIMS(mp)) { | 623 | if (nrecl != q->qi_dqreclaims) { |
611 | mutex_unlock(&dqp->q_hash->qh_lock); | 624 | mutex_unlock(&dqp->q_hash->qh_lock); |
612 | goto again; | 625 | goto again; |
613 | } | 626 | } |
@@ -617,11 +630,9 @@ xfs_qm_dqpurge_int( | |||
617 | * Take the dquot off the mplist and hashlist. It may remain on | 630 | * Take the dquot off the mplist and hashlist. It may remain on |
618 | * freelist in INACTIVE state. | 631 | * freelist in INACTIVE state. |
619 | */ | 632 | */ |
620 | nextdqp = dqp->MPL_NEXT; | ||
621 | nmisses += xfs_qm_dqpurge(dqp); | 633 | nmisses += xfs_qm_dqpurge(dqp); |
622 | dqp = nextdqp; | ||
623 | } | 634 | } |
624 | xfs_qm_mplist_unlock(mp); | 635 | mutex_unlock(&q->qi_dqlist_lock); |
625 | return nmisses; | 636 | return nmisses; |
626 | } | 637 | } |
627 | 638 | ||
@@ -921,12 +932,13 @@ xfs_qm_dqdetach( | |||
921 | 932 | ||
922 | int | 933 | int |
923 | xfs_qm_sync( | 934 | xfs_qm_sync( |
924 | xfs_mount_t *mp, | 935 | struct xfs_mount *mp, |
925 | int flags) | 936 | int flags) |
926 | { | 937 | { |
927 | int recl, restarts; | 938 | struct xfs_quotainfo *q = mp->m_quotainfo; |
928 | xfs_dquot_t *dqp; | 939 | int recl, restarts; |
929 | int error; | 940 | struct xfs_dquot *dqp; |
941 | int error; | ||
930 | 942 | ||
931 | if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp)) | 943 | if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp)) |
932 | return 0; | 944 | return 0; |
@@ -934,18 +946,19 @@ xfs_qm_sync( | |||
934 | restarts = 0; | 946 | restarts = 0; |
935 | 947 | ||
936 | again: | 948 | again: |
937 | xfs_qm_mplist_lock(mp); | 949 | mutex_lock(&q->qi_dqlist_lock); |
938 | /* | 950 | /* |
939 | * dqpurge_all() also takes the mplist lock and iterate thru all dquots | 951 | * dqpurge_all() also takes the mplist lock and iterate thru all dquots |
940 | * in quotaoff. However, if the QUOTA_ACTIVE bits are not cleared | 952 | * in quotaoff. However, if the QUOTA_ACTIVE bits are not cleared |
941 | * when we have the mplist lock, we know that dquots will be consistent | 953 | * when we have the mplist lock, we know that dquots will be consistent |
942 | * as long as we have it locked. | 954 | * as long as we have it locked. |
943 | */ | 955 | */ |
944 | if (! XFS_IS_QUOTA_ON(mp)) { | 956 | if (!XFS_IS_QUOTA_ON(mp)) { |
945 | xfs_qm_mplist_unlock(mp); | 957 | mutex_unlock(&q->qi_dqlist_lock); |
946 | return 0; | 958 | return 0; |
947 | } | 959 | } |
948 | FOREACH_DQUOT_IN_MP(dqp, mp) { | 960 | ASSERT(mutex_is_locked(&q->qi_dqlist_lock)); |
961 | list_for_each_entry(dqp, &q->qi_dqlist, q_mplist) { | ||
949 | /* | 962 | /* |
950 | * If this is vfs_sync calling, then skip the dquots that | 963 | * If this is vfs_sync calling, then skip the dquots that |
951 | * don't 'seem' to be dirty. ie. don't acquire dqlock. | 964 | * don't 'seem' to be dirty. ie. don't acquire dqlock. |
@@ -969,7 +982,7 @@ xfs_qm_sync( | |||
969 | } | 982 | } |
970 | 983 | ||
971 | /* XXX a sentinel would be better */ | 984 | /* XXX a sentinel would be better */ |
972 | recl = XFS_QI_MPLRECLAIMS(mp); | 985 | recl = q->qi_dqreclaims; |
973 | if (!xfs_dqflock_nowait(dqp)) { | 986 | if (!xfs_dqflock_nowait(dqp)) { |
974 | if (flags & SYNC_TRYLOCK) { | 987 | if (flags & SYNC_TRYLOCK) { |
975 | xfs_dqunlock(dqp); | 988 | xfs_dqunlock(dqp); |
@@ -989,7 +1002,7 @@ xfs_qm_sync( | |||
989 | * Let go of the mplist lock. We don't want to hold it | 1002 | * Let go of the mplist lock. We don't want to hold it |
990 | * across a disk write | 1003 | * across a disk write |
991 | */ | 1004 | */ |
992 | xfs_qm_mplist_unlock(mp); | 1005 | mutex_unlock(&q->qi_dqlist_lock); |
993 | error = xfs_qm_dqflush(dqp, flags); | 1006 | error = xfs_qm_dqflush(dqp, flags); |
994 | xfs_dqunlock(dqp); | 1007 | xfs_dqunlock(dqp); |
995 | if (error && XFS_FORCED_SHUTDOWN(mp)) | 1008 | if (error && XFS_FORCED_SHUTDOWN(mp)) |
@@ -997,17 +1010,17 @@ xfs_qm_sync( | |||
997 | else if (error) | 1010 | else if (error) |
998 | return error; | 1011 | return error; |
999 | 1012 | ||
1000 | xfs_qm_mplist_lock(mp); | 1013 | mutex_lock(&q->qi_dqlist_lock); |
1001 | if (recl != XFS_QI_MPLRECLAIMS(mp)) { | 1014 | if (recl != q->qi_dqreclaims) { |
1002 | if (++restarts >= XFS_QM_SYNC_MAX_RESTARTS) | 1015 | if (++restarts >= XFS_QM_SYNC_MAX_RESTARTS) |
1003 | break; | 1016 | break; |
1004 | 1017 | ||
1005 | xfs_qm_mplist_unlock(mp); | 1018 | mutex_unlock(&q->qi_dqlist_lock); |
1006 | goto again; | 1019 | goto again; |
1007 | } | 1020 | } |
1008 | } | 1021 | } |
1009 | 1022 | ||
1010 | xfs_qm_mplist_unlock(mp); | 1023 | mutex_unlock(&q->qi_dqlist_lock); |
1011 | return 0; | 1024 | return 0; |
1012 | } | 1025 | } |
1013 | 1026 | ||
@@ -1052,8 +1065,9 @@ xfs_qm_init_quotainfo( | |||
1052 | return error; | 1065 | return error; |
1053 | } | 1066 | } |
1054 | 1067 | ||
1055 | xfs_qm_list_init(&qinf->qi_dqlist, "mpdqlist", 0); | 1068 | INIT_LIST_HEAD(&qinf->qi_dqlist); |
1056 | lockdep_set_class(&qinf->qi_dqlist.qh_lock, &xfs_quota_mplist_class); | 1069 | mutex_init(&qinf->qi_dqlist_lock); |
1070 | lockdep_set_class(&qinf->qi_dqlist_lock, &xfs_quota_mplist_class); | ||
1057 | 1071 | ||
1058 | qinf->qi_dqreclaims = 0; | 1072 | qinf->qi_dqreclaims = 0; |
1059 | 1073 | ||
@@ -1150,7 +1164,8 @@ xfs_qm_destroy_quotainfo( | |||
1150 | */ | 1164 | */ |
1151 | xfs_qm_rele_quotafs_ref(mp); | 1165 | xfs_qm_rele_quotafs_ref(mp); |
1152 | 1166 | ||
1153 | xfs_qm_list_destroy(&qi->qi_dqlist); | 1167 | ASSERT(list_empty(&qi->qi_dqlist)); |
1168 | mutex_destroy(&qi->qi_dqlist_lock); | ||
1154 | 1169 | ||
1155 | if (qi->qi_uquotaip) { | 1170 | if (qi->qi_uquotaip) { |
1156 | IRELE(qi->qi_uquotaip); | 1171 | IRELE(qi->qi_uquotaip); |
@@ -1177,7 +1192,7 @@ xfs_qm_list_init( | |||
1177 | int n) | 1192 | int n) |
1178 | { | 1193 | { |
1179 | mutex_init(&list->qh_lock); | 1194 | mutex_init(&list->qh_lock); |
1180 | list->qh_next = NULL; | 1195 | INIT_LIST_HEAD(&list->qh_list); |
1181 | list->qh_version = 0; | 1196 | list->qh_version = 0; |
1182 | list->qh_nelems = 0; | 1197 | list->qh_nelems = 0; |
1183 | } | 1198 | } |
@@ -1316,9 +1331,6 @@ xfs_qm_qino_alloc( | |||
1316 | */ | 1331 | */ |
1317 | spin_lock(&mp->m_sb_lock); | 1332 | spin_lock(&mp->m_sb_lock); |
1318 | if (flags & XFS_QMOPT_SBVERSION) { | 1333 | if (flags & XFS_QMOPT_SBVERSION) { |
1319 | #if defined(DEBUG) && defined(XFS_LOUD_RECOVERY) | ||
1320 | unsigned oldv = mp->m_sb.sb_versionnum; | ||
1321 | #endif | ||
1322 | ASSERT(!xfs_sb_version_hasquota(&mp->m_sb)); | 1334 | ASSERT(!xfs_sb_version_hasquota(&mp->m_sb)); |
1323 | ASSERT((sbfields & (XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO | | 1335 | ASSERT((sbfields & (XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO | |
1324 | XFS_SB_GQUOTINO | XFS_SB_QFLAGS)) == | 1336 | XFS_SB_GQUOTINO | XFS_SB_QFLAGS)) == |
@@ -1331,11 +1343,6 @@ xfs_qm_qino_alloc( | |||
1331 | 1343 | ||
1332 | /* qflags will get updated _after_ quotacheck */ | 1344 | /* qflags will get updated _after_ quotacheck */ |
1333 | mp->m_sb.sb_qflags = 0; | 1345 | mp->m_sb.sb_qflags = 0; |
1334 | #if defined(DEBUG) && defined(XFS_LOUD_RECOVERY) | ||
1335 | cmn_err(CE_NOTE, | ||
1336 | "Old superblock version %x, converting to %x.", | ||
1337 | oldv, mp->m_sb.sb_versionnum); | ||
1338 | #endif | ||
1339 | } | 1346 | } |
1340 | if (flags & XFS_QMOPT_UQUOTA) | 1347 | if (flags & XFS_QMOPT_UQUOTA) |
1341 | mp->m_sb.sb_uquotino = (*ip)->i_ino; | 1348 | mp->m_sb.sb_uquotino = (*ip)->i_ino; |
@@ -1371,10 +1378,10 @@ xfs_qm_reset_dqcounts( | |||
1371 | #ifdef DEBUG | 1378 | #ifdef DEBUG |
1372 | j = XFS_FSB_TO_B(mp, XFS_DQUOT_CLUSTER_SIZE_FSB); | 1379 | j = XFS_FSB_TO_B(mp, XFS_DQUOT_CLUSTER_SIZE_FSB); |
1373 | do_div(j, sizeof(xfs_dqblk_t)); | 1380 | do_div(j, sizeof(xfs_dqblk_t)); |
1374 | ASSERT(XFS_QM_DQPERBLK(mp) == j); | 1381 | ASSERT(mp->m_quotainfo->qi_dqperchunk == j); |
1375 | #endif | 1382 | #endif |
1376 | ddq = (xfs_disk_dquot_t *)XFS_BUF_PTR(bp); | 1383 | ddq = (xfs_disk_dquot_t *)XFS_BUF_PTR(bp); |
1377 | for (j = 0; j < XFS_QM_DQPERBLK(mp); j++) { | 1384 | for (j = 0; j < mp->m_quotainfo->qi_dqperchunk; j++) { |
1378 | /* | 1385 | /* |
1379 | * Do a sanity check, and if needed, repair the dqblk. Don't | 1386 | * Do a sanity check, and if needed, repair the dqblk. Don't |
1380 | * output any warnings because it's perfectly possible to | 1387 | * output any warnings because it's perfectly possible to |
@@ -1429,7 +1436,7 @@ xfs_qm_dqiter_bufs( | |||
1429 | while (blkcnt--) { | 1436 | while (blkcnt--) { |
1430 | error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, | 1437 | error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, |
1431 | XFS_FSB_TO_DADDR(mp, bno), | 1438 | XFS_FSB_TO_DADDR(mp, bno), |
1432 | (int)XFS_QI_DQCHUNKLEN(mp), 0, &bp); | 1439 | mp->m_quotainfo->qi_dqchunklen, 0, &bp); |
1433 | if (error) | 1440 | if (error) |
1434 | break; | 1441 | break; |
1435 | 1442 | ||
@@ -1439,7 +1446,7 @@ xfs_qm_dqiter_bufs( | |||
1439 | * goto the next block. | 1446 | * goto the next block. |
1440 | */ | 1447 | */ |
1441 | bno++; | 1448 | bno++; |
1442 | firstid += XFS_QM_DQPERBLK(mp); | 1449 | firstid += mp->m_quotainfo->qi_dqperchunk; |
1443 | } | 1450 | } |
1444 | return error; | 1451 | return error; |
1445 | } | 1452 | } |
@@ -1505,7 +1512,7 @@ xfs_qm_dqiterate( | |||
1505 | continue; | 1512 | continue; |
1506 | 1513 | ||
1507 | firstid = (xfs_dqid_t) map[i].br_startoff * | 1514 | firstid = (xfs_dqid_t) map[i].br_startoff * |
1508 | XFS_QM_DQPERBLK(mp); | 1515 | mp->m_quotainfo->qi_dqperchunk; |
1509 | /* | 1516 | /* |
1510 | * Do a read-ahead on the next extent. | 1517 | * Do a read-ahead on the next extent. |
1511 | */ | 1518 | */ |
@@ -1516,7 +1523,7 @@ xfs_qm_dqiterate( | |||
1516 | while (rablkcnt--) { | 1523 | while (rablkcnt--) { |
1517 | xfs_baread(mp->m_ddev_targp, | 1524 | xfs_baread(mp->m_ddev_targp, |
1518 | XFS_FSB_TO_DADDR(mp, rablkno), | 1525 | XFS_FSB_TO_DADDR(mp, rablkno), |
1519 | (int)XFS_QI_DQCHUNKLEN(mp)); | 1526 | mp->m_quotainfo->qi_dqchunklen); |
1520 | rablkno++; | 1527 | rablkno++; |
1521 | } | 1528 | } |
1522 | } | 1529 | } |
@@ -1576,8 +1583,10 @@ xfs_qm_quotacheck_dqadjust( | |||
1576 | 1583 | ||
1577 | /* | 1584 | /* |
1578 | * Set default limits, adjust timers (since we changed usages) | 1585 | * Set default limits, adjust timers (since we changed usages) |
1586 | * | ||
1587 | * There are no timers for the default values set in the root dquot. | ||
1579 | */ | 1588 | */ |
1580 | if (! XFS_IS_SUSER_DQUOT(dqp)) { | 1589 | if (dqp->q_core.d_id) { |
1581 | xfs_qm_adjust_dqlimits(dqp->q_mount, &dqp->q_core); | 1590 | xfs_qm_adjust_dqlimits(dqp->q_mount, &dqp->q_core); |
1582 | xfs_qm_adjust_dqtimers(dqp->q_mount, &dqp->q_core); | 1591 | xfs_qm_adjust_dqtimers(dqp->q_mount, &dqp->q_core); |
1583 | } | 1592 | } |
@@ -1747,14 +1756,14 @@ xfs_qm_quotacheck( | |||
1747 | lastino = 0; | 1756 | lastino = 0; |
1748 | flags = 0; | 1757 | flags = 0; |
1749 | 1758 | ||
1750 | ASSERT(XFS_QI_UQIP(mp) || XFS_QI_GQIP(mp)); | 1759 | ASSERT(mp->m_quotainfo->qi_uquotaip || mp->m_quotainfo->qi_gquotaip); |
1751 | ASSERT(XFS_IS_QUOTA_RUNNING(mp)); | 1760 | ASSERT(XFS_IS_QUOTA_RUNNING(mp)); |
1752 | 1761 | ||
1753 | /* | 1762 | /* |
1754 | * There should be no cached dquots. The (simplistic) quotacheck | 1763 | * There should be no cached dquots. The (simplistic) quotacheck |
1755 | * algorithm doesn't like that. | 1764 | * algorithm doesn't like that. |
1756 | */ | 1765 | */ |
1757 | ASSERT(XFS_QI_MPLNDQUOTS(mp) == 0); | 1766 | ASSERT(list_empty(&mp->m_quotainfo->qi_dqlist)); |
1758 | 1767 | ||
1759 | cmn_err(CE_NOTE, "XFS quotacheck %s: Please wait.", mp->m_fsname); | 1768 | cmn_err(CE_NOTE, "XFS quotacheck %s: Please wait.", mp->m_fsname); |
1760 | 1769 | ||
@@ -1763,15 +1772,19 @@ xfs_qm_quotacheck( | |||
1763 | * their counters to zero. We need a clean slate. | 1772 | * their counters to zero. We need a clean slate. |
1764 | * We don't log our changes till later. | 1773 | * We don't log our changes till later. |
1765 | */ | 1774 | */ |
1766 | if ((uip = XFS_QI_UQIP(mp))) { | 1775 | uip = mp->m_quotainfo->qi_uquotaip; |
1767 | if ((error = xfs_qm_dqiterate(mp, uip, XFS_QMOPT_UQUOTA))) | 1776 | if (uip) { |
1777 | error = xfs_qm_dqiterate(mp, uip, XFS_QMOPT_UQUOTA); | ||
1778 | if (error) | ||
1768 | goto error_return; | 1779 | goto error_return; |
1769 | flags |= XFS_UQUOTA_CHKD; | 1780 | flags |= XFS_UQUOTA_CHKD; |
1770 | } | 1781 | } |
1771 | 1782 | ||
1772 | if ((gip = XFS_QI_GQIP(mp))) { | 1783 | gip = mp->m_quotainfo->qi_gquotaip; |
1773 | if ((error = xfs_qm_dqiterate(mp, gip, XFS_IS_GQUOTA_ON(mp) ? | 1784 | if (gip) { |
1774 | XFS_QMOPT_GQUOTA : XFS_QMOPT_PQUOTA))) | 1785 | error = xfs_qm_dqiterate(mp, gip, XFS_IS_GQUOTA_ON(mp) ? |
1786 | XFS_QMOPT_GQUOTA : XFS_QMOPT_PQUOTA); | ||
1787 | if (error) | ||
1775 | goto error_return; | 1788 | goto error_return; |
1776 | flags |= XFS_OQUOTA_CHKD; | 1789 | flags |= XFS_OQUOTA_CHKD; |
1777 | } | 1790 | } |
@@ -1804,7 +1817,7 @@ xfs_qm_quotacheck( | |||
1804 | * at this point (because we intentionally didn't in dqget_noattach). | 1817 | * at this point (because we intentionally didn't in dqget_noattach). |
1805 | */ | 1818 | */ |
1806 | if (error) { | 1819 | if (error) { |
1807 | xfs_qm_dqpurge_all(mp, XFS_QMOPT_QUOTALL | XFS_QMOPT_QUOTAOFF); | 1820 | xfs_qm_dqpurge_all(mp, XFS_QMOPT_QUOTALL); |
1808 | goto error_return; | 1821 | goto error_return; |
1809 | } | 1822 | } |
1810 | 1823 | ||
@@ -1825,7 +1838,7 @@ xfs_qm_quotacheck( | |||
1825 | mp->m_qflags &= ~(XFS_OQUOTA_CHKD | XFS_UQUOTA_CHKD); | 1838 | mp->m_qflags &= ~(XFS_OQUOTA_CHKD | XFS_UQUOTA_CHKD); |
1826 | mp->m_qflags |= flags; | 1839 | mp->m_qflags |= flags; |
1827 | 1840 | ||
1828 | XQM_LIST_PRINT(&(XFS_QI_MPL_LIST(mp)), MPL_NEXT, "++++ Mp list +++"); | 1841 | xfs_qm_dquot_list_print(mp); |
1829 | 1842 | ||
1830 | error_return: | 1843 | error_return: |
1831 | if (error) { | 1844 | if (error) { |
@@ -1920,59 +1933,53 @@ xfs_qm_init_quotainos( | |||
1920 | } | 1933 | } |
1921 | } | 1934 | } |
1922 | 1935 | ||
1923 | XFS_QI_UQIP(mp) = uip; | 1936 | mp->m_quotainfo->qi_uquotaip = uip; |
1924 | XFS_QI_GQIP(mp) = gip; | 1937 | mp->m_quotainfo->qi_gquotaip = gip; |
1925 | 1938 | ||
1926 | return 0; | 1939 | return 0; |
1927 | } | 1940 | } |
1928 | 1941 | ||
1929 | 1942 | ||
1943 | |||
1930 | /* | 1944 | /* |
1931 | * Traverse the freelist of dquots and attempt to reclaim a maximum of | 1945 | * Just pop the least recently used dquot off the freelist and |
1932 | * 'howmany' dquots. This operation races with dqlookup(), and attempts to | 1946 | * recycle it. The returned dquot is locked. |
1933 | * favor the lookup function ... | ||
1934 | * XXXsup merge this with qm_reclaim_one(). | ||
1935 | */ | 1947 | */ |
1936 | STATIC int | 1948 | STATIC xfs_dquot_t * |
1937 | xfs_qm_shake_freelist( | 1949 | xfs_qm_dqreclaim_one(void) |
1938 | int howmany) | ||
1939 | { | 1950 | { |
1940 | int nreclaimed; | 1951 | xfs_dquot_t *dqpout; |
1941 | xfs_dqhash_t *hash; | 1952 | xfs_dquot_t *dqp; |
1942 | xfs_dquot_t *dqp, *nextdqp; | ||
1943 | int restarts; | 1953 | int restarts; |
1944 | int nflushes; | ||
1945 | |||
1946 | if (howmany <= 0) | ||
1947 | return 0; | ||
1948 | 1954 | ||
1949 | nreclaimed = 0; | ||
1950 | restarts = 0; | 1955 | restarts = 0; |
1951 | nflushes = 0; | 1956 | dqpout = NULL; |
1952 | 1957 | ||
1953 | #ifdef QUOTADEBUG | 1958 | /* lockorder: hashchainlock, freelistlock, mplistlock, dqlock, dqflock */ |
1954 | cmn_err(CE_DEBUG, "Shake free 0x%x", howmany); | 1959 | startagain: |
1955 | #endif | 1960 | mutex_lock(&xfs_Gqm->qm_dqfrlist_lock); |
1956 | /* lock order is : hashchainlock, freelistlock, mplistlock */ | ||
1957 | tryagain: | ||
1958 | xfs_qm_freelist_lock(xfs_Gqm); | ||
1959 | 1961 | ||
1960 | for (dqp = xfs_Gqm->qm_dqfreelist.qh_next; | 1962 | list_for_each_entry(dqp, &xfs_Gqm->qm_dqfrlist, q_freelist) { |
1961 | ((dqp != (xfs_dquot_t *) &xfs_Gqm->qm_dqfreelist) && | 1963 | struct xfs_mount *mp = dqp->q_mount; |
1962 | nreclaimed < howmany); ) { | ||
1963 | xfs_dqlock(dqp); | 1964 | xfs_dqlock(dqp); |
1964 | 1965 | ||
1965 | /* | 1966 | /* |
1966 | * We are racing with dqlookup here. Naturally we don't | 1967 | * We are racing with dqlookup here. Naturally we don't |
1967 | * want to reclaim a dquot that lookup wants. | 1968 | * want to reclaim a dquot that lookup wants. We release the |
1969 | * freelist lock and start over, so that lookup will grab | ||
1970 | * both the dquot and the freelistlock. | ||
1968 | */ | 1971 | */ |
1969 | if (dqp->dq_flags & XFS_DQ_WANT) { | 1972 | if (dqp->dq_flags & XFS_DQ_WANT) { |
1973 | ASSERT(! (dqp->dq_flags & XFS_DQ_INACTIVE)); | ||
1974 | |||
1975 | trace_xfs_dqreclaim_want(dqp); | ||
1976 | |||
1970 | xfs_dqunlock(dqp); | 1977 | xfs_dqunlock(dqp); |
1971 | xfs_qm_freelist_unlock(xfs_Gqm); | 1978 | mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock); |
1972 | if (++restarts >= XFS_QM_RECLAIM_MAX_RESTARTS) | 1979 | if (++restarts >= XFS_QM_RECLAIM_MAX_RESTARTS) |
1973 | return nreclaimed; | 1980 | return NULL; |
1974 | XQM_STATS_INC(xqmstats.xs_qm_dqwants); | 1981 | XQM_STATS_INC(xqmstats.xs_qm_dqwants); |
1975 | goto tryagain; | 1982 | goto startagain; |
1976 | } | 1983 | } |
1977 | 1984 | ||
1978 | /* | 1985 | /* |
@@ -1981,23 +1988,27 @@ xfs_qm_shake_freelist( | |||
1981 | * life easier. | 1988 | * life easier. |
1982 | */ | 1989 | */ |
1983 | if (dqp->dq_flags & XFS_DQ_INACTIVE) { | 1990 | if (dqp->dq_flags & XFS_DQ_INACTIVE) { |
1984 | ASSERT(dqp->q_mount == NULL); | 1991 | ASSERT(mp == NULL); |
1985 | ASSERT(! XFS_DQ_IS_DIRTY(dqp)); | 1992 | ASSERT(! XFS_DQ_IS_DIRTY(dqp)); |
1986 | ASSERT(dqp->HL_PREVP == NULL); | 1993 | ASSERT(list_empty(&dqp->q_hashlist)); |
1987 | ASSERT(dqp->MPL_PREVP == NULL); | 1994 | ASSERT(list_empty(&dqp->q_mplist)); |
1995 | list_del_init(&dqp->q_freelist); | ||
1996 | xfs_Gqm->qm_dqfrlist_cnt--; | ||
1997 | xfs_dqunlock(dqp); | ||
1998 | dqpout = dqp; | ||
1988 | XQM_STATS_INC(xqmstats.xs_qm_dqinact_reclaims); | 1999 | XQM_STATS_INC(xqmstats.xs_qm_dqinact_reclaims); |
1989 | nextdqp = dqp->dq_flnext; | 2000 | break; |
1990 | goto off_freelist; | ||
1991 | } | 2001 | } |
1992 | 2002 | ||
1993 | ASSERT(dqp->MPL_PREVP); | 2003 | ASSERT(dqp->q_hash); |
2004 | ASSERT(!list_empty(&dqp->q_mplist)); | ||
2005 | |||
1994 | /* | 2006 | /* |
1995 | * Try to grab the flush lock. If this dquot is in the process of | 2007 | * Try to grab the flush lock. If this dquot is in the process of |
1996 | * getting flushed to disk, we don't want to reclaim it. | 2008 | * getting flushed to disk, we don't want to reclaim it. |
1997 | */ | 2009 | */ |
1998 | if (!xfs_dqflock_nowait(dqp)) { | 2010 | if (!xfs_dqflock_nowait(dqp)) { |
1999 | xfs_dqunlock(dqp); | 2011 | xfs_dqunlock(dqp); |
2000 | dqp = dqp->dq_flnext; | ||
2001 | continue; | 2012 | continue; |
2002 | } | 2013 | } |
2003 | 2014 | ||
@@ -2010,21 +2021,21 @@ xfs_qm_shake_freelist( | |||
2010 | if (XFS_DQ_IS_DIRTY(dqp)) { | 2021 | if (XFS_DQ_IS_DIRTY(dqp)) { |
2011 | int error; | 2022 | int error; |
2012 | 2023 | ||
2013 | trace_xfs_dqshake_dirty(dqp); | 2024 | trace_xfs_dqreclaim_dirty(dqp); |
2014 | 2025 | ||
2015 | /* | 2026 | /* |
2016 | * We flush it delayed write, so don't bother | 2027 | * We flush it delayed write, so don't bother |
2017 | * releasing the mplock. | 2028 | * releasing the freelist lock. |
2018 | */ | 2029 | */ |
2019 | error = xfs_qm_dqflush(dqp, 0); | 2030 | error = xfs_qm_dqflush(dqp, 0); |
2020 | if (error) { | 2031 | if (error) { |
2021 | xfs_fs_cmn_err(CE_WARN, dqp->q_mount, | 2032 | xfs_fs_cmn_err(CE_WARN, mp, |
2022 | "xfs_qm_dqflush_all: dquot %p flush failed", dqp); | 2033 | "xfs_qm_dqreclaim: dquot %p flush failed", dqp); |
2023 | } | 2034 | } |
2024 | xfs_dqunlock(dqp); /* dqflush unlocks dqflock */ | 2035 | xfs_dqunlock(dqp); /* dqflush unlocks dqflock */ |
2025 | dqp = dqp->dq_flnext; | ||
2026 | continue; | 2036 | continue; |
2027 | } | 2037 | } |
2038 | |||
2028 | /* | 2039 | /* |
2029 | * We're trying to get the hashlock out of order. This races | 2040 | * We're trying to get the hashlock out of order. This races |
2030 | * with dqlookup; so, we giveup and goto the next dquot if | 2041 | * with dqlookup; so, we giveup and goto the next dquot if |
@@ -2033,56 +2044,74 @@ xfs_qm_shake_freelist( | |||
2033 | * waiting for the freelist lock. | 2044 | * waiting for the freelist lock. |
2034 | */ | 2045 | */ |
2035 | if (!mutex_trylock(&dqp->q_hash->qh_lock)) { | 2046 | if (!mutex_trylock(&dqp->q_hash->qh_lock)) { |
2036 | xfs_dqfunlock(dqp); | 2047 | restarts++; |
2037 | xfs_dqunlock(dqp); | 2048 | goto dqfunlock; |
2038 | dqp = dqp->dq_flnext; | ||
2039 | continue; | ||
2040 | } | 2049 | } |
2050 | |||
2041 | /* | 2051 | /* |
2042 | * This races with dquot allocation code as well as dqflush_all | 2052 | * This races with dquot allocation code as well as dqflush_all |
2043 | * and reclaim code. So, if we failed to grab the mplist lock, | 2053 | * and reclaim code. So, if we failed to grab the mplist lock, |
2044 | * giveup everything and start over. | 2054 | * giveup everything and start over. |
2045 | */ | 2055 | */ |
2046 | hash = dqp->q_hash; | 2056 | if (!mutex_trylock(&mp->m_quotainfo->qi_dqlist_lock)) { |
2047 | ASSERT(hash); | 2057 | restarts++; |
2048 | if (! xfs_qm_mplist_nowait(dqp->q_mount)) { | 2058 | mutex_unlock(&dqp->q_hash->qh_lock); |
2049 | /* XXX put a sentinel so that we can come back here */ | ||
2050 | xfs_dqfunlock(dqp); | 2059 | xfs_dqfunlock(dqp); |
2051 | xfs_dqunlock(dqp); | 2060 | xfs_dqunlock(dqp); |
2052 | mutex_unlock(&hash->qh_lock); | 2061 | mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock); |
2053 | xfs_qm_freelist_unlock(xfs_Gqm); | 2062 | if (restarts++ >= XFS_QM_RECLAIM_MAX_RESTARTS) |
2054 | if (++restarts >= XFS_QM_RECLAIM_MAX_RESTARTS) | 2063 | return NULL; |
2055 | return nreclaimed; | 2064 | goto startagain; |
2056 | goto tryagain; | ||
2057 | } | 2065 | } |
2058 | 2066 | ||
2059 | trace_xfs_dqshake_unlink(dqp); | ||
2060 | |||
2061 | #ifdef QUOTADEBUG | ||
2062 | cmn_err(CE_DEBUG, "Shake 0x%p, ID 0x%x\n", | ||
2063 | dqp, be32_to_cpu(dqp->q_core.d_id)); | ||
2064 | #endif | ||
2065 | ASSERT(dqp->q_nrefs == 0); | 2067 | ASSERT(dqp->q_nrefs == 0); |
2066 | nextdqp = dqp->dq_flnext; | 2068 | list_del_init(&dqp->q_mplist); |
2067 | XQM_MPLIST_REMOVE(&(XFS_QI_MPL_LIST(dqp->q_mount)), dqp); | 2069 | mp->m_quotainfo->qi_dquots--; |
2068 | XQM_HASHLIST_REMOVE(hash, dqp); | 2070 | mp->m_quotainfo->qi_dqreclaims++; |
2071 | list_del_init(&dqp->q_hashlist); | ||
2072 | dqp->q_hash->qh_version++; | ||
2073 | list_del_init(&dqp->q_freelist); | ||
2074 | xfs_Gqm->qm_dqfrlist_cnt--; | ||
2075 | dqpout = dqp; | ||
2076 | mutex_unlock(&mp->m_quotainfo->qi_dqlist_lock); | ||
2077 | mutex_unlock(&dqp->q_hash->qh_lock); | ||
2078 | dqfunlock: | ||
2069 | xfs_dqfunlock(dqp); | 2079 | xfs_dqfunlock(dqp); |
2070 | xfs_qm_mplist_unlock(dqp->q_mount); | ||
2071 | mutex_unlock(&hash->qh_lock); | ||
2072 | |||
2073 | off_freelist: | ||
2074 | XQM_FREELIST_REMOVE(dqp); | ||
2075 | xfs_dqunlock(dqp); | 2080 | xfs_dqunlock(dqp); |
2076 | nreclaimed++; | 2081 | if (dqpout) |
2077 | XQM_STATS_INC(xqmstats.xs_qm_dqshake_reclaims); | 2082 | break; |
2083 | if (restarts >= XFS_QM_RECLAIM_MAX_RESTARTS) | ||
2084 | return NULL; | ||
2085 | } | ||
2086 | mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock); | ||
2087 | return dqpout; | ||
2088 | } | ||
2089 | |||
2090 | /* | ||
2091 | * Traverse the freelist of dquots and attempt to reclaim a maximum of | ||
2092 | * 'howmany' dquots. This operation races with dqlookup(), and attempts to | ||
2093 | * favor the lookup function ... | ||
2094 | */ | ||
2095 | STATIC int | ||
2096 | xfs_qm_shake_freelist( | ||
2097 | int howmany) | ||
2098 | { | ||
2099 | int nreclaimed = 0; | ||
2100 | xfs_dquot_t *dqp; | ||
2101 | |||
2102 | if (howmany <= 0) | ||
2103 | return 0; | ||
2104 | |||
2105 | while (nreclaimed < howmany) { | ||
2106 | dqp = xfs_qm_dqreclaim_one(); | ||
2107 | if (!dqp) | ||
2108 | return nreclaimed; | ||
2078 | xfs_qm_dqdestroy(dqp); | 2109 | xfs_qm_dqdestroy(dqp); |
2079 | dqp = nextdqp; | 2110 | nreclaimed++; |
2080 | } | 2111 | } |
2081 | xfs_qm_freelist_unlock(xfs_Gqm); | ||
2082 | return nreclaimed; | 2112 | return nreclaimed; |
2083 | } | 2113 | } |
2084 | 2114 | ||
2085 | |||
2086 | /* | 2115 | /* |
2087 | * The kmem_shake interface is invoked when memory is running low. | 2116 | * The kmem_shake interface is invoked when memory is running low. |
2088 | */ | 2117 | */ |
@@ -2097,7 +2126,7 @@ xfs_qm_shake(int nr_to_scan, gfp_t gfp_mask) | |||
2097 | if (!xfs_Gqm) | 2126 | if (!xfs_Gqm) |
2098 | return 0; | 2127 | return 0; |
2099 | 2128 | ||
2100 | nfree = xfs_Gqm->qm_dqfreelist.qh_nelems; /* free dquots */ | 2129 | nfree = xfs_Gqm->qm_dqfrlist_cnt; /* free dquots */ |
2101 | /* incore dquots in all f/s's */ | 2130 | /* incore dquots in all f/s's */ |
2102 | ndqused = atomic_read(&xfs_Gqm->qm_totaldquots) - nfree; | 2131 | ndqused = atomic_read(&xfs_Gqm->qm_totaldquots) - nfree; |
2103 | 2132 | ||
@@ -2113,131 +2142,6 @@ xfs_qm_shake(int nr_to_scan, gfp_t gfp_mask) | |||
2113 | } | 2142 | } |
2114 | 2143 | ||
2115 | 2144 | ||
2116 | /* | ||
2117 | * Just pop the least recently used dquot off the freelist and | ||
2118 | * recycle it. The returned dquot is locked. | ||
2119 | */ | ||
2120 | STATIC xfs_dquot_t * | ||
2121 | xfs_qm_dqreclaim_one(void) | ||
2122 | { | ||
2123 | xfs_dquot_t *dqpout; | ||
2124 | xfs_dquot_t *dqp; | ||
2125 | int restarts; | ||
2126 | int nflushes; | ||
2127 | |||
2128 | restarts = 0; | ||
2129 | dqpout = NULL; | ||
2130 | nflushes = 0; | ||
2131 | |||
2132 | /* lockorder: hashchainlock, freelistlock, mplistlock, dqlock, dqflock */ | ||
2133 | startagain: | ||
2134 | xfs_qm_freelist_lock(xfs_Gqm); | ||
2135 | |||
2136 | FOREACH_DQUOT_IN_FREELIST(dqp, &(xfs_Gqm->qm_dqfreelist)) { | ||
2137 | xfs_dqlock(dqp); | ||
2138 | |||
2139 | /* | ||
2140 | * We are racing with dqlookup here. Naturally we don't | ||
2141 | * want to reclaim a dquot that lookup wants. We release the | ||
2142 | * freelist lock and start over, so that lookup will grab | ||
2143 | * both the dquot and the freelistlock. | ||
2144 | */ | ||
2145 | if (dqp->dq_flags & XFS_DQ_WANT) { | ||
2146 | ASSERT(! (dqp->dq_flags & XFS_DQ_INACTIVE)); | ||
2147 | |||
2148 | trace_xfs_dqreclaim_want(dqp); | ||
2149 | |||
2150 | xfs_dqunlock(dqp); | ||
2151 | xfs_qm_freelist_unlock(xfs_Gqm); | ||
2152 | if (++restarts >= XFS_QM_RECLAIM_MAX_RESTARTS) | ||
2153 | return NULL; | ||
2154 | XQM_STATS_INC(xqmstats.xs_qm_dqwants); | ||
2155 | goto startagain; | ||
2156 | } | ||
2157 | |||
2158 | /* | ||
2159 | * If the dquot is inactive, we are assured that it is | ||
2160 | * not on the mplist or the hashlist, and that makes our | ||
2161 | * life easier. | ||
2162 | */ | ||
2163 | if (dqp->dq_flags & XFS_DQ_INACTIVE) { | ||
2164 | ASSERT(dqp->q_mount == NULL); | ||
2165 | ASSERT(! XFS_DQ_IS_DIRTY(dqp)); | ||
2166 | ASSERT(dqp->HL_PREVP == NULL); | ||
2167 | ASSERT(dqp->MPL_PREVP == NULL); | ||
2168 | XQM_FREELIST_REMOVE(dqp); | ||
2169 | xfs_dqunlock(dqp); | ||
2170 | dqpout = dqp; | ||
2171 | XQM_STATS_INC(xqmstats.xs_qm_dqinact_reclaims); | ||
2172 | break; | ||
2173 | } | ||
2174 | |||
2175 | ASSERT(dqp->q_hash); | ||
2176 | ASSERT(dqp->MPL_PREVP); | ||
2177 | |||
2178 | /* | ||
2179 | * Try to grab the flush lock. If this dquot is in the process of | ||
2180 | * getting flushed to disk, we don't want to reclaim it. | ||
2181 | */ | ||
2182 | if (!xfs_dqflock_nowait(dqp)) { | ||
2183 | xfs_dqunlock(dqp); | ||
2184 | continue; | ||
2185 | } | ||
2186 | |||
2187 | /* | ||
2188 | * We have the flush lock so we know that this is not in the | ||
2189 | * process of being flushed. So, if this is dirty, flush it | ||
2190 | * DELWRI so that we don't get a freelist infested with | ||
2191 | * dirty dquots. | ||
2192 | */ | ||
2193 | if (XFS_DQ_IS_DIRTY(dqp)) { | ||
2194 | int error; | ||
2195 | |||
2196 | trace_xfs_dqreclaim_dirty(dqp); | ||
2197 | |||
2198 | /* | ||
2199 | * We flush it delayed write, so don't bother | ||
2200 | * releasing the freelist lock. | ||
2201 | */ | ||
2202 | error = xfs_qm_dqflush(dqp, 0); | ||
2203 | if (error) { | ||
2204 | xfs_fs_cmn_err(CE_WARN, dqp->q_mount, | ||
2205 | "xfs_qm_dqreclaim: dquot %p flush failed", dqp); | ||
2206 | } | ||
2207 | xfs_dqunlock(dqp); /* dqflush unlocks dqflock */ | ||
2208 | continue; | ||
2209 | } | ||
2210 | |||
2211 | if (! xfs_qm_mplist_nowait(dqp->q_mount)) { | ||
2212 | xfs_dqfunlock(dqp); | ||
2213 | xfs_dqunlock(dqp); | ||
2214 | continue; | ||
2215 | } | ||
2216 | |||
2217 | if (!mutex_trylock(&dqp->q_hash->qh_lock)) | ||
2218 | goto mplistunlock; | ||
2219 | |||
2220 | trace_xfs_dqreclaim_unlink(dqp); | ||
2221 | |||
2222 | ASSERT(dqp->q_nrefs == 0); | ||
2223 | XQM_MPLIST_REMOVE(&(XFS_QI_MPL_LIST(dqp->q_mount)), dqp); | ||
2224 | XQM_HASHLIST_REMOVE(dqp->q_hash, dqp); | ||
2225 | XQM_FREELIST_REMOVE(dqp); | ||
2226 | dqpout = dqp; | ||
2227 | mutex_unlock(&dqp->q_hash->qh_lock); | ||
2228 | mplistunlock: | ||
2229 | xfs_qm_mplist_unlock(dqp->q_mount); | ||
2230 | xfs_dqfunlock(dqp); | ||
2231 | xfs_dqunlock(dqp); | ||
2232 | if (dqpout) | ||
2233 | break; | ||
2234 | } | ||
2235 | |||
2236 | xfs_qm_freelist_unlock(xfs_Gqm); | ||
2237 | return dqpout; | ||
2238 | } | ||
2239 | |||
2240 | |||
2241 | /*------------------------------------------------------------------*/ | 2145 | /*------------------------------------------------------------------*/ |
2242 | 2146 | ||
2243 | /* | 2147 | /* |
@@ -2662,66 +2566,3 @@ xfs_qm_vop_create_dqattach( | |||
2662 | } | 2566 | } |
2663 | } | 2567 | } |
2664 | 2568 | ||
2665 | /* ------------- list stuff -----------------*/ | ||
2666 | STATIC void | ||
2667 | xfs_qm_freelist_init(xfs_frlist_t *ql) | ||
2668 | { | ||
2669 | ql->qh_next = ql->qh_prev = (xfs_dquot_t *) ql; | ||
2670 | mutex_init(&ql->qh_lock); | ||
2671 | ql->qh_version = 0; | ||
2672 | ql->qh_nelems = 0; | ||
2673 | } | ||
2674 | |||
2675 | STATIC void | ||
2676 | xfs_qm_freelist_destroy(xfs_frlist_t *ql) | ||
2677 | { | ||
2678 | xfs_dquot_t *dqp, *nextdqp; | ||
2679 | |||
2680 | mutex_lock(&ql->qh_lock); | ||
2681 | for (dqp = ql->qh_next; | ||
2682 | dqp != (xfs_dquot_t *)ql; ) { | ||
2683 | xfs_dqlock(dqp); | ||
2684 | nextdqp = dqp->dq_flnext; | ||
2685 | #ifdef QUOTADEBUG | ||
2686 | cmn_err(CE_DEBUG, "FREELIST destroy 0x%p", dqp); | ||
2687 | #endif | ||
2688 | XQM_FREELIST_REMOVE(dqp); | ||
2689 | xfs_dqunlock(dqp); | ||
2690 | xfs_qm_dqdestroy(dqp); | ||
2691 | dqp = nextdqp; | ||
2692 | } | ||
2693 | mutex_unlock(&ql->qh_lock); | ||
2694 | mutex_destroy(&ql->qh_lock); | ||
2695 | |||
2696 | ASSERT(ql->qh_nelems == 0); | ||
2697 | } | ||
2698 | |||
2699 | STATIC void | ||
2700 | xfs_qm_freelist_insert(xfs_frlist_t *ql, xfs_dquot_t *dq) | ||
2701 | { | ||
2702 | dq->dq_flnext = ql->qh_next; | ||
2703 | dq->dq_flprev = (xfs_dquot_t *)ql; | ||
2704 | ql->qh_next = dq; | ||
2705 | dq->dq_flnext->dq_flprev = dq; | ||
2706 | xfs_Gqm->qm_dqfreelist.qh_nelems++; | ||
2707 | xfs_Gqm->qm_dqfreelist.qh_version++; | ||
2708 | } | ||
2709 | |||
2710 | void | ||
2711 | xfs_qm_freelist_unlink(xfs_dquot_t *dq) | ||
2712 | { | ||
2713 | xfs_dquot_t *next = dq->dq_flnext; | ||
2714 | xfs_dquot_t *prev = dq->dq_flprev; | ||
2715 | |||
2716 | next->dq_flprev = prev; | ||
2717 | prev->dq_flnext = next; | ||
2718 | dq->dq_flnext = dq->dq_flprev = dq; | ||
2719 | xfs_Gqm->qm_dqfreelist.qh_nelems--; | ||
2720 | xfs_Gqm->qm_dqfreelist.qh_version++; | ||
2721 | } | ||
2722 | |||
2723 | void | ||
2724 | xfs_qm_freelist_append(xfs_frlist_t *ql, xfs_dquot_t *dq) | ||
2725 | { | ||
2726 | xfs_qm_freelist_insert((xfs_frlist_t *)ql->qh_prev, dq); | ||
2727 | } | ||
diff --git a/fs/xfs/quota/xfs_qm.h b/fs/xfs/quota/xfs_qm.h index 495564b8af38..c9446f1c726d 100644 --- a/fs/xfs/quota/xfs_qm.h +++ b/fs/xfs/quota/xfs_qm.h | |||
@@ -72,17 +72,6 @@ extern kmem_zone_t *qm_dqtrxzone; | |||
72 | #define XFS_QM_MAX_DQCLUSTER_LOGSZ 3 | 72 | #define XFS_QM_MAX_DQCLUSTER_LOGSZ 3 |
73 | 73 | ||
74 | typedef xfs_dqhash_t xfs_dqlist_t; | 74 | typedef xfs_dqhash_t xfs_dqlist_t; |
75 | /* | ||
76 | * The freelist head. The first two fields match the first two in the | ||
77 | * xfs_dquot_t structure (in xfs_dqmarker_t) | ||
78 | */ | ||
79 | typedef struct xfs_frlist { | ||
80 | struct xfs_dquot *qh_next; | ||
81 | struct xfs_dquot *qh_prev; | ||
82 | struct mutex qh_lock; | ||
83 | uint qh_version; | ||
84 | uint qh_nelems; | ||
85 | } xfs_frlist_t; | ||
86 | 75 | ||
87 | /* | 76 | /* |
88 | * Quota Manager (global) structure. Lives only in core. | 77 | * Quota Manager (global) structure. Lives only in core. |
@@ -91,7 +80,9 @@ typedef struct xfs_qm { | |||
91 | xfs_dqlist_t *qm_usr_dqhtable;/* udquot hash table */ | 80 | xfs_dqlist_t *qm_usr_dqhtable;/* udquot hash table */ |
92 | xfs_dqlist_t *qm_grp_dqhtable;/* gdquot hash table */ | 81 | xfs_dqlist_t *qm_grp_dqhtable;/* gdquot hash table */ |
93 | uint qm_dqhashmask; /* # buckets in dq hashtab - 1 */ | 82 | uint qm_dqhashmask; /* # buckets in dq hashtab - 1 */ |
94 | xfs_frlist_t qm_dqfreelist; /* freelist of dquots */ | 83 | struct list_head qm_dqfrlist; /* freelist of dquots */ |
84 | struct mutex qm_dqfrlist_lock; | ||
85 | int qm_dqfrlist_cnt; | ||
95 | atomic_t qm_totaldquots; /* total incore dquots */ | 86 | atomic_t qm_totaldquots; /* total incore dquots */ |
96 | uint qm_nrefs; /* file systems with quota on */ | 87 | uint qm_nrefs; /* file systems with quota on */ |
97 | int qm_dqfree_ratio;/* ratio of free to inuse dquots */ | 88 | int qm_dqfree_ratio;/* ratio of free to inuse dquots */ |
@@ -106,7 +97,9 @@ typedef struct xfs_qm { | |||
106 | typedef struct xfs_quotainfo { | 97 | typedef struct xfs_quotainfo { |
107 | xfs_inode_t *qi_uquotaip; /* user quota inode */ | 98 | xfs_inode_t *qi_uquotaip; /* user quota inode */ |
108 | xfs_inode_t *qi_gquotaip; /* group quota inode */ | 99 | xfs_inode_t *qi_gquotaip; /* group quota inode */ |
109 | xfs_dqlist_t qi_dqlist; /* all dquots in filesys */ | 100 | struct list_head qi_dqlist; /* all dquots in filesys */ |
101 | struct mutex qi_dqlist_lock; | ||
102 | int qi_dquots; | ||
110 | int qi_dqreclaims; /* a change here indicates | 103 | int qi_dqreclaims; /* a change here indicates |
111 | a removal in the dqlist */ | 104 | a removal in the dqlist */ |
112 | time_t qi_btimelimit; /* limit for blks timer */ | 105 | time_t qi_btimelimit; /* limit for blks timer */ |
@@ -175,10 +168,6 @@ extern int xfs_qm_scall_getqstat(xfs_mount_t *, fs_quota_stat_t *); | |||
175 | extern int xfs_qm_scall_quotaon(xfs_mount_t *, uint); | 168 | extern int xfs_qm_scall_quotaon(xfs_mount_t *, uint); |
176 | extern int xfs_qm_scall_quotaoff(xfs_mount_t *, uint); | 169 | extern int xfs_qm_scall_quotaoff(xfs_mount_t *, uint); |
177 | 170 | ||
178 | /* list stuff */ | ||
179 | extern void xfs_qm_freelist_append(xfs_frlist_t *, xfs_dquot_t *); | ||
180 | extern void xfs_qm_freelist_unlink(xfs_dquot_t *); | ||
181 | |||
182 | #ifdef DEBUG | 171 | #ifdef DEBUG |
183 | extern int xfs_qm_internalqcheck(xfs_mount_t *); | 172 | extern int xfs_qm_internalqcheck(xfs_mount_t *); |
184 | #else | 173 | #else |
diff --git a/fs/xfs/quota/xfs_qm_stats.c b/fs/xfs/quota/xfs_qm_stats.c index 83e7ea3e25fa..3d1fc79532e2 100644 --- a/fs/xfs/quota/xfs_qm_stats.c +++ b/fs/xfs/quota/xfs_qm_stats.c | |||
@@ -55,7 +55,7 @@ static int xqm_proc_show(struct seq_file *m, void *v) | |||
55 | ndquot, | 55 | ndquot, |
56 | xfs_Gqm? atomic_read(&xfs_Gqm->qm_totaldquots) : 0, | 56 | xfs_Gqm? atomic_read(&xfs_Gqm->qm_totaldquots) : 0, |
57 | xfs_Gqm? xfs_Gqm->qm_dqfree_ratio : 0, | 57 | xfs_Gqm? xfs_Gqm->qm_dqfree_ratio : 0, |
58 | xfs_Gqm? xfs_Gqm->qm_dqfreelist.qh_nelems : 0); | 58 | xfs_Gqm? xfs_Gqm->qm_dqfrlist_cnt : 0); |
59 | return 0; | 59 | return 0; |
60 | } | 60 | } |
61 | 61 | ||
diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c index 50bee07d6b0e..92b002f1805f 100644 --- a/fs/xfs/quota/xfs_qm_syscalls.c +++ b/fs/xfs/quota/xfs_qm_syscalls.c | |||
@@ -79,6 +79,7 @@ xfs_qm_scall_quotaoff( | |||
79 | xfs_mount_t *mp, | 79 | xfs_mount_t *mp, |
80 | uint flags) | 80 | uint flags) |
81 | { | 81 | { |
82 | struct xfs_quotainfo *q = mp->m_quotainfo; | ||
82 | uint dqtype; | 83 | uint dqtype; |
83 | int error; | 84 | int error; |
84 | uint inactivate_flags; | 85 | uint inactivate_flags; |
@@ -102,11 +103,8 @@ xfs_qm_scall_quotaoff( | |||
102 | * critical thing. | 103 | * critical thing. |
103 | * If quotaoff, then we must be dealing with the root filesystem. | 104 | * If quotaoff, then we must be dealing with the root filesystem. |
104 | */ | 105 | */ |
105 | ASSERT(mp->m_quotainfo); | 106 | ASSERT(q); |
106 | if (mp->m_quotainfo) | 107 | mutex_lock(&q->qi_quotaofflock); |
107 | mutex_lock(&(XFS_QI_QOFFLOCK(mp))); | ||
108 | |||
109 | ASSERT(mp->m_quotainfo); | ||
110 | 108 | ||
111 | /* | 109 | /* |
112 | * If we're just turning off quota enforcement, change mp and go. | 110 | * If we're just turning off quota enforcement, change mp and go. |
@@ -117,7 +115,7 @@ xfs_qm_scall_quotaoff( | |||
117 | spin_lock(&mp->m_sb_lock); | 115 | spin_lock(&mp->m_sb_lock); |
118 | mp->m_sb.sb_qflags = mp->m_qflags; | 116 | mp->m_sb.sb_qflags = mp->m_qflags; |
119 | spin_unlock(&mp->m_sb_lock); | 117 | spin_unlock(&mp->m_sb_lock); |
120 | mutex_unlock(&(XFS_QI_QOFFLOCK(mp))); | 118 | mutex_unlock(&q->qi_quotaofflock); |
121 | 119 | ||
122 | /* XXX what to do if error ? Revert back to old vals incore ? */ | 120 | /* XXX what to do if error ? Revert back to old vals incore ? */ |
123 | error = xfs_qm_write_sb_changes(mp, XFS_SB_QFLAGS); | 121 | error = xfs_qm_write_sb_changes(mp, XFS_SB_QFLAGS); |
@@ -150,10 +148,8 @@ xfs_qm_scall_quotaoff( | |||
150 | * Nothing to do? Don't complain. This happens when we're just | 148 | * Nothing to do? Don't complain. This happens when we're just |
151 | * turning off quota enforcement. | 149 | * turning off quota enforcement. |
152 | */ | 150 | */ |
153 | if ((mp->m_qflags & flags) == 0) { | 151 | if ((mp->m_qflags & flags) == 0) |
154 | mutex_unlock(&(XFS_QI_QOFFLOCK(mp))); | 152 | goto out_unlock; |
155 | return (0); | ||
156 | } | ||
157 | 153 | ||
158 | /* | 154 | /* |
159 | * Write the LI_QUOTAOFF log record, and do SB changes atomically, | 155 | * Write the LI_QUOTAOFF log record, and do SB changes atomically, |
@@ -162,7 +158,7 @@ xfs_qm_scall_quotaoff( | |||
162 | */ | 158 | */ |
163 | error = xfs_qm_log_quotaoff(mp, &qoffstart, flags); | 159 | error = xfs_qm_log_quotaoff(mp, &qoffstart, flags); |
164 | if (error) | 160 | if (error) |
165 | goto out_error; | 161 | goto out_unlock; |
166 | 162 | ||
167 | /* | 163 | /* |
168 | * Next we clear the XFS_MOUNT_*DQ_ACTIVE bit(s) in the mount struct | 164 | * Next we clear the XFS_MOUNT_*DQ_ACTIVE bit(s) in the mount struct |
@@ -204,7 +200,7 @@ xfs_qm_scall_quotaoff( | |||
204 | * So, if we couldn't purge all the dquots from the filesystem, | 200 | * So, if we couldn't purge all the dquots from the filesystem, |
205 | * we can't get rid of the incore data structures. | 201 | * we can't get rid of the incore data structures. |
206 | */ | 202 | */ |
207 | while ((nculprits = xfs_qm_dqpurge_all(mp, dqtype|XFS_QMOPT_QUOTAOFF))) | 203 | while ((nculprits = xfs_qm_dqpurge_all(mp, dqtype))) |
208 | delay(10 * nculprits); | 204 | delay(10 * nculprits); |
209 | 205 | ||
210 | /* | 206 | /* |
@@ -222,7 +218,7 @@ xfs_qm_scall_quotaoff( | |||
222 | if (error) { | 218 | if (error) { |
223 | /* We're screwed now. Shutdown is the only option. */ | 219 | /* We're screwed now. Shutdown is the only option. */ |
224 | xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); | 220 | xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); |
225 | goto out_error; | 221 | goto out_unlock; |
226 | } | 222 | } |
227 | 223 | ||
228 | /* | 224 | /* |
@@ -230,27 +226,26 @@ xfs_qm_scall_quotaoff( | |||
230 | */ | 226 | */ |
231 | if (((flags & XFS_MOUNT_QUOTA_ALL) == XFS_MOUNT_QUOTA_SET1) || | 227 | if (((flags & XFS_MOUNT_QUOTA_ALL) == XFS_MOUNT_QUOTA_SET1) || |
232 | ((flags & XFS_MOUNT_QUOTA_ALL) == XFS_MOUNT_QUOTA_SET2)) { | 228 | ((flags & XFS_MOUNT_QUOTA_ALL) == XFS_MOUNT_QUOTA_SET2)) { |
233 | mutex_unlock(&(XFS_QI_QOFFLOCK(mp))); | 229 | mutex_unlock(&q->qi_quotaofflock); |
234 | xfs_qm_destroy_quotainfo(mp); | 230 | xfs_qm_destroy_quotainfo(mp); |
235 | return (0); | 231 | return (0); |
236 | } | 232 | } |
237 | 233 | ||
238 | /* | 234 | /* |
239 | * Release our quotainode references, and vn_purge them, | 235 | * Release our quotainode references if we don't need them anymore. |
240 | * if we don't need them anymore. | ||
241 | */ | 236 | */ |
242 | if ((dqtype & XFS_QMOPT_UQUOTA) && XFS_QI_UQIP(mp)) { | 237 | if ((dqtype & XFS_QMOPT_UQUOTA) && q->qi_uquotaip) { |
243 | IRELE(XFS_QI_UQIP(mp)); | 238 | IRELE(q->qi_uquotaip); |
244 | XFS_QI_UQIP(mp) = NULL; | 239 | q->qi_uquotaip = NULL; |
245 | } | 240 | } |
246 | if ((dqtype & (XFS_QMOPT_GQUOTA|XFS_QMOPT_PQUOTA)) && XFS_QI_GQIP(mp)) { | 241 | if ((dqtype & (XFS_QMOPT_GQUOTA|XFS_QMOPT_PQUOTA)) && q->qi_gquotaip) { |
247 | IRELE(XFS_QI_GQIP(mp)); | 242 | IRELE(q->qi_gquotaip); |
248 | XFS_QI_GQIP(mp) = NULL; | 243 | q->qi_gquotaip = NULL; |
249 | } | 244 | } |
250 | out_error: | ||
251 | mutex_unlock(&(XFS_QI_QOFFLOCK(mp))); | ||
252 | 245 | ||
253 | return (error); | 246 | out_unlock: |
247 | mutex_unlock(&q->qi_quotaofflock); | ||
248 | return error; | ||
254 | } | 249 | } |
255 | 250 | ||
256 | int | 251 | int |
@@ -379,9 +374,9 @@ xfs_qm_scall_quotaon( | |||
379 | /* | 374 | /* |
380 | * Switch on quota enforcement in core. | 375 | * Switch on quota enforcement in core. |
381 | */ | 376 | */ |
382 | mutex_lock(&(XFS_QI_QOFFLOCK(mp))); | 377 | mutex_lock(&mp->m_quotainfo->qi_quotaofflock); |
383 | mp->m_qflags |= (flags & XFS_ALL_QUOTA_ENFD); | 378 | mp->m_qflags |= (flags & XFS_ALL_QUOTA_ENFD); |
384 | mutex_unlock(&(XFS_QI_QOFFLOCK(mp))); | 379 | mutex_unlock(&mp->m_quotainfo->qi_quotaofflock); |
385 | 380 | ||
386 | return (0); | 381 | return (0); |
387 | } | 382 | } |
@@ -392,11 +387,12 @@ xfs_qm_scall_quotaon( | |||
392 | */ | 387 | */ |
393 | int | 388 | int |
394 | xfs_qm_scall_getqstat( | 389 | xfs_qm_scall_getqstat( |
395 | xfs_mount_t *mp, | 390 | struct xfs_mount *mp, |
396 | fs_quota_stat_t *out) | 391 | struct fs_quota_stat *out) |
397 | { | 392 | { |
398 | xfs_inode_t *uip, *gip; | 393 | struct xfs_quotainfo *q = mp->m_quotainfo; |
399 | boolean_t tempuqip, tempgqip; | 394 | struct xfs_inode *uip, *gip; |
395 | boolean_t tempuqip, tempgqip; | ||
400 | 396 | ||
401 | uip = gip = NULL; | 397 | uip = gip = NULL; |
402 | tempuqip = tempgqip = B_FALSE; | 398 | tempuqip = tempgqip = B_FALSE; |
@@ -415,9 +411,9 @@ xfs_qm_scall_getqstat( | |||
415 | out->qs_uquota.qfs_ino = mp->m_sb.sb_uquotino; | 411 | out->qs_uquota.qfs_ino = mp->m_sb.sb_uquotino; |
416 | out->qs_gquota.qfs_ino = mp->m_sb.sb_gquotino; | 412 | out->qs_gquota.qfs_ino = mp->m_sb.sb_gquotino; |
417 | 413 | ||
418 | if (mp->m_quotainfo) { | 414 | if (q) { |
419 | uip = mp->m_quotainfo->qi_uquotaip; | 415 | uip = q->qi_uquotaip; |
420 | gip = mp->m_quotainfo->qi_gquotaip; | 416 | gip = q->qi_gquotaip; |
421 | } | 417 | } |
422 | if (!uip && mp->m_sb.sb_uquotino != NULLFSINO) { | 418 | if (!uip && mp->m_sb.sb_uquotino != NULLFSINO) { |
423 | if (xfs_iget(mp, NULL, mp->m_sb.sb_uquotino, | 419 | if (xfs_iget(mp, NULL, mp->m_sb.sb_uquotino, |
@@ -441,17 +437,20 @@ xfs_qm_scall_getqstat( | |||
441 | if (tempgqip) | 437 | if (tempgqip) |
442 | IRELE(gip); | 438 | IRELE(gip); |
443 | } | 439 | } |
444 | if (mp->m_quotainfo) { | 440 | if (q) { |
445 | out->qs_incoredqs = XFS_QI_MPLNDQUOTS(mp); | 441 | out->qs_incoredqs = q->qi_dquots; |
446 | out->qs_btimelimit = XFS_QI_BTIMELIMIT(mp); | 442 | out->qs_btimelimit = q->qi_btimelimit; |
447 | out->qs_itimelimit = XFS_QI_ITIMELIMIT(mp); | 443 | out->qs_itimelimit = q->qi_itimelimit; |
448 | out->qs_rtbtimelimit = XFS_QI_RTBTIMELIMIT(mp); | 444 | out->qs_rtbtimelimit = q->qi_rtbtimelimit; |
449 | out->qs_bwarnlimit = XFS_QI_BWARNLIMIT(mp); | 445 | out->qs_bwarnlimit = q->qi_bwarnlimit; |
450 | out->qs_iwarnlimit = XFS_QI_IWARNLIMIT(mp); | 446 | out->qs_iwarnlimit = q->qi_iwarnlimit; |
451 | } | 447 | } |
452 | return (0); | 448 | return 0; |
453 | } | 449 | } |
454 | 450 | ||
451 | #define XFS_DQ_MASK \ | ||
452 | (FS_DQ_LIMIT_MASK | FS_DQ_TIMER_MASK | FS_DQ_WARNS_MASK) | ||
453 | |||
455 | /* | 454 | /* |
456 | * Adjust quota limits, and start/stop timers accordingly. | 455 | * Adjust quota limits, and start/stop timers accordingly. |
457 | */ | 456 | */ |
@@ -462,15 +461,17 @@ xfs_qm_scall_setqlim( | |||
462 | uint type, | 461 | uint type, |
463 | fs_disk_quota_t *newlim) | 462 | fs_disk_quota_t *newlim) |
464 | { | 463 | { |
464 | struct xfs_quotainfo *q = mp->m_quotainfo; | ||
465 | xfs_disk_dquot_t *ddq; | 465 | xfs_disk_dquot_t *ddq; |
466 | xfs_dquot_t *dqp; | 466 | xfs_dquot_t *dqp; |
467 | xfs_trans_t *tp; | 467 | xfs_trans_t *tp; |
468 | int error; | 468 | int error; |
469 | xfs_qcnt_t hard, soft; | 469 | xfs_qcnt_t hard, soft; |
470 | 470 | ||
471 | if ((newlim->d_fieldmask & | 471 | if (newlim->d_fieldmask & ~XFS_DQ_MASK) |
472 | (FS_DQ_LIMIT_MASK|FS_DQ_TIMER_MASK|FS_DQ_WARNS_MASK)) == 0) | 472 | return EINVAL; |
473 | return (0); | 473 | if ((newlim->d_fieldmask & XFS_DQ_MASK) == 0) |
474 | return 0; | ||
474 | 475 | ||
475 | tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SETQLIM); | 476 | tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SETQLIM); |
476 | if ((error = xfs_trans_reserve(tp, 0, sizeof(xfs_disk_dquot_t) + 128, | 477 | if ((error = xfs_trans_reserve(tp, 0, sizeof(xfs_disk_dquot_t) + 128, |
@@ -485,7 +486,7 @@ xfs_qm_scall_setqlim( | |||
485 | * a quotaoff from happening). (XXXThis doesn't currently happen | 486 | * a quotaoff from happening). (XXXThis doesn't currently happen |
486 | * because we take the vfslock before calling xfs_qm_sysent). | 487 | * because we take the vfslock before calling xfs_qm_sysent). |
487 | */ | 488 | */ |
488 | mutex_lock(&(XFS_QI_QOFFLOCK(mp))); | 489 | mutex_lock(&q->qi_quotaofflock); |
489 | 490 | ||
490 | /* | 491 | /* |
491 | * Get the dquot (locked), and join it to the transaction. | 492 | * Get the dquot (locked), and join it to the transaction. |
@@ -493,9 +494,8 @@ xfs_qm_scall_setqlim( | |||
493 | */ | 494 | */ |
494 | if ((error = xfs_qm_dqget(mp, NULL, id, type, XFS_QMOPT_DQALLOC, &dqp))) { | 495 | if ((error = xfs_qm_dqget(mp, NULL, id, type, XFS_QMOPT_DQALLOC, &dqp))) { |
495 | xfs_trans_cancel(tp, XFS_TRANS_ABORT); | 496 | xfs_trans_cancel(tp, XFS_TRANS_ABORT); |
496 | mutex_unlock(&(XFS_QI_QOFFLOCK(mp))); | ||
497 | ASSERT(error != ENOENT); | 497 | ASSERT(error != ENOENT); |
498 | return (error); | 498 | goto out_unlock; |
499 | } | 499 | } |
500 | xfs_trans_dqjoin(tp, dqp); | 500 | xfs_trans_dqjoin(tp, dqp); |
501 | ddq = &dqp->q_core; | 501 | ddq = &dqp->q_core; |
@@ -513,8 +513,8 @@ xfs_qm_scall_setqlim( | |||
513 | ddq->d_blk_hardlimit = cpu_to_be64(hard); | 513 | ddq->d_blk_hardlimit = cpu_to_be64(hard); |
514 | ddq->d_blk_softlimit = cpu_to_be64(soft); | 514 | ddq->d_blk_softlimit = cpu_to_be64(soft); |
515 | if (id == 0) { | 515 | if (id == 0) { |
516 | mp->m_quotainfo->qi_bhardlimit = hard; | 516 | q->qi_bhardlimit = hard; |
517 | mp->m_quotainfo->qi_bsoftlimit = soft; | 517 | q->qi_bsoftlimit = soft; |
518 | } | 518 | } |
519 | } else { | 519 | } else { |
520 | qdprintk("blkhard %Ld < blksoft %Ld\n", hard, soft); | 520 | qdprintk("blkhard %Ld < blksoft %Ld\n", hard, soft); |
@@ -529,8 +529,8 @@ xfs_qm_scall_setqlim( | |||
529 | ddq->d_rtb_hardlimit = cpu_to_be64(hard); | 529 | ddq->d_rtb_hardlimit = cpu_to_be64(hard); |
530 | ddq->d_rtb_softlimit = cpu_to_be64(soft); | 530 | ddq->d_rtb_softlimit = cpu_to_be64(soft); |
531 | if (id == 0) { | 531 | if (id == 0) { |
532 | mp->m_quotainfo->qi_rtbhardlimit = hard; | 532 | q->qi_rtbhardlimit = hard; |
533 | mp->m_quotainfo->qi_rtbsoftlimit = soft; | 533 | q->qi_rtbsoftlimit = soft; |
534 | } | 534 | } |
535 | } else { | 535 | } else { |
536 | qdprintk("rtbhard %Ld < rtbsoft %Ld\n", hard, soft); | 536 | qdprintk("rtbhard %Ld < rtbsoft %Ld\n", hard, soft); |
@@ -546,8 +546,8 @@ xfs_qm_scall_setqlim( | |||
546 | ddq->d_ino_hardlimit = cpu_to_be64(hard); | 546 | ddq->d_ino_hardlimit = cpu_to_be64(hard); |
547 | ddq->d_ino_softlimit = cpu_to_be64(soft); | 547 | ddq->d_ino_softlimit = cpu_to_be64(soft); |
548 | if (id == 0) { | 548 | if (id == 0) { |
549 | mp->m_quotainfo->qi_ihardlimit = hard; | 549 | q->qi_ihardlimit = hard; |
550 | mp->m_quotainfo->qi_isoftlimit = soft; | 550 | q->qi_isoftlimit = soft; |
551 | } | 551 | } |
552 | } else { | 552 | } else { |
553 | qdprintk("ihard %Ld < isoft %Ld\n", hard, soft); | 553 | qdprintk("ihard %Ld < isoft %Ld\n", hard, soft); |
@@ -572,23 +572,23 @@ xfs_qm_scall_setqlim( | |||
572 | * for warnings. | 572 | * for warnings. |
573 | */ | 573 | */ |
574 | if (newlim->d_fieldmask & FS_DQ_BTIMER) { | 574 | if (newlim->d_fieldmask & FS_DQ_BTIMER) { |
575 | mp->m_quotainfo->qi_btimelimit = newlim->d_btimer; | 575 | q->qi_btimelimit = newlim->d_btimer; |
576 | ddq->d_btimer = cpu_to_be32(newlim->d_btimer); | 576 | ddq->d_btimer = cpu_to_be32(newlim->d_btimer); |
577 | } | 577 | } |
578 | if (newlim->d_fieldmask & FS_DQ_ITIMER) { | 578 | if (newlim->d_fieldmask & FS_DQ_ITIMER) { |
579 | mp->m_quotainfo->qi_itimelimit = newlim->d_itimer; | 579 | q->qi_itimelimit = newlim->d_itimer; |
580 | ddq->d_itimer = cpu_to_be32(newlim->d_itimer); | 580 | ddq->d_itimer = cpu_to_be32(newlim->d_itimer); |
581 | } | 581 | } |
582 | if (newlim->d_fieldmask & FS_DQ_RTBTIMER) { | 582 | if (newlim->d_fieldmask & FS_DQ_RTBTIMER) { |
583 | mp->m_quotainfo->qi_rtbtimelimit = newlim->d_rtbtimer; | 583 | q->qi_rtbtimelimit = newlim->d_rtbtimer; |
584 | ddq->d_rtbtimer = cpu_to_be32(newlim->d_rtbtimer); | 584 | ddq->d_rtbtimer = cpu_to_be32(newlim->d_rtbtimer); |
585 | } | 585 | } |
586 | if (newlim->d_fieldmask & FS_DQ_BWARNS) | 586 | if (newlim->d_fieldmask & FS_DQ_BWARNS) |
587 | mp->m_quotainfo->qi_bwarnlimit = newlim->d_bwarns; | 587 | q->qi_bwarnlimit = newlim->d_bwarns; |
588 | if (newlim->d_fieldmask & FS_DQ_IWARNS) | 588 | if (newlim->d_fieldmask & FS_DQ_IWARNS) |
589 | mp->m_quotainfo->qi_iwarnlimit = newlim->d_iwarns; | 589 | q->qi_iwarnlimit = newlim->d_iwarns; |
590 | if (newlim->d_fieldmask & FS_DQ_RTBWARNS) | 590 | if (newlim->d_fieldmask & FS_DQ_RTBWARNS) |
591 | mp->m_quotainfo->qi_rtbwarnlimit = newlim->d_rtbwarns; | 591 | q->qi_rtbwarnlimit = newlim->d_rtbwarns; |
592 | } else { | 592 | } else { |
593 | /* | 593 | /* |
594 | * If the user is now over quota, start the timelimit. | 594 | * If the user is now over quota, start the timelimit. |
@@ -605,8 +605,9 @@ xfs_qm_scall_setqlim( | |||
605 | error = xfs_trans_commit(tp, 0); | 605 | error = xfs_trans_commit(tp, 0); |
606 | xfs_qm_dqprint(dqp); | 606 | xfs_qm_dqprint(dqp); |
607 | xfs_qm_dqrele(dqp); | 607 | xfs_qm_dqrele(dqp); |
608 | mutex_unlock(&(XFS_QI_QOFFLOCK(mp))); | ||
609 | 608 | ||
609 | out_unlock: | ||
610 | mutex_unlock(&q->qi_quotaofflock); | ||
610 | return error; | 611 | return error; |
611 | } | 612 | } |
612 | 613 | ||
@@ -853,7 +854,8 @@ xfs_dqrele_inode( | |||
853 | int error; | 854 | int error; |
854 | 855 | ||
855 | /* skip quota inodes */ | 856 | /* skip quota inodes */ |
856 | if (ip == XFS_QI_UQIP(ip->i_mount) || ip == XFS_QI_GQIP(ip->i_mount)) { | 857 | if (ip == ip->i_mount->m_quotainfo->qi_uquotaip || |
858 | ip == ip->i_mount->m_quotainfo->qi_gquotaip) { | ||
857 | ASSERT(ip->i_udquot == NULL); | 859 | ASSERT(ip->i_udquot == NULL); |
858 | ASSERT(ip->i_gdquot == NULL); | 860 | ASSERT(ip->i_gdquot == NULL); |
859 | read_unlock(&pag->pag_ici_lock); | 861 | read_unlock(&pag->pag_ici_lock); |
@@ -931,7 +933,8 @@ struct mutex qcheck_lock; | |||
931 | } | 933 | } |
932 | 934 | ||
933 | typedef struct dqtest { | 935 | typedef struct dqtest { |
934 | xfs_dqmarker_t q_lists; | 936 | uint dq_flags; /* various flags (XFS_DQ_*) */ |
937 | struct list_head q_hashlist; | ||
935 | xfs_dqhash_t *q_hash; /* the hashchain header */ | 938 | xfs_dqhash_t *q_hash; /* the hashchain header */ |
936 | xfs_mount_t *q_mount; /* filesystem this relates to */ | 939 | xfs_mount_t *q_mount; /* filesystem this relates to */ |
937 | xfs_dqid_t d_id; /* user id or group id */ | 940 | xfs_dqid_t d_id; /* user id or group id */ |
@@ -942,14 +945,9 @@ typedef struct dqtest { | |||
942 | STATIC void | 945 | STATIC void |
943 | xfs_qm_hashinsert(xfs_dqhash_t *h, xfs_dqtest_t *dqp) | 946 | xfs_qm_hashinsert(xfs_dqhash_t *h, xfs_dqtest_t *dqp) |
944 | { | 947 | { |
945 | xfs_dquot_t *d; | 948 | list_add(&dqp->q_hashlist, &h->qh_list); |
946 | if (((d) = (h)->qh_next)) | 949 | h->qh_version++; |
947 | (d)->HL_PREVP = &((dqp)->HL_NEXT); | 950 | h->qh_nelems++; |
948 | (dqp)->HL_NEXT = d; | ||
949 | (dqp)->HL_PREVP = &((h)->qh_next); | ||
950 | (h)->qh_next = (xfs_dquot_t *)dqp; | ||
951 | (h)->qh_version++; | ||
952 | (h)->qh_nelems++; | ||
953 | } | 951 | } |
954 | STATIC void | 952 | STATIC void |
955 | xfs_qm_dqtest_print( | 953 | xfs_qm_dqtest_print( |
@@ -1061,9 +1059,7 @@ xfs_qm_internalqcheck_dqget( | |||
1061 | xfs_dqhash_t *h; | 1059 | xfs_dqhash_t *h; |
1062 | 1060 | ||
1063 | h = DQTEST_HASH(mp, id, type); | 1061 | h = DQTEST_HASH(mp, id, type); |
1064 | for (d = (xfs_dqtest_t *) h->qh_next; d != NULL; | 1062 | list_for_each_entry(d, &h->qh_list, q_hashlist) { |
1065 | d = (xfs_dqtest_t *) d->HL_NEXT) { | ||
1066 | /* DQTEST_LIST_PRINT(h, HL_NEXT, "@@@@@ dqtestlist @@@@@"); */ | ||
1067 | if (d->d_id == id && mp == d->q_mount) { | 1063 | if (d->d_id == id && mp == d->q_mount) { |
1068 | *O_dq = d; | 1064 | *O_dq = d; |
1069 | return (0); | 1065 | return (0); |
@@ -1074,6 +1070,7 @@ xfs_qm_internalqcheck_dqget( | |||
1074 | d->d_id = id; | 1070 | d->d_id = id; |
1075 | d->q_mount = mp; | 1071 | d->q_mount = mp; |
1076 | d->q_hash = h; | 1072 | d->q_hash = h; |
1073 | INIT_LIST_HEAD(&d->q_hashlist); | ||
1077 | xfs_qm_hashinsert(h, d); | 1074 | xfs_qm_hashinsert(h, d); |
1078 | *O_dq = d; | 1075 | *O_dq = d; |
1079 | return (0); | 1076 | return (0); |
@@ -1180,8 +1177,6 @@ xfs_qm_internalqcheck( | |||
1180 | xfs_ino_t lastino; | 1177 | xfs_ino_t lastino; |
1181 | int done, count; | 1178 | int done, count; |
1182 | int i; | 1179 | int i; |
1183 | xfs_dqtest_t *d, *e; | ||
1184 | xfs_dqhash_t *h1; | ||
1185 | int error; | 1180 | int error; |
1186 | 1181 | ||
1187 | lastino = 0; | 1182 | lastino = 0; |
@@ -1221,19 +1216,18 @@ xfs_qm_internalqcheck( | |||
1221 | } | 1216 | } |
1222 | cmn_err(CE_DEBUG, "Checking results against system dquots"); | 1217 | cmn_err(CE_DEBUG, "Checking results against system dquots"); |
1223 | for (i = 0; i < qmtest_hashmask; i++) { | 1218 | for (i = 0; i < qmtest_hashmask; i++) { |
1224 | h1 = &qmtest_udqtab[i]; | 1219 | xfs_dqtest_t *d, *n; |
1225 | for (d = (xfs_dqtest_t *) h1->qh_next; d != NULL; ) { | 1220 | xfs_dqhash_t *h; |
1221 | |||
1222 | h = &qmtest_udqtab[i]; | ||
1223 | list_for_each_entry_safe(d, n, &h->qh_list, q_hashlist) { | ||
1226 | xfs_dqtest_cmp(d); | 1224 | xfs_dqtest_cmp(d); |
1227 | e = (xfs_dqtest_t *) d->HL_NEXT; | ||
1228 | kmem_free(d); | 1225 | kmem_free(d); |
1229 | d = e; | ||
1230 | } | 1226 | } |
1231 | h1 = &qmtest_gdqtab[i]; | 1227 | h = &qmtest_gdqtab[i]; |
1232 | for (d = (xfs_dqtest_t *) h1->qh_next; d != NULL; ) { | 1228 | list_for_each_entry_safe(d, n, &h->qh_list, q_hashlist) { |
1233 | xfs_dqtest_cmp(d); | 1229 | xfs_dqtest_cmp(d); |
1234 | e = (xfs_dqtest_t *) d->HL_NEXT; | ||
1235 | kmem_free(d); | 1230 | kmem_free(d); |
1236 | d = e; | ||
1237 | } | 1231 | } |
1238 | } | 1232 | } |
1239 | 1233 | ||
diff --git a/fs/xfs/quota/xfs_quota_priv.h b/fs/xfs/quota/xfs_quota_priv.h index 8286b2842b6b..94a3d927d716 100644 --- a/fs/xfs/quota/xfs_quota_priv.h +++ b/fs/xfs/quota/xfs_quota_priv.h | |||
@@ -24,43 +24,6 @@ | |||
24 | */ | 24 | */ |
25 | #define XFS_DQITER_MAP_SIZE 10 | 25 | #define XFS_DQITER_MAP_SIZE 10 |
26 | 26 | ||
27 | /* Number of dquots that fit in to a dquot block */ | ||
28 | #define XFS_QM_DQPERBLK(mp) ((mp)->m_quotainfo->qi_dqperchunk) | ||
29 | |||
30 | #define XFS_DQ_IS_ADDEDTO_TRX(t, d) ((d)->q_transp == (t)) | ||
31 | |||
32 | #define XFS_QI_MPLRECLAIMS(mp) ((mp)->m_quotainfo->qi_dqreclaims) | ||
33 | #define XFS_QI_UQIP(mp) ((mp)->m_quotainfo->qi_uquotaip) | ||
34 | #define XFS_QI_GQIP(mp) ((mp)->m_quotainfo->qi_gquotaip) | ||
35 | #define XFS_QI_DQCHUNKLEN(mp) ((mp)->m_quotainfo->qi_dqchunklen) | ||
36 | #define XFS_QI_BTIMELIMIT(mp) ((mp)->m_quotainfo->qi_btimelimit) | ||
37 | #define XFS_QI_RTBTIMELIMIT(mp) ((mp)->m_quotainfo->qi_rtbtimelimit) | ||
38 | #define XFS_QI_ITIMELIMIT(mp) ((mp)->m_quotainfo->qi_itimelimit) | ||
39 | #define XFS_QI_BWARNLIMIT(mp) ((mp)->m_quotainfo->qi_bwarnlimit) | ||
40 | #define XFS_QI_RTBWARNLIMIT(mp) ((mp)->m_quotainfo->qi_rtbwarnlimit) | ||
41 | #define XFS_QI_IWARNLIMIT(mp) ((mp)->m_quotainfo->qi_iwarnlimit) | ||
42 | #define XFS_QI_QOFFLOCK(mp) ((mp)->m_quotainfo->qi_quotaofflock) | ||
43 | |||
44 | #define XFS_QI_MPL_LIST(mp) ((mp)->m_quotainfo->qi_dqlist) | ||
45 | #define XFS_QI_MPLNEXT(mp) ((mp)->m_quotainfo->qi_dqlist.qh_next) | ||
46 | #define XFS_QI_MPLNDQUOTS(mp) ((mp)->m_quotainfo->qi_dqlist.qh_nelems) | ||
47 | |||
48 | #define xfs_qm_mplist_lock(mp) \ | ||
49 | mutex_lock(&(XFS_QI_MPL_LIST(mp).qh_lock)) | ||
50 | #define xfs_qm_mplist_nowait(mp) \ | ||
51 | mutex_trylock(&(XFS_QI_MPL_LIST(mp).qh_lock)) | ||
52 | #define xfs_qm_mplist_unlock(mp) \ | ||
53 | mutex_unlock(&(XFS_QI_MPL_LIST(mp).qh_lock)) | ||
54 | #define XFS_QM_IS_MPLIST_LOCKED(mp) \ | ||
55 | mutex_is_locked(&(XFS_QI_MPL_LIST(mp).qh_lock)) | ||
56 | |||
57 | #define xfs_qm_freelist_lock(qm) \ | ||
58 | mutex_lock(&((qm)->qm_dqfreelist.qh_lock)) | ||
59 | #define xfs_qm_freelist_lock_nowait(qm) \ | ||
60 | mutex_trylock(&((qm)->qm_dqfreelist.qh_lock)) | ||
61 | #define xfs_qm_freelist_unlock(qm) \ | ||
62 | mutex_unlock(&((qm)->qm_dqfreelist.qh_lock)) | ||
63 | |||
64 | /* | 27 | /* |
65 | * Hash into a bucket in the dquot hash table, based on <mp, id>. | 28 | * Hash into a bucket in the dquot hash table, based on <mp, id>. |
66 | */ | 29 | */ |
@@ -72,9 +35,6 @@ | |||
72 | XFS_DQ_HASHVAL(mp, id)) : \ | 35 | XFS_DQ_HASHVAL(mp, id)) : \ |
73 | (xfs_Gqm->qm_grp_dqhtable + \ | 36 | (xfs_Gqm->qm_grp_dqhtable + \ |
74 | XFS_DQ_HASHVAL(mp, id))) | 37 | XFS_DQ_HASHVAL(mp, id))) |
75 | #define XFS_IS_DQTYPE_ON(mp, type) (type == XFS_DQ_USER ? \ | ||
76 | XFS_IS_UQUOTA_ON(mp) : \ | ||
77 | XFS_IS_OQUOTA_ON(mp)) | ||
78 | #define XFS_IS_DQUOT_UNINITIALIZED(dqp) ( \ | 38 | #define XFS_IS_DQUOT_UNINITIALIZED(dqp) ( \ |
79 | !dqp->q_core.d_blk_hardlimit && \ | 39 | !dqp->q_core.d_blk_hardlimit && \ |
80 | !dqp->q_core.d_blk_softlimit && \ | 40 | !dqp->q_core.d_blk_softlimit && \ |
@@ -86,68 +46,6 @@ | |||
86 | !dqp->q_core.d_rtbcount && \ | 46 | !dqp->q_core.d_rtbcount && \ |
87 | !dqp->q_core.d_icount) | 47 | !dqp->q_core.d_icount) |
88 | 48 | ||
89 | #define HL_PREVP dq_hashlist.ql_prevp | ||
90 | #define HL_NEXT dq_hashlist.ql_next | ||
91 | #define MPL_PREVP dq_mplist.ql_prevp | ||
92 | #define MPL_NEXT dq_mplist.ql_next | ||
93 | |||
94 | |||
95 | #define _LIST_REMOVE(h, dqp, PVP, NXT) \ | ||
96 | { \ | ||
97 | xfs_dquot_t *d; \ | ||
98 | if (((d) = (dqp)->NXT)) \ | ||
99 | (d)->PVP = (dqp)->PVP; \ | ||
100 | *((dqp)->PVP) = d; \ | ||
101 | (dqp)->NXT = NULL; \ | ||
102 | (dqp)->PVP = NULL; \ | ||
103 | (h)->qh_version++; \ | ||
104 | (h)->qh_nelems--; \ | ||
105 | } | ||
106 | |||
107 | #define _LIST_INSERT(h, dqp, PVP, NXT) \ | ||
108 | { \ | ||
109 | xfs_dquot_t *d; \ | ||
110 | if (((d) = (h)->qh_next)) \ | ||
111 | (d)->PVP = &((dqp)->NXT); \ | ||
112 | (dqp)->NXT = d; \ | ||
113 | (dqp)->PVP = &((h)->qh_next); \ | ||
114 | (h)->qh_next = dqp; \ | ||
115 | (h)->qh_version++; \ | ||
116 | (h)->qh_nelems++; \ | ||
117 | } | ||
118 | |||
119 | #define FOREACH_DQUOT_IN_MP(dqp, mp) \ | ||
120 | for ((dqp) = XFS_QI_MPLNEXT(mp); (dqp) != NULL; (dqp) = (dqp)->MPL_NEXT) | ||
121 | |||
122 | #define FOREACH_DQUOT_IN_FREELIST(dqp, qlist) \ | ||
123 | for ((dqp) = (qlist)->qh_next; (dqp) != (xfs_dquot_t *)(qlist); \ | ||
124 | (dqp) = (dqp)->dq_flnext) | ||
125 | |||
126 | #define XQM_HASHLIST_INSERT(h, dqp) \ | ||
127 | _LIST_INSERT(h, dqp, HL_PREVP, HL_NEXT) | ||
128 | |||
129 | #define XQM_FREELIST_INSERT(h, dqp) \ | ||
130 | xfs_qm_freelist_append(h, dqp) | ||
131 | |||
132 | #define XQM_MPLIST_INSERT(h, dqp) \ | ||
133 | _LIST_INSERT(h, dqp, MPL_PREVP, MPL_NEXT) | ||
134 | |||
135 | #define XQM_HASHLIST_REMOVE(h, dqp) \ | ||
136 | _LIST_REMOVE(h, dqp, HL_PREVP, HL_NEXT) | ||
137 | #define XQM_FREELIST_REMOVE(dqp) \ | ||
138 | xfs_qm_freelist_unlink(dqp) | ||
139 | #define XQM_MPLIST_REMOVE(h, dqp) \ | ||
140 | { _LIST_REMOVE(h, dqp, MPL_PREVP, MPL_NEXT); \ | ||
141 | XFS_QI_MPLRECLAIMS((dqp)->q_mount)++; } | ||
142 | |||
143 | #define XFS_DQ_IS_LOGITEM_INITD(dqp) ((dqp)->q_logitem.qli_dquot == (dqp)) | ||
144 | |||
145 | #define XFS_QM_DQP_TO_DQACCT(tp, dqp) (XFS_QM_ISUDQ(dqp) ? \ | ||
146 | (tp)->t_dqinfo->dqa_usrdquots : \ | ||
147 | (tp)->t_dqinfo->dqa_grpdquots) | ||
148 | #define XFS_IS_SUSER_DQUOT(dqp) \ | ||
149 | (!((dqp)->q_core.d_id)) | ||
150 | |||
151 | #define DQFLAGTO_TYPESTR(d) (((d)->dq_flags & XFS_DQ_USER) ? "USR" : \ | 49 | #define DQFLAGTO_TYPESTR(d) (((d)->dq_flags & XFS_DQ_USER) ? "USR" : \ |
152 | (((d)->dq_flags & XFS_DQ_GROUP) ? "GRP" : \ | 50 | (((d)->dq_flags & XFS_DQ_GROUP) ? "GRP" : \ |
153 | (((d)->dq_flags & XFS_DQ_PROJ) ? "PRJ":"???"))) | 51 | (((d)->dq_flags & XFS_DQ_PROJ) ? "PRJ":"???"))) |
diff --git a/fs/xfs/quota/xfs_trans_dquot.c b/fs/xfs/quota/xfs_trans_dquot.c index c3ab75cb1d9a..061d827da33c 100644 --- a/fs/xfs/quota/xfs_trans_dquot.c +++ b/fs/xfs/quota/xfs_trans_dquot.c | |||
@@ -59,12 +59,11 @@ xfs_trans_dqjoin( | |||
59 | xfs_trans_t *tp, | 59 | xfs_trans_t *tp, |
60 | xfs_dquot_t *dqp) | 60 | xfs_dquot_t *dqp) |
61 | { | 61 | { |
62 | xfs_dq_logitem_t *lp; | 62 | xfs_dq_logitem_t *lp = &dqp->q_logitem; |
63 | 63 | ||
64 | ASSERT(! XFS_DQ_IS_ADDEDTO_TRX(tp, dqp)); | 64 | ASSERT(dqp->q_transp != tp); |
65 | ASSERT(XFS_DQ_IS_LOCKED(dqp)); | 65 | ASSERT(XFS_DQ_IS_LOCKED(dqp)); |
66 | ASSERT(XFS_DQ_IS_LOGITEM_INITD(dqp)); | 66 | ASSERT(lp->qli_dquot == dqp); |
67 | lp = &dqp->q_logitem; | ||
68 | 67 | ||
69 | /* | 68 | /* |
70 | * Get a log_item_desc to point at the new item. | 69 | * Get a log_item_desc to point at the new item. |
@@ -96,7 +95,7 @@ xfs_trans_log_dquot( | |||
96 | { | 95 | { |
97 | xfs_log_item_desc_t *lidp; | 96 | xfs_log_item_desc_t *lidp; |
98 | 97 | ||
99 | ASSERT(XFS_DQ_IS_ADDEDTO_TRX(tp, dqp)); | 98 | ASSERT(dqp->q_transp == tp); |
100 | ASSERT(XFS_DQ_IS_LOCKED(dqp)); | 99 | ASSERT(XFS_DQ_IS_LOCKED(dqp)); |
101 | 100 | ||
102 | lidp = xfs_trans_find_item(tp, (xfs_log_item_t*)(&dqp->q_logitem)); | 101 | lidp = xfs_trans_find_item(tp, (xfs_log_item_t*)(&dqp->q_logitem)); |
@@ -198,16 +197,16 @@ xfs_trans_get_dqtrx( | |||
198 | int i; | 197 | int i; |
199 | xfs_dqtrx_t *qa; | 198 | xfs_dqtrx_t *qa; |
200 | 199 | ||
201 | for (i = 0; i < XFS_QM_TRANS_MAXDQS; i++) { | 200 | qa = XFS_QM_ISUDQ(dqp) ? |
202 | qa = XFS_QM_DQP_TO_DQACCT(tp, dqp); | 201 | tp->t_dqinfo->dqa_usrdquots : tp->t_dqinfo->dqa_grpdquots; |
203 | 202 | ||
203 | for (i = 0; i < XFS_QM_TRANS_MAXDQS; i++) { | ||
204 | if (qa[i].qt_dquot == NULL || | 204 | if (qa[i].qt_dquot == NULL || |
205 | qa[i].qt_dquot == dqp) { | 205 | qa[i].qt_dquot == dqp) |
206 | return (&qa[i]); | 206 | return &qa[i]; |
207 | } | ||
208 | } | 207 | } |
209 | 208 | ||
210 | return (NULL); | 209 | return NULL; |
211 | } | 210 | } |
212 | 211 | ||
213 | /* | 212 | /* |
@@ -381,7 +380,7 @@ xfs_trans_apply_dquot_deltas( | |||
381 | break; | 380 | break; |
382 | 381 | ||
383 | ASSERT(XFS_DQ_IS_LOCKED(dqp)); | 382 | ASSERT(XFS_DQ_IS_LOCKED(dqp)); |
384 | ASSERT(XFS_DQ_IS_ADDEDTO_TRX(tp, dqp)); | 383 | ASSERT(dqp->q_transp == tp); |
385 | 384 | ||
386 | /* | 385 | /* |
387 | * adjust the actual number of blocks used | 386 | * adjust the actual number of blocks used |
@@ -639,7 +638,7 @@ xfs_trans_dqresv( | |||
639 | softlimit = q->qi_bsoftlimit; | 638 | softlimit = q->qi_bsoftlimit; |
640 | timer = be32_to_cpu(dqp->q_core.d_btimer); | 639 | timer = be32_to_cpu(dqp->q_core.d_btimer); |
641 | warns = be16_to_cpu(dqp->q_core.d_bwarns); | 640 | warns = be16_to_cpu(dqp->q_core.d_bwarns); |
642 | warnlimit = XFS_QI_BWARNLIMIT(dqp->q_mount); | 641 | warnlimit = dqp->q_mount->m_quotainfo->qi_bwarnlimit; |
643 | resbcountp = &dqp->q_res_bcount; | 642 | resbcountp = &dqp->q_res_bcount; |
644 | } else { | 643 | } else { |
645 | ASSERT(flags & XFS_TRANS_DQ_RES_RTBLKS); | 644 | ASSERT(flags & XFS_TRANS_DQ_RES_RTBLKS); |
@@ -651,7 +650,7 @@ xfs_trans_dqresv( | |||
651 | softlimit = q->qi_rtbsoftlimit; | 650 | softlimit = q->qi_rtbsoftlimit; |
652 | timer = be32_to_cpu(dqp->q_core.d_rtbtimer); | 651 | timer = be32_to_cpu(dqp->q_core.d_rtbtimer); |
653 | warns = be16_to_cpu(dqp->q_core.d_rtbwarns); | 652 | warns = be16_to_cpu(dqp->q_core.d_rtbwarns); |
654 | warnlimit = XFS_QI_RTBWARNLIMIT(dqp->q_mount); | 653 | warnlimit = dqp->q_mount->m_quotainfo->qi_rtbwarnlimit; |
655 | resbcountp = &dqp->q_res_rtbcount; | 654 | resbcountp = &dqp->q_res_rtbcount; |
656 | } | 655 | } |
657 | 656 | ||
@@ -691,7 +690,7 @@ xfs_trans_dqresv( | |||
691 | count = be64_to_cpu(dqp->q_core.d_icount); | 690 | count = be64_to_cpu(dqp->q_core.d_icount); |
692 | timer = be32_to_cpu(dqp->q_core.d_itimer); | 691 | timer = be32_to_cpu(dqp->q_core.d_itimer); |
693 | warns = be16_to_cpu(dqp->q_core.d_iwarns); | 692 | warns = be16_to_cpu(dqp->q_core.d_iwarns); |
694 | warnlimit = XFS_QI_IWARNLIMIT(dqp->q_mount); | 693 | warnlimit = dqp->q_mount->m_quotainfo->qi_iwarnlimit; |
695 | hardlimit = be64_to_cpu(dqp->q_core.d_ino_hardlimit); | 694 | hardlimit = be64_to_cpu(dqp->q_core.d_ino_hardlimit); |
696 | if (!hardlimit) | 695 | if (!hardlimit) |
697 | hardlimit = q->qi_ihardlimit; | 696 | hardlimit = q->qi_ihardlimit; |
diff --git a/fs/xfs/xfs_acl.h b/fs/xfs/xfs_acl.h index d13eeba2c8f8..0135e2a669d7 100644 --- a/fs/xfs/xfs_acl.h +++ b/fs/xfs/xfs_acl.h | |||
@@ -49,8 +49,8 @@ extern int xfs_acl_chmod(struct inode *inode); | |||
49 | extern int posix_acl_access_exists(struct inode *inode); | 49 | extern int posix_acl_access_exists(struct inode *inode); |
50 | extern int posix_acl_default_exists(struct inode *inode); | 50 | extern int posix_acl_default_exists(struct inode *inode); |
51 | 51 | ||
52 | extern struct xattr_handler xfs_xattr_acl_access_handler; | 52 | extern const struct xattr_handler xfs_xattr_acl_access_handler; |
53 | extern struct xattr_handler xfs_xattr_acl_default_handler; | 53 | extern const struct xattr_handler xfs_xattr_acl_default_handler; |
54 | #else | 54 | #else |
55 | # define xfs_check_acl NULL | 55 | # define xfs_check_acl NULL |
56 | # define xfs_get_acl(inode, type) NULL | 56 | # define xfs_get_acl(inode, type) NULL |
diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h index abb8222b88c9..401f364ad36c 100644 --- a/fs/xfs/xfs_ag.h +++ b/fs/xfs/xfs_ag.h | |||
@@ -175,14 +175,20 @@ typedef struct xfs_agfl { | |||
175 | } xfs_agfl_t; | 175 | } xfs_agfl_t; |
176 | 176 | ||
177 | /* | 177 | /* |
178 | * Busy block/extent entry. Used in perag to mark blocks that have been freed | 178 | * Busy block/extent entry. Indexed by a rbtree in perag to mark blocks that |
179 | * but whose transactions aren't committed to disk yet. | 179 | * have been freed but whose transactions aren't committed to disk yet. |
180 | * | ||
181 | * Note that we use the transaction ID to record the transaction, not the | ||
182 | * transaction structure itself. See xfs_alloc_busy_insert() for details. | ||
180 | */ | 183 | */ |
181 | typedef struct xfs_perag_busy { | 184 | struct xfs_busy_extent { |
182 | xfs_agblock_t busy_start; | 185 | struct rb_node rb_node; /* ag by-bno indexed search tree */ |
183 | xfs_extlen_t busy_length; | 186 | struct list_head list; /* transaction busy extent list */ |
184 | struct xfs_trans *busy_tp; /* transaction that did the free */ | 187 | xfs_agnumber_t agno; |
185 | } xfs_perag_busy_t; | 188 | xfs_agblock_t bno; |
189 | xfs_extlen_t length; | ||
190 | xlog_tid_t tid; /* transaction that created this */ | ||
191 | }; | ||
186 | 192 | ||
187 | /* | 193 | /* |
188 | * Per-ag incore structure, copies of information in agf and agi, | 194 | * Per-ag incore structure, copies of information in agf and agi, |
@@ -216,7 +222,8 @@ typedef struct xfs_perag { | |||
216 | xfs_agino_t pagl_leftrec; | 222 | xfs_agino_t pagl_leftrec; |
217 | xfs_agino_t pagl_rightrec; | 223 | xfs_agino_t pagl_rightrec; |
218 | #ifdef __KERNEL__ | 224 | #ifdef __KERNEL__ |
219 | spinlock_t pagb_lock; /* lock for pagb_list */ | 225 | spinlock_t pagb_lock; /* lock for pagb_tree */ |
226 | struct rb_root pagb_tree; /* ordered tree of busy extents */ | ||
220 | 227 | ||
221 | atomic_t pagf_fstrms; /* # of filestreams active in this AG */ | 228 | atomic_t pagf_fstrms; /* # of filestreams active in this AG */ |
222 | 229 | ||
@@ -226,7 +233,6 @@ typedef struct xfs_perag { | |||
226 | int pag_ici_reclaimable; /* reclaimable inodes */ | 233 | int pag_ici_reclaimable; /* reclaimable inodes */ |
227 | #endif | 234 | #endif |
228 | int pagb_count; /* pagb slots in use */ | 235 | int pagb_count; /* pagb slots in use */ |
229 | xfs_perag_busy_t pagb_list[XFS_PAGB_NUM_SLOTS]; /* unstable blocks */ | ||
230 | } xfs_perag_t; | 236 | } xfs_perag_t; |
231 | 237 | ||
232 | /* | 238 | /* |
diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c index 94cddbfb2560..a7fbe8a99b12 100644 --- a/fs/xfs/xfs_alloc.c +++ b/fs/xfs/xfs_alloc.c | |||
@@ -46,11 +46,9 @@ | |||
46 | #define XFSA_FIXUP_BNO_OK 1 | 46 | #define XFSA_FIXUP_BNO_OK 1 |
47 | #define XFSA_FIXUP_CNT_OK 2 | 47 | #define XFSA_FIXUP_CNT_OK 2 |
48 | 48 | ||
49 | STATIC void | 49 | static int |
50 | xfs_alloc_search_busy(xfs_trans_t *tp, | 50 | xfs_alloc_busy_search(struct xfs_mount *mp, xfs_agnumber_t agno, |
51 | xfs_agnumber_t agno, | 51 | xfs_agblock_t bno, xfs_extlen_t len); |
52 | xfs_agblock_t bno, | ||
53 | xfs_extlen_t len); | ||
54 | 52 | ||
55 | /* | 53 | /* |
56 | * Prototypes for per-ag allocation routines | 54 | * Prototypes for per-ag allocation routines |
@@ -540,9 +538,16 @@ xfs_alloc_ag_vextent( | |||
540 | be32_to_cpu(agf->agf_length)); | 538 | be32_to_cpu(agf->agf_length)); |
541 | xfs_alloc_log_agf(args->tp, args->agbp, | 539 | xfs_alloc_log_agf(args->tp, args->agbp, |
542 | XFS_AGF_FREEBLKS); | 540 | XFS_AGF_FREEBLKS); |
543 | /* search the busylist for these blocks */ | 541 | /* |
544 | xfs_alloc_search_busy(args->tp, args->agno, | 542 | * Search the busylist for these blocks and mark the |
545 | args->agbno, args->len); | 543 | * transaction as synchronous if blocks are found. This |
544 | * avoids the need to block due to a synchronous log | ||
545 | * force to ensure correct ordering as the synchronous | ||
546 | * transaction will guarantee that for us. | ||
547 | */ | ||
548 | if (xfs_alloc_busy_search(args->mp, args->agno, | ||
549 | args->agbno, args->len)) | ||
550 | xfs_trans_set_sync(args->tp); | ||
546 | } | 551 | } |
547 | if (!args->isfl) | 552 | if (!args->isfl) |
548 | xfs_trans_mod_sb(args->tp, | 553 | xfs_trans_mod_sb(args->tp, |
@@ -1693,7 +1698,7 @@ xfs_free_ag_extent( | |||
1693 | * when the iclog commits to disk. If a busy block is allocated, | 1698 | * when the iclog commits to disk. If a busy block is allocated, |
1694 | * the iclog is pushed up to the LSN that freed the block. | 1699 | * the iclog is pushed up to the LSN that freed the block. |
1695 | */ | 1700 | */ |
1696 | xfs_alloc_mark_busy(tp, agno, bno, len); | 1701 | xfs_alloc_busy_insert(tp, agno, bno, len); |
1697 | return 0; | 1702 | return 0; |
1698 | 1703 | ||
1699 | error0: | 1704 | error0: |
@@ -1989,14 +1994,20 @@ xfs_alloc_get_freelist( | |||
1989 | *bnop = bno; | 1994 | *bnop = bno; |
1990 | 1995 | ||
1991 | /* | 1996 | /* |
1992 | * As blocks are freed, they are added to the per-ag busy list | 1997 | * As blocks are freed, they are added to the per-ag busy list and |
1993 | * and remain there until the freeing transaction is committed to | 1998 | * remain there until the freeing transaction is committed to disk. |
1994 | * disk. Now that we have allocated blocks, this list must be | 1999 | * Now that we have allocated blocks, this list must be searched to see |
1995 | * searched to see if a block is being reused. If one is, then | 2000 | * if a block is being reused. If one is, then the freeing transaction |
1996 | * the freeing transaction must be pushed to disk NOW by forcing | 2001 | * must be pushed to disk before this transaction. |
1997 | * to disk all iclogs up that transaction's LSN. | 2002 | * |
2003 | * We do this by setting the current transaction to a sync transaction | ||
2004 | * which guarantees that the freeing transaction is on disk before this | ||
2005 | * transaction. This is done instead of a synchronous log force here so | ||
2006 | * that we don't sit and wait with the AGF locked in the transaction | ||
2007 | * during the log force. | ||
1998 | */ | 2008 | */ |
1999 | xfs_alloc_search_busy(tp, be32_to_cpu(agf->agf_seqno), bno, 1); | 2009 | if (xfs_alloc_busy_search(mp, be32_to_cpu(agf->agf_seqno), bno, 1)) |
2010 | xfs_trans_set_sync(tp); | ||
2000 | return 0; | 2011 | return 0; |
2001 | } | 2012 | } |
2002 | 2013 | ||
@@ -2201,7 +2212,7 @@ xfs_alloc_read_agf( | |||
2201 | be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]); | 2212 | be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]); |
2202 | spin_lock_init(&pag->pagb_lock); | 2213 | spin_lock_init(&pag->pagb_lock); |
2203 | pag->pagb_count = 0; | 2214 | pag->pagb_count = 0; |
2204 | memset(pag->pagb_list, 0, sizeof(pag->pagb_list)); | 2215 | pag->pagb_tree = RB_ROOT; |
2205 | pag->pagf_init = 1; | 2216 | pag->pagf_init = 1; |
2206 | } | 2217 | } |
2207 | #ifdef DEBUG | 2218 | #ifdef DEBUG |
@@ -2479,127 +2490,263 @@ error0: | |||
2479 | * list is reused, the transaction that freed it must be forced to disk | 2490 | * list is reused, the transaction that freed it must be forced to disk |
2480 | * before continuing to use the block. | 2491 | * before continuing to use the block. |
2481 | * | 2492 | * |
2482 | * xfs_alloc_mark_busy - add to the per-ag busy list | 2493 | * xfs_alloc_busy_insert - add to the per-ag busy list |
2483 | * xfs_alloc_clear_busy - remove an item from the per-ag busy list | 2494 | * xfs_alloc_busy_clear - remove an item from the per-ag busy list |
2495 | * xfs_alloc_busy_search - search for a busy extent | ||
2496 | */ | ||
2497 | |||
2498 | /* | ||
2499 | * Insert a new extent into the busy tree. | ||
2500 | * | ||
2501 | * The busy extent tree is indexed by the start block of the busy extent. | ||
2502 | * there can be multiple overlapping ranges in the busy extent tree but only | ||
2503 | * ever one entry at a given start block. The reason for this is that | ||
2504 | * multi-block extents can be freed, then smaller chunks of that extent | ||
2505 | * allocated and freed again before the first transaction commit is on disk. | ||
2506 | * If the exact same start block is freed a second time, we have to wait for | ||
2507 | * that busy extent to pass out of the tree before the new extent is inserted. | ||
2508 | * There are two main cases we have to handle here. | ||
2509 | * | ||
2510 | * The first case is a transaction that triggers a "free - allocate - free" | ||
2511 | * cycle. This can occur during btree manipulations as a btree block is freed | ||
2512 | * to the freelist, then allocated from the free list, then freed again. In | ||
2513 | * this case, the second extxpnet free is what triggers the duplicate and as | ||
2514 | * such the transaction IDs should match. Because the extent was allocated in | ||
2515 | * this transaction, the transaction must be marked as synchronous. This is | ||
2516 | * true for all cases where the free/alloc/free occurs in the one transaction, | ||
2517 | * hence the addition of the ASSERT(tp->t_flags & XFS_TRANS_SYNC) to this case. | ||
2518 | * This serves to catch violations of the second case quite effectively. | ||
2519 | * | ||
2520 | * The second case is where the free/alloc/free occur in different | ||
2521 | * transactions. In this case, the thread freeing the extent the second time | ||
2522 | * can't mark the extent busy immediately because it is already tracked in a | ||
2523 | * transaction that may be committing. When the log commit for the existing | ||
2524 | * busy extent completes, the busy extent will be removed from the tree. If we | ||
2525 | * allow the second busy insert to continue using that busy extent structure, | ||
2526 | * it can be freed before this transaction is safely in the log. Hence our | ||
2527 | * only option in this case is to force the log to remove the existing busy | ||
2528 | * extent from the list before we insert the new one with the current | ||
2529 | * transaction ID. | ||
2530 | * | ||
2531 | * The problem we are trying to avoid in the free-alloc-free in separate | ||
2532 | * transactions is most easily described with a timeline: | ||
2533 | * | ||
2534 | * Thread 1 Thread 2 Thread 3 xfslogd | ||
2535 | * xact alloc | ||
2536 | * free X | ||
2537 | * mark busy | ||
2538 | * commit xact | ||
2539 | * free xact | ||
2540 | * xact alloc | ||
2541 | * alloc X | ||
2542 | * busy search | ||
2543 | * mark xact sync | ||
2544 | * commit xact | ||
2545 | * free xact | ||
2546 | * force log | ||
2547 | * checkpoint starts | ||
2548 | * .... | ||
2549 | * xact alloc | ||
2550 | * free X | ||
2551 | * mark busy | ||
2552 | * finds match | ||
2553 | * *** KABOOM! *** | ||
2554 | * .... | ||
2555 | * log IO completes | ||
2556 | * unbusy X | ||
2557 | * checkpoint completes | ||
2558 | * | ||
2559 | * By issuing a log force in thread 3 @ "KABOOM", the thread will block until | ||
2560 | * the checkpoint completes, and the busy extent it matched will have been | ||
2561 | * removed from the tree when it is woken. Hence it can then continue safely. | ||
2562 | * | ||
2563 | * However, to ensure this matching process is robust, we need to use the | ||
2564 | * transaction ID for identifying transaction, as delayed logging results in | ||
2565 | * the busy extent and transaction lifecycles being different. i.e. the busy | ||
2566 | * extent is active for a lot longer than the transaction. Hence the | ||
2567 | * transaction structure can be freed and reallocated, then mark the same | ||
2568 | * extent busy again in the new transaction. In this case the new transaction | ||
2569 | * will have a different tid but can have the same address, and hence we need | ||
2570 | * to check against the tid. | ||
2571 | * | ||
2572 | * Future: for delayed logging, we could avoid the log force if the extent was | ||
2573 | * first freed in the current checkpoint sequence. This, however, requires the | ||
2574 | * ability to pin the current checkpoint in memory until this transaction | ||
2575 | * commits to ensure that both the original free and the current one combine | ||
2576 | * logically into the one checkpoint. If the checkpoint sequences are | ||
2577 | * different, however, we still need to wait on a log force. | ||
2484 | */ | 2578 | */ |
2485 | void | 2579 | void |
2486 | xfs_alloc_mark_busy(xfs_trans_t *tp, | 2580 | xfs_alloc_busy_insert( |
2487 | xfs_agnumber_t agno, | 2581 | struct xfs_trans *tp, |
2488 | xfs_agblock_t bno, | 2582 | xfs_agnumber_t agno, |
2489 | xfs_extlen_t len) | 2583 | xfs_agblock_t bno, |
2584 | xfs_extlen_t len) | ||
2490 | { | 2585 | { |
2491 | xfs_perag_busy_t *bsy; | 2586 | struct xfs_busy_extent *new; |
2587 | struct xfs_busy_extent *busyp; | ||
2492 | struct xfs_perag *pag; | 2588 | struct xfs_perag *pag; |
2493 | int n; | 2589 | struct rb_node **rbp; |
2590 | struct rb_node *parent; | ||
2591 | int match; | ||
2494 | 2592 | ||
2495 | pag = xfs_perag_get(tp->t_mountp, agno); | ||
2496 | spin_lock(&pag->pagb_lock); | ||
2497 | 2593 | ||
2498 | /* search pagb_list for an open slot */ | 2594 | new = kmem_zalloc(sizeof(struct xfs_busy_extent), KM_MAYFAIL); |
2499 | for (bsy = pag->pagb_list, n = 0; | 2595 | if (!new) { |
2500 | n < XFS_PAGB_NUM_SLOTS; | 2596 | /* |
2501 | bsy++, n++) { | 2597 | * No Memory! Since it is now not possible to track the free |
2502 | if (bsy->busy_tp == NULL) { | 2598 | * block, make this a synchronous transaction to insure that |
2503 | break; | 2599 | * the block is not reused before this transaction commits. |
2504 | } | 2600 | */ |
2601 | trace_xfs_alloc_busy(tp, agno, bno, len, 1); | ||
2602 | xfs_trans_set_sync(tp); | ||
2603 | return; | ||
2505 | } | 2604 | } |
2506 | 2605 | ||
2507 | trace_xfs_alloc_busy(tp->t_mountp, agno, bno, len, n); | 2606 | new->agno = agno; |
2607 | new->bno = bno; | ||
2608 | new->length = len; | ||
2609 | new->tid = xfs_log_get_trans_ident(tp); | ||
2508 | 2610 | ||
2509 | if (n < XFS_PAGB_NUM_SLOTS) { | 2611 | INIT_LIST_HEAD(&new->list); |
2510 | bsy = &pag->pagb_list[n]; | 2612 | |
2511 | pag->pagb_count++; | 2613 | /* trace before insert to be able to see failed inserts */ |
2512 | bsy->busy_start = bno; | 2614 | trace_xfs_alloc_busy(tp, agno, bno, len, 0); |
2513 | bsy->busy_length = len; | 2615 | |
2514 | bsy->busy_tp = tp; | 2616 | pag = xfs_perag_get(tp->t_mountp, new->agno); |
2515 | xfs_trans_add_busy(tp, agno, n); | 2617 | restart: |
2516 | } else { | 2618 | spin_lock(&pag->pagb_lock); |
2619 | rbp = &pag->pagb_tree.rb_node; | ||
2620 | parent = NULL; | ||
2621 | busyp = NULL; | ||
2622 | match = 0; | ||
2623 | while (*rbp && match >= 0) { | ||
2624 | parent = *rbp; | ||
2625 | busyp = rb_entry(parent, struct xfs_busy_extent, rb_node); | ||
2626 | |||
2627 | if (new->bno < busyp->bno) { | ||
2628 | /* may overlap, but exact start block is lower */ | ||
2629 | rbp = &(*rbp)->rb_left; | ||
2630 | if (new->bno + new->length > busyp->bno) | ||
2631 | match = busyp->tid == new->tid ? 1 : -1; | ||
2632 | } else if (new->bno > busyp->bno) { | ||
2633 | /* may overlap, but exact start block is higher */ | ||
2634 | rbp = &(*rbp)->rb_right; | ||
2635 | if (bno < busyp->bno + busyp->length) | ||
2636 | match = busyp->tid == new->tid ? 1 : -1; | ||
2637 | } else { | ||
2638 | match = busyp->tid == new->tid ? 1 : -1; | ||
2639 | break; | ||
2640 | } | ||
2641 | } | ||
2642 | if (match < 0) { | ||
2643 | /* overlap marked busy in different transaction */ | ||
2644 | spin_unlock(&pag->pagb_lock); | ||
2645 | xfs_log_force(tp->t_mountp, XFS_LOG_SYNC); | ||
2646 | goto restart; | ||
2647 | } | ||
2648 | if (match > 0) { | ||
2517 | /* | 2649 | /* |
2518 | * The busy list is full! Since it is now not possible to | 2650 | * overlap marked busy in same transaction. Update if exact |
2519 | * track the free block, make this a synchronous transaction | 2651 | * start block match, otherwise combine the busy extents into |
2520 | * to insure that the block is not reused before this | 2652 | * a single range. |
2521 | * transaction commits. | ||
2522 | */ | 2653 | */ |
2523 | xfs_trans_set_sync(tp); | 2654 | if (busyp->bno == new->bno) { |
2524 | } | 2655 | busyp->length = max(busyp->length, new->length); |
2656 | spin_unlock(&pag->pagb_lock); | ||
2657 | ASSERT(tp->t_flags & XFS_TRANS_SYNC); | ||
2658 | xfs_perag_put(pag); | ||
2659 | kmem_free(new); | ||
2660 | return; | ||
2661 | } | ||
2662 | rb_erase(&busyp->rb_node, &pag->pagb_tree); | ||
2663 | new->length = max(busyp->bno + busyp->length, | ||
2664 | new->bno + new->length) - | ||
2665 | min(busyp->bno, new->bno); | ||
2666 | new->bno = min(busyp->bno, new->bno); | ||
2667 | } else | ||
2668 | busyp = NULL; | ||
2525 | 2669 | ||
2670 | rb_link_node(&new->rb_node, parent, rbp); | ||
2671 | rb_insert_color(&new->rb_node, &pag->pagb_tree); | ||
2672 | |||
2673 | list_add(&new->list, &tp->t_busy); | ||
2526 | spin_unlock(&pag->pagb_lock); | 2674 | spin_unlock(&pag->pagb_lock); |
2527 | xfs_perag_put(pag); | 2675 | xfs_perag_put(pag); |
2676 | kmem_free(busyp); | ||
2528 | } | 2677 | } |
2529 | 2678 | ||
2530 | void | 2679 | /* |
2531 | xfs_alloc_clear_busy(xfs_trans_t *tp, | 2680 | * Search for a busy extent within the range of the extent we are about to |
2532 | xfs_agnumber_t agno, | 2681 | * allocate. You need to be holding the busy extent tree lock when calling |
2533 | int idx) | 2682 | * xfs_alloc_busy_search(). This function returns 0 for no overlapping busy |
2683 | * extent, -1 for an overlapping but not exact busy extent, and 1 for an exact | ||
2684 | * match. This is done so that a non-zero return indicates an overlap that | ||
2685 | * will require a synchronous transaction, but it can still be | ||
2686 | * used to distinguish between a partial or exact match. | ||
2687 | */ | ||
2688 | static int | ||
2689 | xfs_alloc_busy_search( | ||
2690 | struct xfs_mount *mp, | ||
2691 | xfs_agnumber_t agno, | ||
2692 | xfs_agblock_t bno, | ||
2693 | xfs_extlen_t len) | ||
2534 | { | 2694 | { |
2535 | struct xfs_perag *pag; | 2695 | struct xfs_perag *pag; |
2536 | xfs_perag_busy_t *list; | 2696 | struct rb_node *rbp; |
2697 | struct xfs_busy_extent *busyp; | ||
2698 | int match = 0; | ||
2537 | 2699 | ||
2538 | ASSERT(idx < XFS_PAGB_NUM_SLOTS); | 2700 | pag = xfs_perag_get(mp, agno); |
2539 | pag = xfs_perag_get(tp->t_mountp, agno); | ||
2540 | spin_lock(&pag->pagb_lock); | 2701 | spin_lock(&pag->pagb_lock); |
2541 | list = pag->pagb_list; | ||
2542 | 2702 | ||
2543 | trace_xfs_alloc_unbusy(tp->t_mountp, agno, idx, list[idx].busy_tp == tp); | 2703 | rbp = pag->pagb_tree.rb_node; |
2544 | 2704 | ||
2545 | if (list[idx].busy_tp == tp) { | 2705 | /* find closest start bno overlap */ |
2546 | list[idx].busy_tp = NULL; | 2706 | while (rbp) { |
2547 | pag->pagb_count--; | 2707 | busyp = rb_entry(rbp, struct xfs_busy_extent, rb_node); |
2708 | if (bno < busyp->bno) { | ||
2709 | /* may overlap, but exact start block is lower */ | ||
2710 | if (bno + len > busyp->bno) | ||
2711 | match = -1; | ||
2712 | rbp = rbp->rb_left; | ||
2713 | } else if (bno > busyp->bno) { | ||
2714 | /* may overlap, but exact start block is higher */ | ||
2715 | if (bno < busyp->bno + busyp->length) | ||
2716 | match = -1; | ||
2717 | rbp = rbp->rb_right; | ||
2718 | } else { | ||
2719 | /* bno matches busyp, length determines exact match */ | ||
2720 | match = (busyp->length == len) ? 1 : -1; | ||
2721 | break; | ||
2722 | } | ||
2548 | } | 2723 | } |
2549 | |||
2550 | spin_unlock(&pag->pagb_lock); | 2724 | spin_unlock(&pag->pagb_lock); |
2725 | trace_xfs_alloc_busysearch(mp, agno, bno, len, !!match); | ||
2551 | xfs_perag_put(pag); | 2726 | xfs_perag_put(pag); |
2727 | return match; | ||
2552 | } | 2728 | } |
2553 | 2729 | ||
2554 | 2730 | void | |
2555 | /* | 2731 | xfs_alloc_busy_clear( |
2556 | * If we find the extent in the busy list, force the log out to get the | 2732 | struct xfs_mount *mp, |
2557 | * extent out of the busy list so the caller can use it straight away. | 2733 | struct xfs_busy_extent *busyp) |
2558 | */ | ||
2559 | STATIC void | ||
2560 | xfs_alloc_search_busy(xfs_trans_t *tp, | ||
2561 | xfs_agnumber_t agno, | ||
2562 | xfs_agblock_t bno, | ||
2563 | xfs_extlen_t len) | ||
2564 | { | 2734 | { |
2565 | struct xfs_perag *pag; | 2735 | struct xfs_perag *pag; |
2566 | xfs_perag_busy_t *bsy; | ||
2567 | xfs_agblock_t uend, bend; | ||
2568 | xfs_lsn_t lsn = 0; | ||
2569 | int cnt; | ||
2570 | 2736 | ||
2571 | pag = xfs_perag_get(tp->t_mountp, agno); | 2737 | trace_xfs_alloc_unbusy(mp, busyp->agno, busyp->bno, |
2572 | spin_lock(&pag->pagb_lock); | 2738 | busyp->length); |
2573 | cnt = pag->pagb_count; | ||
2574 | 2739 | ||
2575 | /* | 2740 | ASSERT(xfs_alloc_busy_search(mp, busyp->agno, busyp->bno, |
2576 | * search pagb_list for this slot, skipping open slots. We have to | 2741 | busyp->length) == 1); |
2577 | * search the entire array as there may be multiple overlaps and | ||
2578 | * we have to get the most recent LSN for the log force to push out | ||
2579 | * all the transactions that span the range. | ||
2580 | */ | ||
2581 | uend = bno + len - 1; | ||
2582 | for (cnt = 0; cnt < pag->pagb_count; cnt++) { | ||
2583 | bsy = &pag->pagb_list[cnt]; | ||
2584 | if (!bsy->busy_tp) | ||
2585 | continue; | ||
2586 | 2742 | ||
2587 | bend = bsy->busy_start + bsy->busy_length - 1; | 2743 | list_del_init(&busyp->list); |
2588 | if (bno > bend || uend < bsy->busy_start) | ||
2589 | continue; | ||
2590 | 2744 | ||
2591 | /* (start1,length1) within (start2, length2) */ | 2745 | pag = xfs_perag_get(mp, busyp->agno); |
2592 | if (XFS_LSN_CMP(bsy->busy_tp->t_commit_lsn, lsn) > 0) | 2746 | spin_lock(&pag->pagb_lock); |
2593 | lsn = bsy->busy_tp->t_commit_lsn; | 2747 | rb_erase(&busyp->rb_node, &pag->pagb_tree); |
2594 | } | ||
2595 | spin_unlock(&pag->pagb_lock); | 2748 | spin_unlock(&pag->pagb_lock); |
2596 | xfs_perag_put(pag); | 2749 | xfs_perag_put(pag); |
2597 | trace_xfs_alloc_busysearch(tp->t_mountp, agno, bno, len, lsn); | ||
2598 | 2750 | ||
2599 | /* | 2751 | kmem_free(busyp); |
2600 | * If a block was found, force the log through the LSN of the | ||
2601 | * transaction that freed the block | ||
2602 | */ | ||
2603 | if (lsn) | ||
2604 | xfs_log_force_lsn(tp->t_mountp, lsn, XFS_LOG_SYNC); | ||
2605 | } | 2752 | } |
diff --git a/fs/xfs/xfs_alloc.h b/fs/xfs/xfs_alloc.h index 599bffa39784..6d05199b667c 100644 --- a/fs/xfs/xfs_alloc.h +++ b/fs/xfs/xfs_alloc.h | |||
@@ -22,6 +22,7 @@ struct xfs_buf; | |||
22 | struct xfs_mount; | 22 | struct xfs_mount; |
23 | struct xfs_perag; | 23 | struct xfs_perag; |
24 | struct xfs_trans; | 24 | struct xfs_trans; |
25 | struct xfs_busy_extent; | ||
25 | 26 | ||
26 | /* | 27 | /* |
27 | * Freespace allocation types. Argument to xfs_alloc_[v]extent. | 28 | * Freespace allocation types. Argument to xfs_alloc_[v]extent. |
@@ -119,15 +120,13 @@ xfs_alloc_longest_free_extent(struct xfs_mount *mp, | |||
119 | #ifdef __KERNEL__ | 120 | #ifdef __KERNEL__ |
120 | 121 | ||
121 | void | 122 | void |
122 | xfs_alloc_mark_busy(xfs_trans_t *tp, | 123 | xfs_alloc_busy_insert(xfs_trans_t *tp, |
123 | xfs_agnumber_t agno, | 124 | xfs_agnumber_t agno, |
124 | xfs_agblock_t bno, | 125 | xfs_agblock_t bno, |
125 | xfs_extlen_t len); | 126 | xfs_extlen_t len); |
126 | 127 | ||
127 | void | 128 | void |
128 | xfs_alloc_clear_busy(xfs_trans_t *tp, | 129 | xfs_alloc_busy_clear(struct xfs_mount *mp, struct xfs_busy_extent *busyp); |
129 | xfs_agnumber_t ag, | ||
130 | int idx); | ||
131 | 130 | ||
132 | #endif /* __KERNEL__ */ | 131 | #endif /* __KERNEL__ */ |
133 | 132 | ||
diff --git a/fs/xfs/xfs_alloc_btree.c b/fs/xfs/xfs_alloc_btree.c index b726e10d2c1c..83f494218759 100644 --- a/fs/xfs/xfs_alloc_btree.c +++ b/fs/xfs/xfs_alloc_btree.c | |||
@@ -134,7 +134,7 @@ xfs_allocbt_free_block( | |||
134 | * disk. If a busy block is allocated, the iclog is pushed up to the | 134 | * disk. If a busy block is allocated, the iclog is pushed up to the |
135 | * LSN that freed the block. | 135 | * LSN that freed the block. |
136 | */ | 136 | */ |
137 | xfs_alloc_mark_busy(cur->bc_tp, be32_to_cpu(agf->agf_seqno), bno, 1); | 137 | xfs_alloc_busy_insert(cur->bc_tp, be32_to_cpu(agf->agf_seqno), bno, 1); |
138 | xfs_trans_agbtree_delta(cur->bc_tp, -1); | 138 | xfs_trans_agbtree_delta(cur->bc_tp, -1); |
139 | return 0; | 139 | return 0; |
140 | } | 140 | } |
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c index 5c11e4d17010..99587ded043f 100644 --- a/fs/xfs/xfs_bmap.c +++ b/fs/xfs/xfs_bmap.c | |||
@@ -3829,7 +3829,7 @@ xfs_bmap_add_attrfork( | |||
3829 | } | 3829 | } |
3830 | if ((error = xfs_bmap_finish(&tp, &flist, &committed))) | 3830 | if ((error = xfs_bmap_finish(&tp, &flist, &committed))) |
3831 | goto error2; | 3831 | goto error2; |
3832 | error = xfs_trans_commit(tp, XFS_TRANS_PERM_LOG_RES); | 3832 | error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); |
3833 | ASSERT(ip->i_df.if_ext_max == | 3833 | ASSERT(ip->i_df.if_ext_max == |
3834 | XFS_IFORK_DSIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t)); | 3834 | XFS_IFORK_DSIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t)); |
3835 | return error; | 3835 | return error; |
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index f3c49e69eab9..02a80984aa05 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c | |||
@@ -64,7 +64,7 @@ xfs_buf_item_log_debug( | |||
64 | nbytes = last - first + 1; | 64 | nbytes = last - first + 1; |
65 | bfset(bip->bli_logged, first, nbytes); | 65 | bfset(bip->bli_logged, first, nbytes); |
66 | for (x = 0; x < nbytes; x++) { | 66 | for (x = 0; x < nbytes; x++) { |
67 | chunk_num = byte >> XFS_BLI_SHIFT; | 67 | chunk_num = byte >> XFS_BLF_SHIFT; |
68 | word_num = chunk_num >> BIT_TO_WORD_SHIFT; | 68 | word_num = chunk_num >> BIT_TO_WORD_SHIFT; |
69 | bit_num = chunk_num & (NBWORD - 1); | 69 | bit_num = chunk_num & (NBWORD - 1); |
70 | wordp = &(bip->bli_format.blf_data_map[word_num]); | 70 | wordp = &(bip->bli_format.blf_data_map[word_num]); |
@@ -166,7 +166,7 @@ xfs_buf_item_size( | |||
166 | * cancel flag in it. | 166 | * cancel flag in it. |
167 | */ | 167 | */ |
168 | trace_xfs_buf_item_size_stale(bip); | 168 | trace_xfs_buf_item_size_stale(bip); |
169 | ASSERT(bip->bli_format.blf_flags & XFS_BLI_CANCEL); | 169 | ASSERT(bip->bli_format.blf_flags & XFS_BLF_CANCEL); |
170 | return 1; | 170 | return 1; |
171 | } | 171 | } |
172 | 172 | ||
@@ -197,9 +197,9 @@ xfs_buf_item_size( | |||
197 | } else if (next_bit != last_bit + 1) { | 197 | } else if (next_bit != last_bit + 1) { |
198 | last_bit = next_bit; | 198 | last_bit = next_bit; |
199 | nvecs++; | 199 | nvecs++; |
200 | } else if (xfs_buf_offset(bp, next_bit * XFS_BLI_CHUNK) != | 200 | } else if (xfs_buf_offset(bp, next_bit * XFS_BLF_CHUNK) != |
201 | (xfs_buf_offset(bp, last_bit * XFS_BLI_CHUNK) + | 201 | (xfs_buf_offset(bp, last_bit * XFS_BLF_CHUNK) + |
202 | XFS_BLI_CHUNK)) { | 202 | XFS_BLF_CHUNK)) { |
203 | last_bit = next_bit; | 203 | last_bit = next_bit; |
204 | nvecs++; | 204 | nvecs++; |
205 | } else { | 205 | } else { |
@@ -254,6 +254,20 @@ xfs_buf_item_format( | |||
254 | vecp++; | 254 | vecp++; |
255 | nvecs = 1; | 255 | nvecs = 1; |
256 | 256 | ||
257 | /* | ||
258 | * If it is an inode buffer, transfer the in-memory state to the | ||
259 | * format flags and clear the in-memory state. We do not transfer | ||
260 | * this state if the inode buffer allocation has not yet been committed | ||
261 | * to the log as setting the XFS_BLI_INODE_BUF flag will prevent | ||
262 | * correct replay of the inode allocation. | ||
263 | */ | ||
264 | if (bip->bli_flags & XFS_BLI_INODE_BUF) { | ||
265 | if (!((bip->bli_flags & XFS_BLI_INODE_ALLOC_BUF) && | ||
266 | xfs_log_item_in_current_chkpt(&bip->bli_item))) | ||
267 | bip->bli_format.blf_flags |= XFS_BLF_INODE_BUF; | ||
268 | bip->bli_flags &= ~XFS_BLI_INODE_BUF; | ||
269 | } | ||
270 | |||
257 | if (bip->bli_flags & XFS_BLI_STALE) { | 271 | if (bip->bli_flags & XFS_BLI_STALE) { |
258 | /* | 272 | /* |
259 | * The buffer is stale, so all we need to log | 273 | * The buffer is stale, so all we need to log |
@@ -261,7 +275,7 @@ xfs_buf_item_format( | |||
261 | * cancel flag in it. | 275 | * cancel flag in it. |
262 | */ | 276 | */ |
263 | trace_xfs_buf_item_format_stale(bip); | 277 | trace_xfs_buf_item_format_stale(bip); |
264 | ASSERT(bip->bli_format.blf_flags & XFS_BLI_CANCEL); | 278 | ASSERT(bip->bli_format.blf_flags & XFS_BLF_CANCEL); |
265 | bip->bli_format.blf_size = nvecs; | 279 | bip->bli_format.blf_size = nvecs; |
266 | return; | 280 | return; |
267 | } | 281 | } |
@@ -294,28 +308,28 @@ xfs_buf_item_format( | |||
294 | * keep counting and scanning. | 308 | * keep counting and scanning. |
295 | */ | 309 | */ |
296 | if (next_bit == -1) { | 310 | if (next_bit == -1) { |
297 | buffer_offset = first_bit * XFS_BLI_CHUNK; | 311 | buffer_offset = first_bit * XFS_BLF_CHUNK; |
298 | vecp->i_addr = xfs_buf_offset(bp, buffer_offset); | 312 | vecp->i_addr = xfs_buf_offset(bp, buffer_offset); |
299 | vecp->i_len = nbits * XFS_BLI_CHUNK; | 313 | vecp->i_len = nbits * XFS_BLF_CHUNK; |
300 | vecp->i_type = XLOG_REG_TYPE_BCHUNK; | 314 | vecp->i_type = XLOG_REG_TYPE_BCHUNK; |
301 | nvecs++; | 315 | nvecs++; |
302 | break; | 316 | break; |
303 | } else if (next_bit != last_bit + 1) { | 317 | } else if (next_bit != last_bit + 1) { |
304 | buffer_offset = first_bit * XFS_BLI_CHUNK; | 318 | buffer_offset = first_bit * XFS_BLF_CHUNK; |
305 | vecp->i_addr = xfs_buf_offset(bp, buffer_offset); | 319 | vecp->i_addr = xfs_buf_offset(bp, buffer_offset); |
306 | vecp->i_len = nbits * XFS_BLI_CHUNK; | 320 | vecp->i_len = nbits * XFS_BLF_CHUNK; |
307 | vecp->i_type = XLOG_REG_TYPE_BCHUNK; | 321 | vecp->i_type = XLOG_REG_TYPE_BCHUNK; |
308 | nvecs++; | 322 | nvecs++; |
309 | vecp++; | 323 | vecp++; |
310 | first_bit = next_bit; | 324 | first_bit = next_bit; |
311 | last_bit = next_bit; | 325 | last_bit = next_bit; |
312 | nbits = 1; | 326 | nbits = 1; |
313 | } else if (xfs_buf_offset(bp, next_bit << XFS_BLI_SHIFT) != | 327 | } else if (xfs_buf_offset(bp, next_bit << XFS_BLF_SHIFT) != |
314 | (xfs_buf_offset(bp, last_bit << XFS_BLI_SHIFT) + | 328 | (xfs_buf_offset(bp, last_bit << XFS_BLF_SHIFT) + |
315 | XFS_BLI_CHUNK)) { | 329 | XFS_BLF_CHUNK)) { |
316 | buffer_offset = first_bit * XFS_BLI_CHUNK; | 330 | buffer_offset = first_bit * XFS_BLF_CHUNK; |
317 | vecp->i_addr = xfs_buf_offset(bp, buffer_offset); | 331 | vecp->i_addr = xfs_buf_offset(bp, buffer_offset); |
318 | vecp->i_len = nbits * XFS_BLI_CHUNK; | 332 | vecp->i_len = nbits * XFS_BLF_CHUNK; |
319 | vecp->i_type = XLOG_REG_TYPE_BCHUNK; | 333 | vecp->i_type = XLOG_REG_TYPE_BCHUNK; |
320 | /* You would think we need to bump the nvecs here too, but we do not | 334 | /* You would think we need to bump the nvecs here too, but we do not |
321 | * this number is used by recovery, and it gets confused by the boundary | 335 | * this number is used by recovery, and it gets confused by the boundary |
@@ -341,10 +355,15 @@ xfs_buf_item_format( | |||
341 | } | 355 | } |
342 | 356 | ||
343 | /* | 357 | /* |
344 | * This is called to pin the buffer associated with the buf log | 358 | * This is called to pin the buffer associated with the buf log item in memory |
345 | * item in memory so it cannot be written out. Simply call bpin() | 359 | * so it cannot be written out. Simply call bpin() on the buffer to do this. |
346 | * on the buffer to do this. | 360 | * |
361 | * We also always take a reference to the buffer log item here so that the bli | ||
362 | * is held while the item is pinned in memory. This means that we can | ||
363 | * unconditionally drop the reference count a transaction holds when the | ||
364 | * transaction is completed. | ||
347 | */ | 365 | */ |
366 | |||
348 | STATIC void | 367 | STATIC void |
349 | xfs_buf_item_pin( | 368 | xfs_buf_item_pin( |
350 | xfs_buf_log_item_t *bip) | 369 | xfs_buf_log_item_t *bip) |
@@ -356,6 +375,7 @@ xfs_buf_item_pin( | |||
356 | ASSERT(atomic_read(&bip->bli_refcount) > 0); | 375 | ASSERT(atomic_read(&bip->bli_refcount) > 0); |
357 | ASSERT((bip->bli_flags & XFS_BLI_LOGGED) || | 376 | ASSERT((bip->bli_flags & XFS_BLI_LOGGED) || |
358 | (bip->bli_flags & XFS_BLI_STALE)); | 377 | (bip->bli_flags & XFS_BLI_STALE)); |
378 | atomic_inc(&bip->bli_refcount); | ||
359 | trace_xfs_buf_item_pin(bip); | 379 | trace_xfs_buf_item_pin(bip); |
360 | xfs_bpin(bp); | 380 | xfs_bpin(bp); |
361 | } | 381 | } |
@@ -372,12 +392,12 @@ xfs_buf_item_pin( | |||
372 | */ | 392 | */ |
373 | STATIC void | 393 | STATIC void |
374 | xfs_buf_item_unpin( | 394 | xfs_buf_item_unpin( |
375 | xfs_buf_log_item_t *bip, | 395 | xfs_buf_log_item_t *bip) |
376 | int stale) | ||
377 | { | 396 | { |
378 | struct xfs_ail *ailp; | 397 | struct xfs_ail *ailp; |
379 | xfs_buf_t *bp; | 398 | xfs_buf_t *bp; |
380 | int freed; | 399 | int freed; |
400 | int stale = bip->bli_flags & XFS_BLI_STALE; | ||
381 | 401 | ||
382 | bp = bip->bli_buf; | 402 | bp = bip->bli_buf; |
383 | ASSERT(bp != NULL); | 403 | ASSERT(bp != NULL); |
@@ -393,7 +413,7 @@ xfs_buf_item_unpin( | |||
393 | ASSERT(XFS_BUF_VALUSEMA(bp) <= 0); | 413 | ASSERT(XFS_BUF_VALUSEMA(bp) <= 0); |
394 | ASSERT(!(XFS_BUF_ISDELAYWRITE(bp))); | 414 | ASSERT(!(XFS_BUF_ISDELAYWRITE(bp))); |
395 | ASSERT(XFS_BUF_ISSTALE(bp)); | 415 | ASSERT(XFS_BUF_ISSTALE(bp)); |
396 | ASSERT(bip->bli_format.blf_flags & XFS_BLI_CANCEL); | 416 | ASSERT(bip->bli_format.blf_flags & XFS_BLF_CANCEL); |
397 | trace_xfs_buf_item_unpin_stale(bip); | 417 | trace_xfs_buf_item_unpin_stale(bip); |
398 | 418 | ||
399 | /* | 419 | /* |
@@ -428,40 +448,34 @@ xfs_buf_item_unpin_remove( | |||
428 | xfs_buf_log_item_t *bip, | 448 | xfs_buf_log_item_t *bip, |
429 | xfs_trans_t *tp) | 449 | xfs_trans_t *tp) |
430 | { | 450 | { |
431 | xfs_buf_t *bp; | 451 | /* will xfs_buf_item_unpin() call xfs_buf_item_relse()? */ |
432 | xfs_log_item_desc_t *lidp; | ||
433 | int stale = 0; | ||
434 | |||
435 | bp = bip->bli_buf; | ||
436 | /* | ||
437 | * will xfs_buf_item_unpin() call xfs_buf_item_relse()? | ||
438 | */ | ||
439 | if ((atomic_read(&bip->bli_refcount) == 1) && | 452 | if ((atomic_read(&bip->bli_refcount) == 1) && |
440 | (bip->bli_flags & XFS_BLI_STALE)) { | 453 | (bip->bli_flags & XFS_BLI_STALE)) { |
454 | /* | ||
455 | * yes -- We can safely do some work here and then call | ||
456 | * buf_item_unpin to do the rest because we are | ||
457 | * are holding the buffer locked so no one else will be | ||
458 | * able to bump up the refcount. We have to remove the | ||
459 | * log item from the transaction as we are about to release | ||
460 | * our reference to the buffer. If we don't, the unlock that | ||
461 | * occurs later in the xfs_trans_uncommit() will try to | ||
462 | * reference the buffer which we no longer have a hold on. | ||
463 | */ | ||
464 | struct xfs_log_item_desc *lidp; | ||
465 | |||
441 | ASSERT(XFS_BUF_VALUSEMA(bip->bli_buf) <= 0); | 466 | ASSERT(XFS_BUF_VALUSEMA(bip->bli_buf) <= 0); |
442 | trace_xfs_buf_item_unpin_stale(bip); | 467 | trace_xfs_buf_item_unpin_stale(bip); |
443 | 468 | ||
444 | /* | 469 | lidp = xfs_trans_find_item(tp, (xfs_log_item_t *)bip); |
445 | * yes -- clear the xaction descriptor in-use flag | ||
446 | * and free the chunk if required. We can safely | ||
447 | * do some work here and then call buf_item_unpin | ||
448 | * to do the rest because if the if is true, then | ||
449 | * we are holding the buffer locked so no one else | ||
450 | * will be able to bump up the refcount. | ||
451 | */ | ||
452 | lidp = xfs_trans_find_item(tp, (xfs_log_item_t *) bip); | ||
453 | stale = lidp->lid_flags & XFS_LID_BUF_STALE; | ||
454 | xfs_trans_free_item(tp, lidp); | 470 | xfs_trans_free_item(tp, lidp); |
471 | |||
455 | /* | 472 | /* |
456 | * Since the transaction no longer refers to the buffer, | 473 | * Since the transaction no longer refers to the buffer, the |
457 | * the buffer should no longer refer to the transaction. | 474 | * buffer should no longer refer to the transaction. |
458 | */ | 475 | */ |
459 | XFS_BUF_SET_FSPRIVATE2(bp, NULL); | 476 | XFS_BUF_SET_FSPRIVATE2(bip->bli_buf, NULL); |
460 | } | 477 | } |
461 | 478 | xfs_buf_item_unpin(bip); | |
462 | xfs_buf_item_unpin(bip, stale); | ||
463 | |||
464 | return; | ||
465 | } | 479 | } |
466 | 480 | ||
467 | /* | 481 | /* |
@@ -495,20 +509,23 @@ xfs_buf_item_trylock( | |||
495 | } | 509 | } |
496 | 510 | ||
497 | /* | 511 | /* |
498 | * Release the buffer associated with the buf log item. | 512 | * Release the buffer associated with the buf log item. If there is no dirty |
499 | * If there is no dirty logged data associated with the | 513 | * logged data associated with the buffer recorded in the buf log item, then |
500 | * buffer recorded in the buf log item, then free the | 514 | * free the buf log item and remove the reference to it in the buffer. |
501 | * buf log item and remove the reference to it in the | 515 | * |
502 | * buffer. | 516 | * This call ignores the recursion count. It is only called when the buffer |
517 | * should REALLY be unlocked, regardless of the recursion count. | ||
503 | * | 518 | * |
504 | * This call ignores the recursion count. It is only called | 519 | * We unconditionally drop the transaction's reference to the log item. If the |
505 | * when the buffer should REALLY be unlocked, regardless | 520 | * item was logged, then another reference was taken when it was pinned, so we |
506 | * of the recursion count. | 521 | * can safely drop the transaction reference now. This also allows us to avoid |
522 | * potential races with the unpin code freeing the bli by not referencing the | ||
523 | * bli after we've dropped the reference count. | ||
507 | * | 524 | * |
508 | * If the XFS_BLI_HOLD flag is set in the buf log item, then | 525 | * If the XFS_BLI_HOLD flag is set in the buf log item, then free the log item |
509 | * free the log item if necessary but do not unlock the buffer. | 526 | * if necessary but do not unlock the buffer. This is for support of |
510 | * This is for support of xfs_trans_bhold(). Make sure the | 527 | * xfs_trans_bhold(). Make sure the XFS_BLI_HOLD field is cleared if we don't |
511 | * XFS_BLI_HOLD field is cleared if we don't free the item. | 528 | * free the item. |
512 | */ | 529 | */ |
513 | STATIC void | 530 | STATIC void |
514 | xfs_buf_item_unlock( | 531 | xfs_buf_item_unlock( |
@@ -520,73 +537,54 @@ xfs_buf_item_unlock( | |||
520 | 537 | ||
521 | bp = bip->bli_buf; | 538 | bp = bip->bli_buf; |
522 | 539 | ||
523 | /* | 540 | /* Clear the buffer's association with this transaction. */ |
524 | * Clear the buffer's association with this transaction. | ||
525 | */ | ||
526 | XFS_BUF_SET_FSPRIVATE2(bp, NULL); | 541 | XFS_BUF_SET_FSPRIVATE2(bp, NULL); |
527 | 542 | ||
528 | /* | 543 | /* |
529 | * If this is a transaction abort, don't return early. | 544 | * If this is a transaction abort, don't return early. Instead, allow |
530 | * Instead, allow the brelse to happen. | 545 | * the brelse to happen. Normally it would be done for stale |
531 | * Normally it would be done for stale (cancelled) buffers | 546 | * (cancelled) buffers at unpin time, but we'll never go through the |
532 | * at unpin time, but we'll never go through the pin/unpin | 547 | * pin/unpin cycle if we abort inside commit. |
533 | * cycle if we abort inside commit. | ||
534 | */ | 548 | */ |
535 | aborted = (bip->bli_item.li_flags & XFS_LI_ABORTED) != 0; | 549 | aborted = (bip->bli_item.li_flags & XFS_LI_ABORTED) != 0; |
536 | 550 | ||
537 | /* | 551 | /* |
538 | * If the buf item is marked stale, then don't do anything. | 552 | * Before possibly freeing the buf item, determine if we should |
539 | * We'll unlock the buffer and free the buf item when the | 553 | * release the buffer at the end of this routine. |
540 | * buffer is unpinned for the last time. | ||
541 | */ | 554 | */ |
542 | if (bip->bli_flags & XFS_BLI_STALE) { | 555 | hold = bip->bli_flags & XFS_BLI_HOLD; |
543 | bip->bli_flags &= ~XFS_BLI_LOGGED; | 556 | |
544 | trace_xfs_buf_item_unlock_stale(bip); | 557 | /* Clear the per transaction state. */ |
545 | ASSERT(bip->bli_format.blf_flags & XFS_BLI_CANCEL); | 558 | bip->bli_flags &= ~(XFS_BLI_LOGGED | XFS_BLI_HOLD); |
546 | if (!aborted) | ||
547 | return; | ||
548 | } | ||
549 | 559 | ||
550 | /* | 560 | /* |
551 | * Drop the transaction's reference to the log item if | 561 | * If the buf item is marked stale, then don't do anything. We'll |
552 | * it was not logged as part of the transaction. Otherwise | 562 | * unlock the buffer and free the buf item when the buffer is unpinned |
553 | * we'll drop the reference in xfs_buf_item_unpin() when | 563 | * for the last time. |
554 | * the transaction is really through with the buffer. | ||
555 | */ | 564 | */ |
556 | if (!(bip->bli_flags & XFS_BLI_LOGGED)) { | 565 | if (bip->bli_flags & XFS_BLI_STALE) { |
557 | atomic_dec(&bip->bli_refcount); | 566 | trace_xfs_buf_item_unlock_stale(bip); |
558 | } else { | 567 | ASSERT(bip->bli_format.blf_flags & XFS_BLF_CANCEL); |
559 | /* | 568 | if (!aborted) { |
560 | * Clear the logged flag since this is per | 569 | atomic_dec(&bip->bli_refcount); |
561 | * transaction state. | 570 | return; |
562 | */ | 571 | } |
563 | bip->bli_flags &= ~XFS_BLI_LOGGED; | ||
564 | } | 572 | } |
565 | 573 | ||
566 | /* | ||
567 | * Before possibly freeing the buf item, determine if we should | ||
568 | * release the buffer at the end of this routine. | ||
569 | */ | ||
570 | hold = bip->bli_flags & XFS_BLI_HOLD; | ||
571 | trace_xfs_buf_item_unlock(bip); | 574 | trace_xfs_buf_item_unlock(bip); |
572 | 575 | ||
573 | /* | 576 | /* |
574 | * If the buf item isn't tracking any data, free it. | 577 | * If the buf item isn't tracking any data, free it, otherwise drop the |
575 | * Otherwise, if XFS_BLI_HOLD is set clear it. | 578 | * reference we hold to it. |
576 | */ | 579 | */ |
577 | if (xfs_bitmap_empty(bip->bli_format.blf_data_map, | 580 | if (xfs_bitmap_empty(bip->bli_format.blf_data_map, |
578 | bip->bli_format.blf_map_size)) { | 581 | bip->bli_format.blf_map_size)) |
579 | xfs_buf_item_relse(bp); | 582 | xfs_buf_item_relse(bp); |
580 | } else if (hold) { | 583 | else |
581 | bip->bli_flags &= ~XFS_BLI_HOLD; | 584 | atomic_dec(&bip->bli_refcount); |
582 | } | ||
583 | 585 | ||
584 | /* | 586 | if (!hold) |
585 | * Release the buffer if XFS_BLI_HOLD was not set. | ||
586 | */ | ||
587 | if (!hold) { | ||
588 | xfs_buf_relse(bp); | 587 | xfs_buf_relse(bp); |
589 | } | ||
590 | } | 588 | } |
591 | 589 | ||
592 | /* | 590 | /* |
@@ -675,7 +673,7 @@ static struct xfs_item_ops xfs_buf_item_ops = { | |||
675 | .iop_format = (void(*)(xfs_log_item_t*, xfs_log_iovec_t*)) | 673 | .iop_format = (void(*)(xfs_log_item_t*, xfs_log_iovec_t*)) |
676 | xfs_buf_item_format, | 674 | xfs_buf_item_format, |
677 | .iop_pin = (void(*)(xfs_log_item_t*))xfs_buf_item_pin, | 675 | .iop_pin = (void(*)(xfs_log_item_t*))xfs_buf_item_pin, |
678 | .iop_unpin = (void(*)(xfs_log_item_t*, int))xfs_buf_item_unpin, | 676 | .iop_unpin = (void(*)(xfs_log_item_t*))xfs_buf_item_unpin, |
679 | .iop_unpin_remove = (void(*)(xfs_log_item_t*, xfs_trans_t *)) | 677 | .iop_unpin_remove = (void(*)(xfs_log_item_t*, xfs_trans_t *)) |
680 | xfs_buf_item_unpin_remove, | 678 | xfs_buf_item_unpin_remove, |
681 | .iop_trylock = (uint(*)(xfs_log_item_t*))xfs_buf_item_trylock, | 679 | .iop_trylock = (uint(*)(xfs_log_item_t*))xfs_buf_item_trylock, |
@@ -723,20 +721,17 @@ xfs_buf_item_init( | |||
723 | } | 721 | } |
724 | 722 | ||
725 | /* | 723 | /* |
726 | * chunks is the number of XFS_BLI_CHUNK size pieces | 724 | * chunks is the number of XFS_BLF_CHUNK size pieces |
727 | * the buffer can be divided into. Make sure not to | 725 | * the buffer can be divided into. Make sure not to |
728 | * truncate any pieces. map_size is the size of the | 726 | * truncate any pieces. map_size is the size of the |
729 | * bitmap needed to describe the chunks of the buffer. | 727 | * bitmap needed to describe the chunks of the buffer. |
730 | */ | 728 | */ |
731 | chunks = (int)((XFS_BUF_COUNT(bp) + (XFS_BLI_CHUNK - 1)) >> XFS_BLI_SHIFT); | 729 | chunks = (int)((XFS_BUF_COUNT(bp) + (XFS_BLF_CHUNK - 1)) >> XFS_BLF_SHIFT); |
732 | map_size = (int)((chunks + NBWORD) >> BIT_TO_WORD_SHIFT); | 730 | map_size = (int)((chunks + NBWORD) >> BIT_TO_WORD_SHIFT); |
733 | 731 | ||
734 | bip = (xfs_buf_log_item_t*)kmem_zone_zalloc(xfs_buf_item_zone, | 732 | bip = (xfs_buf_log_item_t*)kmem_zone_zalloc(xfs_buf_item_zone, |
735 | KM_SLEEP); | 733 | KM_SLEEP); |
736 | bip->bli_item.li_type = XFS_LI_BUF; | 734 | xfs_log_item_init(mp, &bip->bli_item, XFS_LI_BUF, &xfs_buf_item_ops); |
737 | bip->bli_item.li_ops = &xfs_buf_item_ops; | ||
738 | bip->bli_item.li_mountp = mp; | ||
739 | bip->bli_item.li_ailp = mp->m_ail; | ||
740 | bip->bli_buf = bp; | 735 | bip->bli_buf = bp; |
741 | xfs_buf_hold(bp); | 736 | xfs_buf_hold(bp); |
742 | bip->bli_format.blf_type = XFS_LI_BUF; | 737 | bip->bli_format.blf_type = XFS_LI_BUF; |
@@ -799,8 +794,8 @@ xfs_buf_item_log( | |||
799 | /* | 794 | /* |
800 | * Convert byte offsets to bit numbers. | 795 | * Convert byte offsets to bit numbers. |
801 | */ | 796 | */ |
802 | first_bit = first >> XFS_BLI_SHIFT; | 797 | first_bit = first >> XFS_BLF_SHIFT; |
803 | last_bit = last >> XFS_BLI_SHIFT; | 798 | last_bit = last >> XFS_BLF_SHIFT; |
804 | 799 | ||
805 | /* | 800 | /* |
806 | * Calculate the total number of bits to be set. | 801 | * Calculate the total number of bits to be set. |
diff --git a/fs/xfs/xfs_buf_item.h b/fs/xfs/xfs_buf_item.h index 217f34af00cb..f20bb472d582 100644 --- a/fs/xfs/xfs_buf_item.h +++ b/fs/xfs/xfs_buf_item.h | |||
@@ -26,7 +26,7 @@ extern kmem_zone_t *xfs_buf_item_zone; | |||
26 | * have been logged. | 26 | * have been logged. |
27 | * For 6.2 and beyond, this is XFS_LI_BUF. We use this to log everything. | 27 | * For 6.2 and beyond, this is XFS_LI_BUF. We use this to log everything. |
28 | */ | 28 | */ |
29 | typedef struct xfs_buf_log_format_t { | 29 | typedef struct xfs_buf_log_format { |
30 | unsigned short blf_type; /* buf log item type indicator */ | 30 | unsigned short blf_type; /* buf log item type indicator */ |
31 | unsigned short blf_size; /* size of this item */ | 31 | unsigned short blf_size; /* size of this item */ |
32 | ushort blf_flags; /* misc state */ | 32 | ushort blf_flags; /* misc state */ |
@@ -41,22 +41,22 @@ typedef struct xfs_buf_log_format_t { | |||
41 | * This flag indicates that the buffer contains on disk inodes | 41 | * This flag indicates that the buffer contains on disk inodes |
42 | * and requires special recovery handling. | 42 | * and requires special recovery handling. |
43 | */ | 43 | */ |
44 | #define XFS_BLI_INODE_BUF 0x1 | 44 | #define XFS_BLF_INODE_BUF 0x1 |
45 | /* | 45 | /* |
46 | * This flag indicates that the buffer should not be replayed | 46 | * This flag indicates that the buffer should not be replayed |
47 | * during recovery because its blocks are being freed. | 47 | * during recovery because its blocks are being freed. |
48 | */ | 48 | */ |
49 | #define XFS_BLI_CANCEL 0x2 | 49 | #define XFS_BLF_CANCEL 0x2 |
50 | /* | 50 | /* |
51 | * This flag indicates that the buffer contains on disk | 51 | * This flag indicates that the buffer contains on disk |
52 | * user or group dquots and may require special recovery handling. | 52 | * user or group dquots and may require special recovery handling. |
53 | */ | 53 | */ |
54 | #define XFS_BLI_UDQUOT_BUF 0x4 | 54 | #define XFS_BLF_UDQUOT_BUF 0x4 |
55 | #define XFS_BLI_PDQUOT_BUF 0x8 | 55 | #define XFS_BLF_PDQUOT_BUF 0x8 |
56 | #define XFS_BLI_GDQUOT_BUF 0x10 | 56 | #define XFS_BLF_GDQUOT_BUF 0x10 |
57 | 57 | ||
58 | #define XFS_BLI_CHUNK 128 | 58 | #define XFS_BLF_CHUNK 128 |
59 | #define XFS_BLI_SHIFT 7 | 59 | #define XFS_BLF_SHIFT 7 |
60 | #define BIT_TO_WORD_SHIFT 5 | 60 | #define BIT_TO_WORD_SHIFT 5 |
61 | #define NBWORD (NBBY * sizeof(unsigned int)) | 61 | #define NBWORD (NBBY * sizeof(unsigned int)) |
62 | 62 | ||
@@ -69,6 +69,7 @@ typedef struct xfs_buf_log_format_t { | |||
69 | #define XFS_BLI_LOGGED 0x08 | 69 | #define XFS_BLI_LOGGED 0x08 |
70 | #define XFS_BLI_INODE_ALLOC_BUF 0x10 | 70 | #define XFS_BLI_INODE_ALLOC_BUF 0x10 |
71 | #define XFS_BLI_STALE_INODE 0x20 | 71 | #define XFS_BLI_STALE_INODE 0x20 |
72 | #define XFS_BLI_INODE_BUF 0x40 | ||
72 | 73 | ||
73 | #define XFS_BLI_FLAGS \ | 74 | #define XFS_BLI_FLAGS \ |
74 | { XFS_BLI_HOLD, "HOLD" }, \ | 75 | { XFS_BLI_HOLD, "HOLD" }, \ |
@@ -76,7 +77,8 @@ typedef struct xfs_buf_log_format_t { | |||
76 | { XFS_BLI_STALE, "STALE" }, \ | 77 | { XFS_BLI_STALE, "STALE" }, \ |
77 | { XFS_BLI_LOGGED, "LOGGED" }, \ | 78 | { XFS_BLI_LOGGED, "LOGGED" }, \ |
78 | { XFS_BLI_INODE_ALLOC_BUF, "INODE_ALLOC" }, \ | 79 | { XFS_BLI_INODE_ALLOC_BUF, "INODE_ALLOC" }, \ |
79 | { XFS_BLI_STALE_INODE, "STALE_INODE" } | 80 | { XFS_BLI_STALE_INODE, "STALE_INODE" }, \ |
81 | { XFS_BLI_INODE_BUF, "INODE_BUF" } | ||
80 | 82 | ||
81 | 83 | ||
82 | #ifdef __KERNEL__ | 84 | #ifdef __KERNEL__ |
diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c index 92d5cd5bf4f2..047b8a8e5c29 100644 --- a/fs/xfs/xfs_error.c +++ b/fs/xfs/xfs_error.c | |||
@@ -170,7 +170,7 @@ xfs_cmn_err(int panic_tag, int level, xfs_mount_t *mp, char *fmt, ...) | |||
170 | va_list ap; | 170 | va_list ap; |
171 | 171 | ||
172 | #ifdef DEBUG | 172 | #ifdef DEBUG |
173 | xfs_panic_mask |= XFS_PTAG_SHUTDOWN_CORRUPT; | 173 | xfs_panic_mask |= (XFS_PTAG_SHUTDOWN_CORRUPT | XFS_PTAG_LOGRES); |
174 | #endif | 174 | #endif |
175 | 175 | ||
176 | if (xfs_panic_mask && (xfs_panic_mask & panic_tag) | 176 | if (xfs_panic_mask && (xfs_panic_mask & panic_tag) |
@@ -186,18 +186,18 @@ xfs_cmn_err(int panic_tag, int level, xfs_mount_t *mp, char *fmt, ...) | |||
186 | 186 | ||
187 | void | 187 | void |
188 | xfs_error_report( | 188 | xfs_error_report( |
189 | char *tag, | 189 | const char *tag, |
190 | int level, | 190 | int level, |
191 | xfs_mount_t *mp, | 191 | struct xfs_mount *mp, |
192 | char *fname, | 192 | const char *filename, |
193 | int linenum, | 193 | int linenum, |
194 | inst_t *ra) | 194 | inst_t *ra) |
195 | { | 195 | { |
196 | if (level <= xfs_error_level) { | 196 | if (level <= xfs_error_level) { |
197 | xfs_cmn_err(XFS_PTAG_ERROR_REPORT, | 197 | xfs_cmn_err(XFS_PTAG_ERROR_REPORT, |
198 | CE_ALERT, mp, | 198 | CE_ALERT, mp, |
199 | "XFS internal error %s at line %d of file %s. Caller 0x%p\n", | 199 | "XFS internal error %s at line %d of file %s. Caller 0x%p\n", |
200 | tag, linenum, fname, ra); | 200 | tag, linenum, filename, ra); |
201 | 201 | ||
202 | xfs_stack_trace(); | 202 | xfs_stack_trace(); |
203 | } | 203 | } |
@@ -205,15 +205,15 @@ xfs_error_report( | |||
205 | 205 | ||
206 | void | 206 | void |
207 | xfs_corruption_error( | 207 | xfs_corruption_error( |
208 | char *tag, | 208 | const char *tag, |
209 | int level, | 209 | int level, |
210 | xfs_mount_t *mp, | 210 | struct xfs_mount *mp, |
211 | void *p, | 211 | void *p, |
212 | char *fname, | 212 | const char *filename, |
213 | int linenum, | 213 | int linenum, |
214 | inst_t *ra) | 214 | inst_t *ra) |
215 | { | 215 | { |
216 | if (level <= xfs_error_level) | 216 | if (level <= xfs_error_level) |
217 | xfs_hex_dump(p, 16); | 217 | xfs_hex_dump(p, 16); |
218 | xfs_error_report(tag, level, mp, fname, linenum, ra); | 218 | xfs_error_report(tag, level, mp, filename, linenum, ra); |
219 | } | 219 | } |
diff --git a/fs/xfs/xfs_error.h b/fs/xfs/xfs_error.h index 0c93051c4651..c2c1a072bb82 100644 --- a/fs/xfs/xfs_error.h +++ b/fs/xfs/xfs_error.h | |||
@@ -29,10 +29,11 @@ extern int xfs_error_trap(int); | |||
29 | 29 | ||
30 | struct xfs_mount; | 30 | struct xfs_mount; |
31 | 31 | ||
32 | extern void xfs_error_report(char *tag, int level, struct xfs_mount *mp, | 32 | extern void xfs_error_report(const char *tag, int level, struct xfs_mount *mp, |
33 | char *fname, int linenum, inst_t *ra); | 33 | const char *filename, int linenum, inst_t *ra); |
34 | extern void xfs_corruption_error(char *tag, int level, struct xfs_mount *mp, | 34 | extern void xfs_corruption_error(const char *tag, int level, |
35 | void *p, char *fname, int linenum, inst_t *ra); | 35 | struct xfs_mount *mp, void *p, const char *filename, |
36 | int linenum, inst_t *ra); | ||
36 | 37 | ||
37 | #define XFS_ERROR_REPORT(e, lvl, mp) \ | 38 | #define XFS_ERROR_REPORT(e, lvl, mp) \ |
38 | xfs_error_report(e, lvl, mp, __FILE__, __LINE__, __return_address) | 39 | xfs_error_report(e, lvl, mp, __FILE__, __LINE__, __return_address) |
diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c index 6f35ed1b39b9..409fe81585fd 100644 --- a/fs/xfs/xfs_extfree_item.c +++ b/fs/xfs/xfs_extfree_item.c | |||
@@ -106,7 +106,7 @@ xfs_efi_item_pin(xfs_efi_log_item_t *efip) | |||
106 | */ | 106 | */ |
107 | /*ARGSUSED*/ | 107 | /*ARGSUSED*/ |
108 | STATIC void | 108 | STATIC void |
109 | xfs_efi_item_unpin(xfs_efi_log_item_t *efip, int stale) | 109 | xfs_efi_item_unpin(xfs_efi_log_item_t *efip) |
110 | { | 110 | { |
111 | struct xfs_ail *ailp = efip->efi_item.li_ailp; | 111 | struct xfs_ail *ailp = efip->efi_item.li_ailp; |
112 | 112 | ||
@@ -224,7 +224,7 @@ static struct xfs_item_ops xfs_efi_item_ops = { | |||
224 | .iop_format = (void(*)(xfs_log_item_t*, xfs_log_iovec_t*)) | 224 | .iop_format = (void(*)(xfs_log_item_t*, xfs_log_iovec_t*)) |
225 | xfs_efi_item_format, | 225 | xfs_efi_item_format, |
226 | .iop_pin = (void(*)(xfs_log_item_t*))xfs_efi_item_pin, | 226 | .iop_pin = (void(*)(xfs_log_item_t*))xfs_efi_item_pin, |
227 | .iop_unpin = (void(*)(xfs_log_item_t*, int))xfs_efi_item_unpin, | 227 | .iop_unpin = (void(*)(xfs_log_item_t*))xfs_efi_item_unpin, |
228 | .iop_unpin_remove = (void(*)(xfs_log_item_t*, xfs_trans_t *)) | 228 | .iop_unpin_remove = (void(*)(xfs_log_item_t*, xfs_trans_t *)) |
229 | xfs_efi_item_unpin_remove, | 229 | xfs_efi_item_unpin_remove, |
230 | .iop_trylock = (uint(*)(xfs_log_item_t*))xfs_efi_item_trylock, | 230 | .iop_trylock = (uint(*)(xfs_log_item_t*))xfs_efi_item_trylock, |
@@ -259,10 +259,7 @@ xfs_efi_init(xfs_mount_t *mp, | |||
259 | KM_SLEEP); | 259 | KM_SLEEP); |
260 | } | 260 | } |
261 | 261 | ||
262 | efip->efi_item.li_type = XFS_LI_EFI; | 262 | xfs_log_item_init(mp, &efip->efi_item, XFS_LI_EFI, &xfs_efi_item_ops); |
263 | efip->efi_item.li_ops = &xfs_efi_item_ops; | ||
264 | efip->efi_item.li_mountp = mp; | ||
265 | efip->efi_item.li_ailp = mp->m_ail; | ||
266 | efip->efi_format.efi_nextents = nextents; | 263 | efip->efi_format.efi_nextents = nextents; |
267 | efip->efi_format.efi_id = (__psint_t)(void*)efip; | 264 | efip->efi_format.efi_id = (__psint_t)(void*)efip; |
268 | 265 | ||
@@ -428,7 +425,7 @@ xfs_efd_item_pin(xfs_efd_log_item_t *efdp) | |||
428 | */ | 425 | */ |
429 | /*ARGSUSED*/ | 426 | /*ARGSUSED*/ |
430 | STATIC void | 427 | STATIC void |
431 | xfs_efd_item_unpin(xfs_efd_log_item_t *efdp, int stale) | 428 | xfs_efd_item_unpin(xfs_efd_log_item_t *efdp) |
432 | { | 429 | { |
433 | return; | 430 | return; |
434 | } | 431 | } |
@@ -518,7 +515,7 @@ static struct xfs_item_ops xfs_efd_item_ops = { | |||
518 | .iop_format = (void(*)(xfs_log_item_t*, xfs_log_iovec_t*)) | 515 | .iop_format = (void(*)(xfs_log_item_t*, xfs_log_iovec_t*)) |
519 | xfs_efd_item_format, | 516 | xfs_efd_item_format, |
520 | .iop_pin = (void(*)(xfs_log_item_t*))xfs_efd_item_pin, | 517 | .iop_pin = (void(*)(xfs_log_item_t*))xfs_efd_item_pin, |
521 | .iop_unpin = (void(*)(xfs_log_item_t*, int))xfs_efd_item_unpin, | 518 | .iop_unpin = (void(*)(xfs_log_item_t*))xfs_efd_item_unpin, |
522 | .iop_unpin_remove = (void(*)(xfs_log_item_t*, xfs_trans_t*)) | 519 | .iop_unpin_remove = (void(*)(xfs_log_item_t*, xfs_trans_t*)) |
523 | xfs_efd_item_unpin_remove, | 520 | xfs_efd_item_unpin_remove, |
524 | .iop_trylock = (uint(*)(xfs_log_item_t*))xfs_efd_item_trylock, | 521 | .iop_trylock = (uint(*)(xfs_log_item_t*))xfs_efd_item_trylock, |
@@ -554,10 +551,7 @@ xfs_efd_init(xfs_mount_t *mp, | |||
554 | KM_SLEEP); | 551 | KM_SLEEP); |
555 | } | 552 | } |
556 | 553 | ||
557 | efdp->efd_item.li_type = XFS_LI_EFD; | 554 | xfs_log_item_init(mp, &efdp->efd_item, XFS_LI_EFD, &xfs_efd_item_ops); |
558 | efdp->efd_item.li_ops = &xfs_efd_item_ops; | ||
559 | efdp->efd_item.li_mountp = mp; | ||
560 | efdp->efd_item.li_ailp = mp->m_ail; | ||
561 | efdp->efd_efip = efip; | 555 | efdp->efd_efip = efip; |
562 | efdp->efd_format.efd_nextents = nextents; | 556 | efdp->efd_format.efd_nextents = nextents; |
563 | efdp->efd_format.efd_efi_id = efip->efi_format.efi_id; | 557 | efdp->efd_format.efd_efi_id = efip->efi_format.efi_id; |
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 0ffd56447045..8cd6e8d8fe9c 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c | |||
@@ -2449,6 +2449,8 @@ xfs_iunpin_nowait( | |||
2449 | { | 2449 | { |
2450 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)); | 2450 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)); |
2451 | 2451 | ||
2452 | trace_xfs_inode_unpin_nowait(ip, _RET_IP_); | ||
2453 | |||
2452 | /* Give the log a push to start the unpinning I/O */ | 2454 | /* Give the log a push to start the unpinning I/O */ |
2453 | xfs_log_force_lsn(ip->i_mount, ip->i_itemp->ili_last_lsn, 0); | 2455 | xfs_log_force_lsn(ip->i_mount, ip->i_itemp->ili_last_lsn, 0); |
2454 | 2456 | ||
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index 7bfea8540159..cf8249a60004 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c | |||
@@ -543,6 +543,7 @@ xfs_inode_item_pin( | |||
543 | { | 543 | { |
544 | ASSERT(xfs_isilocked(iip->ili_inode, XFS_ILOCK_EXCL)); | 544 | ASSERT(xfs_isilocked(iip->ili_inode, XFS_ILOCK_EXCL)); |
545 | 545 | ||
546 | trace_xfs_inode_pin(iip->ili_inode, _RET_IP_); | ||
546 | atomic_inc(&iip->ili_inode->i_pincount); | 547 | atomic_inc(&iip->ili_inode->i_pincount); |
547 | } | 548 | } |
548 | 549 | ||
@@ -556,11 +557,11 @@ xfs_inode_item_pin( | |||
556 | /* ARGSUSED */ | 557 | /* ARGSUSED */ |
557 | STATIC void | 558 | STATIC void |
558 | xfs_inode_item_unpin( | 559 | xfs_inode_item_unpin( |
559 | xfs_inode_log_item_t *iip, | 560 | xfs_inode_log_item_t *iip) |
560 | int stale) | ||
561 | { | 561 | { |
562 | struct xfs_inode *ip = iip->ili_inode; | 562 | struct xfs_inode *ip = iip->ili_inode; |
563 | 563 | ||
564 | trace_xfs_inode_unpin(ip, _RET_IP_); | ||
564 | ASSERT(atomic_read(&ip->i_pincount) > 0); | 565 | ASSERT(atomic_read(&ip->i_pincount) > 0); |
565 | if (atomic_dec_and_test(&ip->i_pincount)) | 566 | if (atomic_dec_and_test(&ip->i_pincount)) |
566 | wake_up(&ip->i_ipin_wait); | 567 | wake_up(&ip->i_ipin_wait); |
@@ -572,7 +573,7 @@ xfs_inode_item_unpin_remove( | |||
572 | xfs_inode_log_item_t *iip, | 573 | xfs_inode_log_item_t *iip, |
573 | xfs_trans_t *tp) | 574 | xfs_trans_t *tp) |
574 | { | 575 | { |
575 | xfs_inode_item_unpin(iip, 0); | 576 | xfs_inode_item_unpin(iip); |
576 | } | 577 | } |
577 | 578 | ||
578 | /* | 579 | /* |
@@ -838,7 +839,7 @@ static struct xfs_item_ops xfs_inode_item_ops = { | |||
838 | .iop_format = (void(*)(xfs_log_item_t*, xfs_log_iovec_t*)) | 839 | .iop_format = (void(*)(xfs_log_item_t*, xfs_log_iovec_t*)) |
839 | xfs_inode_item_format, | 840 | xfs_inode_item_format, |
840 | .iop_pin = (void(*)(xfs_log_item_t*))xfs_inode_item_pin, | 841 | .iop_pin = (void(*)(xfs_log_item_t*))xfs_inode_item_pin, |
841 | .iop_unpin = (void(*)(xfs_log_item_t*, int))xfs_inode_item_unpin, | 842 | .iop_unpin = (void(*)(xfs_log_item_t*))xfs_inode_item_unpin, |
842 | .iop_unpin_remove = (void(*)(xfs_log_item_t*, xfs_trans_t*)) | 843 | .iop_unpin_remove = (void(*)(xfs_log_item_t*, xfs_trans_t*)) |
843 | xfs_inode_item_unpin_remove, | 844 | xfs_inode_item_unpin_remove, |
844 | .iop_trylock = (uint(*)(xfs_log_item_t*))xfs_inode_item_trylock, | 845 | .iop_trylock = (uint(*)(xfs_log_item_t*))xfs_inode_item_trylock, |
@@ -865,17 +866,9 @@ xfs_inode_item_init( | |||
865 | ASSERT(ip->i_itemp == NULL); | 866 | ASSERT(ip->i_itemp == NULL); |
866 | iip = ip->i_itemp = kmem_zone_zalloc(xfs_ili_zone, KM_SLEEP); | 867 | iip = ip->i_itemp = kmem_zone_zalloc(xfs_ili_zone, KM_SLEEP); |
867 | 868 | ||
868 | iip->ili_item.li_type = XFS_LI_INODE; | ||
869 | iip->ili_item.li_ops = &xfs_inode_item_ops; | ||
870 | iip->ili_item.li_mountp = mp; | ||
871 | iip->ili_item.li_ailp = mp->m_ail; | ||
872 | iip->ili_inode = ip; | 869 | iip->ili_inode = ip; |
873 | 870 | xfs_log_item_init(mp, &iip->ili_item, XFS_LI_INODE, | |
874 | /* | 871 | &xfs_inode_item_ops); |
875 | We have zeroed memory. No need ... | ||
876 | iip->ili_extents_buf = NULL; | ||
877 | */ | ||
878 | |||
879 | iip->ili_format.ilf_type = XFS_LI_INODE; | 872 | iip->ili_format.ilf_type = XFS_LI_INODE; |
880 | iip->ili_format.ilf_ino = ip->i_ino; | 873 | iip->ili_format.ilf_ino = ip->i_ino; |
881 | iip->ili_format.ilf_blkno = ip->i_imap.im_blkno; | 874 | iip->ili_format.ilf_blkno = ip->i_imap.im_blkno; |
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 0b65039951a0..ef14943829da 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c | |||
@@ -55,71 +55,33 @@ | |||
55 | #define XFS_STRAT_WRITE_IMAPS 2 | 55 | #define XFS_STRAT_WRITE_IMAPS 2 |
56 | #define XFS_WRITE_IMAPS XFS_BMAP_MAX_NMAP | 56 | #define XFS_WRITE_IMAPS XFS_BMAP_MAX_NMAP |
57 | 57 | ||
58 | STATIC int | 58 | STATIC int xfs_iomap_write_direct(struct xfs_inode *, xfs_off_t, size_t, |
59 | xfs_imap_to_bmap( | 59 | int, struct xfs_bmbt_irec *, int *); |
60 | xfs_inode_t *ip, | 60 | STATIC int xfs_iomap_write_delay(struct xfs_inode *, xfs_off_t, size_t, int, |
61 | xfs_off_t offset, | 61 | struct xfs_bmbt_irec *, int *); |
62 | xfs_bmbt_irec_t *imap, | 62 | STATIC int xfs_iomap_write_allocate(struct xfs_inode *, xfs_off_t, size_t, |
63 | xfs_iomap_t *iomapp, | 63 | struct xfs_bmbt_irec *, int *); |
64 | int imaps, /* Number of imap entries */ | ||
65 | int iomaps, /* Number of iomap entries */ | ||
66 | int flags) | ||
67 | { | ||
68 | xfs_mount_t *mp = ip->i_mount; | ||
69 | int pbm; | ||
70 | xfs_fsblock_t start_block; | ||
71 | |||
72 | |||
73 | for (pbm = 0; imaps && pbm < iomaps; imaps--, iomapp++, imap++, pbm++) { | ||
74 | iomapp->iomap_offset = XFS_FSB_TO_B(mp, imap->br_startoff); | ||
75 | iomapp->iomap_delta = offset - iomapp->iomap_offset; | ||
76 | iomapp->iomap_bsize = XFS_FSB_TO_B(mp, imap->br_blockcount); | ||
77 | iomapp->iomap_flags = flags; | ||
78 | |||
79 | if (XFS_IS_REALTIME_INODE(ip)) { | ||
80 | iomapp->iomap_flags |= IOMAP_REALTIME; | ||
81 | iomapp->iomap_target = mp->m_rtdev_targp; | ||
82 | } else { | ||
83 | iomapp->iomap_target = mp->m_ddev_targp; | ||
84 | } | ||
85 | start_block = imap->br_startblock; | ||
86 | if (start_block == HOLESTARTBLOCK) { | ||
87 | iomapp->iomap_bn = IOMAP_DADDR_NULL; | ||
88 | iomapp->iomap_flags |= IOMAP_HOLE; | ||
89 | } else if (start_block == DELAYSTARTBLOCK) { | ||
90 | iomapp->iomap_bn = IOMAP_DADDR_NULL; | ||
91 | iomapp->iomap_flags |= IOMAP_DELAY; | ||
92 | } else { | ||
93 | iomapp->iomap_bn = xfs_fsb_to_db(ip, start_block); | ||
94 | if (ISUNWRITTEN(imap)) | ||
95 | iomapp->iomap_flags |= IOMAP_UNWRITTEN; | ||
96 | } | ||
97 | |||
98 | offset += iomapp->iomap_bsize - iomapp->iomap_delta; | ||
99 | } | ||
100 | return pbm; /* Return the number filled */ | ||
101 | } | ||
102 | 64 | ||
103 | int | 65 | int |
104 | xfs_iomap( | 66 | xfs_iomap( |
105 | xfs_inode_t *ip, | 67 | struct xfs_inode *ip, |
106 | xfs_off_t offset, | 68 | xfs_off_t offset, |
107 | ssize_t count, | 69 | ssize_t count, |
108 | int flags, | 70 | int flags, |
109 | xfs_iomap_t *iomapp, | 71 | struct xfs_bmbt_irec *imap, |
110 | int *niomaps) | 72 | int *nimaps, |
73 | int *new) | ||
111 | { | 74 | { |
112 | xfs_mount_t *mp = ip->i_mount; | 75 | struct xfs_mount *mp = ip->i_mount; |
113 | xfs_fileoff_t offset_fsb, end_fsb; | 76 | xfs_fileoff_t offset_fsb, end_fsb; |
114 | int error = 0; | 77 | int error = 0; |
115 | int lockmode = 0; | 78 | int lockmode = 0; |
116 | xfs_bmbt_irec_t imap; | 79 | int bmapi_flags = 0; |
117 | int nimaps = 1; | ||
118 | int bmapi_flags = 0; | ||
119 | int iomap_flags = 0; | ||
120 | 80 | ||
121 | ASSERT((ip->i_d.di_mode & S_IFMT) == S_IFREG); | 81 | ASSERT((ip->i_d.di_mode & S_IFMT) == S_IFREG); |
122 | 82 | ||
83 | *new = 0; | ||
84 | |||
123 | if (XFS_FORCED_SHUTDOWN(mp)) | 85 | if (XFS_FORCED_SHUTDOWN(mp)) |
124 | return XFS_ERROR(EIO); | 86 | return XFS_ERROR(EIO); |
125 | 87 | ||
@@ -160,8 +122,8 @@ xfs_iomap( | |||
160 | 122 | ||
161 | error = xfs_bmapi(NULL, ip, offset_fsb, | 123 | error = xfs_bmapi(NULL, ip, offset_fsb, |
162 | (xfs_filblks_t)(end_fsb - offset_fsb), | 124 | (xfs_filblks_t)(end_fsb - offset_fsb), |
163 | bmapi_flags, NULL, 0, &imap, | 125 | bmapi_flags, NULL, 0, imap, |
164 | &nimaps, NULL, NULL); | 126 | nimaps, NULL, NULL); |
165 | 127 | ||
166 | if (error) | 128 | if (error) |
167 | goto out; | 129 | goto out; |
@@ -169,46 +131,41 @@ xfs_iomap( | |||
169 | switch (flags & (BMAPI_WRITE|BMAPI_ALLOCATE)) { | 131 | switch (flags & (BMAPI_WRITE|BMAPI_ALLOCATE)) { |
170 | case BMAPI_WRITE: | 132 | case BMAPI_WRITE: |
171 | /* If we found an extent, return it */ | 133 | /* If we found an extent, return it */ |
172 | if (nimaps && | 134 | if (*nimaps && |
173 | (imap.br_startblock != HOLESTARTBLOCK) && | 135 | (imap->br_startblock != HOLESTARTBLOCK) && |
174 | (imap.br_startblock != DELAYSTARTBLOCK)) { | 136 | (imap->br_startblock != DELAYSTARTBLOCK)) { |
175 | trace_xfs_iomap_found(ip, offset, count, flags, &imap); | 137 | trace_xfs_iomap_found(ip, offset, count, flags, imap); |
176 | break; | 138 | break; |
177 | } | 139 | } |
178 | 140 | ||
179 | if (flags & (BMAPI_DIRECT|BMAPI_MMAP)) { | 141 | if (flags & (BMAPI_DIRECT|BMAPI_MMAP)) { |
180 | error = xfs_iomap_write_direct(ip, offset, count, flags, | 142 | error = xfs_iomap_write_direct(ip, offset, count, flags, |
181 | &imap, &nimaps, nimaps); | 143 | imap, nimaps); |
182 | } else { | 144 | } else { |
183 | error = xfs_iomap_write_delay(ip, offset, count, flags, | 145 | error = xfs_iomap_write_delay(ip, offset, count, flags, |
184 | &imap, &nimaps); | 146 | imap, nimaps); |
185 | } | 147 | } |
186 | if (!error) { | 148 | if (!error) { |
187 | trace_xfs_iomap_alloc(ip, offset, count, flags, &imap); | 149 | trace_xfs_iomap_alloc(ip, offset, count, flags, imap); |
188 | } | 150 | } |
189 | iomap_flags = IOMAP_NEW; | 151 | *new = 1; |
190 | break; | 152 | break; |
191 | case BMAPI_ALLOCATE: | 153 | case BMAPI_ALLOCATE: |
192 | /* If we found an extent, return it */ | 154 | /* If we found an extent, return it */ |
193 | xfs_iunlock(ip, lockmode); | 155 | xfs_iunlock(ip, lockmode); |
194 | lockmode = 0; | 156 | lockmode = 0; |
195 | 157 | ||
196 | if (nimaps && !isnullstartblock(imap.br_startblock)) { | 158 | if (*nimaps && !isnullstartblock(imap->br_startblock)) { |
197 | trace_xfs_iomap_found(ip, offset, count, flags, &imap); | 159 | trace_xfs_iomap_found(ip, offset, count, flags, imap); |
198 | break; | 160 | break; |
199 | } | 161 | } |
200 | 162 | ||
201 | error = xfs_iomap_write_allocate(ip, offset, count, | 163 | error = xfs_iomap_write_allocate(ip, offset, count, |
202 | &imap, &nimaps); | 164 | imap, nimaps); |
203 | break; | 165 | break; |
204 | } | 166 | } |
205 | 167 | ||
206 | if (nimaps) { | 168 | ASSERT(*nimaps <= 1); |
207 | *niomaps = xfs_imap_to_bmap(ip, offset, &imap, | ||
208 | iomapp, nimaps, *niomaps, iomap_flags); | ||
209 | } else if (niomaps) { | ||
210 | *niomaps = 0; | ||
211 | } | ||
212 | 169 | ||
213 | out: | 170 | out: |
214 | if (lockmode) | 171 | if (lockmode) |
@@ -216,7 +173,6 @@ out: | |||
216 | return XFS_ERROR(error); | 173 | return XFS_ERROR(error); |
217 | } | 174 | } |
218 | 175 | ||
219 | |||
220 | STATIC int | 176 | STATIC int |
221 | xfs_iomap_eof_align_last_fsb( | 177 | xfs_iomap_eof_align_last_fsb( |
222 | xfs_mount_t *mp, | 178 | xfs_mount_t *mp, |
@@ -285,15 +241,14 @@ xfs_cmn_err_fsblock_zero( | |||
285 | return EFSCORRUPTED; | 241 | return EFSCORRUPTED; |
286 | } | 242 | } |
287 | 243 | ||
288 | int | 244 | STATIC int |
289 | xfs_iomap_write_direct( | 245 | xfs_iomap_write_direct( |
290 | xfs_inode_t *ip, | 246 | xfs_inode_t *ip, |
291 | xfs_off_t offset, | 247 | xfs_off_t offset, |
292 | size_t count, | 248 | size_t count, |
293 | int flags, | 249 | int flags, |
294 | xfs_bmbt_irec_t *ret_imap, | 250 | xfs_bmbt_irec_t *ret_imap, |
295 | int *nmaps, | 251 | int *nmaps) |
296 | int found) | ||
297 | { | 252 | { |
298 | xfs_mount_t *mp = ip->i_mount; | 253 | xfs_mount_t *mp = ip->i_mount; |
299 | xfs_fileoff_t offset_fsb; | 254 | xfs_fileoff_t offset_fsb; |
@@ -330,7 +285,7 @@ xfs_iomap_write_direct( | |||
330 | if (error) | 285 | if (error) |
331 | goto error_out; | 286 | goto error_out; |
332 | } else { | 287 | } else { |
333 | if (found && (ret_imap->br_startblock == HOLESTARTBLOCK)) | 288 | if (*nmaps && (ret_imap->br_startblock == HOLESTARTBLOCK)) |
334 | last_fsb = MIN(last_fsb, (xfs_fileoff_t) | 289 | last_fsb = MIN(last_fsb, (xfs_fileoff_t) |
335 | ret_imap->br_blockcount + | 290 | ret_imap->br_blockcount + |
336 | ret_imap->br_startoff); | 291 | ret_imap->br_startoff); |
@@ -485,7 +440,7 @@ xfs_iomap_eof_want_preallocate( | |||
485 | return 0; | 440 | return 0; |
486 | } | 441 | } |
487 | 442 | ||
488 | int | 443 | STATIC int |
489 | xfs_iomap_write_delay( | 444 | xfs_iomap_write_delay( |
490 | xfs_inode_t *ip, | 445 | xfs_inode_t *ip, |
491 | xfs_off_t offset, | 446 | xfs_off_t offset, |
@@ -588,7 +543,7 @@ retry: | |||
588 | * We no longer bother to look at the incoming map - all we have to | 543 | * We no longer bother to look at the incoming map - all we have to |
589 | * guarantee is that whatever we allocate fills the required range. | 544 | * guarantee is that whatever we allocate fills the required range. |
590 | */ | 545 | */ |
591 | int | 546 | STATIC int |
592 | xfs_iomap_write_allocate( | 547 | xfs_iomap_write_allocate( |
593 | xfs_inode_t *ip, | 548 | xfs_inode_t *ip, |
594 | xfs_off_t offset, | 549 | xfs_off_t offset, |
diff --git a/fs/xfs/xfs_iomap.h b/fs/xfs/xfs_iomap.h index 174f29990991..81ac4afd45b3 100644 --- a/fs/xfs/xfs_iomap.h +++ b/fs/xfs/xfs_iomap.h | |||
@@ -18,19 +18,6 @@ | |||
18 | #ifndef __XFS_IOMAP_H__ | 18 | #ifndef __XFS_IOMAP_H__ |
19 | #define __XFS_IOMAP_H__ | 19 | #define __XFS_IOMAP_H__ |
20 | 20 | ||
21 | #define IOMAP_DADDR_NULL ((xfs_daddr_t) (-1LL)) | ||
22 | |||
23 | |||
24 | typedef enum { /* iomap_flags values */ | ||
25 | IOMAP_READ = 0, /* mapping for a read */ | ||
26 | IOMAP_HOLE = 0x02, /* mapping covers a hole */ | ||
27 | IOMAP_DELAY = 0x04, /* mapping covers delalloc region */ | ||
28 | IOMAP_REALTIME = 0x10, /* mapping on the realtime device */ | ||
29 | IOMAP_UNWRITTEN = 0x20, /* mapping covers allocated */ | ||
30 | /* but uninitialized file data */ | ||
31 | IOMAP_NEW = 0x40 /* just allocate */ | ||
32 | } iomap_flags_t; | ||
33 | |||
34 | typedef enum { | 21 | typedef enum { |
35 | /* base extent manipulation calls */ | 22 | /* base extent manipulation calls */ |
36 | BMAPI_READ = (1 << 0), /* read extents */ | 23 | BMAPI_READ = (1 << 0), /* read extents */ |
@@ -52,43 +39,11 @@ typedef enum { | |||
52 | { BMAPI_MMAP, "MMAP" }, \ | 39 | { BMAPI_MMAP, "MMAP" }, \ |
53 | { BMAPI_TRYLOCK, "TRYLOCK" } | 40 | { BMAPI_TRYLOCK, "TRYLOCK" } |
54 | 41 | ||
55 | /* | ||
56 | * xfs_iomap_t: File system I/O map | ||
57 | * | ||
58 | * The iomap_bn field is expressed in 512-byte blocks, and is where the | ||
59 | * mapping starts on disk. | ||
60 | * | ||
61 | * The iomap_offset, iomap_bsize and iomap_delta fields are in bytes. | ||
62 | * iomap_offset is the offset of the mapping in the file itself. | ||
63 | * iomap_bsize is the size of the mapping, iomap_delta is the | ||
64 | * desired data's offset into the mapping, given the offset supplied | ||
65 | * to the file I/O map routine. | ||
66 | * | ||
67 | * When a request is made to read beyond the logical end of the object, | ||
68 | * iomap_size may be set to 0, but iomap_offset and iomap_length should be set | ||
69 | * to the actual amount of underlying storage that has been allocated, if any. | ||
70 | */ | ||
71 | |||
72 | typedef struct xfs_iomap { | ||
73 | xfs_daddr_t iomap_bn; /* first 512B blk of mapping */ | ||
74 | xfs_buftarg_t *iomap_target; | ||
75 | xfs_off_t iomap_offset; /* offset of mapping, bytes */ | ||
76 | xfs_off_t iomap_bsize; /* size of mapping, bytes */ | ||
77 | xfs_off_t iomap_delta; /* offset into mapping, bytes */ | ||
78 | iomap_flags_t iomap_flags; | ||
79 | } xfs_iomap_t; | ||
80 | |||
81 | struct xfs_inode; | 42 | struct xfs_inode; |
82 | struct xfs_bmbt_irec; | 43 | struct xfs_bmbt_irec; |
83 | 44 | ||
84 | extern int xfs_iomap(struct xfs_inode *, xfs_off_t, ssize_t, int, | 45 | extern int xfs_iomap(struct xfs_inode *, xfs_off_t, ssize_t, int, |
85 | struct xfs_iomap *, int *); | 46 | struct xfs_bmbt_irec *, int *, int *); |
86 | extern int xfs_iomap_write_direct(struct xfs_inode *, xfs_off_t, size_t, | ||
87 | int, struct xfs_bmbt_irec *, int *, int); | ||
88 | extern int xfs_iomap_write_delay(struct xfs_inode *, xfs_off_t, size_t, int, | ||
89 | struct xfs_bmbt_irec *, int *); | ||
90 | extern int xfs_iomap_write_allocate(struct xfs_inode *, xfs_off_t, size_t, | ||
91 | struct xfs_bmbt_irec *, int *); | ||
92 | extern int xfs_iomap_write_unwritten(struct xfs_inode *, xfs_off_t, size_t); | 47 | extern int xfs_iomap_write_unwritten(struct xfs_inode *, xfs_off_t, size_t); |
93 | 48 | ||
94 | #endif /* __XFS_IOMAP_H__*/ | 49 | #endif /* __XFS_IOMAP_H__*/ |
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 2be019136287..5215abc8023a 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c | |||
@@ -44,13 +44,8 @@ | |||
44 | 44 | ||
45 | kmem_zone_t *xfs_log_ticket_zone; | 45 | kmem_zone_t *xfs_log_ticket_zone; |
46 | 46 | ||
47 | #define xlog_write_adv_cnt(ptr, len, off, bytes) \ | ||
48 | { (ptr) += (bytes); \ | ||
49 | (len) -= (bytes); \ | ||
50 | (off) += (bytes);} | ||
51 | |||
52 | /* Local miscellaneous function prototypes */ | 47 | /* Local miscellaneous function prototypes */ |
53 | STATIC int xlog_commit_record(xfs_mount_t *mp, xlog_ticket_t *ticket, | 48 | STATIC int xlog_commit_record(struct log *log, struct xlog_ticket *ticket, |
54 | xlog_in_core_t **, xfs_lsn_t *); | 49 | xlog_in_core_t **, xfs_lsn_t *); |
55 | STATIC xlog_t * xlog_alloc_log(xfs_mount_t *mp, | 50 | STATIC xlog_t * xlog_alloc_log(xfs_mount_t *mp, |
56 | xfs_buftarg_t *log_target, | 51 | xfs_buftarg_t *log_target, |
@@ -59,11 +54,6 @@ STATIC xlog_t * xlog_alloc_log(xfs_mount_t *mp, | |||
59 | STATIC int xlog_space_left(xlog_t *log, int cycle, int bytes); | 54 | STATIC int xlog_space_left(xlog_t *log, int cycle, int bytes); |
60 | STATIC int xlog_sync(xlog_t *log, xlog_in_core_t *iclog); | 55 | STATIC int xlog_sync(xlog_t *log, xlog_in_core_t *iclog); |
61 | STATIC void xlog_dealloc_log(xlog_t *log); | 56 | STATIC void xlog_dealloc_log(xlog_t *log); |
62 | STATIC int xlog_write(xfs_mount_t *mp, xfs_log_iovec_t region[], | ||
63 | int nentries, struct xlog_ticket *tic, | ||
64 | xfs_lsn_t *start_lsn, | ||
65 | xlog_in_core_t **commit_iclog, | ||
66 | uint flags); | ||
67 | 57 | ||
68 | /* local state machine functions */ | 58 | /* local state machine functions */ |
69 | STATIC void xlog_state_done_syncing(xlog_in_core_t *iclog, int); | 59 | STATIC void xlog_state_done_syncing(xlog_in_core_t *iclog, int); |
@@ -93,16 +83,8 @@ STATIC int xlog_regrant_write_log_space(xlog_t *log, | |||
93 | STATIC void xlog_ungrant_log_space(xlog_t *log, | 83 | STATIC void xlog_ungrant_log_space(xlog_t *log, |
94 | xlog_ticket_t *ticket); | 84 | xlog_ticket_t *ticket); |
95 | 85 | ||
96 | |||
97 | /* local ticket functions */ | ||
98 | STATIC xlog_ticket_t *xlog_ticket_alloc(xlog_t *log, | ||
99 | int unit_bytes, | ||
100 | int count, | ||
101 | char clientid, | ||
102 | uint flags); | ||
103 | |||
104 | #if defined(DEBUG) | 86 | #if defined(DEBUG) |
105 | STATIC void xlog_verify_dest_ptr(xlog_t *log, __psint_t ptr); | 87 | STATIC void xlog_verify_dest_ptr(xlog_t *log, char *ptr); |
106 | STATIC void xlog_verify_grant_head(xlog_t *log, int equals); | 88 | STATIC void xlog_verify_grant_head(xlog_t *log, int equals); |
107 | STATIC void xlog_verify_iclog(xlog_t *log, xlog_in_core_t *iclog, | 89 | STATIC void xlog_verify_iclog(xlog_t *log, xlog_in_core_t *iclog, |
108 | int count, boolean_t syncing); | 90 | int count, boolean_t syncing); |
@@ -258,7 +240,7 @@ xfs_log_done( | |||
258 | * If we get an error, just continue and give back the log ticket. | 240 | * If we get an error, just continue and give back the log ticket. |
259 | */ | 241 | */ |
260 | (((ticket->t_flags & XLOG_TIC_INITED) == 0) && | 242 | (((ticket->t_flags & XLOG_TIC_INITED) == 0) && |
261 | (xlog_commit_record(mp, ticket, iclog, &lsn)))) { | 243 | (xlog_commit_record(log, ticket, iclog, &lsn)))) { |
262 | lsn = (xfs_lsn_t) -1; | 244 | lsn = (xfs_lsn_t) -1; |
263 | if (ticket->t_flags & XLOG_TIC_PERM_RESERV) { | 245 | if (ticket->t_flags & XLOG_TIC_PERM_RESERV) { |
264 | flags |= XFS_LOG_REL_PERM_RESERV; | 246 | flags |= XFS_LOG_REL_PERM_RESERV; |
@@ -367,6 +349,15 @@ xfs_log_reserve( | |||
367 | ASSERT(flags & XFS_LOG_PERM_RESERV); | 349 | ASSERT(flags & XFS_LOG_PERM_RESERV); |
368 | internal_ticket = *ticket; | 350 | internal_ticket = *ticket; |
369 | 351 | ||
352 | /* | ||
353 | * this is a new transaction on the ticket, so we need to | ||
354 | * change the transaction ID so that the next transaction has a | ||
355 | * different TID in the log. Just add one to the existing tid | ||
356 | * so that we can see chains of rolling transactions in the log | ||
357 | * easily. | ||
358 | */ | ||
359 | internal_ticket->t_tid++; | ||
360 | |||
370 | trace_xfs_log_reserve(log, internal_ticket); | 361 | trace_xfs_log_reserve(log, internal_ticket); |
371 | 362 | ||
372 | xlog_grant_push_ail(mp, internal_ticket->t_unit_res); | 363 | xlog_grant_push_ail(mp, internal_ticket->t_unit_res); |
@@ -374,7 +365,8 @@ xfs_log_reserve( | |||
374 | } else { | 365 | } else { |
375 | /* may sleep if need to allocate more tickets */ | 366 | /* may sleep if need to allocate more tickets */ |
376 | internal_ticket = xlog_ticket_alloc(log, unit_bytes, cnt, | 367 | internal_ticket = xlog_ticket_alloc(log, unit_bytes, cnt, |
377 | client, flags); | 368 | client, flags, |
369 | KM_SLEEP|KM_MAYFAIL); | ||
378 | if (!internal_ticket) | 370 | if (!internal_ticket) |
379 | return XFS_ERROR(ENOMEM); | 371 | return XFS_ERROR(ENOMEM); |
380 | internal_ticket->t_trans_type = t_type; | 372 | internal_ticket->t_trans_type = t_type; |
@@ -459,6 +451,13 @@ xfs_log_mount( | |||
459 | /* Normal transactions can now occur */ | 451 | /* Normal transactions can now occur */ |
460 | mp->m_log->l_flags &= ~XLOG_ACTIVE_RECOVERY; | 452 | mp->m_log->l_flags &= ~XLOG_ACTIVE_RECOVERY; |
461 | 453 | ||
454 | /* | ||
455 | * Now the log has been fully initialised and we know were our | ||
456 | * space grant counters are, we can initialise the permanent ticket | ||
457 | * needed for delayed logging to work. | ||
458 | */ | ||
459 | xlog_cil_init_post_recovery(mp->m_log); | ||
460 | |||
462 | return 0; | 461 | return 0; |
463 | 462 | ||
464 | out_destroy_ail: | 463 | out_destroy_ail: |
@@ -516,18 +515,10 @@ xfs_log_unmount_write(xfs_mount_t *mp) | |||
516 | #ifdef DEBUG | 515 | #ifdef DEBUG |
517 | xlog_in_core_t *first_iclog; | 516 | xlog_in_core_t *first_iclog; |
518 | #endif | 517 | #endif |
519 | xfs_log_iovec_t reg[1]; | ||
520 | xlog_ticket_t *tic = NULL; | 518 | xlog_ticket_t *tic = NULL; |
521 | xfs_lsn_t lsn; | 519 | xfs_lsn_t lsn; |
522 | int error; | 520 | int error; |
523 | 521 | ||
524 | /* the data section must be 32 bit size aligned */ | ||
525 | struct { | ||
526 | __uint16_t magic; | ||
527 | __uint16_t pad1; | ||
528 | __uint32_t pad2; /* may as well make it 64 bits */ | ||
529 | } magic = { XLOG_UNMOUNT_TYPE, 0, 0 }; | ||
530 | |||
531 | /* | 522 | /* |
532 | * Don't write out unmount record on read-only mounts. | 523 | * Don't write out unmount record on read-only mounts. |
533 | * Or, if we are doing a forced umount (typically because of IO errors). | 524 | * Or, if we are doing a forced umount (typically because of IO errors). |
@@ -549,16 +540,30 @@ xfs_log_unmount_write(xfs_mount_t *mp) | |||
549 | } while (iclog != first_iclog); | 540 | } while (iclog != first_iclog); |
550 | #endif | 541 | #endif |
551 | if (! (XLOG_FORCED_SHUTDOWN(log))) { | 542 | if (! (XLOG_FORCED_SHUTDOWN(log))) { |
552 | reg[0].i_addr = (void*)&magic; | ||
553 | reg[0].i_len = sizeof(magic); | ||
554 | reg[0].i_type = XLOG_REG_TYPE_UNMOUNT; | ||
555 | |||
556 | error = xfs_log_reserve(mp, 600, 1, &tic, | 543 | error = xfs_log_reserve(mp, 600, 1, &tic, |
557 | XFS_LOG, 0, XLOG_UNMOUNT_REC_TYPE); | 544 | XFS_LOG, 0, XLOG_UNMOUNT_REC_TYPE); |
558 | if (!error) { | 545 | if (!error) { |
546 | /* the data section must be 32 bit size aligned */ | ||
547 | struct { | ||
548 | __uint16_t magic; | ||
549 | __uint16_t pad1; | ||
550 | __uint32_t pad2; /* may as well make it 64 bits */ | ||
551 | } magic = { | ||
552 | .magic = XLOG_UNMOUNT_TYPE, | ||
553 | }; | ||
554 | struct xfs_log_iovec reg = { | ||
555 | .i_addr = (void *)&magic, | ||
556 | .i_len = sizeof(magic), | ||
557 | .i_type = XLOG_REG_TYPE_UNMOUNT, | ||
558 | }; | ||
559 | struct xfs_log_vec vec = { | ||
560 | .lv_niovecs = 1, | ||
561 | .lv_iovecp = ®, | ||
562 | }; | ||
563 | |||
559 | /* remove inited flag */ | 564 | /* remove inited flag */ |
560 | ((xlog_ticket_t *)tic)->t_flags = 0; | 565 | tic->t_flags = 0; |
561 | error = xlog_write(mp, reg, 1, tic, &lsn, | 566 | error = xlog_write(log, &vec, tic, &lsn, |
562 | NULL, XLOG_UNMOUNT_TRANS); | 567 | NULL, XLOG_UNMOUNT_TRANS); |
563 | /* | 568 | /* |
564 | * At this point, we're umounting anyway, | 569 | * At this point, we're umounting anyway, |
@@ -648,10 +653,30 @@ xfs_log_unmount(xfs_mount_t *mp) | |||
648 | xlog_dealloc_log(mp->m_log); | 653 | xlog_dealloc_log(mp->m_log); |
649 | } | 654 | } |
650 | 655 | ||
656 | void | ||
657 | xfs_log_item_init( | ||
658 | struct xfs_mount *mp, | ||
659 | struct xfs_log_item *item, | ||
660 | int type, | ||
661 | struct xfs_item_ops *ops) | ||
662 | { | ||
663 | item->li_mountp = mp; | ||
664 | item->li_ailp = mp->m_ail; | ||
665 | item->li_type = type; | ||
666 | item->li_ops = ops; | ||
667 | item->li_lv = NULL; | ||
668 | |||
669 | INIT_LIST_HEAD(&item->li_ail); | ||
670 | INIT_LIST_HEAD(&item->li_cil); | ||
671 | } | ||
672 | |||
651 | /* | 673 | /* |
652 | * Write region vectors to log. The write happens using the space reservation | 674 | * Write region vectors to log. The write happens using the space reservation |
653 | * of the ticket (tic). It is not a requirement that all writes for a given | 675 | * of the ticket (tic). It is not a requirement that all writes for a given |
654 | * transaction occur with one call to xfs_log_write(). | 676 | * transaction occur with one call to xfs_log_write(). However, it is important |
677 | * to note that the transaction reservation code makes an assumption about the | ||
678 | * number of log headers a transaction requires that may be violated if you | ||
679 | * don't pass all the transaction vectors in one call.... | ||
655 | */ | 680 | */ |
656 | int | 681 | int |
657 | xfs_log_write( | 682 | xfs_log_write( |
@@ -663,11 +688,15 @@ xfs_log_write( | |||
663 | { | 688 | { |
664 | struct log *log = mp->m_log; | 689 | struct log *log = mp->m_log; |
665 | int error; | 690 | int error; |
691 | struct xfs_log_vec vec = { | ||
692 | .lv_niovecs = nentries, | ||
693 | .lv_iovecp = reg, | ||
694 | }; | ||
666 | 695 | ||
667 | if (XLOG_FORCED_SHUTDOWN(log)) | 696 | if (XLOG_FORCED_SHUTDOWN(log)) |
668 | return XFS_ERROR(EIO); | 697 | return XFS_ERROR(EIO); |
669 | 698 | ||
670 | error = xlog_write(mp, reg, nentries, tic, start_lsn, NULL, 0); | 699 | error = xlog_write(log, &vec, tic, start_lsn, NULL, 0); |
671 | if (error) | 700 | if (error) |
672 | xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR); | 701 | xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR); |
673 | return error; | 702 | return error; |
@@ -1020,6 +1049,7 @@ xlog_alloc_log(xfs_mount_t *mp, | |||
1020 | int i; | 1049 | int i; |
1021 | int iclogsize; | 1050 | int iclogsize; |
1022 | int error = ENOMEM; | 1051 | int error = ENOMEM; |
1052 | uint log2_size = 0; | ||
1023 | 1053 | ||
1024 | log = kmem_zalloc(sizeof(xlog_t), KM_MAYFAIL); | 1054 | log = kmem_zalloc(sizeof(xlog_t), KM_MAYFAIL); |
1025 | if (!log) { | 1055 | if (!log) { |
@@ -1045,29 +1075,30 @@ xlog_alloc_log(xfs_mount_t *mp, | |||
1045 | 1075 | ||
1046 | error = EFSCORRUPTED; | 1076 | error = EFSCORRUPTED; |
1047 | if (xfs_sb_version_hassector(&mp->m_sb)) { | 1077 | if (xfs_sb_version_hassector(&mp->m_sb)) { |
1048 | log->l_sectbb_log = mp->m_sb.sb_logsectlog - BBSHIFT; | 1078 | log2_size = mp->m_sb.sb_logsectlog; |
1049 | if (log->l_sectbb_log < 0 || | 1079 | if (log2_size < BBSHIFT) { |
1050 | log->l_sectbb_log > mp->m_sectbb_log) { | 1080 | xlog_warn("XFS: Log sector size too small " |
1051 | xlog_warn("XFS: Log sector size (0x%x) out of range.", | 1081 | "(0x%x < 0x%x)", log2_size, BBSHIFT); |
1052 | log->l_sectbb_log); | ||
1053 | goto out_free_log; | 1082 | goto out_free_log; |
1054 | } | 1083 | } |
1055 | 1084 | ||
1056 | /* for larger sector sizes, must have v2 or external log */ | 1085 | log2_size -= BBSHIFT; |
1057 | if (log->l_sectbb_log != 0 && | 1086 | if (log2_size > mp->m_sectbb_log) { |
1058 | (log->l_logBBstart != 0 && | 1087 | xlog_warn("XFS: Log sector size too large " |
1059 | !xfs_sb_version_haslogv2(&mp->m_sb))) { | 1088 | "(0x%x > 0x%x)", log2_size, mp->m_sectbb_log); |
1060 | xlog_warn("XFS: log sector size (0x%x) invalid " | ||
1061 | "for configuration.", log->l_sectbb_log); | ||
1062 | goto out_free_log; | 1089 | goto out_free_log; |
1063 | } | 1090 | } |
1064 | if (mp->m_sb.sb_logsectlog < BBSHIFT) { | 1091 | |
1065 | xlog_warn("XFS: Log sector log (0x%x) too small.", | 1092 | /* for larger sector sizes, must have v2 or external log */ |
1066 | mp->m_sb.sb_logsectlog); | 1093 | if (log2_size && log->l_logBBstart > 0 && |
1094 | !xfs_sb_version_haslogv2(&mp->m_sb)) { | ||
1095 | |||
1096 | xlog_warn("XFS: log sector size (0x%x) invalid " | ||
1097 | "for configuration.", log2_size); | ||
1067 | goto out_free_log; | 1098 | goto out_free_log; |
1068 | } | 1099 | } |
1069 | } | 1100 | } |
1070 | log->l_sectbb_mask = (1 << log->l_sectbb_log) - 1; | 1101 | log->l_sectBBsize = 1 << log2_size; |
1071 | 1102 | ||
1072 | xlog_get_iclog_buffer_size(mp, log); | 1103 | xlog_get_iclog_buffer_size(mp, log); |
1073 | 1104 | ||
@@ -1147,6 +1178,9 @@ xlog_alloc_log(xfs_mount_t *mp, | |||
1147 | *iclogp = log->l_iclog; /* complete ring */ | 1178 | *iclogp = log->l_iclog; /* complete ring */ |
1148 | log->l_iclog->ic_prev = prev_iclog; /* re-write 1st prev ptr */ | 1179 | log->l_iclog->ic_prev = prev_iclog; /* re-write 1st prev ptr */ |
1149 | 1180 | ||
1181 | error = xlog_cil_init(log); | ||
1182 | if (error) | ||
1183 | goto out_free_iclog; | ||
1150 | return log; | 1184 | return log; |
1151 | 1185 | ||
1152 | out_free_iclog: | 1186 | out_free_iclog: |
@@ -1174,26 +1208,31 @@ out: | |||
1174 | * ticket. Return the lsn of the commit record. | 1208 | * ticket. Return the lsn of the commit record. |
1175 | */ | 1209 | */ |
1176 | STATIC int | 1210 | STATIC int |
1177 | xlog_commit_record(xfs_mount_t *mp, | 1211 | xlog_commit_record( |
1178 | xlog_ticket_t *ticket, | 1212 | struct log *log, |
1179 | xlog_in_core_t **iclog, | 1213 | struct xlog_ticket *ticket, |
1180 | xfs_lsn_t *commitlsnp) | 1214 | struct xlog_in_core **iclog, |
1215 | xfs_lsn_t *commitlsnp) | ||
1181 | { | 1216 | { |
1182 | int error; | 1217 | struct xfs_mount *mp = log->l_mp; |
1183 | xfs_log_iovec_t reg[1]; | 1218 | int error; |
1184 | 1219 | struct xfs_log_iovec reg = { | |
1185 | reg[0].i_addr = NULL; | 1220 | .i_addr = NULL, |
1186 | reg[0].i_len = 0; | 1221 | .i_len = 0, |
1187 | reg[0].i_type = XLOG_REG_TYPE_COMMIT; | 1222 | .i_type = XLOG_REG_TYPE_COMMIT, |
1223 | }; | ||
1224 | struct xfs_log_vec vec = { | ||
1225 | .lv_niovecs = 1, | ||
1226 | .lv_iovecp = ®, | ||
1227 | }; | ||
1188 | 1228 | ||
1189 | ASSERT_ALWAYS(iclog); | 1229 | ASSERT_ALWAYS(iclog); |
1190 | if ((error = xlog_write(mp, reg, 1, ticket, commitlsnp, | 1230 | error = xlog_write(log, &vec, ticket, commitlsnp, iclog, |
1191 | iclog, XLOG_COMMIT_TRANS))) { | 1231 | XLOG_COMMIT_TRANS); |
1232 | if (error) | ||
1192 | xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR); | 1233 | xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR); |
1193 | } | ||
1194 | return error; | 1234 | return error; |
1195 | } /* xlog_commit_record */ | 1235 | } |
1196 | |||
1197 | 1236 | ||
1198 | /* | 1237 | /* |
1199 | * Push on the buffer cache code if we ever use more than 75% of the on-disk | 1238 | * Push on the buffer cache code if we ever use more than 75% of the on-disk |
@@ -1468,6 +1507,8 @@ xlog_dealloc_log(xlog_t *log) | |||
1468 | xlog_in_core_t *iclog, *next_iclog; | 1507 | xlog_in_core_t *iclog, *next_iclog; |
1469 | int i; | 1508 | int i; |
1470 | 1509 | ||
1510 | xlog_cil_destroy(log); | ||
1511 | |||
1471 | iclog = log->l_iclog; | 1512 | iclog = log->l_iclog; |
1472 | for (i=0; i<log->l_iclog_bufs; i++) { | 1513 | for (i=0; i<log->l_iclog_bufs; i++) { |
1473 | sv_destroy(&iclog->ic_force_wait); | 1514 | sv_destroy(&iclog->ic_force_wait); |
@@ -1510,8 +1551,10 @@ xlog_state_finish_copy(xlog_t *log, | |||
1510 | * print out info relating to regions written which consume | 1551 | * print out info relating to regions written which consume |
1511 | * the reservation | 1552 | * the reservation |
1512 | */ | 1553 | */ |
1513 | STATIC void | 1554 | void |
1514 | xlog_print_tic_res(xfs_mount_t *mp, xlog_ticket_t *ticket) | 1555 | xlog_print_tic_res( |
1556 | struct xfs_mount *mp, | ||
1557 | struct xlog_ticket *ticket) | ||
1515 | { | 1558 | { |
1516 | uint i; | 1559 | uint i; |
1517 | uint ophdr_spc = ticket->t_res_num_ophdrs * (uint)sizeof(xlog_op_header_t); | 1560 | uint ophdr_spc = ticket->t_res_num_ophdrs * (uint)sizeof(xlog_op_header_t); |
@@ -1611,6 +1654,196 @@ xlog_print_tic_res(xfs_mount_t *mp, xlog_ticket_t *ticket) | |||
1611 | "bad-rtype" : res_type_str[r_type-1]), | 1654 | "bad-rtype" : res_type_str[r_type-1]), |
1612 | ticket->t_res_arr[i].r_len); | 1655 | ticket->t_res_arr[i].r_len); |
1613 | } | 1656 | } |
1657 | |||
1658 | xfs_cmn_err(XFS_PTAG_LOGRES, CE_ALERT, mp, | ||
1659 | "xfs_log_write: reservation ran out. Need to up reservation"); | ||
1660 | xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); | ||
1661 | } | ||
1662 | |||
1663 | /* | ||
1664 | * Calculate the potential space needed by the log vector. Each region gets | ||
1665 | * its own xlog_op_header_t and may need to be double word aligned. | ||
1666 | */ | ||
1667 | static int | ||
1668 | xlog_write_calc_vec_length( | ||
1669 | struct xlog_ticket *ticket, | ||
1670 | struct xfs_log_vec *log_vector) | ||
1671 | { | ||
1672 | struct xfs_log_vec *lv; | ||
1673 | int headers = 0; | ||
1674 | int len = 0; | ||
1675 | int i; | ||
1676 | |||
1677 | /* acct for start rec of xact */ | ||
1678 | if (ticket->t_flags & XLOG_TIC_INITED) | ||
1679 | headers++; | ||
1680 | |||
1681 | for (lv = log_vector; lv; lv = lv->lv_next) { | ||
1682 | headers += lv->lv_niovecs; | ||
1683 | |||
1684 | for (i = 0; i < lv->lv_niovecs; i++) { | ||
1685 | struct xfs_log_iovec *vecp = &lv->lv_iovecp[i]; | ||
1686 | |||
1687 | len += vecp->i_len; | ||
1688 | xlog_tic_add_region(ticket, vecp->i_len, vecp->i_type); | ||
1689 | } | ||
1690 | } | ||
1691 | |||
1692 | ticket->t_res_num_ophdrs += headers; | ||
1693 | len += headers * sizeof(struct xlog_op_header); | ||
1694 | |||
1695 | return len; | ||
1696 | } | ||
1697 | |||
1698 | /* | ||
1699 | * If first write for transaction, insert start record We can't be trying to | ||
1700 | * commit if we are inited. We can't have any "partial_copy" if we are inited. | ||
1701 | */ | ||
1702 | static int | ||
1703 | xlog_write_start_rec( | ||
1704 | struct xlog_op_header *ophdr, | ||
1705 | struct xlog_ticket *ticket) | ||
1706 | { | ||
1707 | if (!(ticket->t_flags & XLOG_TIC_INITED)) | ||
1708 | return 0; | ||
1709 | |||
1710 | ophdr->oh_tid = cpu_to_be32(ticket->t_tid); | ||
1711 | ophdr->oh_clientid = ticket->t_clientid; | ||
1712 | ophdr->oh_len = 0; | ||
1713 | ophdr->oh_flags = XLOG_START_TRANS; | ||
1714 | ophdr->oh_res2 = 0; | ||
1715 | |||
1716 | ticket->t_flags &= ~XLOG_TIC_INITED; | ||
1717 | |||
1718 | return sizeof(struct xlog_op_header); | ||
1719 | } | ||
1720 | |||
1721 | static xlog_op_header_t * | ||
1722 | xlog_write_setup_ophdr( | ||
1723 | struct log *log, | ||
1724 | struct xlog_op_header *ophdr, | ||
1725 | struct xlog_ticket *ticket, | ||
1726 | uint flags) | ||
1727 | { | ||
1728 | ophdr->oh_tid = cpu_to_be32(ticket->t_tid); | ||
1729 | ophdr->oh_clientid = ticket->t_clientid; | ||
1730 | ophdr->oh_res2 = 0; | ||
1731 | |||
1732 | /* are we copying a commit or unmount record? */ | ||
1733 | ophdr->oh_flags = flags; | ||
1734 | |||
1735 | /* | ||
1736 | * We've seen logs corrupted with bad transaction client ids. This | ||
1737 | * makes sure that XFS doesn't generate them on. Turn this into an EIO | ||
1738 | * and shut down the filesystem. | ||
1739 | */ | ||
1740 | switch (ophdr->oh_clientid) { | ||
1741 | case XFS_TRANSACTION: | ||
1742 | case XFS_VOLUME: | ||
1743 | case XFS_LOG: | ||
1744 | break; | ||
1745 | default: | ||
1746 | xfs_fs_cmn_err(CE_WARN, log->l_mp, | ||
1747 | "Bad XFS transaction clientid 0x%x in ticket 0x%p", | ||
1748 | ophdr->oh_clientid, ticket); | ||
1749 | return NULL; | ||
1750 | } | ||
1751 | |||
1752 | return ophdr; | ||
1753 | } | ||
1754 | |||
1755 | /* | ||
1756 | * Set up the parameters of the region copy into the log. This has | ||
1757 | * to handle region write split across multiple log buffers - this | ||
1758 | * state is kept external to this function so that this code can | ||
1759 | * can be written in an obvious, self documenting manner. | ||
1760 | */ | ||
1761 | static int | ||
1762 | xlog_write_setup_copy( | ||
1763 | struct xlog_ticket *ticket, | ||
1764 | struct xlog_op_header *ophdr, | ||
1765 | int space_available, | ||
1766 | int space_required, | ||
1767 | int *copy_off, | ||
1768 | int *copy_len, | ||
1769 | int *last_was_partial_copy, | ||
1770 | int *bytes_consumed) | ||
1771 | { | ||
1772 | int still_to_copy; | ||
1773 | |||
1774 | still_to_copy = space_required - *bytes_consumed; | ||
1775 | *copy_off = *bytes_consumed; | ||
1776 | |||
1777 | if (still_to_copy <= space_available) { | ||
1778 | /* write of region completes here */ | ||
1779 | *copy_len = still_to_copy; | ||
1780 | ophdr->oh_len = cpu_to_be32(*copy_len); | ||
1781 | if (*last_was_partial_copy) | ||
1782 | ophdr->oh_flags |= (XLOG_END_TRANS|XLOG_WAS_CONT_TRANS); | ||
1783 | *last_was_partial_copy = 0; | ||
1784 | *bytes_consumed = 0; | ||
1785 | return 0; | ||
1786 | } | ||
1787 | |||
1788 | /* partial write of region, needs extra log op header reservation */ | ||
1789 | *copy_len = space_available; | ||
1790 | ophdr->oh_len = cpu_to_be32(*copy_len); | ||
1791 | ophdr->oh_flags |= XLOG_CONTINUE_TRANS; | ||
1792 | if (*last_was_partial_copy) | ||
1793 | ophdr->oh_flags |= XLOG_WAS_CONT_TRANS; | ||
1794 | *bytes_consumed += *copy_len; | ||
1795 | (*last_was_partial_copy)++; | ||
1796 | |||
1797 | /* account for new log op header */ | ||
1798 | ticket->t_curr_res -= sizeof(struct xlog_op_header); | ||
1799 | ticket->t_res_num_ophdrs++; | ||
1800 | |||
1801 | return sizeof(struct xlog_op_header); | ||
1802 | } | ||
1803 | |||
1804 | static int | ||
1805 | xlog_write_copy_finish( | ||
1806 | struct log *log, | ||
1807 | struct xlog_in_core *iclog, | ||
1808 | uint flags, | ||
1809 | int *record_cnt, | ||
1810 | int *data_cnt, | ||
1811 | int *partial_copy, | ||
1812 | int *partial_copy_len, | ||
1813 | int log_offset, | ||
1814 | struct xlog_in_core **commit_iclog) | ||
1815 | { | ||
1816 | if (*partial_copy) { | ||
1817 | /* | ||
1818 | * This iclog has already been marked WANT_SYNC by | ||
1819 | * xlog_state_get_iclog_space. | ||
1820 | */ | ||
1821 | xlog_state_finish_copy(log, iclog, *record_cnt, *data_cnt); | ||
1822 | *record_cnt = 0; | ||
1823 | *data_cnt = 0; | ||
1824 | return xlog_state_release_iclog(log, iclog); | ||
1825 | } | ||
1826 | |||
1827 | *partial_copy = 0; | ||
1828 | *partial_copy_len = 0; | ||
1829 | |||
1830 | if (iclog->ic_size - log_offset <= sizeof(xlog_op_header_t)) { | ||
1831 | /* no more space in this iclog - push it. */ | ||
1832 | xlog_state_finish_copy(log, iclog, *record_cnt, *data_cnt); | ||
1833 | *record_cnt = 0; | ||
1834 | *data_cnt = 0; | ||
1835 | |||
1836 | spin_lock(&log->l_icloglock); | ||
1837 | xlog_state_want_sync(log, iclog); | ||
1838 | spin_unlock(&log->l_icloglock); | ||
1839 | |||
1840 | if (!commit_iclog) | ||
1841 | return xlog_state_release_iclog(log, iclog); | ||
1842 | ASSERT(flags & XLOG_COMMIT_TRANS); | ||
1843 | *commit_iclog = iclog; | ||
1844 | } | ||
1845 | |||
1846 | return 0; | ||
1614 | } | 1847 | } |
1615 | 1848 | ||
1616 | /* | 1849 | /* |
@@ -1653,211 +1886,163 @@ xlog_print_tic_res(xfs_mount_t *mp, xlog_ticket_t *ticket) | |||
1653 | * we don't update ic_offset until the end when we know exactly how many | 1886 | * we don't update ic_offset until the end when we know exactly how many |
1654 | * bytes have been written out. | 1887 | * bytes have been written out. |
1655 | */ | 1888 | */ |
1656 | STATIC int | 1889 | int |
1657 | xlog_write( | 1890 | xlog_write( |
1658 | struct xfs_mount *mp, | 1891 | struct log *log, |
1659 | struct xfs_log_iovec reg[], | 1892 | struct xfs_log_vec *log_vector, |
1660 | int nentries, | ||
1661 | struct xlog_ticket *ticket, | 1893 | struct xlog_ticket *ticket, |
1662 | xfs_lsn_t *start_lsn, | 1894 | xfs_lsn_t *start_lsn, |
1663 | struct xlog_in_core **commit_iclog, | 1895 | struct xlog_in_core **commit_iclog, |
1664 | uint flags) | 1896 | uint flags) |
1665 | { | 1897 | { |
1666 | xlog_t *log = mp->m_log; | 1898 | struct xlog_in_core *iclog = NULL; |
1667 | xlog_in_core_t *iclog = NULL; /* ptr to current in-core log */ | 1899 | struct xfs_log_iovec *vecp; |
1668 | xlog_op_header_t *logop_head; /* ptr to log operation header */ | 1900 | struct xfs_log_vec *lv; |
1669 | __psint_t ptr; /* copy address into data region */ | 1901 | int len; |
1670 | int len; /* # xlog_write() bytes 2 still copy */ | 1902 | int index; |
1671 | int index; /* region index currently copying */ | 1903 | int partial_copy = 0; |
1672 | int log_offset; /* offset (from 0) into data region */ | 1904 | int partial_copy_len = 0; |
1673 | int start_rec_copy; /* # bytes to copy for start record */ | 1905 | int contwr = 0; |
1674 | int partial_copy; /* did we split a region? */ | 1906 | int record_cnt = 0; |
1675 | int partial_copy_len;/* # bytes copied if split region */ | 1907 | int data_cnt = 0; |
1676 | int need_copy; /* # bytes need to memcpy this region */ | 1908 | int error; |
1677 | int copy_len; /* # bytes actually memcpy'ing */ | ||
1678 | int copy_off; /* # bytes from entry start */ | ||
1679 | int contwr; /* continued write of in-core log? */ | ||
1680 | int error; | ||
1681 | int record_cnt = 0, data_cnt = 0; | ||
1682 | |||
1683 | partial_copy_len = partial_copy = 0; | ||
1684 | |||
1685 | /* Calculate potential maximum space. Each region gets its own | ||
1686 | * xlog_op_header_t and may need to be double word aligned. | ||
1687 | */ | ||
1688 | len = 0; | ||
1689 | if (ticket->t_flags & XLOG_TIC_INITED) { /* acct for start rec of xact */ | ||
1690 | len += sizeof(xlog_op_header_t); | ||
1691 | ticket->t_res_num_ophdrs++; | ||
1692 | } | ||
1693 | |||
1694 | for (index = 0; index < nentries; index++) { | ||
1695 | len += sizeof(xlog_op_header_t); /* each region gets >= 1 */ | ||
1696 | ticket->t_res_num_ophdrs++; | ||
1697 | len += reg[index].i_len; | ||
1698 | xlog_tic_add_region(ticket, reg[index].i_len, reg[index].i_type); | ||
1699 | } | ||
1700 | contwr = *start_lsn = 0; | ||
1701 | 1909 | ||
1702 | if (ticket->t_curr_res < len) { | 1910 | *start_lsn = 0; |
1703 | xlog_print_tic_res(mp, ticket); | ||
1704 | #ifdef DEBUG | ||
1705 | xlog_panic( | ||
1706 | "xfs_log_write: reservation ran out. Need to up reservation"); | ||
1707 | #else | ||
1708 | /* Customer configurable panic */ | ||
1709 | xfs_cmn_err(XFS_PTAG_LOGRES, CE_ALERT, mp, | ||
1710 | "xfs_log_write: reservation ran out. Need to up reservation"); | ||
1711 | /* If we did not panic, shutdown the filesystem */ | ||
1712 | xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); | ||
1713 | #endif | ||
1714 | } else | ||
1715 | ticket->t_curr_res -= len; | ||
1716 | 1911 | ||
1717 | for (index = 0; index < nentries; ) { | 1912 | len = xlog_write_calc_vec_length(ticket, log_vector); |
1718 | if ((error = xlog_state_get_iclog_space(log, len, &iclog, ticket, | 1913 | if (log->l_cilp) { |
1719 | &contwr, &log_offset))) | 1914 | /* |
1720 | return error; | 1915 | * Region headers and bytes are already accounted for. |
1916 | * We only need to take into account start records and | ||
1917 | * split regions in this function. | ||
1918 | */ | ||
1919 | if (ticket->t_flags & XLOG_TIC_INITED) | ||
1920 | ticket->t_curr_res -= sizeof(xlog_op_header_t); | ||
1721 | 1921 | ||
1722 | ASSERT(log_offset <= iclog->ic_size - 1); | 1922 | /* |
1723 | ptr = (__psint_t) ((char *)iclog->ic_datap+log_offset); | 1923 | * Commit record headers need to be accounted for. These |
1924 | * come in as separate writes so are easy to detect. | ||
1925 | */ | ||
1926 | if (flags & (XLOG_COMMIT_TRANS | XLOG_UNMOUNT_TRANS)) | ||
1927 | ticket->t_curr_res -= sizeof(xlog_op_header_t); | ||
1928 | } else | ||
1929 | ticket->t_curr_res -= len; | ||
1930 | |||
1931 | if (ticket->t_curr_res < 0) | ||
1932 | xlog_print_tic_res(log->l_mp, ticket); | ||
1933 | |||
1934 | index = 0; | ||
1935 | lv = log_vector; | ||
1936 | vecp = lv->lv_iovecp; | ||
1937 | while (lv && index < lv->lv_niovecs) { | ||
1938 | void *ptr; | ||
1939 | int log_offset; | ||
1940 | |||
1941 | error = xlog_state_get_iclog_space(log, len, &iclog, ticket, | ||
1942 | &contwr, &log_offset); | ||
1943 | if (error) | ||
1944 | return error; | ||
1724 | 1945 | ||
1725 | /* start_lsn is the first lsn written to. That's all we need. */ | 1946 | ASSERT(log_offset <= iclog->ic_size - 1); |
1726 | if (! *start_lsn) | 1947 | ptr = iclog->ic_datap + log_offset; |
1727 | *start_lsn = be64_to_cpu(iclog->ic_header.h_lsn); | ||
1728 | 1948 | ||
1729 | /* This loop writes out as many regions as can fit in the amount | 1949 | /* start_lsn is the first lsn written to. That's all we need. */ |
1730 | * of space which was allocated by xlog_state_get_iclog_space(). | 1950 | if (!*start_lsn) |
1731 | */ | 1951 | *start_lsn = be64_to_cpu(iclog->ic_header.h_lsn); |
1732 | while (index < nentries) { | ||
1733 | ASSERT(reg[index].i_len % sizeof(__int32_t) == 0); | ||
1734 | ASSERT((__psint_t)ptr % sizeof(__int32_t) == 0); | ||
1735 | start_rec_copy = 0; | ||
1736 | |||
1737 | /* If first write for transaction, insert start record. | ||
1738 | * We can't be trying to commit if we are inited. We can't | ||
1739 | * have any "partial_copy" if we are inited. | ||
1740 | */ | ||
1741 | if (ticket->t_flags & XLOG_TIC_INITED) { | ||
1742 | logop_head = (xlog_op_header_t *)ptr; | ||
1743 | logop_head->oh_tid = cpu_to_be32(ticket->t_tid); | ||
1744 | logop_head->oh_clientid = ticket->t_clientid; | ||
1745 | logop_head->oh_len = 0; | ||
1746 | logop_head->oh_flags = XLOG_START_TRANS; | ||
1747 | logop_head->oh_res2 = 0; | ||
1748 | ticket->t_flags &= ~XLOG_TIC_INITED; /* clear bit */ | ||
1749 | record_cnt++; | ||
1750 | |||
1751 | start_rec_copy = sizeof(xlog_op_header_t); | ||
1752 | xlog_write_adv_cnt(ptr, len, log_offset, start_rec_copy); | ||
1753 | } | ||
1754 | 1952 | ||
1755 | /* Copy log operation header directly into data section */ | 1953 | /* |
1756 | logop_head = (xlog_op_header_t *)ptr; | 1954 | * This loop writes out as many regions as can fit in the amount |
1757 | logop_head->oh_tid = cpu_to_be32(ticket->t_tid); | 1955 | * of space which was allocated by xlog_state_get_iclog_space(). |
1758 | logop_head->oh_clientid = ticket->t_clientid; | 1956 | */ |
1759 | logop_head->oh_res2 = 0; | 1957 | while (lv && index < lv->lv_niovecs) { |
1958 | struct xfs_log_iovec *reg = &vecp[index]; | ||
1959 | struct xlog_op_header *ophdr; | ||
1960 | int start_rec_copy; | ||
1961 | int copy_len; | ||
1962 | int copy_off; | ||
1963 | |||
1964 | ASSERT(reg->i_len % sizeof(__int32_t) == 0); | ||
1965 | ASSERT((unsigned long)ptr % sizeof(__int32_t) == 0); | ||
1966 | |||
1967 | start_rec_copy = xlog_write_start_rec(ptr, ticket); | ||
1968 | if (start_rec_copy) { | ||
1969 | record_cnt++; | ||
1970 | xlog_write_adv_cnt(&ptr, &len, &log_offset, | ||
1971 | start_rec_copy); | ||
1972 | } | ||
1760 | 1973 | ||
1761 | /* header copied directly */ | 1974 | ophdr = xlog_write_setup_ophdr(log, ptr, ticket, flags); |
1762 | xlog_write_adv_cnt(ptr, len, log_offset, sizeof(xlog_op_header_t)); | 1975 | if (!ophdr) |
1976 | return XFS_ERROR(EIO); | ||
1763 | 1977 | ||
1764 | /* are we copying a commit or unmount record? */ | 1978 | xlog_write_adv_cnt(&ptr, &len, &log_offset, |
1765 | logop_head->oh_flags = flags; | 1979 | sizeof(struct xlog_op_header)); |
1980 | |||
1981 | len += xlog_write_setup_copy(ticket, ophdr, | ||
1982 | iclog->ic_size-log_offset, | ||
1983 | reg->i_len, | ||
1984 | ©_off, ©_len, | ||
1985 | &partial_copy, | ||
1986 | &partial_copy_len); | ||
1987 | xlog_verify_dest_ptr(log, ptr); | ||
1988 | |||
1989 | /* copy region */ | ||
1990 | ASSERT(copy_len >= 0); | ||
1991 | memcpy(ptr, reg->i_addr + copy_off, copy_len); | ||
1992 | xlog_write_adv_cnt(&ptr, &len, &log_offset, copy_len); | ||
1993 | |||
1994 | copy_len += start_rec_copy + sizeof(xlog_op_header_t); | ||
1995 | record_cnt++; | ||
1996 | data_cnt += contwr ? copy_len : 0; | ||
1997 | |||
1998 | error = xlog_write_copy_finish(log, iclog, flags, | ||
1999 | &record_cnt, &data_cnt, | ||
2000 | &partial_copy, | ||
2001 | &partial_copy_len, | ||
2002 | log_offset, | ||
2003 | commit_iclog); | ||
2004 | if (error) | ||
2005 | return error; | ||
1766 | 2006 | ||
1767 | /* | 2007 | /* |
1768 | * We've seen logs corrupted with bad transaction client | 2008 | * if we had a partial copy, we need to get more iclog |
1769 | * ids. This makes sure that XFS doesn't generate them on. | 2009 | * space but we don't want to increment the region |
1770 | * Turn this into an EIO and shut down the filesystem. | 2010 | * index because there is still more is this region to |
1771 | */ | 2011 | * write. |
1772 | switch (logop_head->oh_clientid) { | 2012 | * |
1773 | case XFS_TRANSACTION: | 2013 | * If we completed writing this region, and we flushed |
1774 | case XFS_VOLUME: | 2014 | * the iclog (indicated by resetting of the record |
1775 | case XFS_LOG: | 2015 | * count), then we also need to get more log space. If |
1776 | break; | 2016 | * this was the last record, though, we are done and |
1777 | default: | 2017 | * can just return. |
1778 | xfs_fs_cmn_err(CE_WARN, mp, | 2018 | */ |
1779 | "Bad XFS transaction clientid 0x%x in ticket 0x%p", | 2019 | if (partial_copy) |
1780 | logop_head->oh_clientid, ticket); | 2020 | break; |
1781 | return XFS_ERROR(EIO); | ||
1782 | } | ||
1783 | 2021 | ||
1784 | /* Partial write last time? => (partial_copy != 0) | 2022 | if (++index == lv->lv_niovecs) { |
1785 | * need_copy is the amount we'd like to copy if everything could | 2023 | lv = lv->lv_next; |
1786 | * fit in the current memcpy. | 2024 | index = 0; |
1787 | */ | 2025 | if (lv) |
1788 | need_copy = reg[index].i_len - partial_copy_len; | 2026 | vecp = lv->lv_iovecp; |
1789 | 2027 | } | |
1790 | copy_off = partial_copy_len; | 2028 | if (record_cnt == 0) { |
1791 | if (need_copy <= iclog->ic_size - log_offset) { /*complete write */ | 2029 | if (!lv) |
1792 | copy_len = need_copy; | 2030 | return 0; |
1793 | logop_head->oh_len = cpu_to_be32(copy_len); | 2031 | break; |
1794 | if (partial_copy) | 2032 | } |
1795 | logop_head->oh_flags|= (XLOG_END_TRANS|XLOG_WAS_CONT_TRANS); | ||
1796 | partial_copy_len = partial_copy = 0; | ||
1797 | } else { /* partial write */ | ||
1798 | copy_len = iclog->ic_size - log_offset; | ||
1799 | logop_head->oh_len = cpu_to_be32(copy_len); | ||
1800 | logop_head->oh_flags |= XLOG_CONTINUE_TRANS; | ||
1801 | if (partial_copy) | ||
1802 | logop_head->oh_flags |= XLOG_WAS_CONT_TRANS; | ||
1803 | partial_copy_len += copy_len; | ||
1804 | partial_copy++; | ||
1805 | len += sizeof(xlog_op_header_t); /* from splitting of region */ | ||
1806 | /* account for new log op header */ | ||
1807 | ticket->t_curr_res -= sizeof(xlog_op_header_t); | ||
1808 | ticket->t_res_num_ophdrs++; | ||
1809 | } | ||
1810 | xlog_verify_dest_ptr(log, ptr); | ||
1811 | |||
1812 | /* copy region */ | ||
1813 | ASSERT(copy_len >= 0); | ||
1814 | memcpy((xfs_caddr_t)ptr, reg[index].i_addr + copy_off, copy_len); | ||
1815 | xlog_write_adv_cnt(ptr, len, log_offset, copy_len); | ||
1816 | |||
1817 | /* make copy_len total bytes copied, including headers */ | ||
1818 | copy_len += start_rec_copy + sizeof(xlog_op_header_t); | ||
1819 | record_cnt++; | ||
1820 | data_cnt += contwr ? copy_len : 0; | ||
1821 | if (partial_copy) { /* copied partial region */ | ||
1822 | /* already marked WANT_SYNC by xlog_state_get_iclog_space */ | ||
1823 | xlog_state_finish_copy(log, iclog, record_cnt, data_cnt); | ||
1824 | record_cnt = data_cnt = 0; | ||
1825 | if ((error = xlog_state_release_iclog(log, iclog))) | ||
1826 | return error; | ||
1827 | break; /* don't increment index */ | ||
1828 | } else { /* copied entire region */ | ||
1829 | index++; | ||
1830 | partial_copy_len = partial_copy = 0; | ||
1831 | |||
1832 | if (iclog->ic_size - log_offset <= sizeof(xlog_op_header_t)) { | ||
1833 | xlog_state_finish_copy(log, iclog, record_cnt, data_cnt); | ||
1834 | record_cnt = data_cnt = 0; | ||
1835 | spin_lock(&log->l_icloglock); | ||
1836 | xlog_state_want_sync(log, iclog); | ||
1837 | spin_unlock(&log->l_icloglock); | ||
1838 | if (commit_iclog) { | ||
1839 | ASSERT(flags & XLOG_COMMIT_TRANS); | ||
1840 | *commit_iclog = iclog; | ||
1841 | } else if ((error = xlog_state_release_iclog(log, iclog))) | ||
1842 | return error; | ||
1843 | if (index == nentries) | ||
1844 | return 0; /* we are done */ | ||
1845 | else | ||
1846 | break; | ||
1847 | } | 2033 | } |
1848 | } /* if (partial_copy) */ | 2034 | } |
1849 | } /* while (index < nentries) */ | 2035 | |
1850 | } /* for (index = 0; index < nentries; ) */ | 2036 | ASSERT(len == 0); |
1851 | ASSERT(len == 0); | 2037 | |
2038 | xlog_state_finish_copy(log, iclog, record_cnt, data_cnt); | ||
2039 | if (!commit_iclog) | ||
2040 | return xlog_state_release_iclog(log, iclog); | ||
1852 | 2041 | ||
1853 | xlog_state_finish_copy(log, iclog, record_cnt, data_cnt); | ||
1854 | if (commit_iclog) { | ||
1855 | ASSERT(flags & XLOG_COMMIT_TRANS); | 2042 | ASSERT(flags & XLOG_COMMIT_TRANS); |
1856 | *commit_iclog = iclog; | 2043 | *commit_iclog = iclog; |
1857 | return 0; | 2044 | return 0; |
1858 | } | 2045 | } |
1859 | return xlog_state_release_iclog(log, iclog); | ||
1860 | } /* xlog_write */ | ||
1861 | 2046 | ||
1862 | 2047 | ||
1863 | /***************************************************************************** | 2048 | /***************************************************************************** |
@@ -2840,6 +3025,8 @@ _xfs_log_force( | |||
2840 | 3025 | ||
2841 | XFS_STATS_INC(xs_log_force); | 3026 | XFS_STATS_INC(xs_log_force); |
2842 | 3027 | ||
3028 | xlog_cil_push(log, 1); | ||
3029 | |||
2843 | spin_lock(&log->l_icloglock); | 3030 | spin_lock(&log->l_icloglock); |
2844 | 3031 | ||
2845 | iclog = log->l_iclog; | 3032 | iclog = log->l_iclog; |
@@ -2989,6 +3176,12 @@ _xfs_log_force_lsn( | |||
2989 | 3176 | ||
2990 | XFS_STATS_INC(xs_log_force); | 3177 | XFS_STATS_INC(xs_log_force); |
2991 | 3178 | ||
3179 | if (log->l_cilp) { | ||
3180 | lsn = xlog_cil_push_lsn(log, lsn); | ||
3181 | if (lsn == NULLCOMMITLSN) | ||
3182 | return 0; | ||
3183 | } | ||
3184 | |||
2992 | try_again: | 3185 | try_again: |
2993 | spin_lock(&log->l_icloglock); | 3186 | spin_lock(&log->l_icloglock); |
2994 | iclog = log->l_iclog; | 3187 | iclog = log->l_iclog; |
@@ -3153,20 +3346,30 @@ xfs_log_ticket_get( | |||
3153 | return ticket; | 3346 | return ticket; |
3154 | } | 3347 | } |
3155 | 3348 | ||
3349 | xlog_tid_t | ||
3350 | xfs_log_get_trans_ident( | ||
3351 | struct xfs_trans *tp) | ||
3352 | { | ||
3353 | return tp->t_ticket->t_tid; | ||
3354 | } | ||
3355 | |||
3156 | /* | 3356 | /* |
3157 | * Allocate and initialise a new log ticket. | 3357 | * Allocate and initialise a new log ticket. |
3158 | */ | 3358 | */ |
3159 | STATIC xlog_ticket_t * | 3359 | xlog_ticket_t * |
3160 | xlog_ticket_alloc(xlog_t *log, | 3360 | xlog_ticket_alloc( |
3161 | int unit_bytes, | 3361 | struct log *log, |
3162 | int cnt, | 3362 | int unit_bytes, |
3163 | char client, | 3363 | int cnt, |
3164 | uint xflags) | 3364 | char client, |
3365 | uint xflags, | ||
3366 | int alloc_flags) | ||
3165 | { | 3367 | { |
3166 | xlog_ticket_t *tic; | 3368 | struct xlog_ticket *tic; |
3167 | uint num_headers; | 3369 | uint num_headers; |
3370 | int iclog_space; | ||
3168 | 3371 | ||
3169 | tic = kmem_zone_zalloc(xfs_log_ticket_zone, KM_SLEEP|KM_MAYFAIL); | 3372 | tic = kmem_zone_zalloc(xfs_log_ticket_zone, alloc_flags); |
3170 | if (!tic) | 3373 | if (!tic) |
3171 | return NULL; | 3374 | return NULL; |
3172 | 3375 | ||
@@ -3208,16 +3411,40 @@ xlog_ticket_alloc(xlog_t *log, | |||
3208 | /* for start-rec */ | 3411 | /* for start-rec */ |
3209 | unit_bytes += sizeof(xlog_op_header_t); | 3412 | unit_bytes += sizeof(xlog_op_header_t); |
3210 | 3413 | ||
3211 | /* for LR headers */ | 3414 | /* |
3212 | num_headers = ((unit_bytes + log->l_iclog_size-1) >> log->l_iclog_size_log); | 3415 | * for LR headers - the space for data in an iclog is the size minus |
3416 | * the space used for the headers. If we use the iclog size, then we | ||
3417 | * undercalculate the number of headers required. | ||
3418 | * | ||
3419 | * Furthermore - the addition of op headers for split-recs might | ||
3420 | * increase the space required enough to require more log and op | ||
3421 | * headers, so take that into account too. | ||
3422 | * | ||
3423 | * IMPORTANT: This reservation makes the assumption that if this | ||
3424 | * transaction is the first in an iclog and hence has the LR headers | ||
3425 | * accounted to it, then the remaining space in the iclog is | ||
3426 | * exclusively for this transaction. i.e. if the transaction is larger | ||
3427 | * than the iclog, it will be the only thing in that iclog. | ||
3428 | * Fundamentally, this means we must pass the entire log vector to | ||
3429 | * xlog_write to guarantee this. | ||
3430 | */ | ||
3431 | iclog_space = log->l_iclog_size - log->l_iclog_hsize; | ||
3432 | num_headers = howmany(unit_bytes, iclog_space); | ||
3433 | |||
3434 | /* for split-recs - ophdrs added when data split over LRs */ | ||
3435 | unit_bytes += sizeof(xlog_op_header_t) * num_headers; | ||
3436 | |||
3437 | /* add extra header reservations if we overrun */ | ||
3438 | while (!num_headers || | ||
3439 | howmany(unit_bytes, iclog_space) > num_headers) { | ||
3440 | unit_bytes += sizeof(xlog_op_header_t); | ||
3441 | num_headers++; | ||
3442 | } | ||
3213 | unit_bytes += log->l_iclog_hsize * num_headers; | 3443 | unit_bytes += log->l_iclog_hsize * num_headers; |
3214 | 3444 | ||
3215 | /* for commit-rec LR header - note: padding will subsume the ophdr */ | 3445 | /* for commit-rec LR header - note: padding will subsume the ophdr */ |
3216 | unit_bytes += log->l_iclog_hsize; | 3446 | unit_bytes += log->l_iclog_hsize; |
3217 | 3447 | ||
3218 | /* for split-recs - ophdrs added when data split over LRs */ | ||
3219 | unit_bytes += sizeof(xlog_op_header_t) * num_headers; | ||
3220 | |||
3221 | /* for roundoff padding for transaction data and one for commit record */ | 3448 | /* for roundoff padding for transaction data and one for commit record */ |
3222 | if (xfs_sb_version_haslogv2(&log->l_mp->m_sb) && | 3449 | if (xfs_sb_version_haslogv2(&log->l_mp->m_sb) && |
3223 | log->l_mp->m_sb.sb_logsunit > 1) { | 3450 | log->l_mp->m_sb.sb_logsunit > 1) { |
@@ -3233,13 +3460,13 @@ xlog_ticket_alloc(xlog_t *log, | |||
3233 | tic->t_curr_res = unit_bytes; | 3460 | tic->t_curr_res = unit_bytes; |
3234 | tic->t_cnt = cnt; | 3461 | tic->t_cnt = cnt; |
3235 | tic->t_ocnt = cnt; | 3462 | tic->t_ocnt = cnt; |
3236 | tic->t_tid = (xlog_tid_t)((__psint_t)tic & 0xffffffff); | 3463 | tic->t_tid = random32(); |
3237 | tic->t_clientid = client; | 3464 | tic->t_clientid = client; |
3238 | tic->t_flags = XLOG_TIC_INITED; | 3465 | tic->t_flags = XLOG_TIC_INITED; |
3239 | tic->t_trans_type = 0; | 3466 | tic->t_trans_type = 0; |
3240 | if (xflags & XFS_LOG_PERM_RESERV) | 3467 | if (xflags & XFS_LOG_PERM_RESERV) |
3241 | tic->t_flags |= XLOG_TIC_PERM_RESERV; | 3468 | tic->t_flags |= XLOG_TIC_PERM_RESERV; |
3242 | sv_init(&(tic->t_wait), SV_DEFAULT, "logtick"); | 3469 | sv_init(&tic->t_wait, SV_DEFAULT, "logtick"); |
3243 | 3470 | ||
3244 | xlog_tic_reset_res(tic); | 3471 | xlog_tic_reset_res(tic); |
3245 | 3472 | ||
@@ -3260,20 +3487,22 @@ xlog_ticket_alloc(xlog_t *log, | |||
3260 | * part of the log in case we trash the log structure. | 3487 | * part of the log in case we trash the log structure. |
3261 | */ | 3488 | */ |
3262 | void | 3489 | void |
3263 | xlog_verify_dest_ptr(xlog_t *log, | 3490 | xlog_verify_dest_ptr( |
3264 | __psint_t ptr) | 3491 | struct log *log, |
3492 | char *ptr) | ||
3265 | { | 3493 | { |
3266 | int i; | 3494 | int i; |
3267 | int good_ptr = 0; | 3495 | int good_ptr = 0; |
3268 | 3496 | ||
3269 | for (i=0; i < log->l_iclog_bufs; i++) { | 3497 | for (i = 0; i < log->l_iclog_bufs; i++) { |
3270 | if (ptr >= (__psint_t)log->l_iclog_bak[i] && | 3498 | if (ptr >= log->l_iclog_bak[i] && |
3271 | ptr <= (__psint_t)log->l_iclog_bak[i]+log->l_iclog_size) | 3499 | ptr <= log->l_iclog_bak[i] + log->l_iclog_size) |
3272 | good_ptr++; | 3500 | good_ptr++; |
3273 | } | 3501 | } |
3274 | if (! good_ptr) | 3502 | |
3503 | if (!good_ptr) | ||
3275 | xlog_panic("xlog_verify_dest_ptr: invalid ptr"); | 3504 | xlog_panic("xlog_verify_dest_ptr: invalid ptr"); |
3276 | } /* xlog_verify_dest_ptr */ | 3505 | } |
3277 | 3506 | ||
3278 | STATIC void | 3507 | STATIC void |
3279 | xlog_verify_grant_head(xlog_t *log, int equals) | 3508 | xlog_verify_grant_head(xlog_t *log, int equals) |
@@ -3459,6 +3688,11 @@ xlog_state_ioerror( | |||
3459 | * c. nothing new gets queued up after (a) and (b) are done. | 3688 | * c. nothing new gets queued up after (a) and (b) are done. |
3460 | * d. if !logerror, flush the iclogs to disk, then seal them off | 3689 | * d. if !logerror, flush the iclogs to disk, then seal them off |
3461 | * for business. | 3690 | * for business. |
3691 | * | ||
3692 | * Note: for delayed logging the !logerror case needs to flush the regions | ||
3693 | * held in memory out to the iclogs before flushing them to disk. This needs | ||
3694 | * to be done before the log is marked as shutdown, otherwise the flush to the | ||
3695 | * iclogs will fail. | ||
3462 | */ | 3696 | */ |
3463 | int | 3697 | int |
3464 | xfs_log_force_umount( | 3698 | xfs_log_force_umount( |
@@ -3492,6 +3726,16 @@ xfs_log_force_umount( | |||
3492 | return 1; | 3726 | return 1; |
3493 | } | 3727 | } |
3494 | retval = 0; | 3728 | retval = 0; |
3729 | |||
3730 | /* | ||
3731 | * Flush the in memory commit item list before marking the log as | ||
3732 | * being shut down. We need to do it in this order to ensure all the | ||
3733 | * completed transactions are flushed to disk with the xfs_log_force() | ||
3734 | * call below. | ||
3735 | */ | ||
3736 | if (!logerror && (mp->m_flags & XFS_MOUNT_DELAYLOG)) | ||
3737 | xlog_cil_push(log, 1); | ||
3738 | |||
3495 | /* | 3739 | /* |
3496 | * We must hold both the GRANT lock and the LOG lock, | 3740 | * We must hold both the GRANT lock and the LOG lock, |
3497 | * before we mark the filesystem SHUTDOWN and wake | 3741 | * before we mark the filesystem SHUTDOWN and wake |
diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h index 97a24c7795a4..04c78e642cc8 100644 --- a/fs/xfs/xfs_log.h +++ b/fs/xfs/xfs_log.h | |||
@@ -19,7 +19,6 @@ | |||
19 | #define __XFS_LOG_H__ | 19 | #define __XFS_LOG_H__ |
20 | 20 | ||
21 | /* get lsn fields */ | 21 | /* get lsn fields */ |
22 | |||
23 | #define CYCLE_LSN(lsn) ((uint)((lsn)>>32)) | 22 | #define CYCLE_LSN(lsn) ((uint)((lsn)>>32)) |
24 | #define BLOCK_LSN(lsn) ((uint)(lsn)) | 23 | #define BLOCK_LSN(lsn) ((uint)(lsn)) |
25 | 24 | ||
@@ -110,6 +109,15 @@ typedef struct xfs_log_iovec { | |||
110 | uint i_type; /* type of region */ | 109 | uint i_type; /* type of region */ |
111 | } xfs_log_iovec_t; | 110 | } xfs_log_iovec_t; |
112 | 111 | ||
112 | struct xfs_log_vec { | ||
113 | struct xfs_log_vec *lv_next; /* next lv in build list */ | ||
114 | int lv_niovecs; /* number of iovecs in lv */ | ||
115 | struct xfs_log_iovec *lv_iovecp; /* iovec array */ | ||
116 | struct xfs_log_item *lv_item; /* owner */ | ||
117 | char *lv_buf; /* formatted buffer */ | ||
118 | int lv_buf_len; /* size of formatted buffer */ | ||
119 | }; | ||
120 | |||
113 | /* | 121 | /* |
114 | * Structure used to pass callback function and the function's argument | 122 | * Structure used to pass callback function and the function's argument |
115 | * to the log manager. | 123 | * to the log manager. |
@@ -126,6 +134,14 @@ typedef struct xfs_log_callback { | |||
126 | struct xfs_mount; | 134 | struct xfs_mount; |
127 | struct xlog_in_core; | 135 | struct xlog_in_core; |
128 | struct xlog_ticket; | 136 | struct xlog_ticket; |
137 | struct xfs_log_item; | ||
138 | struct xfs_item_ops; | ||
139 | struct xfs_trans; | ||
140 | |||
141 | void xfs_log_item_init(struct xfs_mount *mp, | ||
142 | struct xfs_log_item *item, | ||
143 | int type, | ||
144 | struct xfs_item_ops *ops); | ||
129 | 145 | ||
130 | xfs_lsn_t xfs_log_done(struct xfs_mount *mp, | 146 | xfs_lsn_t xfs_log_done(struct xfs_mount *mp, |
131 | struct xlog_ticket *ticket, | 147 | struct xlog_ticket *ticket, |
@@ -174,9 +190,16 @@ int xfs_log_need_covered(struct xfs_mount *mp); | |||
174 | 190 | ||
175 | void xlog_iodone(struct xfs_buf *); | 191 | void xlog_iodone(struct xfs_buf *); |
176 | 192 | ||
177 | struct xlog_ticket * xfs_log_ticket_get(struct xlog_ticket *ticket); | 193 | struct xlog_ticket *xfs_log_ticket_get(struct xlog_ticket *ticket); |
178 | void xfs_log_ticket_put(struct xlog_ticket *ticket); | 194 | void xfs_log_ticket_put(struct xlog_ticket *ticket); |
179 | 195 | ||
196 | xlog_tid_t xfs_log_get_trans_ident(struct xfs_trans *tp); | ||
197 | |||
198 | int xfs_log_commit_cil(struct xfs_mount *mp, struct xfs_trans *tp, | ||
199 | struct xfs_log_vec *log_vector, | ||
200 | xfs_lsn_t *commit_lsn, int flags); | ||
201 | bool xfs_log_item_in_current_chkpt(struct xfs_log_item *lip); | ||
202 | |||
180 | #endif | 203 | #endif |
181 | 204 | ||
182 | 205 | ||
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c new file mode 100644 index 000000000000..bb17cc044bf3 --- /dev/null +++ b/fs/xfs/xfs_log_cil.c | |||
@@ -0,0 +1,725 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2010 Red Hat, Inc. All Rights Reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or | ||
5 | * modify it under the terms of the GNU General Public License as | ||
6 | * published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope that it would be useful, | ||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
11 | * GNU General Public License for more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program; if not, write the Free Software Foundation, | ||
15 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
16 | */ | ||
17 | |||
18 | #include "xfs.h" | ||
19 | #include "xfs_fs.h" | ||
20 | #include "xfs_types.h" | ||
21 | #include "xfs_bit.h" | ||
22 | #include "xfs_log.h" | ||
23 | #include "xfs_inum.h" | ||
24 | #include "xfs_trans.h" | ||
25 | #include "xfs_trans_priv.h" | ||
26 | #include "xfs_log_priv.h" | ||
27 | #include "xfs_sb.h" | ||
28 | #include "xfs_ag.h" | ||
29 | #include "xfs_dir2.h" | ||
30 | #include "xfs_dmapi.h" | ||
31 | #include "xfs_mount.h" | ||
32 | #include "xfs_error.h" | ||
33 | #include "xfs_alloc.h" | ||
34 | |||
35 | /* | ||
36 | * Perform initial CIL structure initialisation. If the CIL is not | ||
37 | * enabled in this filesystem, ensure the log->l_cilp is null so | ||
38 | * we can check this conditional to determine if we are doing delayed | ||
39 | * logging or not. | ||
40 | */ | ||
41 | int | ||
42 | xlog_cil_init( | ||
43 | struct log *log) | ||
44 | { | ||
45 | struct xfs_cil *cil; | ||
46 | struct xfs_cil_ctx *ctx; | ||
47 | |||
48 | log->l_cilp = NULL; | ||
49 | if (!(log->l_mp->m_flags & XFS_MOUNT_DELAYLOG)) | ||
50 | return 0; | ||
51 | |||
52 | cil = kmem_zalloc(sizeof(*cil), KM_SLEEP|KM_MAYFAIL); | ||
53 | if (!cil) | ||
54 | return ENOMEM; | ||
55 | |||
56 | ctx = kmem_zalloc(sizeof(*ctx), KM_SLEEP|KM_MAYFAIL); | ||
57 | if (!ctx) { | ||
58 | kmem_free(cil); | ||
59 | return ENOMEM; | ||
60 | } | ||
61 | |||
62 | INIT_LIST_HEAD(&cil->xc_cil); | ||
63 | INIT_LIST_HEAD(&cil->xc_committing); | ||
64 | spin_lock_init(&cil->xc_cil_lock); | ||
65 | init_rwsem(&cil->xc_ctx_lock); | ||
66 | sv_init(&cil->xc_commit_wait, SV_DEFAULT, "cilwait"); | ||
67 | |||
68 | INIT_LIST_HEAD(&ctx->committing); | ||
69 | INIT_LIST_HEAD(&ctx->busy_extents); | ||
70 | ctx->sequence = 1; | ||
71 | ctx->cil = cil; | ||
72 | cil->xc_ctx = ctx; | ||
73 | |||
74 | cil->xc_log = log; | ||
75 | log->l_cilp = cil; | ||
76 | return 0; | ||
77 | } | ||
78 | |||
79 | void | ||
80 | xlog_cil_destroy( | ||
81 | struct log *log) | ||
82 | { | ||
83 | if (!log->l_cilp) | ||
84 | return; | ||
85 | |||
86 | if (log->l_cilp->xc_ctx) { | ||
87 | if (log->l_cilp->xc_ctx->ticket) | ||
88 | xfs_log_ticket_put(log->l_cilp->xc_ctx->ticket); | ||
89 | kmem_free(log->l_cilp->xc_ctx); | ||
90 | } | ||
91 | |||
92 | ASSERT(list_empty(&log->l_cilp->xc_cil)); | ||
93 | kmem_free(log->l_cilp); | ||
94 | } | ||
95 | |||
96 | /* | ||
97 | * Allocate a new ticket. Failing to get a new ticket makes it really hard to | ||
98 | * recover, so we don't allow failure here. Also, we allocate in a context that | ||
99 | * we don't want to be issuing transactions from, so we need to tell the | ||
100 | * allocation code this as well. | ||
101 | * | ||
102 | * We don't reserve any space for the ticket - we are going to steal whatever | ||
103 | * space we require from transactions as they commit. To ensure we reserve all | ||
104 | * the space required, we need to set the current reservation of the ticket to | ||
105 | * zero so that we know to steal the initial transaction overhead from the | ||
106 | * first transaction commit. | ||
107 | */ | ||
108 | static struct xlog_ticket * | ||
109 | xlog_cil_ticket_alloc( | ||
110 | struct log *log) | ||
111 | { | ||
112 | struct xlog_ticket *tic; | ||
113 | |||
114 | tic = xlog_ticket_alloc(log, 0, 1, XFS_TRANSACTION, 0, | ||
115 | KM_SLEEP|KM_NOFS); | ||
116 | tic->t_trans_type = XFS_TRANS_CHECKPOINT; | ||
117 | |||
118 | /* | ||
119 | * set the current reservation to zero so we know to steal the basic | ||
120 | * transaction overhead reservation from the first transaction commit. | ||
121 | */ | ||
122 | tic->t_curr_res = 0; | ||
123 | return tic; | ||
124 | } | ||
125 | |||
126 | /* | ||
127 | * After the first stage of log recovery is done, we know where the head and | ||
128 | * tail of the log are. We need this log initialisation done before we can | ||
129 | * initialise the first CIL checkpoint context. | ||
130 | * | ||
131 | * Here we allocate a log ticket to track space usage during a CIL push. This | ||
132 | * ticket is passed to xlog_write() directly so that we don't slowly leak log | ||
133 | * space by failing to account for space used by log headers and additional | ||
134 | * region headers for split regions. | ||
135 | */ | ||
136 | void | ||
137 | xlog_cil_init_post_recovery( | ||
138 | struct log *log) | ||
139 | { | ||
140 | if (!log->l_cilp) | ||
141 | return; | ||
142 | |||
143 | log->l_cilp->xc_ctx->ticket = xlog_cil_ticket_alloc(log); | ||
144 | log->l_cilp->xc_ctx->sequence = 1; | ||
145 | log->l_cilp->xc_ctx->commit_lsn = xlog_assign_lsn(log->l_curr_cycle, | ||
146 | log->l_curr_block); | ||
147 | } | ||
148 | |||
149 | /* | ||
150 | * Insert the log item into the CIL and calculate the difference in space | ||
151 | * consumed by the item. Add the space to the checkpoint ticket and calculate | ||
152 | * if the change requires additional log metadata. If it does, take that space | ||
153 | * as well. Remove the amount of space we addded to the checkpoint ticket from | ||
154 | * the current transaction ticket so that the accounting works out correctly. | ||
155 | * | ||
156 | * If this is the first time the item is being placed into the CIL in this | ||
157 | * context, pin it so it can't be written to disk until the CIL is flushed to | ||
158 | * the iclog and the iclog written to disk. | ||
159 | */ | ||
160 | static void | ||
161 | xlog_cil_insert( | ||
162 | struct log *log, | ||
163 | struct xlog_ticket *ticket, | ||
164 | struct xfs_log_item *item, | ||
165 | struct xfs_log_vec *lv) | ||
166 | { | ||
167 | struct xfs_cil *cil = log->l_cilp; | ||
168 | struct xfs_log_vec *old = lv->lv_item->li_lv; | ||
169 | struct xfs_cil_ctx *ctx = cil->xc_ctx; | ||
170 | int len; | ||
171 | int diff_iovecs; | ||
172 | int iclog_space; | ||
173 | |||
174 | if (old) { | ||
175 | /* existing lv on log item, space used is a delta */ | ||
176 | ASSERT(!list_empty(&item->li_cil)); | ||
177 | ASSERT(old->lv_buf && old->lv_buf_len && old->lv_niovecs); | ||
178 | |||
179 | len = lv->lv_buf_len - old->lv_buf_len; | ||
180 | diff_iovecs = lv->lv_niovecs - old->lv_niovecs; | ||
181 | kmem_free(old->lv_buf); | ||
182 | kmem_free(old); | ||
183 | } else { | ||
184 | /* new lv, must pin the log item */ | ||
185 | ASSERT(!lv->lv_item->li_lv); | ||
186 | ASSERT(list_empty(&item->li_cil)); | ||
187 | |||
188 | len = lv->lv_buf_len; | ||
189 | diff_iovecs = lv->lv_niovecs; | ||
190 | IOP_PIN(lv->lv_item); | ||
191 | |||
192 | } | ||
193 | len += diff_iovecs * sizeof(xlog_op_header_t); | ||
194 | |||
195 | /* attach new log vector to log item */ | ||
196 | lv->lv_item->li_lv = lv; | ||
197 | |||
198 | spin_lock(&cil->xc_cil_lock); | ||
199 | list_move_tail(&item->li_cil, &cil->xc_cil); | ||
200 | ctx->nvecs += diff_iovecs; | ||
201 | |||
202 | /* | ||
203 | * If this is the first time the item is being committed to the CIL, | ||
204 | * store the sequence number on the log item so we can tell | ||
205 | * in future commits whether this is the first checkpoint the item is | ||
206 | * being committed into. | ||
207 | */ | ||
208 | if (!item->li_seq) | ||
209 | item->li_seq = ctx->sequence; | ||
210 | |||
211 | /* | ||
212 | * Now transfer enough transaction reservation to the context ticket | ||
213 | * for the checkpoint. The context ticket is special - the unit | ||
214 | * reservation has to grow as well as the current reservation as we | ||
215 | * steal from tickets so we can correctly determine the space used | ||
216 | * during the transaction commit. | ||
217 | */ | ||
218 | if (ctx->ticket->t_curr_res == 0) { | ||
219 | /* first commit in checkpoint, steal the header reservation */ | ||
220 | ASSERT(ticket->t_curr_res >= ctx->ticket->t_unit_res + len); | ||
221 | ctx->ticket->t_curr_res = ctx->ticket->t_unit_res; | ||
222 | ticket->t_curr_res -= ctx->ticket->t_unit_res; | ||
223 | } | ||
224 | |||
225 | /* do we need space for more log record headers? */ | ||
226 | iclog_space = log->l_iclog_size - log->l_iclog_hsize; | ||
227 | if (len > 0 && (ctx->space_used / iclog_space != | ||
228 | (ctx->space_used + len) / iclog_space)) { | ||
229 | int hdrs; | ||
230 | |||
231 | hdrs = (len + iclog_space - 1) / iclog_space; | ||
232 | /* need to take into account split region headers, too */ | ||
233 | hdrs *= log->l_iclog_hsize + sizeof(struct xlog_op_header); | ||
234 | ctx->ticket->t_unit_res += hdrs; | ||
235 | ctx->ticket->t_curr_res += hdrs; | ||
236 | ticket->t_curr_res -= hdrs; | ||
237 | ASSERT(ticket->t_curr_res >= len); | ||
238 | } | ||
239 | ticket->t_curr_res -= len; | ||
240 | ctx->space_used += len; | ||
241 | |||
242 | spin_unlock(&cil->xc_cil_lock); | ||
243 | } | ||
244 | |||
245 | /* | ||
246 | * Format log item into a flat buffers | ||
247 | * | ||
248 | * For delayed logging, we need to hold a formatted buffer containing all the | ||
249 | * changes on the log item. This enables us to relog the item in memory and | ||
250 | * write it out asynchronously without needing to relock the object that was | ||
251 | * modified at the time it gets written into the iclog. | ||
252 | * | ||
253 | * This function builds a vector for the changes in each log item in the | ||
254 | * transaction. It then works out the length of the buffer needed for each log | ||
255 | * item, allocates them and formats the vector for the item into the buffer. | ||
256 | * The buffer is then attached to the log item are then inserted into the | ||
257 | * Committed Item List for tracking until the next checkpoint is written out. | ||
258 | * | ||
259 | * We don't set up region headers during this process; we simply copy the | ||
260 | * regions into the flat buffer. We can do this because we still have to do a | ||
261 | * formatting step to write the regions into the iclog buffer. Writing the | ||
262 | * ophdrs during the iclog write means that we can support splitting large | ||
263 | * regions across iclog boundares without needing a change in the format of the | ||
264 | * item/region encapsulation. | ||
265 | * | ||
266 | * Hence what we need to do now is change the rewrite the vector array to point | ||
267 | * to the copied region inside the buffer we just allocated. This allows us to | ||
268 | * format the regions into the iclog as though they are being formatted | ||
269 | * directly out of the objects themselves. | ||
270 | */ | ||
271 | static void | ||
272 | xlog_cil_format_items( | ||
273 | struct log *log, | ||
274 | struct xfs_log_vec *log_vector, | ||
275 | struct xlog_ticket *ticket, | ||
276 | xfs_lsn_t *start_lsn) | ||
277 | { | ||
278 | struct xfs_log_vec *lv; | ||
279 | |||
280 | if (start_lsn) | ||
281 | *start_lsn = log->l_cilp->xc_ctx->sequence; | ||
282 | |||
283 | ASSERT(log_vector); | ||
284 | for (lv = log_vector; lv; lv = lv->lv_next) { | ||
285 | void *ptr; | ||
286 | int index; | ||
287 | int len = 0; | ||
288 | |||
289 | /* build the vector array and calculate it's length */ | ||
290 | IOP_FORMAT(lv->lv_item, lv->lv_iovecp); | ||
291 | for (index = 0; index < lv->lv_niovecs; index++) | ||
292 | len += lv->lv_iovecp[index].i_len; | ||
293 | |||
294 | lv->lv_buf_len = len; | ||
295 | lv->lv_buf = kmem_zalloc(lv->lv_buf_len, KM_SLEEP|KM_NOFS); | ||
296 | ptr = lv->lv_buf; | ||
297 | |||
298 | for (index = 0; index < lv->lv_niovecs; index++) { | ||
299 | struct xfs_log_iovec *vec = &lv->lv_iovecp[index]; | ||
300 | |||
301 | memcpy(ptr, vec->i_addr, vec->i_len); | ||
302 | vec->i_addr = ptr; | ||
303 | ptr += vec->i_len; | ||
304 | } | ||
305 | ASSERT(ptr == lv->lv_buf + lv->lv_buf_len); | ||
306 | |||
307 | xlog_cil_insert(log, ticket, lv->lv_item, lv); | ||
308 | } | ||
309 | } | ||
310 | |||
311 | static void | ||
312 | xlog_cil_free_logvec( | ||
313 | struct xfs_log_vec *log_vector) | ||
314 | { | ||
315 | struct xfs_log_vec *lv; | ||
316 | |||
317 | for (lv = log_vector; lv; ) { | ||
318 | struct xfs_log_vec *next = lv->lv_next; | ||
319 | kmem_free(lv->lv_buf); | ||
320 | kmem_free(lv); | ||
321 | lv = next; | ||
322 | } | ||
323 | } | ||
324 | |||
325 | /* | ||
326 | * Commit a transaction with the given vector to the Committed Item List. | ||
327 | * | ||
328 | * To do this, we need to format the item, pin it in memory if required and | ||
329 | * account for the space used by the transaction. Once we have done that we | ||
330 | * need to release the unused reservation for the transaction, attach the | ||
331 | * transaction to the checkpoint context so we carry the busy extents through | ||
332 | * to checkpoint completion, and then unlock all the items in the transaction. | ||
333 | * | ||
334 | * For more specific information about the order of operations in | ||
335 | * xfs_log_commit_cil() please refer to the comments in | ||
336 | * xfs_trans_commit_iclog(). | ||
337 | * | ||
338 | * Called with the context lock already held in read mode to lock out | ||
339 | * background commit, returns without it held once background commits are | ||
340 | * allowed again. | ||
341 | */ | ||
342 | int | ||
343 | xfs_log_commit_cil( | ||
344 | struct xfs_mount *mp, | ||
345 | struct xfs_trans *tp, | ||
346 | struct xfs_log_vec *log_vector, | ||
347 | xfs_lsn_t *commit_lsn, | ||
348 | int flags) | ||
349 | { | ||
350 | struct log *log = mp->m_log; | ||
351 | int log_flags = 0; | ||
352 | int push = 0; | ||
353 | |||
354 | if (flags & XFS_TRANS_RELEASE_LOG_RES) | ||
355 | log_flags = XFS_LOG_REL_PERM_RESERV; | ||
356 | |||
357 | if (XLOG_FORCED_SHUTDOWN(log)) { | ||
358 | xlog_cil_free_logvec(log_vector); | ||
359 | return XFS_ERROR(EIO); | ||
360 | } | ||
361 | |||
362 | /* lock out background commit */ | ||
363 | down_read(&log->l_cilp->xc_ctx_lock); | ||
364 | xlog_cil_format_items(log, log_vector, tp->t_ticket, commit_lsn); | ||
365 | |||
366 | /* check we didn't blow the reservation */ | ||
367 | if (tp->t_ticket->t_curr_res < 0) | ||
368 | xlog_print_tic_res(log->l_mp, tp->t_ticket); | ||
369 | |||
370 | /* attach the transaction to the CIL if it has any busy extents */ | ||
371 | if (!list_empty(&tp->t_busy)) { | ||
372 | spin_lock(&log->l_cilp->xc_cil_lock); | ||
373 | list_splice_init(&tp->t_busy, | ||
374 | &log->l_cilp->xc_ctx->busy_extents); | ||
375 | spin_unlock(&log->l_cilp->xc_cil_lock); | ||
376 | } | ||
377 | |||
378 | tp->t_commit_lsn = *commit_lsn; | ||
379 | xfs_log_done(mp, tp->t_ticket, NULL, log_flags); | ||
380 | xfs_trans_unreserve_and_mod_sb(tp); | ||
381 | |||
382 | /* check for background commit before unlock */ | ||
383 | if (log->l_cilp->xc_ctx->space_used > XLOG_CIL_SPACE_LIMIT(log)) | ||
384 | push = 1; | ||
385 | up_read(&log->l_cilp->xc_ctx_lock); | ||
386 | |||
387 | /* | ||
388 | * We need to push CIL every so often so we don't cache more than we | ||
389 | * can fit in the log. The limit really is that a checkpoint can't be | ||
390 | * more than half the log (the current checkpoint is not allowed to | ||
391 | * overwrite the previous checkpoint), but commit latency and memory | ||
392 | * usage limit this to a smaller size in most cases. | ||
393 | */ | ||
394 | if (push) | ||
395 | xlog_cil_push(log, 0); | ||
396 | return 0; | ||
397 | } | ||
398 | |||
399 | /* | ||
400 | * Mark all items committed and clear busy extents. We free the log vector | ||
401 | * chains in a separate pass so that we unpin the log items as quickly as | ||
402 | * possible. | ||
403 | */ | ||
404 | static void | ||
405 | xlog_cil_committed( | ||
406 | void *args, | ||
407 | int abort) | ||
408 | { | ||
409 | struct xfs_cil_ctx *ctx = args; | ||
410 | struct xfs_log_vec *lv; | ||
411 | int abortflag = abort ? XFS_LI_ABORTED : 0; | ||
412 | struct xfs_busy_extent *busyp, *n; | ||
413 | |||
414 | /* unpin all the log items */ | ||
415 | for (lv = ctx->lv_chain; lv; lv = lv->lv_next ) { | ||
416 | xfs_trans_item_committed(lv->lv_item, ctx->start_lsn, | ||
417 | abortflag); | ||
418 | } | ||
419 | |||
420 | list_for_each_entry_safe(busyp, n, &ctx->busy_extents, list) | ||
421 | xfs_alloc_busy_clear(ctx->cil->xc_log->l_mp, busyp); | ||
422 | |||
423 | spin_lock(&ctx->cil->xc_cil_lock); | ||
424 | list_del(&ctx->committing); | ||
425 | spin_unlock(&ctx->cil->xc_cil_lock); | ||
426 | |||
427 | xlog_cil_free_logvec(ctx->lv_chain); | ||
428 | kmem_free(ctx); | ||
429 | } | ||
430 | |||
431 | /* | ||
432 | * Push the Committed Item List to the log. If the push_now flag is not set, | ||
433 | * then it is a background flush and so we can chose to ignore it. | ||
434 | */ | ||
435 | int | ||
436 | xlog_cil_push( | ||
437 | struct log *log, | ||
438 | int push_now) | ||
439 | { | ||
440 | struct xfs_cil *cil = log->l_cilp; | ||
441 | struct xfs_log_vec *lv; | ||
442 | struct xfs_cil_ctx *ctx; | ||
443 | struct xfs_cil_ctx *new_ctx; | ||
444 | struct xlog_in_core *commit_iclog; | ||
445 | struct xlog_ticket *tic; | ||
446 | int num_lv; | ||
447 | int num_iovecs; | ||
448 | int len; | ||
449 | int error = 0; | ||
450 | struct xfs_trans_header thdr; | ||
451 | struct xfs_log_iovec lhdr; | ||
452 | struct xfs_log_vec lvhdr = { NULL }; | ||
453 | xfs_lsn_t commit_lsn; | ||
454 | |||
455 | if (!cil) | ||
456 | return 0; | ||
457 | |||
458 | new_ctx = kmem_zalloc(sizeof(*new_ctx), KM_SLEEP|KM_NOFS); | ||
459 | new_ctx->ticket = xlog_cil_ticket_alloc(log); | ||
460 | |||
461 | /* lock out transaction commit, but don't block on background push */ | ||
462 | if (!down_write_trylock(&cil->xc_ctx_lock)) { | ||
463 | if (!push_now) | ||
464 | goto out_free_ticket; | ||
465 | down_write(&cil->xc_ctx_lock); | ||
466 | } | ||
467 | ctx = cil->xc_ctx; | ||
468 | |||
469 | /* check if we've anything to push */ | ||
470 | if (list_empty(&cil->xc_cil)) | ||
471 | goto out_skip; | ||
472 | |||
473 | /* check for spurious background flush */ | ||
474 | if (!push_now && cil->xc_ctx->space_used < XLOG_CIL_SPACE_LIMIT(log)) | ||
475 | goto out_skip; | ||
476 | |||
477 | /* | ||
478 | * pull all the log vectors off the items in the CIL, and | ||
479 | * remove the items from the CIL. We don't need the CIL lock | ||
480 | * here because it's only needed on the transaction commit | ||
481 | * side which is currently locked out by the flush lock. | ||
482 | */ | ||
483 | lv = NULL; | ||
484 | num_lv = 0; | ||
485 | num_iovecs = 0; | ||
486 | len = 0; | ||
487 | while (!list_empty(&cil->xc_cil)) { | ||
488 | struct xfs_log_item *item; | ||
489 | int i; | ||
490 | |||
491 | item = list_first_entry(&cil->xc_cil, | ||
492 | struct xfs_log_item, li_cil); | ||
493 | list_del_init(&item->li_cil); | ||
494 | if (!ctx->lv_chain) | ||
495 | ctx->lv_chain = item->li_lv; | ||
496 | else | ||
497 | lv->lv_next = item->li_lv; | ||
498 | lv = item->li_lv; | ||
499 | item->li_lv = NULL; | ||
500 | |||
501 | num_lv++; | ||
502 | num_iovecs += lv->lv_niovecs; | ||
503 | for (i = 0; i < lv->lv_niovecs; i++) | ||
504 | len += lv->lv_iovecp[i].i_len; | ||
505 | } | ||
506 | |||
507 | /* | ||
508 | * initialise the new context and attach it to the CIL. Then attach | ||
509 | * the current context to the CIL committing lsit so it can be found | ||
510 | * during log forces to extract the commit lsn of the sequence that | ||
511 | * needs to be forced. | ||
512 | */ | ||
513 | INIT_LIST_HEAD(&new_ctx->committing); | ||
514 | INIT_LIST_HEAD(&new_ctx->busy_extents); | ||
515 | new_ctx->sequence = ctx->sequence + 1; | ||
516 | new_ctx->cil = cil; | ||
517 | cil->xc_ctx = new_ctx; | ||
518 | |||
519 | /* | ||
520 | * The switch is now done, so we can drop the context lock and move out | ||
521 | * of a shared context. We can't just go straight to the commit record, | ||
522 | * though - we need to synchronise with previous and future commits so | ||
523 | * that the commit records are correctly ordered in the log to ensure | ||
524 | * that we process items during log IO completion in the correct order. | ||
525 | * | ||
526 | * For example, if we get an EFI in one checkpoint and the EFD in the | ||
527 | * next (e.g. due to log forces), we do not want the checkpoint with | ||
528 | * the EFD to be committed before the checkpoint with the EFI. Hence | ||
529 | * we must strictly order the commit records of the checkpoints so | ||
530 | * that: a) the checkpoint callbacks are attached to the iclogs in the | ||
531 | * correct order; and b) the checkpoints are replayed in correct order | ||
532 | * in log recovery. | ||
533 | * | ||
534 | * Hence we need to add this context to the committing context list so | ||
535 | * that higher sequences will wait for us to write out a commit record | ||
536 | * before they do. | ||
537 | */ | ||
538 | spin_lock(&cil->xc_cil_lock); | ||
539 | list_add(&ctx->committing, &cil->xc_committing); | ||
540 | spin_unlock(&cil->xc_cil_lock); | ||
541 | up_write(&cil->xc_ctx_lock); | ||
542 | |||
543 | /* | ||
544 | * Build a checkpoint transaction header and write it to the log to | ||
545 | * begin the transaction. We need to account for the space used by the | ||
546 | * transaction header here as it is not accounted for in xlog_write(). | ||
547 | * | ||
548 | * The LSN we need to pass to the log items on transaction commit is | ||
549 | * the LSN reported by the first log vector write. If we use the commit | ||
550 | * record lsn then we can move the tail beyond the grant write head. | ||
551 | */ | ||
552 | tic = ctx->ticket; | ||
553 | thdr.th_magic = XFS_TRANS_HEADER_MAGIC; | ||
554 | thdr.th_type = XFS_TRANS_CHECKPOINT; | ||
555 | thdr.th_tid = tic->t_tid; | ||
556 | thdr.th_num_items = num_iovecs; | ||
557 | lhdr.i_addr = (xfs_caddr_t)&thdr; | ||
558 | lhdr.i_len = sizeof(xfs_trans_header_t); | ||
559 | lhdr.i_type = XLOG_REG_TYPE_TRANSHDR; | ||
560 | tic->t_curr_res -= lhdr.i_len + sizeof(xlog_op_header_t); | ||
561 | |||
562 | lvhdr.lv_niovecs = 1; | ||
563 | lvhdr.lv_iovecp = &lhdr; | ||
564 | lvhdr.lv_next = ctx->lv_chain; | ||
565 | |||
566 | error = xlog_write(log, &lvhdr, tic, &ctx->start_lsn, NULL, 0); | ||
567 | if (error) | ||
568 | goto out_abort; | ||
569 | |||
570 | /* | ||
571 | * now that we've written the checkpoint into the log, strictly | ||
572 | * order the commit records so replay will get them in the right order. | ||
573 | */ | ||
574 | restart: | ||
575 | spin_lock(&cil->xc_cil_lock); | ||
576 | list_for_each_entry(new_ctx, &cil->xc_committing, committing) { | ||
577 | /* | ||
578 | * Higher sequences will wait for this one so skip them. | ||
579 | * Don't wait for own own sequence, either. | ||
580 | */ | ||
581 | if (new_ctx->sequence >= ctx->sequence) | ||
582 | continue; | ||
583 | if (!new_ctx->commit_lsn) { | ||
584 | /* | ||
585 | * It is still being pushed! Wait for the push to | ||
586 | * complete, then start again from the beginning. | ||
587 | */ | ||
588 | sv_wait(&cil->xc_commit_wait, 0, &cil->xc_cil_lock, 0); | ||
589 | goto restart; | ||
590 | } | ||
591 | } | ||
592 | spin_unlock(&cil->xc_cil_lock); | ||
593 | |||
594 | commit_lsn = xfs_log_done(log->l_mp, tic, &commit_iclog, 0); | ||
595 | if (error || commit_lsn == -1) | ||
596 | goto out_abort; | ||
597 | |||
598 | /* attach all the transactions w/ busy extents to iclog */ | ||
599 | ctx->log_cb.cb_func = xlog_cil_committed; | ||
600 | ctx->log_cb.cb_arg = ctx; | ||
601 | error = xfs_log_notify(log->l_mp, commit_iclog, &ctx->log_cb); | ||
602 | if (error) | ||
603 | goto out_abort; | ||
604 | |||
605 | /* | ||
606 | * now the checkpoint commit is complete and we've attached the | ||
607 | * callbacks to the iclog we can assign the commit LSN to the context | ||
608 | * and wake up anyone who is waiting for the commit to complete. | ||
609 | */ | ||
610 | spin_lock(&cil->xc_cil_lock); | ||
611 | ctx->commit_lsn = commit_lsn; | ||
612 | sv_broadcast(&cil->xc_commit_wait); | ||
613 | spin_unlock(&cil->xc_cil_lock); | ||
614 | |||
615 | /* release the hounds! */ | ||
616 | return xfs_log_release_iclog(log->l_mp, commit_iclog); | ||
617 | |||
618 | out_skip: | ||
619 | up_write(&cil->xc_ctx_lock); | ||
620 | out_free_ticket: | ||
621 | xfs_log_ticket_put(new_ctx->ticket); | ||
622 | kmem_free(new_ctx); | ||
623 | return 0; | ||
624 | |||
625 | out_abort: | ||
626 | xlog_cil_committed(ctx, XFS_LI_ABORTED); | ||
627 | return XFS_ERROR(EIO); | ||
628 | } | ||
629 | |||
630 | /* | ||
631 | * Conditionally push the CIL based on the sequence passed in. | ||
632 | * | ||
633 | * We only need to push if we haven't already pushed the sequence | ||
634 | * number given. Hence the only time we will trigger a push here is | ||
635 | * if the push sequence is the same as the current context. | ||
636 | * | ||
637 | * We return the current commit lsn to allow the callers to determine if a | ||
638 | * iclog flush is necessary following this call. | ||
639 | * | ||
640 | * XXX: Initially, just push the CIL unconditionally and return whatever | ||
641 | * commit lsn is there. It'll be empty, so this is broken for now. | ||
642 | */ | ||
643 | xfs_lsn_t | ||
644 | xlog_cil_push_lsn( | ||
645 | struct log *log, | ||
646 | xfs_lsn_t push_seq) | ||
647 | { | ||
648 | struct xfs_cil *cil = log->l_cilp; | ||
649 | struct xfs_cil_ctx *ctx; | ||
650 | xfs_lsn_t commit_lsn = NULLCOMMITLSN; | ||
651 | |||
652 | restart: | ||
653 | down_write(&cil->xc_ctx_lock); | ||
654 | ASSERT(push_seq <= cil->xc_ctx->sequence); | ||
655 | |||
656 | /* check to see if we need to force out the current context */ | ||
657 | if (push_seq == cil->xc_ctx->sequence) { | ||
658 | up_write(&cil->xc_ctx_lock); | ||
659 | xlog_cil_push(log, 1); | ||
660 | goto restart; | ||
661 | } | ||
662 | |||
663 | /* | ||
664 | * See if we can find a previous sequence still committing. | ||
665 | * We can drop the flush lock as soon as we have the cil lock | ||
666 | * because we are now only comparing contexts protected by | ||
667 | * the cil lock. | ||
668 | * | ||
669 | * We need to wait for all previous sequence commits to complete | ||
670 | * before allowing the force of push_seq to go ahead. Hence block | ||
671 | * on commits for those as well. | ||
672 | */ | ||
673 | spin_lock(&cil->xc_cil_lock); | ||
674 | up_write(&cil->xc_ctx_lock); | ||
675 | list_for_each_entry(ctx, &cil->xc_committing, committing) { | ||
676 | if (ctx->sequence > push_seq) | ||
677 | continue; | ||
678 | if (!ctx->commit_lsn) { | ||
679 | /* | ||
680 | * It is still being pushed! Wait for the push to | ||
681 | * complete, then start again from the beginning. | ||
682 | */ | ||
683 | sv_wait(&cil->xc_commit_wait, 0, &cil->xc_cil_lock, 0); | ||
684 | goto restart; | ||
685 | } | ||
686 | if (ctx->sequence != push_seq) | ||
687 | continue; | ||
688 | /* found it! */ | ||
689 | commit_lsn = ctx->commit_lsn; | ||
690 | } | ||
691 | spin_unlock(&cil->xc_cil_lock); | ||
692 | return commit_lsn; | ||
693 | } | ||
694 | |||
695 | /* | ||
696 | * Check if the current log item was first committed in this sequence. | ||
697 | * We can't rely on just the log item being in the CIL, we have to check | ||
698 | * the recorded commit sequence number. | ||
699 | * | ||
700 | * Note: for this to be used in a non-racy manner, it has to be called with | ||
701 | * CIL flushing locked out. As a result, it should only be used during the | ||
702 | * transaction commit process when deciding what to format into the item. | ||
703 | */ | ||
704 | bool | ||
705 | xfs_log_item_in_current_chkpt( | ||
706 | struct xfs_log_item *lip) | ||
707 | { | ||
708 | struct xfs_cil_ctx *ctx; | ||
709 | |||
710 | if (!(lip->li_mountp->m_flags & XFS_MOUNT_DELAYLOG)) | ||
711 | return false; | ||
712 | if (list_empty(&lip->li_cil)) | ||
713 | return false; | ||
714 | |||
715 | ctx = lip->li_mountp->m_log->l_cilp->xc_ctx; | ||
716 | |||
717 | /* | ||
718 | * li_seq is written on the first commit of a log item to record the | ||
719 | * first checkpoint it is written to. Hence if it is different to the | ||
720 | * current sequence, we're in a new checkpoint. | ||
721 | */ | ||
722 | if (XFS_LSN_CMP(lip->li_seq, ctx->sequence) != 0) | ||
723 | return false; | ||
724 | return true; | ||
725 | } | ||
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h index fd02a18facd5..8c072618965c 100644 --- a/fs/xfs/xfs_log_priv.h +++ b/fs/xfs/xfs_log_priv.h | |||
@@ -152,8 +152,6 @@ static inline uint xlog_get_client_id(__be32 i) | |||
152 | #define XLOG_RECOVERY_NEEDED 0x4 /* log was recovered */ | 152 | #define XLOG_RECOVERY_NEEDED 0x4 /* log was recovered */ |
153 | #define XLOG_IO_ERROR 0x8 /* log hit an I/O error, and being | 153 | #define XLOG_IO_ERROR 0x8 /* log hit an I/O error, and being |
154 | shutdown */ | 154 | shutdown */ |
155 | typedef __uint32_t xlog_tid_t; | ||
156 | |||
157 | 155 | ||
158 | #ifdef __KERNEL__ | 156 | #ifdef __KERNEL__ |
159 | /* | 157 | /* |
@@ -379,6 +377,99 @@ typedef struct xlog_in_core { | |||
379 | } xlog_in_core_t; | 377 | } xlog_in_core_t; |
380 | 378 | ||
381 | /* | 379 | /* |
380 | * The CIL context is used to aggregate per-transaction details as well be | ||
381 | * passed to the iclog for checkpoint post-commit processing. After being | ||
382 | * passed to the iclog, another context needs to be allocated for tracking the | ||
383 | * next set of transactions to be aggregated into a checkpoint. | ||
384 | */ | ||
385 | struct xfs_cil; | ||
386 | |||
387 | struct xfs_cil_ctx { | ||
388 | struct xfs_cil *cil; | ||
389 | xfs_lsn_t sequence; /* chkpt sequence # */ | ||
390 | xfs_lsn_t start_lsn; /* first LSN of chkpt commit */ | ||
391 | xfs_lsn_t commit_lsn; /* chkpt commit record lsn */ | ||
392 | struct xlog_ticket *ticket; /* chkpt ticket */ | ||
393 | int nvecs; /* number of regions */ | ||
394 | int space_used; /* aggregate size of regions */ | ||
395 | struct list_head busy_extents; /* busy extents in chkpt */ | ||
396 | struct xfs_log_vec *lv_chain; /* logvecs being pushed */ | ||
397 | xfs_log_callback_t log_cb; /* completion callback hook. */ | ||
398 | struct list_head committing; /* ctx committing list */ | ||
399 | }; | ||
400 | |||
401 | /* | ||
402 | * Committed Item List structure | ||
403 | * | ||
404 | * This structure is used to track log items that have been committed but not | ||
405 | * yet written into the log. It is used only when the delayed logging mount | ||
406 | * option is enabled. | ||
407 | * | ||
408 | * This structure tracks the list of committing checkpoint contexts so | ||
409 | * we can avoid the problem of having to hold out new transactions during a | ||
410 | * flush until we have a the commit record LSN of the checkpoint. We can | ||
411 | * traverse the list of committing contexts in xlog_cil_push_lsn() to find a | ||
412 | * sequence match and extract the commit LSN directly from there. If the | ||
413 | * checkpoint is still in the process of committing, we can block waiting for | ||
414 | * the commit LSN to be determined as well. This should make synchronous | ||
415 | * operations almost as efficient as the old logging methods. | ||
416 | */ | ||
417 | struct xfs_cil { | ||
418 | struct log *xc_log; | ||
419 | struct list_head xc_cil; | ||
420 | spinlock_t xc_cil_lock; | ||
421 | struct xfs_cil_ctx *xc_ctx; | ||
422 | struct rw_semaphore xc_ctx_lock; | ||
423 | struct list_head xc_committing; | ||
424 | sv_t xc_commit_wait; | ||
425 | }; | ||
426 | |||
427 | /* | ||
428 | * The amount of log space we should the CIL to aggregate is difficult to size. | ||
429 | * Whatever we chose we have to make we can get a reservation for the log space | ||
430 | * effectively, that it is large enough to capture sufficient relogging to | ||
431 | * reduce log buffer IO significantly, but it is not too large for the log or | ||
432 | * induces too much latency when writing out through the iclogs. We track both | ||
433 | * space consumed and the number of vectors in the checkpoint context, so we | ||
434 | * need to decide which to use for limiting. | ||
435 | * | ||
436 | * Every log buffer we write out during a push needs a header reserved, which | ||
437 | * is at least one sector and more for v2 logs. Hence we need a reservation of | ||
438 | * at least 512 bytes per 32k of log space just for the LR headers. That means | ||
439 | * 16KB of reservation per megabyte of delayed logging space we will consume, | ||
440 | * plus various headers. The number of headers will vary based on the num of | ||
441 | * io vectors, so limiting on a specific number of vectors is going to result | ||
442 | * in transactions of varying size. IOWs, it is more consistent to track and | ||
443 | * limit space consumed in the log rather than by the number of objects being | ||
444 | * logged in order to prevent checkpoint ticket overruns. | ||
445 | * | ||
446 | * Further, use of static reservations through the log grant mechanism is | ||
447 | * problematic. It introduces a lot of complexity (e.g. reserve grant vs write | ||
448 | * grant) and a significant deadlock potential because regranting write space | ||
449 | * can block on log pushes. Hence if we have to regrant log space during a log | ||
450 | * push, we can deadlock. | ||
451 | * | ||
452 | * However, we can avoid this by use of a dynamic "reservation stealing" | ||
453 | * technique during transaction commit whereby unused reservation space in the | ||
454 | * transaction ticket is transferred to the CIL ctx commit ticket to cover the | ||
455 | * space needed by the checkpoint transaction. This means that we never need to | ||
456 | * specifically reserve space for the CIL checkpoint transaction, nor do we | ||
457 | * need to regrant space once the checkpoint completes. This also means the | ||
458 | * checkpoint transaction ticket is specific to the checkpoint context, rather | ||
459 | * than the CIL itself. | ||
460 | * | ||
461 | * With dynamic reservations, we can basically make up arbitrary limits for the | ||
462 | * checkpoint size so long as they don't violate any other size rules. Hence | ||
463 | * the initial maximum size for the checkpoint transaction will be set to a | ||
464 | * quarter of the log or 8MB, which ever is smaller. 8MB is an arbitrary limit | ||
465 | * right now based on the latency of writing out a large amount of data through | ||
466 | * the circular iclog buffers. | ||
467 | */ | ||
468 | |||
469 | #define XLOG_CIL_SPACE_LIMIT(log) \ | ||
470 | (min((log->l_logsize >> 2), (8 * 1024 * 1024))) | ||
471 | |||
472 | /* | ||
382 | * The reservation head lsn is not made up of a cycle number and block number. | 473 | * The reservation head lsn is not made up of a cycle number and block number. |
383 | * Instead, it uses a cycle number and byte number. Logs don't expect to | 474 | * Instead, it uses a cycle number and byte number. Logs don't expect to |
384 | * overflow 31 bits worth of byte offset, so using a byte number will mean | 475 | * overflow 31 bits worth of byte offset, so using a byte number will mean |
@@ -388,6 +479,7 @@ typedef struct log { | |||
388 | /* The following fields don't need locking */ | 479 | /* The following fields don't need locking */ |
389 | struct xfs_mount *l_mp; /* mount point */ | 480 | struct xfs_mount *l_mp; /* mount point */ |
390 | struct xfs_ail *l_ailp; /* AIL log is working with */ | 481 | struct xfs_ail *l_ailp; /* AIL log is working with */ |
482 | struct xfs_cil *l_cilp; /* CIL log is working with */ | ||
391 | struct xfs_buf *l_xbuf; /* extra buffer for log | 483 | struct xfs_buf *l_xbuf; /* extra buffer for log |
392 | * wrapping */ | 484 | * wrapping */ |
393 | struct xfs_buftarg *l_targ; /* buftarg of log */ | 485 | struct xfs_buftarg *l_targ; /* buftarg of log */ |
@@ -396,9 +488,7 @@ typedef struct log { | |||
396 | struct xfs_buf_cancel **l_buf_cancel_table; | 488 | struct xfs_buf_cancel **l_buf_cancel_table; |
397 | int l_iclog_hsize; /* size of iclog header */ | 489 | int l_iclog_hsize; /* size of iclog header */ |
398 | int l_iclog_heads; /* # of iclog header sectors */ | 490 | int l_iclog_heads; /* # of iclog header sectors */ |
399 | uint l_sectbb_log; /* log2 of sector size in BBs */ | 491 | uint l_sectBBsize; /* sector size in BBs (2^n) */ |
400 | uint l_sectbb_mask; /* sector size (in BBs) | ||
401 | * alignment mask */ | ||
402 | int l_iclog_size; /* size of log in bytes */ | 492 | int l_iclog_size; /* size of log in bytes */ |
403 | int l_iclog_size_log; /* log power size of log */ | 493 | int l_iclog_size_log; /* log power size of log */ |
404 | int l_iclog_bufs; /* number of iclog buffers */ | 494 | int l_iclog_bufs; /* number of iclog buffers */ |
@@ -440,14 +530,40 @@ typedef struct log { | |||
440 | 530 | ||
441 | #define XLOG_FORCED_SHUTDOWN(log) ((log)->l_flags & XLOG_IO_ERROR) | 531 | #define XLOG_FORCED_SHUTDOWN(log) ((log)->l_flags & XLOG_IO_ERROR) |
442 | 532 | ||
443 | |||
444 | /* common routines */ | 533 | /* common routines */ |
445 | extern xfs_lsn_t xlog_assign_tail_lsn(struct xfs_mount *mp); | 534 | extern xfs_lsn_t xlog_assign_tail_lsn(struct xfs_mount *mp); |
446 | extern int xlog_recover(xlog_t *log); | 535 | extern int xlog_recover(xlog_t *log); |
447 | extern int xlog_recover_finish(xlog_t *log); | 536 | extern int xlog_recover_finish(xlog_t *log); |
448 | extern void xlog_pack_data(xlog_t *log, xlog_in_core_t *iclog, int); | 537 | extern void xlog_pack_data(xlog_t *log, xlog_in_core_t *iclog, int); |
449 | 538 | ||
450 | extern kmem_zone_t *xfs_log_ticket_zone; | 539 | extern kmem_zone_t *xfs_log_ticket_zone; |
540 | struct xlog_ticket *xlog_ticket_alloc(struct log *log, int unit_bytes, | ||
541 | int count, char client, uint xflags, | ||
542 | int alloc_flags); | ||
543 | |||
544 | |||
545 | static inline void | ||
546 | xlog_write_adv_cnt(void **ptr, int *len, int *off, size_t bytes) | ||
547 | { | ||
548 | *ptr += bytes; | ||
549 | *len -= bytes; | ||
550 | *off += bytes; | ||
551 | } | ||
552 | |||
553 | void xlog_print_tic_res(struct xfs_mount *mp, struct xlog_ticket *ticket); | ||
554 | int xlog_write(struct log *log, struct xfs_log_vec *log_vector, | ||
555 | struct xlog_ticket *tic, xfs_lsn_t *start_lsn, | ||
556 | xlog_in_core_t **commit_iclog, uint flags); | ||
557 | |||
558 | /* | ||
559 | * Committed Item List interfaces | ||
560 | */ | ||
561 | int xlog_cil_init(struct log *log); | ||
562 | void xlog_cil_init_post_recovery(struct log *log); | ||
563 | void xlog_cil_destroy(struct log *log); | ||
564 | |||
565 | int xlog_cil_push(struct log *log, int push_now); | ||
566 | xfs_lsn_t xlog_cil_push_lsn(struct log *log, xfs_lsn_t push_sequence); | ||
451 | 567 | ||
452 | /* | 568 | /* |
453 | * Unmount record type is used as a pseudo transaction type for the ticket. | 569 | * Unmount record type is used as a pseudo transaction type for the ticket. |
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 22e6efdc17ea..14a69aec2c0b 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c | |||
@@ -56,33 +56,61 @@ STATIC void xlog_recover_check_summary(xlog_t *); | |||
56 | #define xlog_recover_check_summary(log) | 56 | #define xlog_recover_check_summary(log) |
57 | #endif | 57 | #endif |
58 | 58 | ||
59 | |||
60 | /* | 59 | /* |
61 | * Sector aligned buffer routines for buffer create/read/write/access | 60 | * Sector aligned buffer routines for buffer create/read/write/access |
62 | */ | 61 | */ |
63 | 62 | ||
64 | #define XLOG_SECTOR_ROUNDUP_BBCOUNT(log, bbs) \ | 63 | /* |
65 | ( ((log)->l_sectbb_mask && (bbs & (log)->l_sectbb_mask)) ? \ | 64 | * Verify the given count of basic blocks is valid number of blocks |
66 | ((bbs + (log)->l_sectbb_mask + 1) & ~(log)->l_sectbb_mask) : (bbs) ) | 65 | * to specify for an operation involving the given XFS log buffer. |
67 | #define XLOG_SECTOR_ROUNDDOWN_BLKNO(log, bno) ((bno) & ~(log)->l_sectbb_mask) | 66 | * Returns nonzero if the count is valid, 0 otherwise. |
67 | */ | ||
68 | 68 | ||
69 | static inline int | ||
70 | xlog_buf_bbcount_valid( | ||
71 | xlog_t *log, | ||
72 | int bbcount) | ||
73 | { | ||
74 | return bbcount > 0 && bbcount <= log->l_logBBsize; | ||
75 | } | ||
76 | |||
77 | /* | ||
78 | * Allocate a buffer to hold log data. The buffer needs to be able | ||
79 | * to map to a range of nbblks basic blocks at any valid (basic | ||
80 | * block) offset within the log. | ||
81 | */ | ||
69 | STATIC xfs_buf_t * | 82 | STATIC xfs_buf_t * |
70 | xlog_get_bp( | 83 | xlog_get_bp( |
71 | xlog_t *log, | 84 | xlog_t *log, |
72 | int nbblks) | 85 | int nbblks) |
73 | { | 86 | { |
74 | if (nbblks <= 0 || nbblks > log->l_logBBsize) { | 87 | if (!xlog_buf_bbcount_valid(log, nbblks)) { |
75 | xlog_warn("XFS: Invalid block length (0x%x) given for buffer", nbblks); | 88 | xlog_warn("XFS: Invalid block length (0x%x) given for buffer", |
76 | XFS_ERROR_REPORT("xlog_get_bp(1)", | 89 | nbblks); |
77 | XFS_ERRLEVEL_HIGH, log->l_mp); | 90 | XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_HIGH, log->l_mp); |
78 | return NULL; | 91 | return NULL; |
79 | } | 92 | } |
80 | 93 | ||
81 | if (log->l_sectbb_log) { | 94 | /* |
82 | if (nbblks > 1) | 95 | * We do log I/O in units of log sectors (a power-of-2 |
83 | nbblks += XLOG_SECTOR_ROUNDUP_BBCOUNT(log, 1); | 96 | * multiple of the basic block size), so we round up the |
84 | nbblks = XLOG_SECTOR_ROUNDUP_BBCOUNT(log, nbblks); | 97 | * requested size to acommodate the basic blocks required |
85 | } | 98 | * for complete log sectors. |
99 | * | ||
100 | * In addition, the buffer may be used for a non-sector- | ||
101 | * aligned block offset, in which case an I/O of the | ||
102 | * requested size could extend beyond the end of the | ||
103 | * buffer. If the requested size is only 1 basic block it | ||
104 | * will never straddle a sector boundary, so this won't be | ||
105 | * an issue. Nor will this be a problem if the log I/O is | ||
106 | * done in basic blocks (sector size 1). But otherwise we | ||
107 | * extend the buffer by one extra log sector to ensure | ||
108 | * there's space to accomodate this possiblility. | ||
109 | */ | ||
110 | if (nbblks > 1 && log->l_sectBBsize > 1) | ||
111 | nbblks += log->l_sectBBsize; | ||
112 | nbblks = round_up(nbblks, log->l_sectBBsize); | ||
113 | |||
86 | return xfs_buf_get_noaddr(BBTOB(nbblks), log->l_mp->m_logdev_targp); | 114 | return xfs_buf_get_noaddr(BBTOB(nbblks), log->l_mp->m_logdev_targp); |
87 | } | 115 | } |
88 | 116 | ||
@@ -93,6 +121,10 @@ xlog_put_bp( | |||
93 | xfs_buf_free(bp); | 121 | xfs_buf_free(bp); |
94 | } | 122 | } |
95 | 123 | ||
124 | /* | ||
125 | * Return the address of the start of the given block number's data | ||
126 | * in a log buffer. The buffer covers a log sector-aligned region. | ||
127 | */ | ||
96 | STATIC xfs_caddr_t | 128 | STATIC xfs_caddr_t |
97 | xlog_align( | 129 | xlog_align( |
98 | xlog_t *log, | 130 | xlog_t *log, |
@@ -100,14 +132,14 @@ xlog_align( | |||
100 | int nbblks, | 132 | int nbblks, |
101 | xfs_buf_t *bp) | 133 | xfs_buf_t *bp) |
102 | { | 134 | { |
135 | xfs_daddr_t offset; | ||
103 | xfs_caddr_t ptr; | 136 | xfs_caddr_t ptr; |
104 | 137 | ||
105 | if (!log->l_sectbb_log) | 138 | offset = blk_no & ((xfs_daddr_t) log->l_sectBBsize - 1); |
106 | return XFS_BUF_PTR(bp); | 139 | ptr = XFS_BUF_PTR(bp) + BBTOB(offset); |
140 | |||
141 | ASSERT(ptr + BBTOB(nbblks) <= XFS_BUF_PTR(bp) + XFS_BUF_SIZE(bp)); | ||
107 | 142 | ||
108 | ptr = XFS_BUF_PTR(bp) + BBTOB((int)blk_no & log->l_sectbb_mask); | ||
109 | ASSERT(XFS_BUF_SIZE(bp) >= | ||
110 | BBTOB(nbblks + (blk_no & log->l_sectbb_mask))); | ||
111 | return ptr; | 143 | return ptr; |
112 | } | 144 | } |
113 | 145 | ||
@@ -124,21 +156,18 @@ xlog_bread_noalign( | |||
124 | { | 156 | { |
125 | int error; | 157 | int error; |
126 | 158 | ||
127 | if (nbblks <= 0 || nbblks > log->l_logBBsize) { | 159 | if (!xlog_buf_bbcount_valid(log, nbblks)) { |
128 | xlog_warn("XFS: Invalid block length (0x%x) given for buffer", nbblks); | 160 | xlog_warn("XFS: Invalid block length (0x%x) given for buffer", |
129 | XFS_ERROR_REPORT("xlog_bread(1)", | 161 | nbblks); |
130 | XFS_ERRLEVEL_HIGH, log->l_mp); | 162 | XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_HIGH, log->l_mp); |
131 | return EFSCORRUPTED; | 163 | return EFSCORRUPTED; |
132 | } | 164 | } |
133 | 165 | ||
134 | if (log->l_sectbb_log) { | 166 | blk_no = round_down(blk_no, log->l_sectBBsize); |
135 | blk_no = XLOG_SECTOR_ROUNDDOWN_BLKNO(log, blk_no); | 167 | nbblks = round_up(nbblks, log->l_sectBBsize); |
136 | nbblks = XLOG_SECTOR_ROUNDUP_BBCOUNT(log, nbblks); | ||
137 | } | ||
138 | 168 | ||
139 | ASSERT(nbblks > 0); | 169 | ASSERT(nbblks > 0); |
140 | ASSERT(BBTOB(nbblks) <= XFS_BUF_SIZE(bp)); | 170 | ASSERT(BBTOB(nbblks) <= XFS_BUF_SIZE(bp)); |
141 | ASSERT(bp); | ||
142 | 171 | ||
143 | XFS_BUF_SET_ADDR(bp, log->l_logBBstart + blk_no); | 172 | XFS_BUF_SET_ADDR(bp, log->l_logBBstart + blk_no); |
144 | XFS_BUF_READ(bp); | 173 | XFS_BUF_READ(bp); |
@@ -186,17 +215,15 @@ xlog_bwrite( | |||
186 | { | 215 | { |
187 | int error; | 216 | int error; |
188 | 217 | ||
189 | if (nbblks <= 0 || nbblks > log->l_logBBsize) { | 218 | if (!xlog_buf_bbcount_valid(log, nbblks)) { |
190 | xlog_warn("XFS: Invalid block length (0x%x) given for buffer", nbblks); | 219 | xlog_warn("XFS: Invalid block length (0x%x) given for buffer", |
191 | XFS_ERROR_REPORT("xlog_bwrite(1)", | 220 | nbblks); |
192 | XFS_ERRLEVEL_HIGH, log->l_mp); | 221 | XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_HIGH, log->l_mp); |
193 | return EFSCORRUPTED; | 222 | return EFSCORRUPTED; |
194 | } | 223 | } |
195 | 224 | ||
196 | if (log->l_sectbb_log) { | 225 | blk_no = round_down(blk_no, log->l_sectBBsize); |
197 | blk_no = XLOG_SECTOR_ROUNDDOWN_BLKNO(log, blk_no); | 226 | nbblks = round_up(nbblks, log->l_sectBBsize); |
198 | nbblks = XLOG_SECTOR_ROUNDUP_BBCOUNT(log, nbblks); | ||
199 | } | ||
200 | 227 | ||
201 | ASSERT(nbblks > 0); | 228 | ASSERT(nbblks > 0); |
202 | ASSERT(BBTOB(nbblks) <= XFS_BUF_SIZE(bp)); | 229 | ASSERT(BBTOB(nbblks) <= XFS_BUF_SIZE(bp)); |
@@ -327,39 +354,38 @@ xlog_find_cycle_start( | |||
327 | { | 354 | { |
328 | xfs_caddr_t offset; | 355 | xfs_caddr_t offset; |
329 | xfs_daddr_t mid_blk; | 356 | xfs_daddr_t mid_blk; |
357 | xfs_daddr_t end_blk; | ||
330 | uint mid_cycle; | 358 | uint mid_cycle; |
331 | int error; | 359 | int error; |
332 | 360 | ||
333 | mid_blk = BLK_AVG(first_blk, *last_blk); | 361 | end_blk = *last_blk; |
334 | while (mid_blk != first_blk && mid_blk != *last_blk) { | 362 | mid_blk = BLK_AVG(first_blk, end_blk); |
363 | while (mid_blk != first_blk && mid_blk != end_blk) { | ||
335 | error = xlog_bread(log, mid_blk, 1, bp, &offset); | 364 | error = xlog_bread(log, mid_blk, 1, bp, &offset); |
336 | if (error) | 365 | if (error) |
337 | return error; | 366 | return error; |
338 | mid_cycle = xlog_get_cycle(offset); | 367 | mid_cycle = xlog_get_cycle(offset); |
339 | if (mid_cycle == cycle) { | 368 | if (mid_cycle == cycle) |
340 | *last_blk = mid_blk; | 369 | end_blk = mid_blk; /* last_half_cycle == mid_cycle */ |
341 | /* last_half_cycle == mid_cycle */ | 370 | else |
342 | } else { | 371 | first_blk = mid_blk; /* first_half_cycle == mid_cycle */ |
343 | first_blk = mid_blk; | 372 | mid_blk = BLK_AVG(first_blk, end_blk); |
344 | /* first_half_cycle == mid_cycle */ | ||
345 | } | ||
346 | mid_blk = BLK_AVG(first_blk, *last_blk); | ||
347 | } | 373 | } |
348 | ASSERT((mid_blk == first_blk && mid_blk+1 == *last_blk) || | 374 | ASSERT((mid_blk == first_blk && mid_blk+1 == end_blk) || |
349 | (mid_blk == *last_blk && mid_blk-1 == first_blk)); | 375 | (mid_blk == end_blk && mid_blk-1 == first_blk)); |
376 | |||
377 | *last_blk = end_blk; | ||
350 | 378 | ||
351 | return 0; | 379 | return 0; |
352 | } | 380 | } |
353 | 381 | ||
354 | /* | 382 | /* |
355 | * Check that the range of blocks does not contain the cycle number | 383 | * Check that a range of blocks does not contain stop_on_cycle_no. |
356 | * given. The scan needs to occur from front to back and the ptr into the | 384 | * Fill in *new_blk with the block offset where such a block is |
357 | * region must be updated since a later routine will need to perform another | 385 | * found, or with -1 (an invalid block number) if there is no such |
358 | * test. If the region is completely good, we end up returning the same | 386 | * block in the range. The scan needs to occur from front to back |
359 | * last block number. | 387 | * and the pointer into the region must be updated since a later |
360 | * | 388 | * routine will need to perform another test. |
361 | * Set blkno to -1 if we encounter no errors. This is an invalid block number | ||
362 | * since we don't ever expect logs to get this large. | ||
363 | */ | 389 | */ |
364 | STATIC int | 390 | STATIC int |
365 | xlog_find_verify_cycle( | 391 | xlog_find_verify_cycle( |
@@ -376,12 +402,16 @@ xlog_find_verify_cycle( | |||
376 | xfs_caddr_t buf = NULL; | 402 | xfs_caddr_t buf = NULL; |
377 | int error = 0; | 403 | int error = 0; |
378 | 404 | ||
405 | /* | ||
406 | * Greedily allocate a buffer big enough to handle the full | ||
407 | * range of basic blocks we'll be examining. If that fails, | ||
408 | * try a smaller size. We need to be able to read at least | ||
409 | * a log sector, or we're out of luck. | ||
410 | */ | ||
379 | bufblks = 1 << ffs(nbblks); | 411 | bufblks = 1 << ffs(nbblks); |
380 | |||
381 | while (!(bp = xlog_get_bp(log, bufblks))) { | 412 | while (!(bp = xlog_get_bp(log, bufblks))) { |
382 | /* can't get enough memory to do everything in one big buffer */ | ||
383 | bufblks >>= 1; | 413 | bufblks >>= 1; |
384 | if (bufblks <= log->l_sectbb_log) | 414 | if (bufblks < log->l_sectBBsize) |
385 | return ENOMEM; | 415 | return ENOMEM; |
386 | } | 416 | } |
387 | 417 | ||
@@ -629,7 +659,7 @@ xlog_find_head( | |||
629 | * In this case we want to find the first block with cycle | 659 | * In this case we want to find the first block with cycle |
630 | * number matching last_half_cycle. We expect the log to be | 660 | * number matching last_half_cycle. We expect the log to be |
631 | * some variation on | 661 | * some variation on |
632 | * x + 1 ... | x ... | 662 | * x + 1 ... | x ... | x |
633 | * The first block with cycle number x (last_half_cycle) will | 663 | * The first block with cycle number x (last_half_cycle) will |
634 | * be where the new head belongs. First we do a binary search | 664 | * be where the new head belongs. First we do a binary search |
635 | * for the first occurrence of last_half_cycle. The binary | 665 | * for the first occurrence of last_half_cycle. The binary |
@@ -639,11 +669,13 @@ xlog_find_head( | |||
639 | * the log, then we look for occurrences of last_half_cycle - 1 | 669 | * the log, then we look for occurrences of last_half_cycle - 1 |
640 | * at the end of the log. The cases we're looking for look | 670 | * at the end of the log. The cases we're looking for look |
641 | * like | 671 | * like |
642 | * x + 1 ... | x | x + 1 | x ... | 672 | * v binary search stopped here |
643 | * ^ binary search stopped here | 673 | * x + 1 ... | x | x + 1 | x ... | x |
674 | * ^ but we want to locate this spot | ||
644 | * or | 675 | * or |
645 | * x + 1 ... | x ... | x - 1 | x | ||
646 | * <---------> less than scan distance | 676 | * <---------> less than scan distance |
677 | * x + 1 ... | x ... | x - 1 | x | ||
678 | * ^ we want to locate this spot | ||
647 | */ | 679 | */ |
648 | stop_on_cycle = last_half_cycle; | 680 | stop_on_cycle = last_half_cycle; |
649 | if ((error = xlog_find_cycle_start(log, bp, first_blk, | 681 | if ((error = xlog_find_cycle_start(log, bp, first_blk, |
@@ -699,16 +731,16 @@ xlog_find_head( | |||
699 | * certainly not the head of the log. By searching for | 731 | * certainly not the head of the log. By searching for |
700 | * last_half_cycle-1 we accomplish that. | 732 | * last_half_cycle-1 we accomplish that. |
701 | */ | 733 | */ |
702 | start_blk = log_bbnum - num_scan_bblks + head_blk; | ||
703 | ASSERT(head_blk <= INT_MAX && | 734 | ASSERT(head_blk <= INT_MAX && |
704 | (xfs_daddr_t) num_scan_bblks - head_blk >= 0); | 735 | (xfs_daddr_t) num_scan_bblks >= head_blk); |
736 | start_blk = log_bbnum - (num_scan_bblks - head_blk); | ||
705 | if ((error = xlog_find_verify_cycle(log, start_blk, | 737 | if ((error = xlog_find_verify_cycle(log, start_blk, |
706 | num_scan_bblks - (int)head_blk, | 738 | num_scan_bblks - (int)head_blk, |
707 | (stop_on_cycle - 1), &new_blk))) | 739 | (stop_on_cycle - 1), &new_blk))) |
708 | goto bp_err; | 740 | goto bp_err; |
709 | if (new_blk != -1) { | 741 | if (new_blk != -1) { |
710 | head_blk = new_blk; | 742 | head_blk = new_blk; |
711 | goto bad_blk; | 743 | goto validate_head; |
712 | } | 744 | } |
713 | 745 | ||
714 | /* | 746 | /* |
@@ -726,7 +758,7 @@ xlog_find_head( | |||
726 | head_blk = new_blk; | 758 | head_blk = new_blk; |
727 | } | 759 | } |
728 | 760 | ||
729 | bad_blk: | 761 | validate_head: |
730 | /* | 762 | /* |
731 | * Now we need to make sure head_blk is not pointing to a block in | 763 | * Now we need to make sure head_blk is not pointing to a block in |
732 | * the middle of a log record. | 764 | * the middle of a log record. |
@@ -748,7 +780,7 @@ xlog_find_head( | |||
748 | if ((error = xlog_find_verify_log_record(log, start_blk, | 780 | if ((error = xlog_find_verify_log_record(log, start_blk, |
749 | &head_blk, 0)) == -1) { | 781 | &head_blk, 0)) == -1) { |
750 | /* We hit the beginning of the log during our search */ | 782 | /* We hit the beginning of the log during our search */ |
751 | start_blk = log_bbnum - num_scan_bblks + head_blk; | 783 | start_blk = log_bbnum - (num_scan_bblks - head_blk); |
752 | new_blk = log_bbnum; | 784 | new_blk = log_bbnum; |
753 | ASSERT(start_blk <= INT_MAX && | 785 | ASSERT(start_blk <= INT_MAX && |
754 | (xfs_daddr_t) log_bbnum-start_blk >= 0); | 786 | (xfs_daddr_t) log_bbnum-start_blk >= 0); |
@@ -833,12 +865,12 @@ xlog_find_tail( | |||
833 | if (*head_blk == 0) { /* special case */ | 865 | if (*head_blk == 0) { /* special case */ |
834 | error = xlog_bread(log, 0, 1, bp, &offset); | 866 | error = xlog_bread(log, 0, 1, bp, &offset); |
835 | if (error) | 867 | if (error) |
836 | goto bread_err; | 868 | goto done; |
837 | 869 | ||
838 | if (xlog_get_cycle(offset) == 0) { | 870 | if (xlog_get_cycle(offset) == 0) { |
839 | *tail_blk = 0; | 871 | *tail_blk = 0; |
840 | /* leave all other log inited values alone */ | 872 | /* leave all other log inited values alone */ |
841 | goto exit; | 873 | goto done; |
842 | } | 874 | } |
843 | } | 875 | } |
844 | 876 | ||
@@ -849,7 +881,7 @@ xlog_find_tail( | |||
849 | for (i = (int)(*head_blk) - 1; i >= 0; i--) { | 881 | for (i = (int)(*head_blk) - 1; i >= 0; i--) { |
850 | error = xlog_bread(log, i, 1, bp, &offset); | 882 | error = xlog_bread(log, i, 1, bp, &offset); |
851 | if (error) | 883 | if (error) |
852 | goto bread_err; | 884 | goto done; |
853 | 885 | ||
854 | if (XLOG_HEADER_MAGIC_NUM == be32_to_cpu(*(__be32 *)offset)) { | 886 | if (XLOG_HEADER_MAGIC_NUM == be32_to_cpu(*(__be32 *)offset)) { |
855 | found = 1; | 887 | found = 1; |
@@ -866,7 +898,7 @@ xlog_find_tail( | |||
866 | for (i = log->l_logBBsize - 1; i >= (int)(*head_blk); i--) { | 898 | for (i = log->l_logBBsize - 1; i >= (int)(*head_blk); i--) { |
867 | error = xlog_bread(log, i, 1, bp, &offset); | 899 | error = xlog_bread(log, i, 1, bp, &offset); |
868 | if (error) | 900 | if (error) |
869 | goto bread_err; | 901 | goto done; |
870 | 902 | ||
871 | if (XLOG_HEADER_MAGIC_NUM == | 903 | if (XLOG_HEADER_MAGIC_NUM == |
872 | be32_to_cpu(*(__be32 *)offset)) { | 904 | be32_to_cpu(*(__be32 *)offset)) { |
@@ -941,7 +973,7 @@ xlog_find_tail( | |||
941 | umount_data_blk = (i + hblks) % log->l_logBBsize; | 973 | umount_data_blk = (i + hblks) % log->l_logBBsize; |
942 | error = xlog_bread(log, umount_data_blk, 1, bp, &offset); | 974 | error = xlog_bread(log, umount_data_blk, 1, bp, &offset); |
943 | if (error) | 975 | if (error) |
944 | goto bread_err; | 976 | goto done; |
945 | 977 | ||
946 | op_head = (xlog_op_header_t *)offset; | 978 | op_head = (xlog_op_header_t *)offset; |
947 | if (op_head->oh_flags & XLOG_UNMOUNT_TRANS) { | 979 | if (op_head->oh_flags & XLOG_UNMOUNT_TRANS) { |
@@ -987,12 +1019,10 @@ xlog_find_tail( | |||
987 | * But... if the -device- itself is readonly, just skip this. | 1019 | * But... if the -device- itself is readonly, just skip this. |
988 | * We can't recover this device anyway, so it won't matter. | 1020 | * We can't recover this device anyway, so it won't matter. |
989 | */ | 1021 | */ |
990 | if (!xfs_readonly_buftarg(log->l_mp->m_logdev_targp)) { | 1022 | if (!xfs_readonly_buftarg(log->l_mp->m_logdev_targp)) |
991 | error = xlog_clear_stale_blocks(log, tail_lsn); | 1023 | error = xlog_clear_stale_blocks(log, tail_lsn); |
992 | } | ||
993 | 1024 | ||
994 | bread_err: | 1025 | done: |
995 | exit: | ||
996 | xlog_put_bp(bp); | 1026 | xlog_put_bp(bp); |
997 | 1027 | ||
998 | if (error) | 1028 | if (error) |
@@ -1152,16 +1182,22 @@ xlog_write_log_records( | |||
1152 | xfs_caddr_t offset; | 1182 | xfs_caddr_t offset; |
1153 | xfs_buf_t *bp; | 1183 | xfs_buf_t *bp; |
1154 | int balign, ealign; | 1184 | int balign, ealign; |
1155 | int sectbb = XLOG_SECTOR_ROUNDUP_BBCOUNT(log, 1); | 1185 | int sectbb = log->l_sectBBsize; |
1156 | int end_block = start_block + blocks; | 1186 | int end_block = start_block + blocks; |
1157 | int bufblks; | 1187 | int bufblks; |
1158 | int error = 0; | 1188 | int error = 0; |
1159 | int i, j = 0; | 1189 | int i, j = 0; |
1160 | 1190 | ||
1191 | /* | ||
1192 | * Greedily allocate a buffer big enough to handle the full | ||
1193 | * range of basic blocks to be written. If that fails, try | ||
1194 | * a smaller size. We need to be able to write at least a | ||
1195 | * log sector, or we're out of luck. | ||
1196 | */ | ||
1161 | bufblks = 1 << ffs(blocks); | 1197 | bufblks = 1 << ffs(blocks); |
1162 | while (!(bp = xlog_get_bp(log, bufblks))) { | 1198 | while (!(bp = xlog_get_bp(log, bufblks))) { |
1163 | bufblks >>= 1; | 1199 | bufblks >>= 1; |
1164 | if (bufblks <= log->l_sectbb_log) | 1200 | if (bufblks < sectbb) |
1165 | return ENOMEM; | 1201 | return ENOMEM; |
1166 | } | 1202 | } |
1167 | 1203 | ||
@@ -1169,7 +1205,7 @@ xlog_write_log_records( | |||
1169 | * the buffer in the starting sector not covered by the first | 1205 | * the buffer in the starting sector not covered by the first |
1170 | * write below. | 1206 | * write below. |
1171 | */ | 1207 | */ |
1172 | balign = XLOG_SECTOR_ROUNDDOWN_BLKNO(log, start_block); | 1208 | balign = round_down(start_block, sectbb); |
1173 | if (balign != start_block) { | 1209 | if (balign != start_block) { |
1174 | error = xlog_bread_noalign(log, start_block, 1, bp); | 1210 | error = xlog_bread_noalign(log, start_block, 1, bp); |
1175 | if (error) | 1211 | if (error) |
@@ -1188,7 +1224,7 @@ xlog_write_log_records( | |||
1188 | * the buffer in the final sector not covered by the write. | 1224 | * the buffer in the final sector not covered by the write. |
1189 | * If this is the same sector as the above read, skip it. | 1225 | * If this is the same sector as the above read, skip it. |
1190 | */ | 1226 | */ |
1191 | ealign = XLOG_SECTOR_ROUNDDOWN_BLKNO(log, end_block); | 1227 | ealign = round_down(end_block, sectbb); |
1192 | if (j == 0 && (start_block + endcount > ealign)) { | 1228 | if (j == 0 && (start_block + endcount > ealign)) { |
1193 | offset = XFS_BUF_PTR(bp); | 1229 | offset = XFS_BUF_PTR(bp); |
1194 | balign = BBTOB(ealign - start_block); | 1230 | balign = BBTOB(ealign - start_block); |
@@ -1408,6 +1444,7 @@ xlog_recover_add_item( | |||
1408 | 1444 | ||
1409 | STATIC int | 1445 | STATIC int |
1410 | xlog_recover_add_to_cont_trans( | 1446 | xlog_recover_add_to_cont_trans( |
1447 | struct log *log, | ||
1411 | xlog_recover_t *trans, | 1448 | xlog_recover_t *trans, |
1412 | xfs_caddr_t dp, | 1449 | xfs_caddr_t dp, |
1413 | int len) | 1450 | int len) |
@@ -1434,6 +1471,7 @@ xlog_recover_add_to_cont_trans( | |||
1434 | memcpy(&ptr[old_len], dp, len); /* d, s, l */ | 1471 | memcpy(&ptr[old_len], dp, len); /* d, s, l */ |
1435 | item->ri_buf[item->ri_cnt-1].i_len += len; | 1472 | item->ri_buf[item->ri_cnt-1].i_len += len; |
1436 | item->ri_buf[item->ri_cnt-1].i_addr = ptr; | 1473 | item->ri_buf[item->ri_cnt-1].i_addr = ptr; |
1474 | trace_xfs_log_recover_item_add_cont(log, trans, item, 0); | ||
1437 | return 0; | 1475 | return 0; |
1438 | } | 1476 | } |
1439 | 1477 | ||
@@ -1452,6 +1490,7 @@ xlog_recover_add_to_cont_trans( | |||
1452 | */ | 1490 | */ |
1453 | STATIC int | 1491 | STATIC int |
1454 | xlog_recover_add_to_trans( | 1492 | xlog_recover_add_to_trans( |
1493 | struct log *log, | ||
1455 | xlog_recover_t *trans, | 1494 | xlog_recover_t *trans, |
1456 | xfs_caddr_t dp, | 1495 | xfs_caddr_t dp, |
1457 | int len) | 1496 | int len) |
@@ -1510,6 +1549,7 @@ xlog_recover_add_to_trans( | |||
1510 | item->ri_buf[item->ri_cnt].i_addr = ptr; | 1549 | item->ri_buf[item->ri_cnt].i_addr = ptr; |
1511 | item->ri_buf[item->ri_cnt].i_len = len; | 1550 | item->ri_buf[item->ri_cnt].i_len = len; |
1512 | item->ri_cnt++; | 1551 | item->ri_cnt++; |
1552 | trace_xfs_log_recover_item_add(log, trans, item, 0); | ||
1513 | return 0; | 1553 | return 0; |
1514 | } | 1554 | } |
1515 | 1555 | ||
@@ -1521,7 +1561,9 @@ xlog_recover_add_to_trans( | |||
1521 | */ | 1561 | */ |
1522 | STATIC int | 1562 | STATIC int |
1523 | xlog_recover_reorder_trans( | 1563 | xlog_recover_reorder_trans( |
1524 | xlog_recover_t *trans) | 1564 | struct log *log, |
1565 | xlog_recover_t *trans, | ||
1566 | int pass) | ||
1525 | { | 1567 | { |
1526 | xlog_recover_item_t *item, *n; | 1568 | xlog_recover_item_t *item, *n; |
1527 | LIST_HEAD(sort_list); | 1569 | LIST_HEAD(sort_list); |
@@ -1534,7 +1576,9 @@ xlog_recover_reorder_trans( | |||
1534 | 1576 | ||
1535 | switch (ITEM_TYPE(item)) { | 1577 | switch (ITEM_TYPE(item)) { |
1536 | case XFS_LI_BUF: | 1578 | case XFS_LI_BUF: |
1537 | if (!(buf_f->blf_flags & XFS_BLI_CANCEL)) { | 1579 | if (!(buf_f->blf_flags & XFS_BLF_CANCEL)) { |
1580 | trace_xfs_log_recover_item_reorder_head(log, | ||
1581 | trans, item, pass); | ||
1538 | list_move(&item->ri_list, &trans->r_itemq); | 1582 | list_move(&item->ri_list, &trans->r_itemq); |
1539 | break; | 1583 | break; |
1540 | } | 1584 | } |
@@ -1543,6 +1587,8 @@ xlog_recover_reorder_trans( | |||
1543 | case XFS_LI_QUOTAOFF: | 1587 | case XFS_LI_QUOTAOFF: |
1544 | case XFS_LI_EFD: | 1588 | case XFS_LI_EFD: |
1545 | case XFS_LI_EFI: | 1589 | case XFS_LI_EFI: |
1590 | trace_xfs_log_recover_item_reorder_tail(log, | ||
1591 | trans, item, pass); | ||
1546 | list_move_tail(&item->ri_list, &trans->r_itemq); | 1592 | list_move_tail(&item->ri_list, &trans->r_itemq); |
1547 | break; | 1593 | break; |
1548 | default: | 1594 | default: |
@@ -1592,8 +1638,10 @@ xlog_recover_do_buffer_pass1( | |||
1592 | /* | 1638 | /* |
1593 | * If this isn't a cancel buffer item, then just return. | 1639 | * If this isn't a cancel buffer item, then just return. |
1594 | */ | 1640 | */ |
1595 | if (!(flags & XFS_BLI_CANCEL)) | 1641 | if (!(flags & XFS_BLF_CANCEL)) { |
1642 | trace_xfs_log_recover_buf_not_cancel(log, buf_f); | ||
1596 | return; | 1643 | return; |
1644 | } | ||
1597 | 1645 | ||
1598 | /* | 1646 | /* |
1599 | * Insert an xfs_buf_cancel record into the hash table of | 1647 | * Insert an xfs_buf_cancel record into the hash table of |
@@ -1627,6 +1675,7 @@ xlog_recover_do_buffer_pass1( | |||
1627 | while (nextp != NULL) { | 1675 | while (nextp != NULL) { |
1628 | if (nextp->bc_blkno == blkno && nextp->bc_len == len) { | 1676 | if (nextp->bc_blkno == blkno && nextp->bc_len == len) { |
1629 | nextp->bc_refcount++; | 1677 | nextp->bc_refcount++; |
1678 | trace_xfs_log_recover_buf_cancel_ref_inc(log, buf_f); | ||
1630 | return; | 1679 | return; |
1631 | } | 1680 | } |
1632 | prevp = nextp; | 1681 | prevp = nextp; |
@@ -1640,13 +1689,14 @@ xlog_recover_do_buffer_pass1( | |||
1640 | bcp->bc_refcount = 1; | 1689 | bcp->bc_refcount = 1; |
1641 | bcp->bc_next = NULL; | 1690 | bcp->bc_next = NULL; |
1642 | prevp->bc_next = bcp; | 1691 | prevp->bc_next = bcp; |
1692 | trace_xfs_log_recover_buf_cancel_add(log, buf_f); | ||
1643 | } | 1693 | } |
1644 | 1694 | ||
1645 | /* | 1695 | /* |
1646 | * Check to see whether the buffer being recovered has a corresponding | 1696 | * Check to see whether the buffer being recovered has a corresponding |
1647 | * entry in the buffer cancel record table. If it does then return 1 | 1697 | * entry in the buffer cancel record table. If it does then return 1 |
1648 | * so that it will be cancelled, otherwise return 0. If the buffer is | 1698 | * so that it will be cancelled, otherwise return 0. If the buffer is |
1649 | * actually a buffer cancel item (XFS_BLI_CANCEL is set), then decrement | 1699 | * actually a buffer cancel item (XFS_BLF_CANCEL is set), then decrement |
1650 | * the refcount on the entry in the table and remove it from the table | 1700 | * the refcount on the entry in the table and remove it from the table |
1651 | * if this is the last reference. | 1701 | * if this is the last reference. |
1652 | * | 1702 | * |
@@ -1671,7 +1721,7 @@ xlog_check_buffer_cancelled( | |||
1671 | * There is nothing in the table built in pass one, | 1721 | * There is nothing in the table built in pass one, |
1672 | * so this buffer must not be cancelled. | 1722 | * so this buffer must not be cancelled. |
1673 | */ | 1723 | */ |
1674 | ASSERT(!(flags & XFS_BLI_CANCEL)); | 1724 | ASSERT(!(flags & XFS_BLF_CANCEL)); |
1675 | return 0; | 1725 | return 0; |
1676 | } | 1726 | } |
1677 | 1727 | ||
@@ -1683,7 +1733,7 @@ xlog_check_buffer_cancelled( | |||
1683 | * There is no corresponding entry in the table built | 1733 | * There is no corresponding entry in the table built |
1684 | * in pass one, so this buffer has not been cancelled. | 1734 | * in pass one, so this buffer has not been cancelled. |
1685 | */ | 1735 | */ |
1686 | ASSERT(!(flags & XFS_BLI_CANCEL)); | 1736 | ASSERT(!(flags & XFS_BLF_CANCEL)); |
1687 | return 0; | 1737 | return 0; |
1688 | } | 1738 | } |
1689 | 1739 | ||
@@ -1702,7 +1752,7 @@ xlog_check_buffer_cancelled( | |||
1702 | * one in the table and remove it if this is the | 1752 | * one in the table and remove it if this is the |
1703 | * last reference. | 1753 | * last reference. |
1704 | */ | 1754 | */ |
1705 | if (flags & XFS_BLI_CANCEL) { | 1755 | if (flags & XFS_BLF_CANCEL) { |
1706 | bcp->bc_refcount--; | 1756 | bcp->bc_refcount--; |
1707 | if (bcp->bc_refcount == 0) { | 1757 | if (bcp->bc_refcount == 0) { |
1708 | if (prevp == NULL) { | 1758 | if (prevp == NULL) { |
@@ -1722,7 +1772,7 @@ xlog_check_buffer_cancelled( | |||
1722 | * We didn't find a corresponding entry in the table, so | 1772 | * We didn't find a corresponding entry in the table, so |
1723 | * return 0 so that the buffer is NOT cancelled. | 1773 | * return 0 so that the buffer is NOT cancelled. |
1724 | */ | 1774 | */ |
1725 | ASSERT(!(flags & XFS_BLI_CANCEL)); | 1775 | ASSERT(!(flags & XFS_BLF_CANCEL)); |
1726 | return 0; | 1776 | return 0; |
1727 | } | 1777 | } |
1728 | 1778 | ||
@@ -1779,6 +1829,8 @@ xlog_recover_do_inode_buffer( | |||
1779 | unsigned int *data_map = NULL; | 1829 | unsigned int *data_map = NULL; |
1780 | unsigned int map_size = 0; | 1830 | unsigned int map_size = 0; |
1781 | 1831 | ||
1832 | trace_xfs_log_recover_buf_inode_buf(mp->m_log, buf_f); | ||
1833 | |||
1782 | switch (buf_f->blf_type) { | 1834 | switch (buf_f->blf_type) { |
1783 | case XFS_LI_BUF: | 1835 | case XFS_LI_BUF: |
1784 | data_map = buf_f->blf_data_map; | 1836 | data_map = buf_f->blf_data_map; |
@@ -1822,8 +1874,8 @@ xlog_recover_do_inode_buffer( | |||
1822 | nbits = xfs_contig_bits(data_map, map_size, | 1874 | nbits = xfs_contig_bits(data_map, map_size, |
1823 | bit); | 1875 | bit); |
1824 | ASSERT(nbits > 0); | 1876 | ASSERT(nbits > 0); |
1825 | reg_buf_offset = bit << XFS_BLI_SHIFT; | 1877 | reg_buf_offset = bit << XFS_BLF_SHIFT; |
1826 | reg_buf_bytes = nbits << XFS_BLI_SHIFT; | 1878 | reg_buf_bytes = nbits << XFS_BLF_SHIFT; |
1827 | item_index++; | 1879 | item_index++; |
1828 | } | 1880 | } |
1829 | 1881 | ||
@@ -1837,7 +1889,7 @@ xlog_recover_do_inode_buffer( | |||
1837 | } | 1889 | } |
1838 | 1890 | ||
1839 | ASSERT(item->ri_buf[item_index].i_addr != NULL); | 1891 | ASSERT(item->ri_buf[item_index].i_addr != NULL); |
1840 | ASSERT((item->ri_buf[item_index].i_len % XFS_BLI_CHUNK) == 0); | 1892 | ASSERT((item->ri_buf[item_index].i_len % XFS_BLF_CHUNK) == 0); |
1841 | ASSERT((reg_buf_offset + reg_buf_bytes) <= XFS_BUF_COUNT(bp)); | 1893 | ASSERT((reg_buf_offset + reg_buf_bytes) <= XFS_BUF_COUNT(bp)); |
1842 | 1894 | ||
1843 | /* | 1895 | /* |
@@ -1874,6 +1926,7 @@ xlog_recover_do_inode_buffer( | |||
1874 | /*ARGSUSED*/ | 1926 | /*ARGSUSED*/ |
1875 | STATIC void | 1927 | STATIC void |
1876 | xlog_recover_do_reg_buffer( | 1928 | xlog_recover_do_reg_buffer( |
1929 | struct xfs_mount *mp, | ||
1877 | xlog_recover_item_t *item, | 1930 | xlog_recover_item_t *item, |
1878 | xfs_buf_t *bp, | 1931 | xfs_buf_t *bp, |
1879 | xfs_buf_log_format_t *buf_f) | 1932 | xfs_buf_log_format_t *buf_f) |
@@ -1885,6 +1938,8 @@ xlog_recover_do_reg_buffer( | |||
1885 | unsigned int map_size = 0; | 1938 | unsigned int map_size = 0; |
1886 | int error; | 1939 | int error; |
1887 | 1940 | ||
1941 | trace_xfs_log_recover_buf_reg_buf(mp->m_log, buf_f); | ||
1942 | |||
1888 | switch (buf_f->blf_type) { | 1943 | switch (buf_f->blf_type) { |
1889 | case XFS_LI_BUF: | 1944 | case XFS_LI_BUF: |
1890 | data_map = buf_f->blf_data_map; | 1945 | data_map = buf_f->blf_data_map; |
@@ -1900,9 +1955,9 @@ xlog_recover_do_reg_buffer( | |||
1900 | nbits = xfs_contig_bits(data_map, map_size, bit); | 1955 | nbits = xfs_contig_bits(data_map, map_size, bit); |
1901 | ASSERT(nbits > 0); | 1956 | ASSERT(nbits > 0); |
1902 | ASSERT(item->ri_buf[i].i_addr != NULL); | 1957 | ASSERT(item->ri_buf[i].i_addr != NULL); |
1903 | ASSERT(item->ri_buf[i].i_len % XFS_BLI_CHUNK == 0); | 1958 | ASSERT(item->ri_buf[i].i_len % XFS_BLF_CHUNK == 0); |
1904 | ASSERT(XFS_BUF_COUNT(bp) >= | 1959 | ASSERT(XFS_BUF_COUNT(bp) >= |
1905 | ((uint)bit << XFS_BLI_SHIFT)+(nbits<<XFS_BLI_SHIFT)); | 1960 | ((uint)bit << XFS_BLF_SHIFT)+(nbits<<XFS_BLF_SHIFT)); |
1906 | 1961 | ||
1907 | /* | 1962 | /* |
1908 | * Do a sanity check if this is a dquot buffer. Just checking | 1963 | * Do a sanity check if this is a dquot buffer. Just checking |
@@ -1911,7 +1966,7 @@ xlog_recover_do_reg_buffer( | |||
1911 | */ | 1966 | */ |
1912 | error = 0; | 1967 | error = 0; |
1913 | if (buf_f->blf_flags & | 1968 | if (buf_f->blf_flags & |
1914 | (XFS_BLI_UDQUOT_BUF|XFS_BLI_PDQUOT_BUF|XFS_BLI_GDQUOT_BUF)) { | 1969 | (XFS_BLF_UDQUOT_BUF|XFS_BLF_PDQUOT_BUF|XFS_BLF_GDQUOT_BUF)) { |
1915 | if (item->ri_buf[i].i_addr == NULL) { | 1970 | if (item->ri_buf[i].i_addr == NULL) { |
1916 | cmn_err(CE_ALERT, | 1971 | cmn_err(CE_ALERT, |
1917 | "XFS: NULL dquot in %s.", __func__); | 1972 | "XFS: NULL dquot in %s.", __func__); |
@@ -1932,9 +1987,9 @@ xlog_recover_do_reg_buffer( | |||
1932 | } | 1987 | } |
1933 | 1988 | ||
1934 | memcpy(xfs_buf_offset(bp, | 1989 | memcpy(xfs_buf_offset(bp, |
1935 | (uint)bit << XFS_BLI_SHIFT), /* dest */ | 1990 | (uint)bit << XFS_BLF_SHIFT), /* dest */ |
1936 | item->ri_buf[i].i_addr, /* source */ | 1991 | item->ri_buf[i].i_addr, /* source */ |
1937 | nbits<<XFS_BLI_SHIFT); /* length */ | 1992 | nbits<<XFS_BLF_SHIFT); /* length */ |
1938 | next: | 1993 | next: |
1939 | i++; | 1994 | i++; |
1940 | bit += nbits; | 1995 | bit += nbits; |
@@ -2083,6 +2138,8 @@ xlog_recover_do_dquot_buffer( | |||
2083 | { | 2138 | { |
2084 | uint type; | 2139 | uint type; |
2085 | 2140 | ||
2141 | trace_xfs_log_recover_buf_dquot_buf(log, buf_f); | ||
2142 | |||
2086 | /* | 2143 | /* |
2087 | * Filesystems are required to send in quota flags at mount time. | 2144 | * Filesystems are required to send in quota flags at mount time. |
2088 | */ | 2145 | */ |
@@ -2091,11 +2148,11 @@ xlog_recover_do_dquot_buffer( | |||
2091 | } | 2148 | } |
2092 | 2149 | ||
2093 | type = 0; | 2150 | type = 0; |
2094 | if (buf_f->blf_flags & XFS_BLI_UDQUOT_BUF) | 2151 | if (buf_f->blf_flags & XFS_BLF_UDQUOT_BUF) |
2095 | type |= XFS_DQ_USER; | 2152 | type |= XFS_DQ_USER; |
2096 | if (buf_f->blf_flags & XFS_BLI_PDQUOT_BUF) | 2153 | if (buf_f->blf_flags & XFS_BLF_PDQUOT_BUF) |
2097 | type |= XFS_DQ_PROJ; | 2154 | type |= XFS_DQ_PROJ; |
2098 | if (buf_f->blf_flags & XFS_BLI_GDQUOT_BUF) | 2155 | if (buf_f->blf_flags & XFS_BLF_GDQUOT_BUF) |
2099 | type |= XFS_DQ_GROUP; | 2156 | type |= XFS_DQ_GROUP; |
2100 | /* | 2157 | /* |
2101 | * This type of quotas was turned off, so ignore this buffer | 2158 | * This type of quotas was turned off, so ignore this buffer |
@@ -2103,7 +2160,7 @@ xlog_recover_do_dquot_buffer( | |||
2103 | if (log->l_quotaoffs_flag & type) | 2160 | if (log->l_quotaoffs_flag & type) |
2104 | return; | 2161 | return; |
2105 | 2162 | ||
2106 | xlog_recover_do_reg_buffer(item, bp, buf_f); | 2163 | xlog_recover_do_reg_buffer(mp, item, bp, buf_f); |
2107 | } | 2164 | } |
2108 | 2165 | ||
2109 | /* | 2166 | /* |
@@ -2116,7 +2173,7 @@ xlog_recover_do_dquot_buffer( | |||
2116 | * here which overlaps that may be stale. | 2173 | * here which overlaps that may be stale. |
2117 | * | 2174 | * |
2118 | * When meta-data buffers are freed at run time we log a buffer item | 2175 | * When meta-data buffers are freed at run time we log a buffer item |
2119 | * with the XFS_BLI_CANCEL bit set to indicate that previous copies | 2176 | * with the XFS_BLF_CANCEL bit set to indicate that previous copies |
2120 | * of the buffer in the log should not be replayed at recovery time. | 2177 | * of the buffer in the log should not be replayed at recovery time. |
2121 | * This is so that if the blocks covered by the buffer are reused for | 2178 | * This is so that if the blocks covered by the buffer are reused for |
2122 | * file data before we crash we don't end up replaying old, freed | 2179 | * file data before we crash we don't end up replaying old, freed |
@@ -2150,7 +2207,7 @@ xlog_recover_do_buffer_trans( | |||
2150 | if (pass == XLOG_RECOVER_PASS1) { | 2207 | if (pass == XLOG_RECOVER_PASS1) { |
2151 | /* | 2208 | /* |
2152 | * In this pass we're only looking for buf items | 2209 | * In this pass we're only looking for buf items |
2153 | * with the XFS_BLI_CANCEL bit set. | 2210 | * with the XFS_BLF_CANCEL bit set. |
2154 | */ | 2211 | */ |
2155 | xlog_recover_do_buffer_pass1(log, buf_f); | 2212 | xlog_recover_do_buffer_pass1(log, buf_f); |
2156 | return 0; | 2213 | return 0; |
@@ -2164,9 +2221,11 @@ xlog_recover_do_buffer_trans( | |||
2164 | */ | 2221 | */ |
2165 | cancel = xlog_recover_do_buffer_pass2(log, buf_f); | 2222 | cancel = xlog_recover_do_buffer_pass2(log, buf_f); |
2166 | if (cancel) { | 2223 | if (cancel) { |
2224 | trace_xfs_log_recover_buf_cancel(log, buf_f); | ||
2167 | return 0; | 2225 | return 0; |
2168 | } | 2226 | } |
2169 | } | 2227 | } |
2228 | trace_xfs_log_recover_buf_recover(log, buf_f); | ||
2170 | switch (buf_f->blf_type) { | 2229 | switch (buf_f->blf_type) { |
2171 | case XFS_LI_BUF: | 2230 | case XFS_LI_BUF: |
2172 | blkno = buf_f->blf_blkno; | 2231 | blkno = buf_f->blf_blkno; |
@@ -2185,7 +2244,7 @@ xlog_recover_do_buffer_trans( | |||
2185 | 2244 | ||
2186 | mp = log->l_mp; | 2245 | mp = log->l_mp; |
2187 | buf_flags = XBF_LOCK; | 2246 | buf_flags = XBF_LOCK; |
2188 | if (!(flags & XFS_BLI_INODE_BUF)) | 2247 | if (!(flags & XFS_BLF_INODE_BUF)) |
2189 | buf_flags |= XBF_MAPPED; | 2248 | buf_flags |= XBF_MAPPED; |
2190 | 2249 | ||
2191 | bp = xfs_buf_read(mp->m_ddev_targp, blkno, len, buf_flags); | 2250 | bp = xfs_buf_read(mp->m_ddev_targp, blkno, len, buf_flags); |
@@ -2198,13 +2257,13 @@ xlog_recover_do_buffer_trans( | |||
2198 | } | 2257 | } |
2199 | 2258 | ||
2200 | error = 0; | 2259 | error = 0; |
2201 | if (flags & XFS_BLI_INODE_BUF) { | 2260 | if (flags & XFS_BLF_INODE_BUF) { |
2202 | error = xlog_recover_do_inode_buffer(mp, item, bp, buf_f); | 2261 | error = xlog_recover_do_inode_buffer(mp, item, bp, buf_f); |
2203 | } else if (flags & | 2262 | } else if (flags & |
2204 | (XFS_BLI_UDQUOT_BUF|XFS_BLI_PDQUOT_BUF|XFS_BLI_GDQUOT_BUF)) { | 2263 | (XFS_BLF_UDQUOT_BUF|XFS_BLF_PDQUOT_BUF|XFS_BLF_GDQUOT_BUF)) { |
2205 | xlog_recover_do_dquot_buffer(mp, log, item, bp, buf_f); | 2264 | xlog_recover_do_dquot_buffer(mp, log, item, bp, buf_f); |
2206 | } else { | 2265 | } else { |
2207 | xlog_recover_do_reg_buffer(item, bp, buf_f); | 2266 | xlog_recover_do_reg_buffer(mp, item, bp, buf_f); |
2208 | } | 2267 | } |
2209 | if (error) | 2268 | if (error) |
2210 | return XFS_ERROR(error); | 2269 | return XFS_ERROR(error); |
@@ -2284,8 +2343,10 @@ xlog_recover_do_inode_trans( | |||
2284 | if (xlog_check_buffer_cancelled(log, in_f->ilf_blkno, | 2343 | if (xlog_check_buffer_cancelled(log, in_f->ilf_blkno, |
2285 | in_f->ilf_len, 0)) { | 2344 | in_f->ilf_len, 0)) { |
2286 | error = 0; | 2345 | error = 0; |
2346 | trace_xfs_log_recover_inode_cancel(log, in_f); | ||
2287 | goto error; | 2347 | goto error; |
2288 | } | 2348 | } |
2349 | trace_xfs_log_recover_inode_recover(log, in_f); | ||
2289 | 2350 | ||
2290 | bp = xfs_buf_read(mp->m_ddev_targp, in_f->ilf_blkno, in_f->ilf_len, | 2351 | bp = xfs_buf_read(mp->m_ddev_targp, in_f->ilf_blkno, in_f->ilf_len, |
2291 | XBF_LOCK); | 2352 | XBF_LOCK); |
@@ -2337,6 +2398,7 @@ xlog_recover_do_inode_trans( | |||
2337 | /* do nothing */ | 2398 | /* do nothing */ |
2338 | } else { | 2399 | } else { |
2339 | xfs_buf_relse(bp); | 2400 | xfs_buf_relse(bp); |
2401 | trace_xfs_log_recover_inode_skip(log, in_f); | ||
2340 | error = 0; | 2402 | error = 0; |
2341 | goto error; | 2403 | goto error; |
2342 | } | 2404 | } |
@@ -2758,11 +2820,12 @@ xlog_recover_do_trans( | |||
2758 | int error = 0; | 2820 | int error = 0; |
2759 | xlog_recover_item_t *item; | 2821 | xlog_recover_item_t *item; |
2760 | 2822 | ||
2761 | error = xlog_recover_reorder_trans(trans); | 2823 | error = xlog_recover_reorder_trans(log, trans, pass); |
2762 | if (error) | 2824 | if (error) |
2763 | return error; | 2825 | return error; |
2764 | 2826 | ||
2765 | list_for_each_entry(item, &trans->r_itemq, ri_list) { | 2827 | list_for_each_entry(item, &trans->r_itemq, ri_list) { |
2828 | trace_xfs_log_recover_item_recover(log, trans, item, pass); | ||
2766 | switch (ITEM_TYPE(item)) { | 2829 | switch (ITEM_TYPE(item)) { |
2767 | case XFS_LI_BUF: | 2830 | case XFS_LI_BUF: |
2768 | error = xlog_recover_do_buffer_trans(log, item, pass); | 2831 | error = xlog_recover_do_buffer_trans(log, item, pass); |
@@ -2919,8 +2982,9 @@ xlog_recover_process_data( | |||
2919 | error = xlog_recover_unmount_trans(trans); | 2982 | error = xlog_recover_unmount_trans(trans); |
2920 | break; | 2983 | break; |
2921 | case XLOG_WAS_CONT_TRANS: | 2984 | case XLOG_WAS_CONT_TRANS: |
2922 | error = xlog_recover_add_to_cont_trans(trans, | 2985 | error = xlog_recover_add_to_cont_trans(log, |
2923 | dp, be32_to_cpu(ohead->oh_len)); | 2986 | trans, dp, |
2987 | be32_to_cpu(ohead->oh_len)); | ||
2924 | break; | 2988 | break; |
2925 | case XLOG_START_TRANS: | 2989 | case XLOG_START_TRANS: |
2926 | xlog_warn( | 2990 | xlog_warn( |
@@ -2930,7 +2994,7 @@ xlog_recover_process_data( | |||
2930 | break; | 2994 | break; |
2931 | case 0: | 2995 | case 0: |
2932 | case XLOG_CONTINUE_TRANS: | 2996 | case XLOG_CONTINUE_TRANS: |
2933 | error = xlog_recover_add_to_trans(trans, | 2997 | error = xlog_recover_add_to_trans(log, trans, |
2934 | dp, be32_to_cpu(ohead->oh_len)); | 2998 | dp, be32_to_cpu(ohead->oh_len)); |
2935 | break; | 2999 | break; |
2936 | default: | 3000 | default: |
@@ -3331,42 +3395,6 @@ xlog_pack_data( | |||
3331 | } | 3395 | } |
3332 | } | 3396 | } |
3333 | 3397 | ||
3334 | #if defined(DEBUG) && defined(XFS_LOUD_RECOVERY) | ||
3335 | STATIC void | ||
3336 | xlog_unpack_data_checksum( | ||
3337 | xlog_rec_header_t *rhead, | ||
3338 | xfs_caddr_t dp, | ||
3339 | xlog_t *log) | ||
3340 | { | ||
3341 | __be32 *up = (__be32 *)dp; | ||
3342 | uint chksum = 0; | ||
3343 | int i; | ||
3344 | |||
3345 | /* divide length by 4 to get # words */ | ||
3346 | for (i=0; i < be32_to_cpu(rhead->h_len) >> 2; i++) { | ||
3347 | chksum ^= be32_to_cpu(*up); | ||
3348 | up++; | ||
3349 | } | ||
3350 | if (chksum != be32_to_cpu(rhead->h_chksum)) { | ||
3351 | if (rhead->h_chksum || | ||
3352 | ((log->l_flags & XLOG_CHKSUM_MISMATCH) == 0)) { | ||
3353 | cmn_err(CE_DEBUG, | ||
3354 | "XFS: LogR chksum mismatch: was (0x%x) is (0x%x)\n", | ||
3355 | be32_to_cpu(rhead->h_chksum), chksum); | ||
3356 | cmn_err(CE_DEBUG, | ||
3357 | "XFS: Disregard message if filesystem was created with non-DEBUG kernel"); | ||
3358 | if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) { | ||
3359 | cmn_err(CE_DEBUG, | ||
3360 | "XFS: LogR this is a LogV2 filesystem\n"); | ||
3361 | } | ||
3362 | log->l_flags |= XLOG_CHKSUM_MISMATCH; | ||
3363 | } | ||
3364 | } | ||
3365 | } | ||
3366 | #else | ||
3367 | #define xlog_unpack_data_checksum(rhead, dp, log) | ||
3368 | #endif | ||
3369 | |||
3370 | STATIC void | 3398 | STATIC void |
3371 | xlog_unpack_data( | 3399 | xlog_unpack_data( |
3372 | xlog_rec_header_t *rhead, | 3400 | xlog_rec_header_t *rhead, |
@@ -3390,8 +3418,6 @@ xlog_unpack_data( | |||
3390 | dp += BBSIZE; | 3418 | dp += BBSIZE; |
3391 | } | 3419 | } |
3392 | } | 3420 | } |
3393 | |||
3394 | xlog_unpack_data_checksum(rhead, dp, log); | ||
3395 | } | 3421 | } |
3396 | 3422 | ||
3397 | STATIC int | 3423 | STATIC int |
@@ -3490,7 +3516,7 @@ xlog_do_recovery_pass( | |||
3490 | hblks = 1; | 3516 | hblks = 1; |
3491 | } | 3517 | } |
3492 | } else { | 3518 | } else { |
3493 | ASSERT(log->l_sectbb_log == 0); | 3519 | ASSERT(log->l_sectBBsize == 1); |
3494 | hblks = 1; | 3520 | hblks = 1; |
3495 | hbp = xlog_get_bp(log, 1); | 3521 | hbp = xlog_get_bp(log, 1); |
3496 | h_size = XLOG_BIG_RECORD_BSIZE; | 3522 | h_size = XLOG_BIG_RECORD_BSIZE; |
@@ -3946,10 +3972,6 @@ xlog_recover_check_summary( | |||
3946 | xfs_agf_t *agfp; | 3972 | xfs_agf_t *agfp; |
3947 | xfs_buf_t *agfbp; | 3973 | xfs_buf_t *agfbp; |
3948 | xfs_buf_t *agibp; | 3974 | xfs_buf_t *agibp; |
3949 | xfs_buf_t *sbbp; | ||
3950 | #ifdef XFS_LOUD_RECOVERY | ||
3951 | xfs_sb_t *sbp; | ||
3952 | #endif | ||
3953 | xfs_agnumber_t agno; | 3975 | xfs_agnumber_t agno; |
3954 | __uint64_t freeblks; | 3976 | __uint64_t freeblks; |
3955 | __uint64_t itotal; | 3977 | __uint64_t itotal; |
@@ -3984,30 +4006,5 @@ xlog_recover_check_summary( | |||
3984 | xfs_buf_relse(agibp); | 4006 | xfs_buf_relse(agibp); |
3985 | } | 4007 | } |
3986 | } | 4008 | } |
3987 | |||
3988 | sbbp = xfs_getsb(mp, 0); | ||
3989 | #ifdef XFS_LOUD_RECOVERY | ||
3990 | sbp = &mp->m_sb; | ||
3991 | xfs_sb_from_disk(sbp, XFS_BUF_TO_SBP(sbbp)); | ||
3992 | cmn_err(CE_NOTE, | ||
3993 | "xlog_recover_check_summary: sb_icount %Lu itotal %Lu", | ||
3994 | sbp->sb_icount, itotal); | ||
3995 | cmn_err(CE_NOTE, | ||
3996 | "xlog_recover_check_summary: sb_ifree %Lu itotal %Lu", | ||
3997 | sbp->sb_ifree, ifree); | ||
3998 | cmn_err(CE_NOTE, | ||
3999 | "xlog_recover_check_summary: sb_fdblocks %Lu freeblks %Lu", | ||
4000 | sbp->sb_fdblocks, freeblks); | ||
4001 | #if 0 | ||
4002 | /* | ||
4003 | * This is turned off until I account for the allocation | ||
4004 | * btree blocks which live in free space. | ||
4005 | */ | ||
4006 | ASSERT(sbp->sb_icount == itotal); | ||
4007 | ASSERT(sbp->sb_ifree == ifree); | ||
4008 | ASSERT(sbp->sb_fdblocks == freeblks); | ||
4009 | #endif | ||
4010 | #endif | ||
4011 | xfs_buf_relse(sbbp); | ||
4012 | } | 4009 | } |
4013 | #endif /* DEBUG */ | 4010 | #endif /* DEBUG */ |
diff --git a/fs/xfs/xfs_log_recover.h b/fs/xfs/xfs_log_recover.h index 75d749207258..1c55ccbb379d 100644 --- a/fs/xfs/xfs_log_recover.h +++ b/fs/xfs/xfs_log_recover.h | |||
@@ -28,7 +28,7 @@ | |||
28 | #define XLOG_RHASH(tid) \ | 28 | #define XLOG_RHASH(tid) \ |
29 | ((((__uint32_t)tid)>>XLOG_RHASH_SHIFT) & (XLOG_RHASH_SIZE-1)) | 29 | ((((__uint32_t)tid)>>XLOG_RHASH_SHIFT) & (XLOG_RHASH_SIZE-1)) |
30 | 30 | ||
31 | #define XLOG_MAX_REGIONS_IN_ITEM (XFS_MAX_BLOCKSIZE / XFS_BLI_CHUNK / 2 + 1) | 31 | #define XLOG_MAX_REGIONS_IN_ITEM (XFS_MAX_BLOCKSIZE / XFS_BLF_CHUNK / 2 + 1) |
32 | 32 | ||
33 | 33 | ||
34 | /* | 34 | /* |
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index e79b56b4bca6..d7bf38c8cd1c 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c | |||
@@ -1405,13 +1405,6 @@ xfs_mountfs( | |||
1405 | xfs_qm_mount_quotas(mp); | 1405 | xfs_qm_mount_quotas(mp); |
1406 | } | 1406 | } |
1407 | 1407 | ||
1408 | #if defined(DEBUG) && defined(XFS_LOUD_RECOVERY) | ||
1409 | if (XFS_IS_QUOTA_ON(mp)) | ||
1410 | xfs_fs_cmn_err(CE_NOTE, mp, "Disk quotas turned on"); | ||
1411 | else | ||
1412 | xfs_fs_cmn_err(CE_NOTE, mp, "Disk quotas not turned on"); | ||
1413 | #endif | ||
1414 | |||
1415 | /* | 1408 | /* |
1416 | * Now we are mounted, reserve a small amount of unused space for | 1409 | * Now we are mounted, reserve a small amount of unused space for |
1417 | * privileged transactions. This is needed so that transaction | 1410 | * privileged transactions. This is needed so that transaction |
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index 9ff48a16a7ee..1d2c7eed4eda 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h | |||
@@ -268,6 +268,7 @@ typedef struct xfs_mount { | |||
268 | #define XFS_MOUNT_WSYNC (1ULL << 0) /* for nfs - all metadata ops | 268 | #define XFS_MOUNT_WSYNC (1ULL << 0) /* for nfs - all metadata ops |
269 | must be synchronous except | 269 | must be synchronous except |
270 | for space allocations */ | 270 | for space allocations */ |
271 | #define XFS_MOUNT_DELAYLOG (1ULL << 1) /* delayed logging is enabled */ | ||
271 | #define XFS_MOUNT_DMAPI (1ULL << 2) /* dmapi is enabled */ | 272 | #define XFS_MOUNT_DMAPI (1ULL << 2) /* dmapi is enabled */ |
272 | #define XFS_MOUNT_WAS_CLEAN (1ULL << 3) | 273 | #define XFS_MOUNT_WAS_CLEAN (1ULL << 3) |
273 | #define XFS_MOUNT_FS_SHUTDOWN (1ULL << 4) /* atomic stop of all filesystem | 274 | #define XFS_MOUNT_FS_SHUTDOWN (1ULL << 4) /* atomic stop of all filesystem |
diff --git a/fs/xfs/xfs_quota.h b/fs/xfs/xfs_quota.h index fdcab3f81dde..e0e64b113bd6 100644 --- a/fs/xfs/xfs_quota.h +++ b/fs/xfs/xfs_quota.h | |||
@@ -201,9 +201,6 @@ typedef struct xfs_qoff_logformat { | |||
201 | #define XFS_QMOPT_FORCE_RES 0x0000010 /* ignore quota limits */ | 201 | #define XFS_QMOPT_FORCE_RES 0x0000010 /* ignore quota limits */ |
202 | #define XFS_QMOPT_DQSUSER 0x0000020 /* don't cache super users dquot */ | 202 | #define XFS_QMOPT_DQSUSER 0x0000020 /* don't cache super users dquot */ |
203 | #define XFS_QMOPT_SBVERSION 0x0000040 /* change superblock version num */ | 203 | #define XFS_QMOPT_SBVERSION 0x0000040 /* change superblock version num */ |
204 | #define XFS_QMOPT_QUOTAOFF 0x0000080 /* quotas are being turned off */ | ||
205 | #define XFS_QMOPT_UMOUNTING 0x0000100 /* filesys is being unmounted */ | ||
206 | #define XFS_QMOPT_DOLOG 0x0000200 /* log buf changes (in quotacheck) */ | ||
207 | #define XFS_QMOPT_DOWARN 0x0000400 /* increase warning cnt if needed */ | 204 | #define XFS_QMOPT_DOWARN 0x0000400 /* increase warning cnt if needed */ |
208 | #define XFS_QMOPT_DQREPAIR 0x0001000 /* repair dquot if damaged */ | 205 | #define XFS_QMOPT_DQREPAIR 0x0001000 /* repair dquot if damaged */ |
209 | #define XFS_QMOPT_GQUOTA 0x0002000 /* group dquot requested */ | 206 | #define XFS_QMOPT_GQUOTA 0x0002000 /* group dquot requested */ |
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index f73e358bae8d..ce558efa2ea0 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c | |||
@@ -44,24 +44,14 @@ | |||
44 | #include "xfs_trans_priv.h" | 44 | #include "xfs_trans_priv.h" |
45 | #include "xfs_trans_space.h" | 45 | #include "xfs_trans_space.h" |
46 | #include "xfs_inode_item.h" | 46 | #include "xfs_inode_item.h" |
47 | 47 | #include "xfs_trace.h" | |
48 | |||
49 | STATIC void xfs_trans_apply_sb_deltas(xfs_trans_t *); | ||
50 | STATIC uint xfs_trans_count_vecs(xfs_trans_t *); | ||
51 | STATIC void xfs_trans_fill_vecs(xfs_trans_t *, xfs_log_iovec_t *); | ||
52 | STATIC void xfs_trans_uncommit(xfs_trans_t *, uint); | ||
53 | STATIC void xfs_trans_committed(xfs_trans_t *, int); | ||
54 | STATIC void xfs_trans_chunk_committed(xfs_log_item_chunk_t *, xfs_lsn_t, int); | ||
55 | STATIC void xfs_trans_free(xfs_trans_t *); | ||
56 | 48 | ||
57 | kmem_zone_t *xfs_trans_zone; | 49 | kmem_zone_t *xfs_trans_zone; |
58 | 50 | ||
59 | |||
60 | /* | 51 | /* |
61 | * Reservation functions here avoid a huge stack in xfs_trans_init | 52 | * Reservation functions here avoid a huge stack in xfs_trans_init |
62 | * due to register overflow from temporaries in the calculations. | 53 | * due to register overflow from temporaries in the calculations. |
63 | */ | 54 | */ |
64 | |||
65 | STATIC uint | 55 | STATIC uint |
66 | xfs_calc_write_reservation(xfs_mount_t *mp) | 56 | xfs_calc_write_reservation(xfs_mount_t *mp) |
67 | { | 57 | { |
@@ -254,13 +244,30 @@ _xfs_trans_alloc( | |||
254 | tp->t_type = type; | 244 | tp->t_type = type; |
255 | tp->t_mountp = mp; | 245 | tp->t_mountp = mp; |
256 | tp->t_items_free = XFS_LIC_NUM_SLOTS; | 246 | tp->t_items_free = XFS_LIC_NUM_SLOTS; |
257 | tp->t_busy_free = XFS_LBC_NUM_SLOTS; | ||
258 | xfs_lic_init(&(tp->t_items)); | 247 | xfs_lic_init(&(tp->t_items)); |
259 | XFS_LBC_INIT(&(tp->t_busy)); | 248 | INIT_LIST_HEAD(&tp->t_busy); |
260 | return tp; | 249 | return tp; |
261 | } | 250 | } |
262 | 251 | ||
263 | /* | 252 | /* |
253 | * Free the transaction structure. If there is more clean up | ||
254 | * to do when the structure is freed, add it here. | ||
255 | */ | ||
256 | STATIC void | ||
257 | xfs_trans_free( | ||
258 | struct xfs_trans *tp) | ||
259 | { | ||
260 | struct xfs_busy_extent *busyp, *n; | ||
261 | |||
262 | list_for_each_entry_safe(busyp, n, &tp->t_busy, list) | ||
263 | xfs_alloc_busy_clear(tp->t_mountp, busyp); | ||
264 | |||
265 | atomic_dec(&tp->t_mountp->m_active_trans); | ||
266 | xfs_trans_free_dqinfo(tp); | ||
267 | kmem_zone_free(xfs_trans_zone, tp); | ||
268 | } | ||
269 | |||
270 | /* | ||
264 | * This is called to create a new transaction which will share the | 271 | * This is called to create a new transaction which will share the |
265 | * permanent log reservation of the given transaction. The remaining | 272 | * permanent log reservation of the given transaction. The remaining |
266 | * unused block and rt extent reservations are also inherited. This | 273 | * unused block and rt extent reservations are also inherited. This |
@@ -283,9 +290,8 @@ xfs_trans_dup( | |||
283 | ntp->t_type = tp->t_type; | 290 | ntp->t_type = tp->t_type; |
284 | ntp->t_mountp = tp->t_mountp; | 291 | ntp->t_mountp = tp->t_mountp; |
285 | ntp->t_items_free = XFS_LIC_NUM_SLOTS; | 292 | ntp->t_items_free = XFS_LIC_NUM_SLOTS; |
286 | ntp->t_busy_free = XFS_LBC_NUM_SLOTS; | ||
287 | xfs_lic_init(&(ntp->t_items)); | 293 | xfs_lic_init(&(ntp->t_items)); |
288 | XFS_LBC_INIT(&(ntp->t_busy)); | 294 | INIT_LIST_HEAD(&ntp->t_busy); |
289 | 295 | ||
290 | ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES); | 296 | ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES); |
291 | ASSERT(tp->t_ticket != NULL); | 297 | ASSERT(tp->t_ticket != NULL); |
@@ -421,7 +427,6 @@ undo_blocks: | |||
421 | return error; | 427 | return error; |
422 | } | 428 | } |
423 | 429 | ||
424 | |||
425 | /* | 430 | /* |
426 | * Record the indicated change to the given field for application | 431 | * Record the indicated change to the given field for application |
427 | * to the file system's superblock when the transaction commits. | 432 | * to the file system's superblock when the transaction commits. |
@@ -650,7 +655,7 @@ xfs_trans_apply_sb_deltas( | |||
650 | * XFS_TRANS_SB_DIRTY will not be set when the transaction is updated but we | 655 | * XFS_TRANS_SB_DIRTY will not be set when the transaction is updated but we |
651 | * still need to update the incore superblock with the changes. | 656 | * still need to update the incore superblock with the changes. |
652 | */ | 657 | */ |
653 | STATIC void | 658 | void |
654 | xfs_trans_unreserve_and_mod_sb( | 659 | xfs_trans_unreserve_and_mod_sb( |
655 | xfs_trans_t *tp) | 660 | xfs_trans_t *tp) |
656 | { | 661 | { |
@@ -764,94 +769,256 @@ xfs_trans_unreserve_and_mod_sb( | |||
764 | } | 769 | } |
765 | } | 770 | } |
766 | 771 | ||
772 | /* | ||
773 | * Total up the number of log iovecs needed to commit this | ||
774 | * transaction. The transaction itself needs one for the | ||
775 | * transaction header. Ask each dirty item in turn how many | ||
776 | * it needs to get the total. | ||
777 | */ | ||
778 | static uint | ||
779 | xfs_trans_count_vecs( | ||
780 | struct xfs_trans *tp) | ||
781 | { | ||
782 | int nvecs; | ||
783 | xfs_log_item_desc_t *lidp; | ||
784 | |||
785 | nvecs = 1; | ||
786 | lidp = xfs_trans_first_item(tp); | ||
787 | ASSERT(lidp != NULL); | ||
788 | |||
789 | /* In the non-debug case we need to start bailing out if we | ||
790 | * didn't find a log_item here, return zero and let trans_commit | ||
791 | * deal with it. | ||
792 | */ | ||
793 | if (lidp == NULL) | ||
794 | return 0; | ||
795 | |||
796 | while (lidp != NULL) { | ||
797 | /* | ||
798 | * Skip items which aren't dirty in this transaction. | ||
799 | */ | ||
800 | if (!(lidp->lid_flags & XFS_LID_DIRTY)) { | ||
801 | lidp = xfs_trans_next_item(tp, lidp); | ||
802 | continue; | ||
803 | } | ||
804 | lidp->lid_size = IOP_SIZE(lidp->lid_item); | ||
805 | nvecs += lidp->lid_size; | ||
806 | lidp = xfs_trans_next_item(tp, lidp); | ||
807 | } | ||
808 | |||
809 | return nvecs; | ||
810 | } | ||
767 | 811 | ||
768 | /* | 812 | /* |
769 | * xfs_trans_commit | 813 | * Fill in the vector with pointers to data to be logged |
814 | * by this transaction. The transaction header takes | ||
815 | * the first vector, and then each dirty item takes the | ||
816 | * number of vectors it indicated it needed in xfs_trans_count_vecs(). | ||
770 | * | 817 | * |
771 | * Commit the given transaction to the log a/synchronously. | 818 | * As each item fills in the entries it needs, also pin the item |
819 | * so that it cannot be flushed out until the log write completes. | ||
820 | */ | ||
821 | static void | ||
822 | xfs_trans_fill_vecs( | ||
823 | struct xfs_trans *tp, | ||
824 | struct xfs_log_iovec *log_vector) | ||
825 | { | ||
826 | xfs_log_item_desc_t *lidp; | ||
827 | struct xfs_log_iovec *vecp; | ||
828 | uint nitems; | ||
829 | |||
830 | /* | ||
831 | * Skip over the entry for the transaction header, we'll | ||
832 | * fill that in at the end. | ||
833 | */ | ||
834 | vecp = log_vector + 1; | ||
835 | |||
836 | nitems = 0; | ||
837 | lidp = xfs_trans_first_item(tp); | ||
838 | ASSERT(lidp); | ||
839 | while (lidp) { | ||
840 | /* Skip items which aren't dirty in this transaction. */ | ||
841 | if (!(lidp->lid_flags & XFS_LID_DIRTY)) { | ||
842 | lidp = xfs_trans_next_item(tp, lidp); | ||
843 | continue; | ||
844 | } | ||
845 | |||
846 | /* | ||
847 | * The item may be marked dirty but not log anything. This can | ||
848 | * be used to get called when a transaction is committed. | ||
849 | */ | ||
850 | if (lidp->lid_size) | ||
851 | nitems++; | ||
852 | IOP_FORMAT(lidp->lid_item, vecp); | ||
853 | vecp += lidp->lid_size; | ||
854 | IOP_PIN(lidp->lid_item); | ||
855 | lidp = xfs_trans_next_item(tp, lidp); | ||
856 | } | ||
857 | |||
858 | /* | ||
859 | * Now that we've counted the number of items in this transaction, fill | ||
860 | * in the transaction header. Note that the transaction header does not | ||
861 | * have a log item. | ||
862 | */ | ||
863 | tp->t_header.th_magic = XFS_TRANS_HEADER_MAGIC; | ||
864 | tp->t_header.th_type = tp->t_type; | ||
865 | tp->t_header.th_num_items = nitems; | ||
866 | log_vector->i_addr = (xfs_caddr_t)&tp->t_header; | ||
867 | log_vector->i_len = sizeof(xfs_trans_header_t); | ||
868 | log_vector->i_type = XLOG_REG_TYPE_TRANSHDR; | ||
869 | } | ||
870 | |||
871 | /* | ||
872 | * The committed item processing consists of calling the committed routine of | ||
873 | * each logged item, updating the item's position in the AIL if necessary, and | ||
874 | * unpinning each item. If the committed routine returns -1, then do nothing | ||
875 | * further with the item because it may have been freed. | ||
772 | * | 876 | * |
773 | * XFS disk error handling mechanism is not based on a typical | 877 | * Since items are unlocked when they are copied to the incore log, it is |
774 | * transaction abort mechanism. Logically after the filesystem | 878 | * possible for two transactions to be completing and manipulating the same |
775 | * gets marked 'SHUTDOWN', we can't let any new transactions | 879 | * item simultaneously. The AIL lock will protect the lsn field of each item. |
776 | * be durable - ie. committed to disk - because some metadata might | 880 | * The value of this field can never go backwards. |
777 | * be inconsistent. In such cases, this returns an error, and the | 881 | * |
778 | * caller may assume that all locked objects joined to the transaction | 882 | * We unpin the items after repositioning them in the AIL, because otherwise |
779 | * have already been unlocked as if the commit had succeeded. | 883 | * they could be immediately flushed and we'd have to race with the flusher |
780 | * Do not reference the transaction structure after this call. | 884 | * trying to pull the item from the AIL as we add it. |
781 | */ | 885 | */ |
782 | /*ARGSUSED*/ | 886 | void |
783 | int | 887 | xfs_trans_item_committed( |
784 | _xfs_trans_commit( | 888 | struct xfs_log_item *lip, |
785 | xfs_trans_t *tp, | 889 | xfs_lsn_t commit_lsn, |
786 | uint flags, | 890 | int aborted) |
787 | int *log_flushed) | ||
788 | { | 891 | { |
789 | xfs_log_iovec_t *log_vector; | 892 | xfs_lsn_t item_lsn; |
790 | int nvec; | 893 | struct xfs_ail *ailp; |
791 | xfs_mount_t *mp; | ||
792 | xfs_lsn_t commit_lsn; | ||
793 | /* REFERENCED */ | ||
794 | int error; | ||
795 | int log_flags; | ||
796 | int sync; | ||
797 | #define XFS_TRANS_LOGVEC_COUNT 16 | ||
798 | xfs_log_iovec_t log_vector_fast[XFS_TRANS_LOGVEC_COUNT]; | ||
799 | struct xlog_in_core *commit_iclog; | ||
800 | int shutdown; | ||
801 | 894 | ||
802 | commit_lsn = -1; | 895 | if (aborted) |
896 | lip->li_flags |= XFS_LI_ABORTED; | ||
897 | item_lsn = IOP_COMMITTED(lip, commit_lsn); | ||
898 | |||
899 | /* If the committed routine returns -1, item has been freed. */ | ||
900 | if (XFS_LSN_CMP(item_lsn, (xfs_lsn_t)-1) == 0) | ||
901 | return; | ||
803 | 902 | ||
804 | /* | 903 | /* |
805 | * Determine whether this commit is releasing a permanent | 904 | * If the returned lsn is greater than what it contained before, update |
806 | * log reservation or not. | 905 | * the location of the item in the AIL. If it is not, then do nothing. |
906 | * Items can never move backwards in the AIL. | ||
907 | * | ||
908 | * While the new lsn should usually be greater, it is possible that a | ||
909 | * later transaction completing simultaneously with an earlier one | ||
910 | * using the same item could complete first with a higher lsn. This | ||
911 | * would cause the earlier transaction to fail the test below. | ||
807 | */ | 912 | */ |
808 | if (flags & XFS_TRANS_RELEASE_LOG_RES) { | 913 | ailp = lip->li_ailp; |
809 | ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES); | 914 | spin_lock(&ailp->xa_lock); |
810 | log_flags = XFS_LOG_REL_PERM_RESERV; | 915 | if (XFS_LSN_CMP(item_lsn, lip->li_lsn) > 0) { |
916 | /* | ||
917 | * This will set the item's lsn to item_lsn and update the | ||
918 | * position of the item in the AIL. | ||
919 | * | ||
920 | * xfs_trans_ail_update() drops the AIL lock. | ||
921 | */ | ||
922 | xfs_trans_ail_update(ailp, lip, item_lsn); | ||
811 | } else { | 923 | } else { |
812 | log_flags = 0; | 924 | spin_unlock(&ailp->xa_lock); |
813 | } | 925 | } |
814 | mp = tp->t_mountp; | ||
815 | 926 | ||
816 | /* | 927 | /* |
817 | * If there is nothing to be logged by the transaction, | 928 | * Now that we've repositioned the item in the AIL, unpin it so it can |
818 | * then unlock all of the items associated with the | 929 | * be flushed. Pass information about buffer stale state down from the |
819 | * transaction and free the transaction structure. | 930 | * log item flags, if anyone else stales the buffer we do not want to |
820 | * Also make sure to return any reserved blocks to | 931 | * pay any attention to it. |
821 | * the free pool. | ||
822 | */ | 932 | */ |
823 | shut_us_down: | 933 | IOP_UNPIN(lip); |
824 | shutdown = XFS_FORCED_SHUTDOWN(mp) ? EIO : 0; | 934 | } |
825 | if (!(tp->t_flags & XFS_TRANS_DIRTY) || shutdown) { | 935 | |
826 | xfs_trans_unreserve_and_mod_sb(tp); | 936 | /* |
937 | * This is typically called by the LM when a transaction has been fully | ||
938 | * committed to disk. It needs to unpin the items which have | ||
939 | * been logged by the transaction and update their positions | ||
940 | * in the AIL if necessary. | ||
941 | * | ||
942 | * This also gets called when the transactions didn't get written out | ||
943 | * because of an I/O error. Abortflag & XFS_LI_ABORTED is set then. | ||
944 | */ | ||
945 | STATIC void | ||
946 | xfs_trans_committed( | ||
947 | struct xfs_trans *tp, | ||
948 | int abortflag) | ||
949 | { | ||
950 | xfs_log_item_desc_t *lidp; | ||
951 | xfs_log_item_chunk_t *licp; | ||
952 | xfs_log_item_chunk_t *next_licp; | ||
953 | |||
954 | /* Call the transaction's completion callback if there is one. */ | ||
955 | if (tp->t_callback != NULL) | ||
956 | tp->t_callback(tp, tp->t_callarg); | ||
957 | |||
958 | for (lidp = xfs_trans_first_item(tp); | ||
959 | lidp != NULL; | ||
960 | lidp = xfs_trans_next_item(tp, lidp)) { | ||
961 | xfs_trans_item_committed(lidp->lid_item, tp->t_lsn, abortflag); | ||
962 | } | ||
963 | |||
964 | /* free the item chunks, ignoring the embedded chunk */ | ||
965 | for (licp = tp->t_items.lic_next; licp != NULL; licp = next_licp) { | ||
966 | next_licp = licp->lic_next; | ||
967 | kmem_free(licp); | ||
968 | } | ||
969 | |||
970 | xfs_trans_free(tp); | ||
971 | } | ||
972 | |||
973 | /* | ||
974 | * Called from the trans_commit code when we notice that | ||
975 | * the filesystem is in the middle of a forced shutdown. | ||
976 | */ | ||
977 | STATIC void | ||
978 | xfs_trans_uncommit( | ||
979 | struct xfs_trans *tp, | ||
980 | uint flags) | ||
981 | { | ||
982 | xfs_log_item_desc_t *lidp; | ||
983 | |||
984 | for (lidp = xfs_trans_first_item(tp); | ||
985 | lidp != NULL; | ||
986 | lidp = xfs_trans_next_item(tp, lidp)) { | ||
827 | /* | 987 | /* |
828 | * It is indeed possible for the transaction to be | 988 | * Unpin all but those that aren't dirty. |
829 | * not dirty but the dqinfo portion to be. All that | ||
830 | * means is that we have some (non-persistent) quota | ||
831 | * reservations that need to be unreserved. | ||
832 | */ | 989 | */ |
833 | xfs_trans_unreserve_and_mod_dquots(tp); | 990 | if (lidp->lid_flags & XFS_LID_DIRTY) |
834 | if (tp->t_ticket) { | 991 | IOP_UNPIN_REMOVE(lidp->lid_item, tp); |
835 | commit_lsn = xfs_log_done(mp, tp->t_ticket, | ||
836 | NULL, log_flags); | ||
837 | if (commit_lsn == -1 && !shutdown) | ||
838 | shutdown = XFS_ERROR(EIO); | ||
839 | } | ||
840 | current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS); | ||
841 | xfs_trans_free_items(tp, shutdown? XFS_TRANS_ABORT : 0); | ||
842 | xfs_trans_free_busy(tp); | ||
843 | xfs_trans_free(tp); | ||
844 | XFS_STATS_INC(xs_trans_empty); | ||
845 | return (shutdown); | ||
846 | } | 992 | } |
847 | ASSERT(tp->t_ticket != NULL); | ||
848 | 993 | ||
849 | /* | 994 | xfs_trans_unreserve_and_mod_sb(tp); |
850 | * If we need to update the superblock, then do it now. | 995 | xfs_trans_unreserve_and_mod_dquots(tp); |
851 | */ | 996 | |
852 | if (tp->t_flags & XFS_TRANS_SB_DIRTY) | 997 | xfs_trans_free_items(tp, NULLCOMMITLSN, flags); |
853 | xfs_trans_apply_sb_deltas(tp); | 998 | xfs_trans_free(tp); |
854 | xfs_trans_apply_dquot_deltas(tp); | 999 | } |
1000 | |||
1001 | /* | ||
1002 | * Format the transaction direct to the iclog. This isolates the physical | ||
1003 | * transaction commit operation from the logical operation and hence allows | ||
1004 | * other methods to be introduced without affecting the existing commit path. | ||
1005 | */ | ||
1006 | static int | ||
1007 | xfs_trans_commit_iclog( | ||
1008 | struct xfs_mount *mp, | ||
1009 | struct xfs_trans *tp, | ||
1010 | xfs_lsn_t *commit_lsn, | ||
1011 | int flags) | ||
1012 | { | ||
1013 | int shutdown; | ||
1014 | int error; | ||
1015 | int log_flags = 0; | ||
1016 | struct xlog_in_core *commit_iclog; | ||
1017 | #define XFS_TRANS_LOGVEC_COUNT 16 | ||
1018 | struct xfs_log_iovec log_vector_fast[XFS_TRANS_LOGVEC_COUNT]; | ||
1019 | struct xfs_log_iovec *log_vector; | ||
1020 | uint nvec; | ||
1021 | |||
855 | 1022 | ||
856 | /* | 1023 | /* |
857 | * Ask each log item how many log_vector entries it will | 1024 | * Ask each log item how many log_vector entries it will |
@@ -861,8 +1028,7 @@ shut_us_down: | |||
861 | */ | 1028 | */ |
862 | nvec = xfs_trans_count_vecs(tp); | 1029 | nvec = xfs_trans_count_vecs(tp); |
863 | if (nvec == 0) { | 1030 | if (nvec == 0) { |
864 | xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR); | 1031 | return ENOMEM; /* triggers a shutdown! */ |
865 | goto shut_us_down; | ||
866 | } else if (nvec <= XFS_TRANS_LOGVEC_COUNT) { | 1032 | } else if (nvec <= XFS_TRANS_LOGVEC_COUNT) { |
867 | log_vector = log_vector_fast; | 1033 | log_vector = log_vector_fast; |
868 | } else { | 1034 | } else { |
@@ -877,6 +1043,9 @@ shut_us_down: | |||
877 | */ | 1043 | */ |
878 | xfs_trans_fill_vecs(tp, log_vector); | 1044 | xfs_trans_fill_vecs(tp, log_vector); |
879 | 1045 | ||
1046 | if (flags & XFS_TRANS_RELEASE_LOG_RES) | ||
1047 | log_flags = XFS_LOG_REL_PERM_RESERV; | ||
1048 | |||
880 | error = xfs_log_write(mp, log_vector, nvec, tp->t_ticket, &(tp->t_lsn)); | 1049 | error = xfs_log_write(mp, log_vector, nvec, tp->t_ticket, &(tp->t_lsn)); |
881 | 1050 | ||
882 | /* | 1051 | /* |
@@ -884,18 +1053,19 @@ shut_us_down: | |||
884 | * at any time after this call. However, all the items associated | 1053 | * at any time after this call. However, all the items associated |
885 | * with the transaction are still locked and pinned in memory. | 1054 | * with the transaction are still locked and pinned in memory. |
886 | */ | 1055 | */ |
887 | commit_lsn = xfs_log_done(mp, tp->t_ticket, &commit_iclog, log_flags); | 1056 | *commit_lsn = xfs_log_done(mp, tp->t_ticket, &commit_iclog, log_flags); |
888 | 1057 | ||
889 | tp->t_commit_lsn = commit_lsn; | 1058 | tp->t_commit_lsn = *commit_lsn; |
890 | if (nvec > XFS_TRANS_LOGVEC_COUNT) { | 1059 | trace_xfs_trans_commit_lsn(tp); |
1060 | |||
1061 | if (nvec > XFS_TRANS_LOGVEC_COUNT) | ||
891 | kmem_free(log_vector); | 1062 | kmem_free(log_vector); |
892 | } | ||
893 | 1063 | ||
894 | /* | 1064 | /* |
895 | * If we got a log write error. Unpin the logitems that we | 1065 | * If we got a log write error. Unpin the logitems that we |
896 | * had pinned, clean up, free trans structure, and return error. | 1066 | * had pinned, clean up, free trans structure, and return error. |
897 | */ | 1067 | */ |
898 | if (error || commit_lsn == -1) { | 1068 | if (error || *commit_lsn == -1) { |
899 | current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS); | 1069 | current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS); |
900 | xfs_trans_uncommit(tp, flags|XFS_TRANS_ABORT); | 1070 | xfs_trans_uncommit(tp, flags|XFS_TRANS_ABORT); |
901 | return XFS_ERROR(EIO); | 1071 | return XFS_ERROR(EIO); |
@@ -909,8 +1079,6 @@ shut_us_down: | |||
909 | */ | 1079 | */ |
910 | xfs_trans_unreserve_and_mod_sb(tp); | 1080 | xfs_trans_unreserve_and_mod_sb(tp); |
911 | 1081 | ||
912 | sync = tp->t_flags & XFS_TRANS_SYNC; | ||
913 | |||
914 | /* | 1082 | /* |
915 | * Tell the LM to call the transaction completion routine | 1083 | * Tell the LM to call the transaction completion routine |
916 | * when the log write with LSN commit_lsn completes (e.g. | 1084 | * when the log write with LSN commit_lsn completes (e.g. |
@@ -953,7 +1121,7 @@ shut_us_down: | |||
953 | * the commit lsn of this transaction for dependency tracking | 1121 | * the commit lsn of this transaction for dependency tracking |
954 | * purposes. | 1122 | * purposes. |
955 | */ | 1123 | */ |
956 | xfs_trans_unlock_items(tp, commit_lsn); | 1124 | xfs_trans_unlock_items(tp, *commit_lsn); |
957 | 1125 | ||
958 | /* | 1126 | /* |
959 | * If we detected a log error earlier, finish committing | 1127 | * If we detected a log error earlier, finish committing |
@@ -973,156 +1141,204 @@ shut_us_down: | |||
973 | * and the items are released we can finally allow the iclog to | 1141 | * and the items are released we can finally allow the iclog to |
974 | * go to disk. | 1142 | * go to disk. |
975 | */ | 1143 | */ |
976 | error = xfs_log_release_iclog(mp, commit_iclog); | 1144 | return xfs_log_release_iclog(mp, commit_iclog); |
977 | |||
978 | /* | ||
979 | * If the transaction needs to be synchronous, then force the | ||
980 | * log out now and wait for it. | ||
981 | */ | ||
982 | if (sync) { | ||
983 | if (!error) { | ||
984 | error = _xfs_log_force_lsn(mp, commit_lsn, | ||
985 | XFS_LOG_SYNC, log_flushed); | ||
986 | } | ||
987 | XFS_STATS_INC(xs_trans_sync); | ||
988 | } else { | ||
989 | XFS_STATS_INC(xs_trans_async); | ||
990 | } | ||
991 | |||
992 | return (error); | ||
993 | } | 1145 | } |
994 | 1146 | ||
995 | |||
996 | /* | 1147 | /* |
997 | * Total up the number of log iovecs needed to commit this | 1148 | * Walk the log items and allocate log vector structures for |
998 | * transaction. The transaction itself needs one for the | 1149 | * each item large enough to fit all the vectors they require. |
999 | * transaction header. Ask each dirty item in turn how many | 1150 | * Note that this format differs from the old log vector format in |
1000 | * it needs to get the total. | 1151 | * that there is no transaction header in these log vectors. |
1001 | */ | 1152 | */ |
1002 | STATIC uint | 1153 | STATIC struct xfs_log_vec * |
1003 | xfs_trans_count_vecs( | 1154 | xfs_trans_alloc_log_vecs( |
1004 | xfs_trans_t *tp) | 1155 | xfs_trans_t *tp) |
1005 | { | 1156 | { |
1006 | int nvecs; | ||
1007 | xfs_log_item_desc_t *lidp; | 1157 | xfs_log_item_desc_t *lidp; |
1158 | struct xfs_log_vec *lv = NULL; | ||
1159 | struct xfs_log_vec *ret_lv = NULL; | ||
1008 | 1160 | ||
1009 | nvecs = 1; | ||
1010 | lidp = xfs_trans_first_item(tp); | 1161 | lidp = xfs_trans_first_item(tp); |
1011 | ASSERT(lidp != NULL); | ||
1012 | 1162 | ||
1013 | /* In the non-debug case we need to start bailing out if we | 1163 | /* Bail out if we didn't find a log item. */ |
1014 | * didn't find a log_item here, return zero and let trans_commit | 1164 | if (!lidp) { |
1015 | * deal with it. | 1165 | ASSERT(0); |
1016 | */ | 1166 | return NULL; |
1017 | if (lidp == NULL) | 1167 | } |
1018 | return 0; | ||
1019 | 1168 | ||
1020 | while (lidp != NULL) { | 1169 | while (lidp != NULL) { |
1021 | /* | 1170 | struct xfs_log_vec *new_lv; |
1022 | * Skip items which aren't dirty in this transaction. | 1171 | |
1023 | */ | 1172 | /* Skip items which aren't dirty in this transaction. */ |
1024 | if (!(lidp->lid_flags & XFS_LID_DIRTY)) { | 1173 | if (!(lidp->lid_flags & XFS_LID_DIRTY)) { |
1025 | lidp = xfs_trans_next_item(tp, lidp); | 1174 | lidp = xfs_trans_next_item(tp, lidp); |
1026 | continue; | 1175 | continue; |
1027 | } | 1176 | } |
1177 | |||
1178 | /* Skip items that do not have any vectors for writing */ | ||
1028 | lidp->lid_size = IOP_SIZE(lidp->lid_item); | 1179 | lidp->lid_size = IOP_SIZE(lidp->lid_item); |
1029 | nvecs += lidp->lid_size; | 1180 | if (!lidp->lid_size) { |
1181 | lidp = xfs_trans_next_item(tp, lidp); | ||
1182 | continue; | ||
1183 | } | ||
1184 | |||
1185 | new_lv = kmem_zalloc(sizeof(*new_lv) + | ||
1186 | lidp->lid_size * sizeof(struct xfs_log_iovec), | ||
1187 | KM_SLEEP); | ||
1188 | |||
1189 | /* The allocated iovec region lies beyond the log vector. */ | ||
1190 | new_lv->lv_iovecp = (struct xfs_log_iovec *)&new_lv[1]; | ||
1191 | new_lv->lv_niovecs = lidp->lid_size; | ||
1192 | new_lv->lv_item = lidp->lid_item; | ||
1193 | if (!ret_lv) | ||
1194 | ret_lv = new_lv; | ||
1195 | else | ||
1196 | lv->lv_next = new_lv; | ||
1197 | lv = new_lv; | ||
1030 | lidp = xfs_trans_next_item(tp, lidp); | 1198 | lidp = xfs_trans_next_item(tp, lidp); |
1031 | } | 1199 | } |
1032 | 1200 | ||
1033 | return nvecs; | 1201 | return ret_lv; |
1034 | } | 1202 | } |
1035 | 1203 | ||
1036 | /* | 1204 | static int |
1037 | * Called from the trans_commit code when we notice that | 1205 | xfs_trans_commit_cil( |
1038 | * the filesystem is in the middle of a forced shutdown. | 1206 | struct xfs_mount *mp, |
1039 | */ | 1207 | struct xfs_trans *tp, |
1040 | STATIC void | 1208 | xfs_lsn_t *commit_lsn, |
1041 | xfs_trans_uncommit( | 1209 | int flags) |
1042 | xfs_trans_t *tp, | ||
1043 | uint flags) | ||
1044 | { | 1210 | { |
1045 | xfs_log_item_desc_t *lidp; | 1211 | struct xfs_log_vec *log_vector; |
1212 | int error; | ||
1046 | 1213 | ||
1047 | for (lidp = xfs_trans_first_item(tp); | 1214 | /* |
1048 | lidp != NULL; | 1215 | * Get each log item to allocate a vector structure for |
1049 | lidp = xfs_trans_next_item(tp, lidp)) { | 1216 | * the log item to to pass to the log write code. The |
1050 | /* | 1217 | * CIL commit code will format the vector and save it away. |
1051 | * Unpin all but those that aren't dirty. | 1218 | */ |
1052 | */ | 1219 | log_vector = xfs_trans_alloc_log_vecs(tp); |
1053 | if (lidp->lid_flags & XFS_LID_DIRTY) | 1220 | if (!log_vector) |
1054 | IOP_UNPIN_REMOVE(lidp->lid_item, tp); | 1221 | return ENOMEM; |
1055 | } | ||
1056 | 1222 | ||
1057 | xfs_trans_unreserve_and_mod_sb(tp); | 1223 | error = xfs_log_commit_cil(mp, tp, log_vector, commit_lsn, flags); |
1058 | xfs_trans_unreserve_and_mod_dquots(tp); | 1224 | if (error) |
1225 | return error; | ||
1059 | 1226 | ||
1060 | xfs_trans_free_items(tp, flags); | 1227 | current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS); |
1061 | xfs_trans_free_busy(tp); | 1228 | |
1229 | /* xfs_trans_free_items() unlocks them first */ | ||
1230 | xfs_trans_free_items(tp, *commit_lsn, 0); | ||
1062 | xfs_trans_free(tp); | 1231 | xfs_trans_free(tp); |
1232 | return 0; | ||
1063 | } | 1233 | } |
1064 | 1234 | ||
1065 | /* | 1235 | /* |
1066 | * Fill in the vector with pointers to data to be logged | 1236 | * xfs_trans_commit |
1067 | * by this transaction. The transaction header takes | ||
1068 | * the first vector, and then each dirty item takes the | ||
1069 | * number of vectors it indicated it needed in xfs_trans_count_vecs(). | ||
1070 | * | 1237 | * |
1071 | * As each item fills in the entries it needs, also pin the item | 1238 | * Commit the given transaction to the log a/synchronously. |
1072 | * so that it cannot be flushed out until the log write completes. | 1239 | * |
1240 | * XFS disk error handling mechanism is not based on a typical | ||
1241 | * transaction abort mechanism. Logically after the filesystem | ||
1242 | * gets marked 'SHUTDOWN', we can't let any new transactions | ||
1243 | * be durable - ie. committed to disk - because some metadata might | ||
1244 | * be inconsistent. In such cases, this returns an error, and the | ||
1245 | * caller may assume that all locked objects joined to the transaction | ||
1246 | * have already been unlocked as if the commit had succeeded. | ||
1247 | * Do not reference the transaction structure after this call. | ||
1073 | */ | 1248 | */ |
1074 | STATIC void | 1249 | int |
1075 | xfs_trans_fill_vecs( | 1250 | _xfs_trans_commit( |
1076 | xfs_trans_t *tp, | 1251 | struct xfs_trans *tp, |
1077 | xfs_log_iovec_t *log_vector) | 1252 | uint flags, |
1253 | int *log_flushed) | ||
1078 | { | 1254 | { |
1079 | xfs_log_item_desc_t *lidp; | 1255 | struct xfs_mount *mp = tp->t_mountp; |
1080 | xfs_log_iovec_t *vecp; | 1256 | xfs_lsn_t commit_lsn = -1; |
1081 | uint nitems; | 1257 | int error = 0; |
1258 | int log_flags = 0; | ||
1259 | int sync = tp->t_flags & XFS_TRANS_SYNC; | ||
1082 | 1260 | ||
1083 | /* | 1261 | /* |
1084 | * Skip over the entry for the transaction header, we'll | 1262 | * Determine whether this commit is releasing a permanent |
1085 | * fill that in at the end. | 1263 | * log reservation or not. |
1086 | */ | 1264 | */ |
1087 | vecp = log_vector + 1; /* pointer arithmetic */ | 1265 | if (flags & XFS_TRANS_RELEASE_LOG_RES) { |
1266 | ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES); | ||
1267 | log_flags = XFS_LOG_REL_PERM_RESERV; | ||
1268 | } | ||
1088 | 1269 | ||
1089 | nitems = 0; | 1270 | /* |
1090 | lidp = xfs_trans_first_item(tp); | 1271 | * If there is nothing to be logged by the transaction, |
1091 | ASSERT(lidp != NULL); | 1272 | * then unlock all of the items associated with the |
1092 | while (lidp != NULL) { | 1273 | * transaction and free the transaction structure. |
1093 | /* | 1274 | * Also make sure to return any reserved blocks to |
1094 | * Skip items which aren't dirty in this transaction. | 1275 | * the free pool. |
1095 | */ | 1276 | */ |
1096 | if (!(lidp->lid_flags & XFS_LID_DIRTY)) { | 1277 | if (!(tp->t_flags & XFS_TRANS_DIRTY)) |
1097 | lidp = xfs_trans_next_item(tp, lidp); | 1278 | goto out_unreserve; |
1098 | continue; | 1279 | |
1099 | } | 1280 | if (XFS_FORCED_SHUTDOWN(mp)) { |
1100 | /* | 1281 | error = XFS_ERROR(EIO); |
1101 | * The item may be marked dirty but not log anything. | 1282 | goto out_unreserve; |
1102 | * This can be used to get called when a transaction | 1283 | } |
1103 | * is committed. | 1284 | |
1104 | */ | 1285 | ASSERT(tp->t_ticket != NULL); |
1105 | if (lidp->lid_size) { | 1286 | |
1106 | nitems++; | 1287 | /* |
1288 | * If we need to update the superblock, then do it now. | ||
1289 | */ | ||
1290 | if (tp->t_flags & XFS_TRANS_SB_DIRTY) | ||
1291 | xfs_trans_apply_sb_deltas(tp); | ||
1292 | xfs_trans_apply_dquot_deltas(tp); | ||
1293 | |||
1294 | if (mp->m_flags & XFS_MOUNT_DELAYLOG) | ||
1295 | error = xfs_trans_commit_cil(mp, tp, &commit_lsn, flags); | ||
1296 | else | ||
1297 | error = xfs_trans_commit_iclog(mp, tp, &commit_lsn, flags); | ||
1298 | |||
1299 | if (error == ENOMEM) { | ||
1300 | xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR); | ||
1301 | error = XFS_ERROR(EIO); | ||
1302 | goto out_unreserve; | ||
1303 | } | ||
1304 | |||
1305 | /* | ||
1306 | * If the transaction needs to be synchronous, then force the | ||
1307 | * log out now and wait for it. | ||
1308 | */ | ||
1309 | if (sync) { | ||
1310 | if (!error) { | ||
1311 | error = _xfs_log_force_lsn(mp, commit_lsn, | ||
1312 | XFS_LOG_SYNC, log_flushed); | ||
1107 | } | 1313 | } |
1108 | IOP_FORMAT(lidp->lid_item, vecp); | 1314 | XFS_STATS_INC(xs_trans_sync); |
1109 | vecp += lidp->lid_size; /* pointer arithmetic */ | 1315 | } else { |
1110 | IOP_PIN(lidp->lid_item); | 1316 | XFS_STATS_INC(xs_trans_async); |
1111 | lidp = xfs_trans_next_item(tp, lidp); | ||
1112 | } | 1317 | } |
1113 | 1318 | ||
1319 | return error; | ||
1320 | |||
1321 | out_unreserve: | ||
1322 | xfs_trans_unreserve_and_mod_sb(tp); | ||
1323 | |||
1114 | /* | 1324 | /* |
1115 | * Now that we've counted the number of items in this | 1325 | * It is indeed possible for the transaction to be not dirty but |
1116 | * transaction, fill in the transaction header. | 1326 | * the dqinfo portion to be. All that means is that we have some |
1327 | * (non-persistent) quota reservations that need to be unreserved. | ||
1117 | */ | 1328 | */ |
1118 | tp->t_header.th_magic = XFS_TRANS_HEADER_MAGIC; | 1329 | xfs_trans_unreserve_and_mod_dquots(tp); |
1119 | tp->t_header.th_type = tp->t_type; | 1330 | if (tp->t_ticket) { |
1120 | tp->t_header.th_num_items = nitems; | 1331 | commit_lsn = xfs_log_done(mp, tp->t_ticket, NULL, log_flags); |
1121 | log_vector->i_addr = (xfs_caddr_t)&tp->t_header; | 1332 | if (commit_lsn == -1 && !error) |
1122 | log_vector->i_len = sizeof(xfs_trans_header_t); | 1333 | error = XFS_ERROR(EIO); |
1123 | log_vector->i_type = XLOG_REG_TYPE_TRANSHDR; | 1334 | } |
1124 | } | 1335 | current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS); |
1336 | xfs_trans_free_items(tp, NULLCOMMITLSN, error ? XFS_TRANS_ABORT : 0); | ||
1337 | xfs_trans_free(tp); | ||
1125 | 1338 | ||
1339 | XFS_STATS_INC(xs_trans_empty); | ||
1340 | return error; | ||
1341 | } | ||
1126 | 1342 | ||
1127 | /* | 1343 | /* |
1128 | * Unlock all of the transaction's items and free the transaction. | 1344 | * Unlock all of the transaction's items and free the transaction. |
@@ -1195,25 +1411,10 @@ xfs_trans_cancel( | |||
1195 | /* mark this thread as no longer being in a transaction */ | 1411 | /* mark this thread as no longer being in a transaction */ |
1196 | current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS); | 1412 | current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS); |
1197 | 1413 | ||
1198 | xfs_trans_free_items(tp, flags); | 1414 | xfs_trans_free_items(tp, NULLCOMMITLSN, flags); |
1199 | xfs_trans_free_busy(tp); | ||
1200 | xfs_trans_free(tp); | 1415 | xfs_trans_free(tp); |
1201 | } | 1416 | } |
1202 | 1417 | ||
1203 | |||
1204 | /* | ||
1205 | * Free the transaction structure. If there is more clean up | ||
1206 | * to do when the structure is freed, add it here. | ||
1207 | */ | ||
1208 | STATIC void | ||
1209 | xfs_trans_free( | ||
1210 | xfs_trans_t *tp) | ||
1211 | { | ||
1212 | atomic_dec(&tp->t_mountp->m_active_trans); | ||
1213 | xfs_trans_free_dqinfo(tp); | ||
1214 | kmem_zone_free(xfs_trans_zone, tp); | ||
1215 | } | ||
1216 | |||
1217 | /* | 1418 | /* |
1218 | * Roll from one trans in the sequence of PERMANENT transactions to | 1419 | * Roll from one trans in the sequence of PERMANENT transactions to |
1219 | * the next: permanent transactions are only flushed out when | 1420 | * the next: permanent transactions are only flushed out when |
@@ -1283,174 +1484,3 @@ xfs_trans_roll( | |||
1283 | xfs_trans_ihold(trans, dp); | 1484 | xfs_trans_ihold(trans, dp); |
1284 | return 0; | 1485 | return 0; |
1285 | } | 1486 | } |
1286 | |||
1287 | /* | ||
1288 | * THIS SHOULD BE REWRITTEN TO USE xfs_trans_next_item(). | ||
1289 | * | ||
1290 | * This is typically called by the LM when a transaction has been fully | ||
1291 | * committed to disk. It needs to unpin the items which have | ||
1292 | * been logged by the transaction and update their positions | ||
1293 | * in the AIL if necessary. | ||
1294 | * This also gets called when the transactions didn't get written out | ||
1295 | * because of an I/O error. Abortflag & XFS_LI_ABORTED is set then. | ||
1296 | * | ||
1297 | * Call xfs_trans_chunk_committed() to process the items in | ||
1298 | * each chunk. | ||
1299 | */ | ||
1300 | STATIC void | ||
1301 | xfs_trans_committed( | ||
1302 | xfs_trans_t *tp, | ||
1303 | int abortflag) | ||
1304 | { | ||
1305 | xfs_log_item_chunk_t *licp; | ||
1306 | xfs_log_item_chunk_t *next_licp; | ||
1307 | xfs_log_busy_chunk_t *lbcp; | ||
1308 | xfs_log_busy_slot_t *lbsp; | ||
1309 | int i; | ||
1310 | |||
1311 | /* | ||
1312 | * Call the transaction's completion callback if there | ||
1313 | * is one. | ||
1314 | */ | ||
1315 | if (tp->t_callback != NULL) { | ||
1316 | tp->t_callback(tp, tp->t_callarg); | ||
1317 | } | ||
1318 | |||
1319 | /* | ||
1320 | * Special case the chunk embedded in the transaction. | ||
1321 | */ | ||
1322 | licp = &(tp->t_items); | ||
1323 | if (!(xfs_lic_are_all_free(licp))) { | ||
1324 | xfs_trans_chunk_committed(licp, tp->t_lsn, abortflag); | ||
1325 | } | ||
1326 | |||
1327 | /* | ||
1328 | * Process the items in each chunk in turn. | ||
1329 | */ | ||
1330 | licp = licp->lic_next; | ||
1331 | while (licp != NULL) { | ||
1332 | ASSERT(!xfs_lic_are_all_free(licp)); | ||
1333 | xfs_trans_chunk_committed(licp, tp->t_lsn, abortflag); | ||
1334 | next_licp = licp->lic_next; | ||
1335 | kmem_free(licp); | ||
1336 | licp = next_licp; | ||
1337 | } | ||
1338 | |||
1339 | /* | ||
1340 | * Clear all the per-AG busy list items listed in this transaction | ||
1341 | */ | ||
1342 | lbcp = &tp->t_busy; | ||
1343 | while (lbcp != NULL) { | ||
1344 | for (i = 0, lbsp = lbcp->lbc_busy; i < lbcp->lbc_unused; i++, lbsp++) { | ||
1345 | if (!XFS_LBC_ISFREE(lbcp, i)) { | ||
1346 | xfs_alloc_clear_busy(tp, lbsp->lbc_ag, | ||
1347 | lbsp->lbc_idx); | ||
1348 | } | ||
1349 | } | ||
1350 | lbcp = lbcp->lbc_next; | ||
1351 | } | ||
1352 | xfs_trans_free_busy(tp); | ||
1353 | |||
1354 | /* | ||
1355 | * That's it for the transaction structure. Free it. | ||
1356 | */ | ||
1357 | xfs_trans_free(tp); | ||
1358 | } | ||
1359 | |||
1360 | /* | ||
1361 | * This is called to perform the commit processing for each | ||
1362 | * item described by the given chunk. | ||
1363 | * | ||
1364 | * The commit processing consists of unlocking items which were | ||
1365 | * held locked with the SYNC_UNLOCK attribute, calling the committed | ||
1366 | * routine of each logged item, updating the item's position in the AIL | ||
1367 | * if necessary, and unpinning each item. If the committed routine | ||
1368 | * returns -1, then do nothing further with the item because it | ||
1369 | * may have been freed. | ||
1370 | * | ||
1371 | * Since items are unlocked when they are copied to the incore | ||
1372 | * log, it is possible for two transactions to be completing | ||
1373 | * and manipulating the same item simultaneously. The AIL lock | ||
1374 | * will protect the lsn field of each item. The value of this | ||
1375 | * field can never go backwards. | ||
1376 | * | ||
1377 | * We unpin the items after repositioning them in the AIL, because | ||
1378 | * otherwise they could be immediately flushed and we'd have to race | ||
1379 | * with the flusher trying to pull the item from the AIL as we add it. | ||
1380 | */ | ||
1381 | STATIC void | ||
1382 | xfs_trans_chunk_committed( | ||
1383 | xfs_log_item_chunk_t *licp, | ||
1384 | xfs_lsn_t lsn, | ||
1385 | int aborted) | ||
1386 | { | ||
1387 | xfs_log_item_desc_t *lidp; | ||
1388 | xfs_log_item_t *lip; | ||
1389 | xfs_lsn_t item_lsn; | ||
1390 | int i; | ||
1391 | |||
1392 | lidp = licp->lic_descs; | ||
1393 | for (i = 0; i < licp->lic_unused; i++, lidp++) { | ||
1394 | struct xfs_ail *ailp; | ||
1395 | |||
1396 | if (xfs_lic_isfree(licp, i)) { | ||
1397 | continue; | ||
1398 | } | ||
1399 | |||
1400 | lip = lidp->lid_item; | ||
1401 | if (aborted) | ||
1402 | lip->li_flags |= XFS_LI_ABORTED; | ||
1403 | |||
1404 | /* | ||
1405 | * Send in the ABORTED flag to the COMMITTED routine | ||
1406 | * so that it knows whether the transaction was aborted | ||
1407 | * or not. | ||
1408 | */ | ||
1409 | item_lsn = IOP_COMMITTED(lip, lsn); | ||
1410 | |||
1411 | /* | ||
1412 | * If the committed routine returns -1, make | ||
1413 | * no more references to the item. | ||
1414 | */ | ||
1415 | if (XFS_LSN_CMP(item_lsn, (xfs_lsn_t)-1) == 0) { | ||
1416 | continue; | ||
1417 | } | ||
1418 | |||
1419 | /* | ||
1420 | * If the returned lsn is greater than what it | ||
1421 | * contained before, update the location of the | ||
1422 | * item in the AIL. If it is not, then do nothing. | ||
1423 | * Items can never move backwards in the AIL. | ||
1424 | * | ||
1425 | * While the new lsn should usually be greater, it | ||
1426 | * is possible that a later transaction completing | ||
1427 | * simultaneously with an earlier one using the | ||
1428 | * same item could complete first with a higher lsn. | ||
1429 | * This would cause the earlier transaction to fail | ||
1430 | * the test below. | ||
1431 | */ | ||
1432 | ailp = lip->li_ailp; | ||
1433 | spin_lock(&ailp->xa_lock); | ||
1434 | if (XFS_LSN_CMP(item_lsn, lip->li_lsn) > 0) { | ||
1435 | /* | ||
1436 | * This will set the item's lsn to item_lsn | ||
1437 | * and update the position of the item in | ||
1438 | * the AIL. | ||
1439 | * | ||
1440 | * xfs_trans_ail_update() drops the AIL lock. | ||
1441 | */ | ||
1442 | xfs_trans_ail_update(ailp, lip, item_lsn); | ||
1443 | } else { | ||
1444 | spin_unlock(&ailp->xa_lock); | ||
1445 | } | ||
1446 | |||
1447 | /* | ||
1448 | * Now that we've repositioned the item in the AIL, | ||
1449 | * unpin it so it can be flushed. Pass information | ||
1450 | * about buffer stale state down from the log item | ||
1451 | * flags, if anyone else stales the buffer we do not | ||
1452 | * want to pay any attention to it. | ||
1453 | */ | ||
1454 | IOP_UNPIN(lip, lidp->lid_flags & XFS_LID_BUF_STALE); | ||
1455 | } | ||
1456 | } | ||
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h index 79c8bab9dfff..8c69e7824f68 100644 --- a/fs/xfs/xfs_trans.h +++ b/fs/xfs/xfs_trans.h | |||
@@ -49,6 +49,15 @@ typedef struct xfs_trans_header { | |||
49 | #define XFS_LI_DQUOT 0x123d | 49 | #define XFS_LI_DQUOT 0x123d |
50 | #define XFS_LI_QUOTAOFF 0x123e | 50 | #define XFS_LI_QUOTAOFF 0x123e |
51 | 51 | ||
52 | #define XFS_LI_TYPE_DESC \ | ||
53 | { XFS_LI_EFI, "XFS_LI_EFI" }, \ | ||
54 | { XFS_LI_EFD, "XFS_LI_EFD" }, \ | ||
55 | { XFS_LI_IUNLINK, "XFS_LI_IUNLINK" }, \ | ||
56 | { XFS_LI_INODE, "XFS_LI_INODE" }, \ | ||
57 | { XFS_LI_BUF, "XFS_LI_BUF" }, \ | ||
58 | { XFS_LI_DQUOT, "XFS_LI_DQUOT" }, \ | ||
59 | { XFS_LI_QUOTAOFF, "XFS_LI_QUOTAOFF" } | ||
60 | |||
52 | /* | 61 | /* |
53 | * Transaction types. Used to distinguish types of buffers. | 62 | * Transaction types. Used to distinguish types of buffers. |
54 | */ | 63 | */ |
@@ -97,7 +106,8 @@ typedef struct xfs_trans_header { | |||
97 | #define XFS_TRANS_GROWFSRT_FREE 39 | 106 | #define XFS_TRANS_GROWFSRT_FREE 39 |
98 | #define XFS_TRANS_SWAPEXT 40 | 107 | #define XFS_TRANS_SWAPEXT 40 |
99 | #define XFS_TRANS_SB_COUNT 41 | 108 | #define XFS_TRANS_SB_COUNT 41 |
100 | #define XFS_TRANS_TYPE_MAX 41 | 109 | #define XFS_TRANS_CHECKPOINT 42 |
110 | #define XFS_TRANS_TYPE_MAX 42 | ||
101 | /* new transaction types need to be reflected in xfs_logprint(8) */ | 111 | /* new transaction types need to be reflected in xfs_logprint(8) */ |
102 | 112 | ||
103 | #define XFS_TRANS_TYPES \ | 113 | #define XFS_TRANS_TYPES \ |
@@ -139,6 +149,7 @@ typedef struct xfs_trans_header { | |||
139 | { XFS_TRANS_GROWFSRT_FREE, "GROWFSRT_FREE" }, \ | 149 | { XFS_TRANS_GROWFSRT_FREE, "GROWFSRT_FREE" }, \ |
140 | { XFS_TRANS_SWAPEXT, "SWAPEXT" }, \ | 150 | { XFS_TRANS_SWAPEXT, "SWAPEXT" }, \ |
141 | { XFS_TRANS_SB_COUNT, "SB_COUNT" }, \ | 151 | { XFS_TRANS_SB_COUNT, "SB_COUNT" }, \ |
152 | { XFS_TRANS_CHECKPOINT, "CHECKPOINT" }, \ | ||
142 | { XFS_TRANS_DUMMY1, "DUMMY1" }, \ | 153 | { XFS_TRANS_DUMMY1, "DUMMY1" }, \ |
143 | { XFS_TRANS_DUMMY2, "DUMMY2" }, \ | 154 | { XFS_TRANS_DUMMY2, "DUMMY2" }, \ |
144 | { XLOG_UNMOUNT_REC_TYPE, "UNMOUNT" } | 155 | { XLOG_UNMOUNT_REC_TYPE, "UNMOUNT" } |
@@ -159,7 +170,6 @@ typedef struct xfs_log_item_desc { | |||
159 | 170 | ||
160 | #define XFS_LID_DIRTY 0x1 | 171 | #define XFS_LID_DIRTY 0x1 |
161 | #define XFS_LID_PINNED 0x2 | 172 | #define XFS_LID_PINNED 0x2 |
162 | #define XFS_LID_BUF_STALE 0x8 | ||
163 | 173 | ||
164 | /* | 174 | /* |
165 | * This structure is used to maintain a chunk list of log_item_desc | 175 | * This structure is used to maintain a chunk list of log_item_desc |
@@ -805,6 +815,7 @@ struct xfs_log_item_desc; | |||
805 | struct xfs_mount; | 815 | struct xfs_mount; |
806 | struct xfs_trans; | 816 | struct xfs_trans; |
807 | struct xfs_dquot_acct; | 817 | struct xfs_dquot_acct; |
818 | struct xfs_busy_extent; | ||
808 | 819 | ||
809 | typedef struct xfs_log_item { | 820 | typedef struct xfs_log_item { |
810 | struct list_head li_ail; /* AIL pointers */ | 821 | struct list_head li_ail; /* AIL pointers */ |
@@ -820,6 +831,11 @@ typedef struct xfs_log_item { | |||
820 | /* buffer item iodone */ | 831 | /* buffer item iodone */ |
821 | /* callback func */ | 832 | /* callback func */ |
822 | struct xfs_item_ops *li_ops; /* function list */ | 833 | struct xfs_item_ops *li_ops; /* function list */ |
834 | |||
835 | /* delayed logging */ | ||
836 | struct list_head li_cil; /* CIL pointers */ | ||
837 | struct xfs_log_vec *li_lv; /* active log vector */ | ||
838 | xfs_lsn_t li_seq; /* CIL commit seq */ | ||
823 | } xfs_log_item_t; | 839 | } xfs_log_item_t; |
824 | 840 | ||
825 | #define XFS_LI_IN_AIL 0x1 | 841 | #define XFS_LI_IN_AIL 0x1 |
@@ -833,7 +849,7 @@ typedef struct xfs_item_ops { | |||
833 | uint (*iop_size)(xfs_log_item_t *); | 849 | uint (*iop_size)(xfs_log_item_t *); |
834 | void (*iop_format)(xfs_log_item_t *, struct xfs_log_iovec *); | 850 | void (*iop_format)(xfs_log_item_t *, struct xfs_log_iovec *); |
835 | void (*iop_pin)(xfs_log_item_t *); | 851 | void (*iop_pin)(xfs_log_item_t *); |
836 | void (*iop_unpin)(xfs_log_item_t *, int); | 852 | void (*iop_unpin)(xfs_log_item_t *); |
837 | void (*iop_unpin_remove)(xfs_log_item_t *, struct xfs_trans *); | 853 | void (*iop_unpin_remove)(xfs_log_item_t *, struct xfs_trans *); |
838 | uint (*iop_trylock)(xfs_log_item_t *); | 854 | uint (*iop_trylock)(xfs_log_item_t *); |
839 | void (*iop_unlock)(xfs_log_item_t *); | 855 | void (*iop_unlock)(xfs_log_item_t *); |
@@ -846,7 +862,7 @@ typedef struct xfs_item_ops { | |||
846 | #define IOP_SIZE(ip) (*(ip)->li_ops->iop_size)(ip) | 862 | #define IOP_SIZE(ip) (*(ip)->li_ops->iop_size)(ip) |
847 | #define IOP_FORMAT(ip,vp) (*(ip)->li_ops->iop_format)(ip, vp) | 863 | #define IOP_FORMAT(ip,vp) (*(ip)->li_ops->iop_format)(ip, vp) |
848 | #define IOP_PIN(ip) (*(ip)->li_ops->iop_pin)(ip) | 864 | #define IOP_PIN(ip) (*(ip)->li_ops->iop_pin)(ip) |
849 | #define IOP_UNPIN(ip, flags) (*(ip)->li_ops->iop_unpin)(ip, flags) | 865 | #define IOP_UNPIN(ip) (*(ip)->li_ops->iop_unpin)(ip) |
850 | #define IOP_UNPIN_REMOVE(ip,tp) (*(ip)->li_ops->iop_unpin_remove)(ip, tp) | 866 | #define IOP_UNPIN_REMOVE(ip,tp) (*(ip)->li_ops->iop_unpin_remove)(ip, tp) |
851 | #define IOP_TRYLOCK(ip) (*(ip)->li_ops->iop_trylock)(ip) | 867 | #define IOP_TRYLOCK(ip) (*(ip)->li_ops->iop_trylock)(ip) |
852 | #define IOP_UNLOCK(ip) (*(ip)->li_ops->iop_unlock)(ip) | 868 | #define IOP_UNLOCK(ip) (*(ip)->li_ops->iop_unlock)(ip) |
@@ -864,34 +880,6 @@ typedef struct xfs_item_ops { | |||
864 | #define XFS_ITEM_PUSHBUF 3 | 880 | #define XFS_ITEM_PUSHBUF 3 |
865 | 881 | ||
866 | /* | 882 | /* |
867 | * This structure is used to maintain a list of block ranges that have been | ||
868 | * freed in the transaction. The ranges are listed in the perag[] busy list | ||
869 | * between when they're freed and the transaction is committed to disk. | ||
870 | */ | ||
871 | |||
872 | typedef struct xfs_log_busy_slot { | ||
873 | xfs_agnumber_t lbc_ag; | ||
874 | ushort lbc_idx; /* index in perag.busy[] */ | ||
875 | } xfs_log_busy_slot_t; | ||
876 | |||
877 | #define XFS_LBC_NUM_SLOTS 31 | ||
878 | typedef struct xfs_log_busy_chunk { | ||
879 | struct xfs_log_busy_chunk *lbc_next; | ||
880 | uint lbc_free; /* free slots bitmask */ | ||
881 | ushort lbc_unused; /* first unused */ | ||
882 | xfs_log_busy_slot_t lbc_busy[XFS_LBC_NUM_SLOTS]; | ||
883 | } xfs_log_busy_chunk_t; | ||
884 | |||
885 | #define XFS_LBC_MAX_SLOT (XFS_LBC_NUM_SLOTS - 1) | ||
886 | #define XFS_LBC_FREEMASK ((1U << XFS_LBC_NUM_SLOTS) - 1) | ||
887 | |||
888 | #define XFS_LBC_INIT(cp) ((cp)->lbc_free = XFS_LBC_FREEMASK) | ||
889 | #define XFS_LBC_CLAIM(cp, slot) ((cp)->lbc_free &= ~(1 << (slot))) | ||
890 | #define XFS_LBC_SLOT(cp, slot) (&((cp)->lbc_busy[(slot)])) | ||
891 | #define XFS_LBC_VACANCY(cp) (((cp)->lbc_free) & XFS_LBC_FREEMASK) | ||
892 | #define XFS_LBC_ISFREE(cp, slot) ((cp)->lbc_free & (1 << (slot))) | ||
893 | |||
894 | /* | ||
895 | * This is the type of function which can be given to xfs_trans_callback() | 883 | * This is the type of function which can be given to xfs_trans_callback() |
896 | * to be called upon the transaction's commit to disk. | 884 | * to be called upon the transaction's commit to disk. |
897 | */ | 885 | */ |
@@ -942,8 +930,7 @@ typedef struct xfs_trans { | |||
942 | unsigned int t_items_free; /* log item descs free */ | 930 | unsigned int t_items_free; /* log item descs free */ |
943 | xfs_log_item_chunk_t t_items; /* first log item desc chunk */ | 931 | xfs_log_item_chunk_t t_items; /* first log item desc chunk */ |
944 | xfs_trans_header_t t_header; /* header for in-log trans */ | 932 | xfs_trans_header_t t_header; /* header for in-log trans */ |
945 | unsigned int t_busy_free; /* busy descs free */ | 933 | struct list_head t_busy; /* list of busy extents */ |
946 | xfs_log_busy_chunk_t t_busy; /* busy/async free blocks */ | ||
947 | unsigned long t_pflags; /* saved process flags state */ | 934 | unsigned long t_pflags; /* saved process flags state */ |
948 | } xfs_trans_t; | 935 | } xfs_trans_t; |
949 | 936 | ||
@@ -1017,9 +1004,6 @@ int _xfs_trans_commit(xfs_trans_t *, | |||
1017 | void xfs_trans_cancel(xfs_trans_t *, int); | 1004 | void xfs_trans_cancel(xfs_trans_t *, int); |
1018 | int xfs_trans_ail_init(struct xfs_mount *); | 1005 | int xfs_trans_ail_init(struct xfs_mount *); |
1019 | void xfs_trans_ail_destroy(struct xfs_mount *); | 1006 | void xfs_trans_ail_destroy(struct xfs_mount *); |
1020 | xfs_log_busy_slot_t *xfs_trans_add_busy(xfs_trans_t *tp, | ||
1021 | xfs_agnumber_t ag, | ||
1022 | xfs_extlen_t idx); | ||
1023 | 1007 | ||
1024 | extern kmem_zone_t *xfs_trans_zone; | 1008 | extern kmem_zone_t *xfs_trans_zone; |
1025 | 1009 | ||
diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c index fb586360d1c9..63d81a22f4fd 100644 --- a/fs/xfs/xfs_trans_buf.c +++ b/fs/xfs/xfs_trans_buf.c | |||
@@ -40,11 +40,51 @@ | |||
40 | #include "xfs_rw.h" | 40 | #include "xfs_rw.h" |
41 | #include "xfs_trace.h" | 41 | #include "xfs_trace.h" |
42 | 42 | ||
43 | /* | ||
44 | * Check to see if a buffer matching the given parameters is already | ||
45 | * a part of the given transaction. | ||
46 | */ | ||
47 | STATIC struct xfs_buf * | ||
48 | xfs_trans_buf_item_match( | ||
49 | struct xfs_trans *tp, | ||
50 | struct xfs_buftarg *target, | ||
51 | xfs_daddr_t blkno, | ||
52 | int len) | ||
53 | { | ||
54 | xfs_log_item_chunk_t *licp; | ||
55 | xfs_log_item_desc_t *lidp; | ||
56 | xfs_buf_log_item_t *blip; | ||
57 | int i; | ||
58 | |||
59 | len = BBTOB(len); | ||
60 | for (licp = &tp->t_items; licp != NULL; licp = licp->lic_next) { | ||
61 | if (xfs_lic_are_all_free(licp)) { | ||
62 | ASSERT(licp == &tp->t_items); | ||
63 | ASSERT(licp->lic_next == NULL); | ||
64 | return NULL; | ||
65 | } | ||
66 | |||
67 | for (i = 0; i < licp->lic_unused; i++) { | ||
68 | /* | ||
69 | * Skip unoccupied slots. | ||
70 | */ | ||
71 | if (xfs_lic_isfree(licp, i)) | ||
72 | continue; | ||
73 | |||
74 | lidp = xfs_lic_slot(licp, i); | ||
75 | blip = (xfs_buf_log_item_t *)lidp->lid_item; | ||
76 | if (blip->bli_item.li_type != XFS_LI_BUF) | ||
77 | continue; | ||
78 | |||
79 | if (XFS_BUF_TARGET(blip->bli_buf) == target && | ||
80 | XFS_BUF_ADDR(blip->bli_buf) == blkno && | ||
81 | XFS_BUF_COUNT(blip->bli_buf) == len) | ||
82 | return blip->bli_buf; | ||
83 | } | ||
84 | } | ||
43 | 85 | ||
44 | STATIC xfs_buf_t *xfs_trans_buf_item_match(xfs_trans_t *, xfs_buftarg_t *, | 86 | return NULL; |
45 | xfs_daddr_t, int); | 87 | } |
46 | STATIC xfs_buf_t *xfs_trans_buf_item_match_all(xfs_trans_t *, xfs_buftarg_t *, | ||
47 | xfs_daddr_t, int); | ||
48 | 88 | ||
49 | /* | 89 | /* |
50 | * Add the locked buffer to the transaction. | 90 | * Add the locked buffer to the transaction. |
@@ -74,7 +114,7 @@ _xfs_trans_bjoin( | |||
74 | xfs_buf_item_init(bp, tp->t_mountp); | 114 | xfs_buf_item_init(bp, tp->t_mountp); |
75 | bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *); | 115 | bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *); |
76 | ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); | 116 | ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); |
77 | ASSERT(!(bip->bli_format.blf_flags & XFS_BLI_CANCEL)); | 117 | ASSERT(!(bip->bli_format.blf_flags & XFS_BLF_CANCEL)); |
78 | ASSERT(!(bip->bli_flags & XFS_BLI_LOGGED)); | 118 | ASSERT(!(bip->bli_flags & XFS_BLI_LOGGED)); |
79 | if (reset_recur) | 119 | if (reset_recur) |
80 | bip->bli_recur = 0; | 120 | bip->bli_recur = 0; |
@@ -112,14 +152,6 @@ xfs_trans_bjoin( | |||
112 | * within the transaction, just increment its lock recursion count | 152 | * within the transaction, just increment its lock recursion count |
113 | * and return a pointer to it. | 153 | * and return a pointer to it. |
114 | * | 154 | * |
115 | * Use the fast path function xfs_trans_buf_item_match() or the buffer | ||
116 | * cache routine incore_match() to find the buffer | ||
117 | * if it is already owned by this transaction. | ||
118 | * | ||
119 | * If we don't already own the buffer, use get_buf() to get it. | ||
120 | * If it doesn't yet have an associated xfs_buf_log_item structure, | ||
121 | * then allocate one and add the item to this transaction. | ||
122 | * | ||
123 | * If the transaction pointer is NULL, make this just a normal | 155 | * If the transaction pointer is NULL, make this just a normal |
124 | * get_buf() call. | 156 | * get_buf() call. |
125 | */ | 157 | */ |
@@ -149,11 +181,7 @@ xfs_trans_get_buf(xfs_trans_t *tp, | |||
149 | * have it locked. In this case we just increment the lock | 181 | * have it locked. In this case we just increment the lock |
150 | * recursion count and return the buffer to the caller. | 182 | * recursion count and return the buffer to the caller. |
151 | */ | 183 | */ |
152 | if (tp->t_items.lic_next == NULL) { | 184 | bp = xfs_trans_buf_item_match(tp, target_dev, blkno, len); |
153 | bp = xfs_trans_buf_item_match(tp, target_dev, blkno, len); | ||
154 | } else { | ||
155 | bp = xfs_trans_buf_item_match_all(tp, target_dev, blkno, len); | ||
156 | } | ||
157 | if (bp != NULL) { | 185 | if (bp != NULL) { |
158 | ASSERT(XFS_BUF_VALUSEMA(bp) <= 0); | 186 | ASSERT(XFS_BUF_VALUSEMA(bp) <= 0); |
159 | if (XFS_FORCED_SHUTDOWN(tp->t_mountp)) | 187 | if (XFS_FORCED_SHUTDOWN(tp->t_mountp)) |
@@ -259,14 +287,6 @@ int xfs_error_mod = 33; | |||
259 | * within the transaction and already read in, just increment its | 287 | * within the transaction and already read in, just increment its |
260 | * lock recursion count and return a pointer to it. | 288 | * lock recursion count and return a pointer to it. |
261 | * | 289 | * |
262 | * Use the fast path function xfs_trans_buf_item_match() or the buffer | ||
263 | * cache routine incore_match() to find the buffer | ||
264 | * if it is already owned by this transaction. | ||
265 | * | ||
266 | * If we don't already own the buffer, use read_buf() to get it. | ||
267 | * If it doesn't yet have an associated xfs_buf_log_item structure, | ||
268 | * then allocate one and add the item to this transaction. | ||
269 | * | ||
270 | * If the transaction pointer is NULL, make this just a normal | 290 | * If the transaction pointer is NULL, make this just a normal |
271 | * read_buf() call. | 291 | * read_buf() call. |
272 | */ | 292 | */ |
@@ -328,11 +348,7 @@ xfs_trans_read_buf( | |||
328 | * If the buffer is not yet read in, then we read it in, increment | 348 | * If the buffer is not yet read in, then we read it in, increment |
329 | * the lock recursion count, and return it to the caller. | 349 | * the lock recursion count, and return it to the caller. |
330 | */ | 350 | */ |
331 | if (tp->t_items.lic_next == NULL) { | 351 | bp = xfs_trans_buf_item_match(tp, target, blkno, len); |
332 | bp = xfs_trans_buf_item_match(tp, target, blkno, len); | ||
333 | } else { | ||
334 | bp = xfs_trans_buf_item_match_all(tp, target, blkno, len); | ||
335 | } | ||
336 | if (bp != NULL) { | 352 | if (bp != NULL) { |
337 | ASSERT(XFS_BUF_VALUSEMA(bp) <= 0); | 353 | ASSERT(XFS_BUF_VALUSEMA(bp) <= 0); |
338 | ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp); | 354 | ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp); |
@@ -495,7 +511,7 @@ xfs_trans_brelse(xfs_trans_t *tp, | |||
495 | bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *); | 511 | bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *); |
496 | ASSERT(bip->bli_item.li_type == XFS_LI_BUF); | 512 | ASSERT(bip->bli_item.li_type == XFS_LI_BUF); |
497 | ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); | 513 | ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); |
498 | ASSERT(!(bip->bli_format.blf_flags & XFS_BLI_CANCEL)); | 514 | ASSERT(!(bip->bli_format.blf_flags & XFS_BLF_CANCEL)); |
499 | ASSERT(atomic_read(&bip->bli_refcount) > 0); | 515 | ASSERT(atomic_read(&bip->bli_refcount) > 0); |
500 | 516 | ||
501 | /* | 517 | /* |
@@ -603,7 +619,7 @@ xfs_trans_bhold(xfs_trans_t *tp, | |||
603 | 619 | ||
604 | bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *); | 620 | bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *); |
605 | ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); | 621 | ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); |
606 | ASSERT(!(bip->bli_format.blf_flags & XFS_BLI_CANCEL)); | 622 | ASSERT(!(bip->bli_format.blf_flags & XFS_BLF_CANCEL)); |
607 | ASSERT(atomic_read(&bip->bli_refcount) > 0); | 623 | ASSERT(atomic_read(&bip->bli_refcount) > 0); |
608 | bip->bli_flags |= XFS_BLI_HOLD; | 624 | bip->bli_flags |= XFS_BLI_HOLD; |
609 | trace_xfs_trans_bhold(bip); | 625 | trace_xfs_trans_bhold(bip); |
@@ -625,7 +641,7 @@ xfs_trans_bhold_release(xfs_trans_t *tp, | |||
625 | 641 | ||
626 | bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *); | 642 | bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *); |
627 | ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); | 643 | ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); |
628 | ASSERT(!(bip->bli_format.blf_flags & XFS_BLI_CANCEL)); | 644 | ASSERT(!(bip->bli_format.blf_flags & XFS_BLF_CANCEL)); |
629 | ASSERT(atomic_read(&bip->bli_refcount) > 0); | 645 | ASSERT(atomic_read(&bip->bli_refcount) > 0); |
630 | ASSERT(bip->bli_flags & XFS_BLI_HOLD); | 646 | ASSERT(bip->bli_flags & XFS_BLI_HOLD); |
631 | bip->bli_flags &= ~XFS_BLI_HOLD; | 647 | bip->bli_flags &= ~XFS_BLI_HOLD; |
@@ -688,7 +704,7 @@ xfs_trans_log_buf(xfs_trans_t *tp, | |||
688 | bip->bli_flags &= ~XFS_BLI_STALE; | 704 | bip->bli_flags &= ~XFS_BLI_STALE; |
689 | ASSERT(XFS_BUF_ISSTALE(bp)); | 705 | ASSERT(XFS_BUF_ISSTALE(bp)); |
690 | XFS_BUF_UNSTALE(bp); | 706 | XFS_BUF_UNSTALE(bp); |
691 | bip->bli_format.blf_flags &= ~XFS_BLI_CANCEL; | 707 | bip->bli_format.blf_flags &= ~XFS_BLF_CANCEL; |
692 | } | 708 | } |
693 | 709 | ||
694 | lidp = xfs_trans_find_item(tp, (xfs_log_item_t*)bip); | 710 | lidp = xfs_trans_find_item(tp, (xfs_log_item_t*)bip); |
@@ -696,7 +712,6 @@ xfs_trans_log_buf(xfs_trans_t *tp, | |||
696 | 712 | ||
697 | tp->t_flags |= XFS_TRANS_DIRTY; | 713 | tp->t_flags |= XFS_TRANS_DIRTY; |
698 | lidp->lid_flags |= XFS_LID_DIRTY; | 714 | lidp->lid_flags |= XFS_LID_DIRTY; |
699 | lidp->lid_flags &= ~XFS_LID_BUF_STALE; | ||
700 | bip->bli_flags |= XFS_BLI_LOGGED; | 715 | bip->bli_flags |= XFS_BLI_LOGGED; |
701 | xfs_buf_item_log(bip, first, last); | 716 | xfs_buf_item_log(bip, first, last); |
702 | } | 717 | } |
@@ -747,8 +762,8 @@ xfs_trans_binval( | |||
747 | ASSERT(!(XFS_BUF_ISDELAYWRITE(bp))); | 762 | ASSERT(!(XFS_BUF_ISDELAYWRITE(bp))); |
748 | ASSERT(XFS_BUF_ISSTALE(bp)); | 763 | ASSERT(XFS_BUF_ISSTALE(bp)); |
749 | ASSERT(!(bip->bli_flags & (XFS_BLI_LOGGED | XFS_BLI_DIRTY))); | 764 | ASSERT(!(bip->bli_flags & (XFS_BLI_LOGGED | XFS_BLI_DIRTY))); |
750 | ASSERT(!(bip->bli_format.blf_flags & XFS_BLI_INODE_BUF)); | 765 | ASSERT(!(bip->bli_format.blf_flags & XFS_BLF_INODE_BUF)); |
751 | ASSERT(bip->bli_format.blf_flags & XFS_BLI_CANCEL); | 766 | ASSERT(bip->bli_format.blf_flags & XFS_BLF_CANCEL); |
752 | ASSERT(lidp->lid_flags & XFS_LID_DIRTY); | 767 | ASSERT(lidp->lid_flags & XFS_LID_DIRTY); |
753 | ASSERT(tp->t_flags & XFS_TRANS_DIRTY); | 768 | ASSERT(tp->t_flags & XFS_TRANS_DIRTY); |
754 | return; | 769 | return; |
@@ -759,7 +774,7 @@ xfs_trans_binval( | |||
759 | * in the buf log item. The STALE flag will be used in | 774 | * in the buf log item. The STALE flag will be used in |
760 | * xfs_buf_item_unpin() to determine if it should clean up | 775 | * xfs_buf_item_unpin() to determine if it should clean up |
761 | * when the last reference to the buf item is given up. | 776 | * when the last reference to the buf item is given up. |
762 | * We set the XFS_BLI_CANCEL flag in the buf log format structure | 777 | * We set the XFS_BLF_CANCEL flag in the buf log format structure |
763 | * and log the buf item. This will be used at recovery time | 778 | * and log the buf item. This will be used at recovery time |
764 | * to determine that copies of the buffer in the log before | 779 | * to determine that copies of the buffer in the log before |
765 | * this should not be replayed. | 780 | * this should not be replayed. |
@@ -777,26 +792,26 @@ xfs_trans_binval( | |||
777 | XFS_BUF_UNDELAYWRITE(bp); | 792 | XFS_BUF_UNDELAYWRITE(bp); |
778 | XFS_BUF_STALE(bp); | 793 | XFS_BUF_STALE(bp); |
779 | bip->bli_flags |= XFS_BLI_STALE; | 794 | bip->bli_flags |= XFS_BLI_STALE; |
780 | bip->bli_flags &= ~(XFS_BLI_LOGGED | XFS_BLI_DIRTY); | 795 | bip->bli_flags &= ~(XFS_BLI_INODE_BUF | XFS_BLI_LOGGED | XFS_BLI_DIRTY); |
781 | bip->bli_format.blf_flags &= ~XFS_BLI_INODE_BUF; | 796 | bip->bli_format.blf_flags &= ~XFS_BLF_INODE_BUF; |
782 | bip->bli_format.blf_flags |= XFS_BLI_CANCEL; | 797 | bip->bli_format.blf_flags |= XFS_BLF_CANCEL; |
783 | memset((char *)(bip->bli_format.blf_data_map), 0, | 798 | memset((char *)(bip->bli_format.blf_data_map), 0, |
784 | (bip->bli_format.blf_map_size * sizeof(uint))); | 799 | (bip->bli_format.blf_map_size * sizeof(uint))); |
785 | lidp->lid_flags |= XFS_LID_DIRTY|XFS_LID_BUF_STALE; | 800 | lidp->lid_flags |= XFS_LID_DIRTY; |
786 | tp->t_flags |= XFS_TRANS_DIRTY; | 801 | tp->t_flags |= XFS_TRANS_DIRTY; |
787 | } | 802 | } |
788 | 803 | ||
789 | /* | 804 | /* |
790 | * This call is used to indicate that the buffer contains on-disk | 805 | * This call is used to indicate that the buffer contains on-disk inodes which |
791 | * inodes which must be handled specially during recovery. They | 806 | * must be handled specially during recovery. They require special handling |
792 | * require special handling because only the di_next_unlinked from | 807 | * because only the di_next_unlinked from the inodes in the buffer should be |
793 | * the inodes in the buffer should be recovered. The rest of the | 808 | * recovered. The rest of the data in the buffer is logged via the inodes |
794 | * data in the buffer is logged via the inodes themselves. | 809 | * themselves. |
795 | * | 810 | * |
796 | * All we do is set the XFS_BLI_INODE_BUF flag in the buffer's log | 811 | * All we do is set the XFS_BLI_INODE_BUF flag in the items flags so it can be |
797 | * format structure so that we'll know what to do at recovery time. | 812 | * transferred to the buffer's log format structure so that we'll know what to |
813 | * do at recovery time. | ||
798 | */ | 814 | */ |
799 | /* ARGSUSED */ | ||
800 | void | 815 | void |
801 | xfs_trans_inode_buf( | 816 | xfs_trans_inode_buf( |
802 | xfs_trans_t *tp, | 817 | xfs_trans_t *tp, |
@@ -811,7 +826,7 @@ xfs_trans_inode_buf( | |||
811 | bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *); | 826 | bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *); |
812 | ASSERT(atomic_read(&bip->bli_refcount) > 0); | 827 | ASSERT(atomic_read(&bip->bli_refcount) > 0); |
813 | 828 | ||
814 | bip->bli_format.blf_flags |= XFS_BLI_INODE_BUF; | 829 | bip->bli_flags |= XFS_BLI_INODE_BUF; |
815 | } | 830 | } |
816 | 831 | ||
817 | /* | 832 | /* |
@@ -893,120 +908,12 @@ xfs_trans_dquot_buf( | |||
893 | ASSERT(XFS_BUF_ISBUSY(bp)); | 908 | ASSERT(XFS_BUF_ISBUSY(bp)); |
894 | ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp); | 909 | ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp); |
895 | ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL); | 910 | ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL); |
896 | ASSERT(type == XFS_BLI_UDQUOT_BUF || | 911 | ASSERT(type == XFS_BLF_UDQUOT_BUF || |
897 | type == XFS_BLI_PDQUOT_BUF || | 912 | type == XFS_BLF_PDQUOT_BUF || |
898 | type == XFS_BLI_GDQUOT_BUF); | 913 | type == XFS_BLF_GDQUOT_BUF); |
899 | 914 | ||
900 | bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *); | 915 | bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *); |
901 | ASSERT(atomic_read(&bip->bli_refcount) > 0); | 916 | ASSERT(atomic_read(&bip->bli_refcount) > 0); |
902 | 917 | ||
903 | bip->bli_format.blf_flags |= type; | 918 | bip->bli_format.blf_flags |= type; |
904 | } | 919 | } |
905 | |||
906 | /* | ||
907 | * Check to see if a buffer matching the given parameters is already | ||
908 | * a part of the given transaction. Only check the first, embedded | ||
909 | * chunk, since we don't want to spend all day scanning large transactions. | ||
910 | */ | ||
911 | STATIC xfs_buf_t * | ||
912 | xfs_trans_buf_item_match( | ||
913 | xfs_trans_t *tp, | ||
914 | xfs_buftarg_t *target, | ||
915 | xfs_daddr_t blkno, | ||
916 | int len) | ||
917 | { | ||
918 | xfs_log_item_chunk_t *licp; | ||
919 | xfs_log_item_desc_t *lidp; | ||
920 | xfs_buf_log_item_t *blip; | ||
921 | xfs_buf_t *bp; | ||
922 | int i; | ||
923 | |||
924 | bp = NULL; | ||
925 | len = BBTOB(len); | ||
926 | licp = &tp->t_items; | ||
927 | if (!xfs_lic_are_all_free(licp)) { | ||
928 | for (i = 0; i < licp->lic_unused; i++) { | ||
929 | /* | ||
930 | * Skip unoccupied slots. | ||
931 | */ | ||
932 | if (xfs_lic_isfree(licp, i)) { | ||
933 | continue; | ||
934 | } | ||
935 | |||
936 | lidp = xfs_lic_slot(licp, i); | ||
937 | blip = (xfs_buf_log_item_t *)lidp->lid_item; | ||
938 | if (blip->bli_item.li_type != XFS_LI_BUF) { | ||
939 | continue; | ||
940 | } | ||
941 | |||
942 | bp = blip->bli_buf; | ||
943 | if ((XFS_BUF_TARGET(bp) == target) && | ||
944 | (XFS_BUF_ADDR(bp) == blkno) && | ||
945 | (XFS_BUF_COUNT(bp) == len)) { | ||
946 | /* | ||
947 | * We found it. Break out and | ||
948 | * return the pointer to the buffer. | ||
949 | */ | ||
950 | break; | ||
951 | } else { | ||
952 | bp = NULL; | ||
953 | } | ||
954 | } | ||
955 | } | ||
956 | return bp; | ||
957 | } | ||
958 | |||
959 | /* | ||
960 | * Check to see if a buffer matching the given parameters is already | ||
961 | * a part of the given transaction. Check all the chunks, we | ||
962 | * want to be thorough. | ||
963 | */ | ||
964 | STATIC xfs_buf_t * | ||
965 | xfs_trans_buf_item_match_all( | ||
966 | xfs_trans_t *tp, | ||
967 | xfs_buftarg_t *target, | ||
968 | xfs_daddr_t blkno, | ||
969 | int len) | ||
970 | { | ||
971 | xfs_log_item_chunk_t *licp; | ||
972 | xfs_log_item_desc_t *lidp; | ||
973 | xfs_buf_log_item_t *blip; | ||
974 | xfs_buf_t *bp; | ||
975 | int i; | ||
976 | |||
977 | bp = NULL; | ||
978 | len = BBTOB(len); | ||
979 | for (licp = &tp->t_items; licp != NULL; licp = licp->lic_next) { | ||
980 | if (xfs_lic_are_all_free(licp)) { | ||
981 | ASSERT(licp == &tp->t_items); | ||
982 | ASSERT(licp->lic_next == NULL); | ||
983 | return NULL; | ||
984 | } | ||
985 | for (i = 0; i < licp->lic_unused; i++) { | ||
986 | /* | ||
987 | * Skip unoccupied slots. | ||
988 | */ | ||
989 | if (xfs_lic_isfree(licp, i)) { | ||
990 | continue; | ||
991 | } | ||
992 | |||
993 | lidp = xfs_lic_slot(licp, i); | ||
994 | blip = (xfs_buf_log_item_t *)lidp->lid_item; | ||
995 | if (blip->bli_item.li_type != XFS_LI_BUF) { | ||
996 | continue; | ||
997 | } | ||
998 | |||
999 | bp = blip->bli_buf; | ||
1000 | if ((XFS_BUF_TARGET(bp) == target) && | ||
1001 | (XFS_BUF_ADDR(bp) == blkno) && | ||
1002 | (XFS_BUF_COUNT(bp) == len)) { | ||
1003 | /* | ||
1004 | * We found it. Break out and | ||
1005 | * return the pointer to the buffer. | ||
1006 | */ | ||
1007 | return bp; | ||
1008 | } | ||
1009 | } | ||
1010 | } | ||
1011 | return NULL; | ||
1012 | } | ||
diff --git a/fs/xfs/xfs_trans_item.c b/fs/xfs/xfs_trans_item.c index eb3fc57f9eef..f11d37d06dcc 100644 --- a/fs/xfs/xfs_trans_item.c +++ b/fs/xfs/xfs_trans_item.c | |||
@@ -299,6 +299,7 @@ xfs_trans_next_item(xfs_trans_t *tp, xfs_log_item_desc_t *lidp) | |||
299 | void | 299 | void |
300 | xfs_trans_free_items( | 300 | xfs_trans_free_items( |
301 | xfs_trans_t *tp, | 301 | xfs_trans_t *tp, |
302 | xfs_lsn_t commit_lsn, | ||
302 | int flags) | 303 | int flags) |
303 | { | 304 | { |
304 | xfs_log_item_chunk_t *licp; | 305 | xfs_log_item_chunk_t *licp; |
@@ -311,7 +312,7 @@ xfs_trans_free_items( | |||
311 | * Special case the embedded chunk so we don't free it below. | 312 | * Special case the embedded chunk so we don't free it below. |
312 | */ | 313 | */ |
313 | if (!xfs_lic_are_all_free(licp)) { | 314 | if (!xfs_lic_are_all_free(licp)) { |
314 | (void) xfs_trans_unlock_chunk(licp, 1, abort, NULLCOMMITLSN); | 315 | (void) xfs_trans_unlock_chunk(licp, 1, abort, commit_lsn); |
315 | xfs_lic_all_free(licp); | 316 | xfs_lic_all_free(licp); |
316 | licp->lic_unused = 0; | 317 | licp->lic_unused = 0; |
317 | } | 318 | } |
@@ -322,7 +323,7 @@ xfs_trans_free_items( | |||
322 | */ | 323 | */ |
323 | while (licp != NULL) { | 324 | while (licp != NULL) { |
324 | ASSERT(!xfs_lic_are_all_free(licp)); | 325 | ASSERT(!xfs_lic_are_all_free(licp)); |
325 | (void) xfs_trans_unlock_chunk(licp, 1, abort, NULLCOMMITLSN); | 326 | (void) xfs_trans_unlock_chunk(licp, 1, abort, commit_lsn); |
326 | next_licp = licp->lic_next; | 327 | next_licp = licp->lic_next; |
327 | kmem_free(licp); | 328 | kmem_free(licp); |
328 | licp = next_licp; | 329 | licp = next_licp; |
@@ -438,112 +439,3 @@ xfs_trans_unlock_chunk( | |||
438 | 439 | ||
439 | return freed; | 440 | return freed; |
440 | } | 441 | } |
441 | |||
442 | |||
443 | /* | ||
444 | * This is called to add the given busy item to the transaction's | ||
445 | * list of busy items. It must find a free busy item descriptor | ||
446 | * or allocate a new one and add the item to that descriptor. | ||
447 | * The function returns a pointer to busy descriptor used to point | ||
448 | * to the new busy entry. The log busy entry will now point to its new | ||
449 | * descriptor with its ???? field. | ||
450 | */ | ||
451 | xfs_log_busy_slot_t * | ||
452 | xfs_trans_add_busy(xfs_trans_t *tp, xfs_agnumber_t ag, xfs_extlen_t idx) | ||
453 | { | ||
454 | xfs_log_busy_chunk_t *lbcp; | ||
455 | xfs_log_busy_slot_t *lbsp; | ||
456 | int i=0; | ||
457 | |||
458 | /* | ||
459 | * If there are no free descriptors, allocate a new chunk | ||
460 | * of them and put it at the front of the chunk list. | ||
461 | */ | ||
462 | if (tp->t_busy_free == 0) { | ||
463 | lbcp = (xfs_log_busy_chunk_t*) | ||
464 | kmem_alloc(sizeof(xfs_log_busy_chunk_t), KM_SLEEP); | ||
465 | ASSERT(lbcp != NULL); | ||
466 | /* | ||
467 | * Initialize the chunk, and then | ||
468 | * claim the first slot in the newly allocated chunk. | ||
469 | */ | ||
470 | XFS_LBC_INIT(lbcp); | ||
471 | XFS_LBC_CLAIM(lbcp, 0); | ||
472 | lbcp->lbc_unused = 1; | ||
473 | lbsp = XFS_LBC_SLOT(lbcp, 0); | ||
474 | |||
475 | /* | ||
476 | * Link in the new chunk and update the free count. | ||
477 | */ | ||
478 | lbcp->lbc_next = tp->t_busy.lbc_next; | ||
479 | tp->t_busy.lbc_next = lbcp; | ||
480 | tp->t_busy_free = XFS_LIC_NUM_SLOTS - 1; | ||
481 | |||
482 | /* | ||
483 | * Initialize the descriptor and the generic portion | ||
484 | * of the log item. | ||
485 | * | ||
486 | * Point the new slot at this item and return it. | ||
487 | * Also point the log item at its currently active | ||
488 | * descriptor and set the item's mount pointer. | ||
489 | */ | ||
490 | lbsp->lbc_ag = ag; | ||
491 | lbsp->lbc_idx = idx; | ||
492 | return lbsp; | ||
493 | } | ||
494 | |||
495 | /* | ||
496 | * Find the free descriptor. It is somewhere in the chunklist | ||
497 | * of descriptors. | ||
498 | */ | ||
499 | lbcp = &tp->t_busy; | ||
500 | while (lbcp != NULL) { | ||
501 | if (XFS_LBC_VACANCY(lbcp)) { | ||
502 | if (lbcp->lbc_unused <= XFS_LBC_MAX_SLOT) { | ||
503 | i = lbcp->lbc_unused; | ||
504 | break; | ||
505 | } else { | ||
506 | /* out-of-order vacancy */ | ||
507 | cmn_err(CE_DEBUG, "OOO vacancy lbcp 0x%p\n", lbcp); | ||
508 | ASSERT(0); | ||
509 | } | ||
510 | } | ||
511 | lbcp = lbcp->lbc_next; | ||
512 | } | ||
513 | ASSERT(lbcp != NULL); | ||
514 | /* | ||
515 | * If we find a free descriptor, claim it, | ||
516 | * initialize it, and return it. | ||
517 | */ | ||
518 | XFS_LBC_CLAIM(lbcp, i); | ||
519 | if (lbcp->lbc_unused <= i) { | ||
520 | lbcp->lbc_unused = i + 1; | ||
521 | } | ||
522 | lbsp = XFS_LBC_SLOT(lbcp, i); | ||
523 | tp->t_busy_free--; | ||
524 | lbsp->lbc_ag = ag; | ||
525 | lbsp->lbc_idx = idx; | ||
526 | return lbsp; | ||
527 | } | ||
528 | |||
529 | |||
530 | /* | ||
531 | * xfs_trans_free_busy | ||
532 | * Free all of the busy lists from a transaction | ||
533 | */ | ||
534 | void | ||
535 | xfs_trans_free_busy(xfs_trans_t *tp) | ||
536 | { | ||
537 | xfs_log_busy_chunk_t *lbcp; | ||
538 | xfs_log_busy_chunk_t *lbcq; | ||
539 | |||
540 | lbcp = tp->t_busy.lbc_next; | ||
541 | while (lbcp != NULL) { | ||
542 | lbcq = lbcp->lbc_next; | ||
543 | kmem_free(lbcp); | ||
544 | lbcp = lbcq; | ||
545 | } | ||
546 | |||
547 | XFS_LBC_INIT(&tp->t_busy); | ||
548 | tp->t_busy.lbc_unused = 0; | ||
549 | } | ||
diff --git a/fs/xfs/xfs_trans_priv.h b/fs/xfs/xfs_trans_priv.h index 73e2ad397432..c6e4f2c8de6e 100644 --- a/fs/xfs/xfs_trans_priv.h +++ b/fs/xfs/xfs_trans_priv.h | |||
@@ -35,13 +35,14 @@ struct xfs_log_item_desc *xfs_trans_find_item(struct xfs_trans *, | |||
35 | struct xfs_log_item_desc *xfs_trans_first_item(struct xfs_trans *); | 35 | struct xfs_log_item_desc *xfs_trans_first_item(struct xfs_trans *); |
36 | struct xfs_log_item_desc *xfs_trans_next_item(struct xfs_trans *, | 36 | struct xfs_log_item_desc *xfs_trans_next_item(struct xfs_trans *, |
37 | struct xfs_log_item_desc *); | 37 | struct xfs_log_item_desc *); |
38 | void xfs_trans_free_items(struct xfs_trans *, int); | 38 | |
39 | void xfs_trans_unlock_items(struct xfs_trans *, | 39 | void xfs_trans_unlock_items(struct xfs_trans *tp, xfs_lsn_t commit_lsn); |
40 | xfs_lsn_t); | 40 | void xfs_trans_free_items(struct xfs_trans *tp, xfs_lsn_t commit_lsn, |
41 | void xfs_trans_free_busy(xfs_trans_t *tp); | 41 | int flags); |
42 | xfs_log_busy_slot_t *xfs_trans_add_busy(xfs_trans_t *tp, | 42 | |
43 | xfs_agnumber_t ag, | 43 | void xfs_trans_item_committed(struct xfs_log_item *lip, |
44 | xfs_extlen_t idx); | 44 | xfs_lsn_t commit_lsn, int aborted); |
45 | void xfs_trans_unreserve_and_mod_sb(struct xfs_trans *tp); | ||
45 | 46 | ||
46 | /* | 47 | /* |
47 | * AIL traversal cursor. | 48 | * AIL traversal cursor. |
diff --git a/fs/xfs/xfs_types.h b/fs/xfs/xfs_types.h index b09904555d07..320775295e32 100644 --- a/fs/xfs/xfs_types.h +++ b/fs/xfs/xfs_types.h | |||
@@ -75,6 +75,8 @@ typedef __uint32_t xfs_dahash_t; /* dir/attr hash value */ | |||
75 | 75 | ||
76 | typedef __uint16_t xfs_prid_t; /* prid_t truncated to 16bits in XFS */ | 76 | typedef __uint16_t xfs_prid_t; /* prid_t truncated to 16bits in XFS */ |
77 | 77 | ||
78 | typedef __uint32_t xlog_tid_t; /* transaction ID type */ | ||
79 | |||
78 | /* | 80 | /* |
79 | * These types are 64 bits on disk but are either 32 or 64 bits in memory. | 81 | * These types are 64 bits on disk but are either 32 or 64 bits in memory. |
80 | * Disk based types: | 82 | * Disk based types: |