aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorNiv Sardi <xaiki@debian.org>2008-11-06 23:07:12 -0500
committerNiv Sardi <xaiki@debian.org>2008-11-06 23:07:12 -0500
commitdcd7b4e5c0649b1d2219399529b20de1df517e55 (patch)
treeef00739e48ddda0a30061d62a7348ed4b0c9aeeb /fs
parent75fa67706cce5272bcfc51ed646f2da21f3bdb6e (diff)
parent91b777125175077fb74025608dba87f100586c62 (diff)
Merge branch 'master' of git://oss.sgi.com:8090/xfs/linux-2.6
Diffstat (limited to 'fs')
-rw-r--r--fs/inode.c208
-rw-r--r--fs/xfs/Makefile4
-rw-r--r--fs/xfs/linux-2.6/xfs_aops.c2
-rw-r--r--fs/xfs/linux-2.6/xfs_cred.h10
-rw-r--r--fs/xfs/linux-2.6/xfs_globals.c8
-rw-r--r--fs/xfs/linux-2.6/xfs_globals.h1
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl.c9
-rw-r--r--fs/xfs/linux-2.6/xfs_iops.c25
-rw-r--r--fs/xfs/linux-2.6/xfs_linux.h2
-rw-r--r--fs/xfs/linux-2.6/xfs_stats.c4
-rw-r--r--fs/xfs/linux-2.6/xfs_stats.h65
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c840
-rw-r--r--fs/xfs/linux-2.6/xfs_super.h3
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.c763
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.h55
-rw-r--r--fs/xfs/linux-2.6/xfs_sysctl.c11
-rw-r--r--fs/xfs/linux-2.6/xfs_sysctl.h3
-rw-r--r--fs/xfs/linux-2.6/xfs_vfs.h31
-rw-r--r--fs/xfs/linux-2.6/xfs_vnode.c15
-rw-r--r--fs/xfs/linux-2.6/xfs_vnode.h5
-rw-r--r--fs/xfs/quota/xfs_dquot.c39
-rw-r--r--fs/xfs/quota/xfs_dquot.h4
-rw-r--r--fs/xfs/quota/xfs_dquot_item.c45
-rw-r--r--fs/xfs/quota/xfs_qm.c13
-rw-r--r--fs/xfs/quota/xfs_qm.h1
-rw-r--r--fs/xfs/quota/xfs_qm_bhv.c1
-rw-r--r--fs/xfs/quota/xfs_qm_syscalls.c137
-rw-r--r--fs/xfs/support/debug.c2
-rw-r--r--fs/xfs/xfs.h2
-rw-r--r--fs/xfs/xfs_acl.c8
-rw-r--r--fs/xfs/xfs_ag.h10
-rw-r--r--fs/xfs/xfs_alloc.c195
-rw-r--r--fs/xfs/xfs_alloc.h27
-rw-r--r--fs/xfs/xfs_alloc_btree.c2387
-rw-r--r--fs/xfs/xfs_alloc_btree.h107
-rw-r--r--fs/xfs/xfs_arch.h39
-rw-r--r--fs/xfs/xfs_bit.h3
-rw-r--r--fs/xfs/xfs_bmap.c296
-rw-r--r--fs/xfs/xfs_bmap.h61
-rw-r--r--fs/xfs/xfs_bmap_btree.c2614
-rw-r--r--fs/xfs/xfs_bmap_btree.h171
-rw-r--r--fs/xfs/xfs_btree.c3596
-rw-r--r--fs/xfs/xfs_btree.h392
-rw-r--r--fs/xfs/xfs_btree_trace.c249
-rw-r--r--fs/xfs/xfs_btree_trace.h116
-rw-r--r--fs/xfs/xfs_buf_item.c25
-rw-r--r--fs/xfs/xfs_clnt.h105
-rw-r--r--fs/xfs/xfs_da_btree.c5
-rw-r--r--fs/xfs/xfs_da_btree.h24
-rw-r--r--fs/xfs/xfs_dinode.h5
-rw-r--r--fs/xfs/xfs_dir2.c6
-rw-r--r--fs/xfs/xfs_dmops.c5
-rw-r--r--fs/xfs/xfs_extfree_item.c45
-rw-r--r--fs/xfs/xfs_fsops.c24
-rw-r--r--fs/xfs/xfs_ialloc.c132
-rw-r--r--fs/xfs/xfs_ialloc.h21
-rw-r--r--fs/xfs/xfs_ialloc_btree.c2193
-rw-r--r--fs/xfs/xfs_ialloc_btree.h111
-rw-r--r--fs/xfs/xfs_iget.c501
-rw-r--r--fs/xfs/xfs_imap.h2
-rw-r--r--fs/xfs/xfs_inode.c292
-rw-r--r--fs/xfs/xfs_inode.h288
-rw-r--r--fs/xfs/xfs_inode_item.c30
-rw-r--r--fs/xfs/xfs_inode_item.h41
-rw-r--r--fs/xfs/xfs_itable.c33
-rw-r--r--fs/xfs/xfs_log.c10
-rw-r--r--fs/xfs/xfs_log_priv.h1
-rw-r--r--fs/xfs/xfs_log_recover.c105
-rw-r--r--fs/xfs/xfs_mount.c47
-rw-r--r--fs/xfs/xfs_mount.h54
-rw-r--r--fs/xfs/xfs_qmops.c5
-rw-r--r--fs/xfs/xfs_trans.c13
-rw-r--r--fs/xfs/xfs_trans.h322
-rw-r--r--fs/xfs/xfs_trans_ail.c362
-rw-r--r--fs/xfs/xfs_trans_buf.c7
-rw-r--r--fs/xfs/xfs_trans_item.c10
-rw-r--r--fs/xfs/xfs_trans_priv.h98
-rw-r--r--fs/xfs/xfs_vfsops.c617
-rw-r--r--fs/xfs/xfs_vfsops.h2
-rw-r--r--fs/xfs/xfs_vnodeops.c141
-rw-r--r--fs/xfs/xfs_vnodeops.h6
81 files changed, 8094 insertions, 10173 deletions
diff --git a/fs/inode.c b/fs/inode.c
index 0487ddba1397..f84ba338fafd 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -108,84 +108,100 @@ static void wake_up_inode(struct inode *inode)
108 wake_up_bit(&inode->i_state, __I_LOCK); 108 wake_up_bit(&inode->i_state, __I_LOCK);
109} 109}
110 110
111static struct inode *alloc_inode(struct super_block *sb) 111/**
112 * inode_init_always - perform inode structure intialisation
113 * @sb - superblock inode belongs to.
114 * @inode - inode to initialise
115 *
116 * These are initializations that need to be done on every inode
117 * allocation as the fields are not initialised by slab allocation.
118 */
119struct inode *inode_init_always(struct super_block *sb, struct inode *inode)
112{ 120{
113 static const struct address_space_operations empty_aops; 121 static const struct address_space_operations empty_aops;
114 static struct inode_operations empty_iops; 122 static struct inode_operations empty_iops;
115 static const struct file_operations empty_fops; 123 static const struct file_operations empty_fops;
116 struct inode *inode;
117
118 if (sb->s_op->alloc_inode)
119 inode = sb->s_op->alloc_inode(sb);
120 else
121 inode = (struct inode *) kmem_cache_alloc(inode_cachep, GFP_KERNEL);
122 124
123 if (inode) { 125 struct address_space * const mapping = &inode->i_data;
124 struct address_space * const mapping = &inode->i_data; 126
125 127 inode->i_sb = sb;
126 inode->i_sb = sb; 128 inode->i_blkbits = sb->s_blocksize_bits;
127 inode->i_blkbits = sb->s_blocksize_bits; 129 inode->i_flags = 0;
128 inode->i_flags = 0; 130 atomic_set(&inode->i_count, 1);
129 atomic_set(&inode->i_count, 1); 131 inode->i_op = &empty_iops;
130 inode->i_op = &empty_iops; 132 inode->i_fop = &empty_fops;
131 inode->i_fop = &empty_fops; 133 inode->i_nlink = 1;
132 inode->i_nlink = 1; 134 atomic_set(&inode->i_writecount, 0);
133 atomic_set(&inode->i_writecount, 0); 135 inode->i_size = 0;
134 inode->i_size = 0; 136 inode->i_blocks = 0;
135 inode->i_blocks = 0; 137 inode->i_bytes = 0;
136 inode->i_bytes = 0; 138 inode->i_generation = 0;
137 inode->i_generation = 0;
138#ifdef CONFIG_QUOTA 139#ifdef CONFIG_QUOTA
139 memset(&inode->i_dquot, 0, sizeof(inode->i_dquot)); 140 memset(&inode->i_dquot, 0, sizeof(inode->i_dquot));
140#endif 141#endif
141 inode->i_pipe = NULL; 142 inode->i_pipe = NULL;
142 inode->i_bdev = NULL; 143 inode->i_bdev = NULL;
143 inode->i_cdev = NULL; 144 inode->i_cdev = NULL;
144 inode->i_rdev = 0; 145 inode->i_rdev = 0;
145 inode->dirtied_when = 0; 146 inode->dirtied_when = 0;
146 if (security_inode_alloc(inode)) { 147 if (security_inode_alloc(inode)) {
147 if (inode->i_sb->s_op->destroy_inode) 148 if (inode->i_sb->s_op->destroy_inode)
148 inode->i_sb->s_op->destroy_inode(inode); 149 inode->i_sb->s_op->destroy_inode(inode);
149 else 150 else
150 kmem_cache_free(inode_cachep, (inode)); 151 kmem_cache_free(inode_cachep, (inode));
151 return NULL; 152 return NULL;
152 } 153 }
153 154
154 spin_lock_init(&inode->i_lock); 155 spin_lock_init(&inode->i_lock);
155 lockdep_set_class(&inode->i_lock, &sb->s_type->i_lock_key); 156 lockdep_set_class(&inode->i_lock, &sb->s_type->i_lock_key);
156 157
157 mutex_init(&inode->i_mutex); 158 mutex_init(&inode->i_mutex);
158 lockdep_set_class(&inode->i_mutex, &sb->s_type->i_mutex_key); 159 lockdep_set_class(&inode->i_mutex, &sb->s_type->i_mutex_key);
159 160
160 init_rwsem(&inode->i_alloc_sem); 161 init_rwsem(&inode->i_alloc_sem);
161 lockdep_set_class(&inode->i_alloc_sem, &sb->s_type->i_alloc_sem_key); 162 lockdep_set_class(&inode->i_alloc_sem, &sb->s_type->i_alloc_sem_key);
162 163
163 mapping->a_ops = &empty_aops; 164 mapping->a_ops = &empty_aops;
164 mapping->host = inode; 165 mapping->host = inode;
165 mapping->flags = 0; 166 mapping->flags = 0;
166 mapping_set_gfp_mask(mapping, GFP_HIGHUSER_PAGECACHE); 167 mapping_set_gfp_mask(mapping, GFP_HIGHUSER_PAGECACHE);
167 mapping->assoc_mapping = NULL; 168 mapping->assoc_mapping = NULL;
168 mapping->backing_dev_info = &default_backing_dev_info; 169 mapping->backing_dev_info = &default_backing_dev_info;
169 mapping->writeback_index = 0; 170 mapping->writeback_index = 0;
170 171
171 /* 172 /*
172 * If the block_device provides a backing_dev_info for client 173 * If the block_device provides a backing_dev_info for client
173 * inodes then use that. Otherwise the inode share the bdev's 174 * inodes then use that. Otherwise the inode share the bdev's
174 * backing_dev_info. 175 * backing_dev_info.
175 */ 176 */
176 if (sb->s_bdev) { 177 if (sb->s_bdev) {
177 struct backing_dev_info *bdi; 178 struct backing_dev_info *bdi;
178 179
179 bdi = sb->s_bdev->bd_inode_backing_dev_info; 180 bdi = sb->s_bdev->bd_inode_backing_dev_info;
180 if (!bdi) 181 if (!bdi)
181 bdi = sb->s_bdev->bd_inode->i_mapping->backing_dev_info; 182 bdi = sb->s_bdev->bd_inode->i_mapping->backing_dev_info;
182 mapping->backing_dev_info = bdi; 183 mapping->backing_dev_info = bdi;
183 }
184 inode->i_private = NULL;
185 inode->i_mapping = mapping;
186 } 184 }
185 inode->i_private = NULL;
186 inode->i_mapping = mapping;
187
187 return inode; 188 return inode;
188} 189}
190EXPORT_SYMBOL(inode_init_always);
191
192static struct inode *alloc_inode(struct super_block *sb)
193{
194 struct inode *inode;
195
196 if (sb->s_op->alloc_inode)
197 inode = sb->s_op->alloc_inode(sb);
198 else
199 inode = kmem_cache_alloc(inode_cachep, GFP_KERNEL);
200
201 if (inode)
202 return inode_init_always(sb, inode);
203 return NULL;
204}
189 205
190void destroy_inode(struct inode *inode) 206void destroy_inode(struct inode *inode)
191{ 207{
@@ -196,6 +212,7 @@ void destroy_inode(struct inode *inode)
196 else 212 else
197 kmem_cache_free(inode_cachep, (inode)); 213 kmem_cache_free(inode_cachep, (inode));
198} 214}
215EXPORT_SYMBOL(destroy_inode);
199 216
200 217
201/* 218/*
@@ -534,6 +551,49 @@ repeat:
534 return node ? inode : NULL; 551 return node ? inode : NULL;
535} 552}
536 553
554static unsigned long hash(struct super_block *sb, unsigned long hashval)
555{
556 unsigned long tmp;
557
558 tmp = (hashval * (unsigned long)sb) ^ (GOLDEN_RATIO_PRIME + hashval) /
559 L1_CACHE_BYTES;
560 tmp = tmp ^ ((tmp ^ GOLDEN_RATIO_PRIME) >> I_HASHBITS);
561 return tmp & I_HASHMASK;
562}
563
564static inline void
565__inode_add_to_lists(struct super_block *sb, struct hlist_head *head,
566 struct inode *inode)
567{
568 inodes_stat.nr_inodes++;
569 list_add(&inode->i_list, &inode_in_use);
570 list_add(&inode->i_sb_list, &sb->s_inodes);
571 if (head)
572 hlist_add_head(&inode->i_hash, head);
573}
574
575/**
576 * inode_add_to_lists - add a new inode to relevant lists
577 * @sb - superblock inode belongs to.
578 * @inode - inode to mark in use
579 *
580 * When an inode is allocated it needs to be accounted for, added to the in use
581 * list, the owning superblock and the inode hash. This needs to be done under
582 * the inode_lock, so export a function to do this rather than the inode lock
583 * itself. We calculate the hash list to add to here so it is all internal
584 * which requires the caller to have already set up the inode number in the
585 * inode to add.
586 */
587void inode_add_to_lists(struct super_block *sb, struct inode *inode)
588{
589 struct hlist_head *head = inode_hashtable + hash(sb, inode->i_ino);
590
591 spin_lock(&inode_lock);
592 __inode_add_to_lists(sb, head, inode);
593 spin_unlock(&inode_lock);
594}
595EXPORT_SYMBOL_GPL(inode_add_to_lists);
596
537/** 597/**
538 * new_inode - obtain an inode 598 * new_inode - obtain an inode
539 * @sb: superblock 599 * @sb: superblock
@@ -561,9 +621,7 @@ struct inode *new_inode(struct super_block *sb)
561 inode = alloc_inode(sb); 621 inode = alloc_inode(sb);
562 if (inode) { 622 if (inode) {
563 spin_lock(&inode_lock); 623 spin_lock(&inode_lock);
564 inodes_stat.nr_inodes++; 624 __inode_add_to_lists(sb, NULL, inode);
565 list_add(&inode->i_list, &inode_in_use);
566 list_add(&inode->i_sb_list, &sb->s_inodes);
567 inode->i_ino = ++last_ino; 625 inode->i_ino = ++last_ino;
568 inode->i_state = 0; 626 inode->i_state = 0;
569 spin_unlock(&inode_lock); 627 spin_unlock(&inode_lock);
@@ -622,10 +680,7 @@ static struct inode * get_new_inode(struct super_block *sb, struct hlist_head *h
622 if (set(inode, data)) 680 if (set(inode, data))
623 goto set_failed; 681 goto set_failed;
624 682
625 inodes_stat.nr_inodes++; 683 __inode_add_to_lists(sb, head, inode);
626 list_add(&inode->i_list, &inode_in_use);
627 list_add(&inode->i_sb_list, &sb->s_inodes);
628 hlist_add_head(&inode->i_hash, head);
629 inode->i_state = I_LOCK|I_NEW; 684 inode->i_state = I_LOCK|I_NEW;
630 spin_unlock(&inode_lock); 685 spin_unlock(&inode_lock);
631 686
@@ -671,10 +726,7 @@ static struct inode * get_new_inode_fast(struct super_block *sb, struct hlist_he
671 old = find_inode_fast(sb, head, ino); 726 old = find_inode_fast(sb, head, ino);
672 if (!old) { 727 if (!old) {
673 inode->i_ino = ino; 728 inode->i_ino = ino;
674 inodes_stat.nr_inodes++; 729 __inode_add_to_lists(sb, head, inode);
675 list_add(&inode->i_list, &inode_in_use);
676 list_add(&inode->i_sb_list, &sb->s_inodes);
677 hlist_add_head(&inode->i_hash, head);
678 inode->i_state = I_LOCK|I_NEW; 730 inode->i_state = I_LOCK|I_NEW;
679 spin_unlock(&inode_lock); 731 spin_unlock(&inode_lock);
680 732
@@ -698,16 +750,6 @@ static struct inode * get_new_inode_fast(struct super_block *sb, struct hlist_he
698 return inode; 750 return inode;
699} 751}
700 752
701static unsigned long hash(struct super_block *sb, unsigned long hashval)
702{
703 unsigned long tmp;
704
705 tmp = (hashval * (unsigned long)sb) ^ (GOLDEN_RATIO_PRIME + hashval) /
706 L1_CACHE_BYTES;
707 tmp = tmp ^ ((tmp ^ GOLDEN_RATIO_PRIME) >> I_HASHBITS);
708 return tmp & I_HASHMASK;
709}
710
711/** 753/**
712 * iunique - get a unique inode number 754 * iunique - get a unique inode number
713 * @sb: superblock 755 * @sb: superblock
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index 737c9a425361..51b87de97f87 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -91,7 +91,8 @@ xfs-y += xfs_alloc.o \
91 xfs_dmops.o \ 91 xfs_dmops.o \
92 xfs_qmops.o 92 xfs_qmops.o
93 93
94xfs-$(CONFIG_XFS_TRACE) += xfs_dir2_trace.o 94xfs-$(CONFIG_XFS_TRACE) += xfs_btree_trace.o \
95 xfs_dir2_trace.o
95 96
96# Objects in linux/ 97# Objects in linux/
97xfs-y += $(addprefix $(XFS_LINUX)/, \ 98xfs-y += $(addprefix $(XFS_LINUX)/, \
@@ -106,6 +107,7 @@ xfs-y += $(addprefix $(XFS_LINUX)/, \
106 xfs_iops.o \ 107 xfs_iops.o \
107 xfs_lrw.o \ 108 xfs_lrw.o \
108 xfs_super.o \ 109 xfs_super.o \
110 xfs_sync.o \
109 xfs_vnode.o \ 111 xfs_vnode.o \
110 xfs_xattr.o) 112 xfs_xattr.o)
111 113
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index a44d68eb50b5..8fbc97df3609 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -191,7 +191,7 @@ xfs_setfilesize(
191 ip->i_d.di_size = isize; 191 ip->i_d.di_size = isize;
192 ip->i_update_core = 1; 192 ip->i_update_core = 1;
193 ip->i_update_size = 1; 193 ip->i_update_size = 1;
194 mark_inode_dirty_sync(ioend->io_inode); 194 xfs_mark_inode_dirty_sync(ip);
195 } 195 }
196 196
197 xfs_iunlock(ip, XFS_ILOCK_EXCL); 197 xfs_iunlock(ip, XFS_ILOCK_EXCL);
diff --git a/fs/xfs/linux-2.6/xfs_cred.h b/fs/xfs/linux-2.6/xfs_cred.h
index 652721ce0ea5..e279d00779f4 100644
--- a/fs/xfs/linux-2.6/xfs_cred.h
+++ b/fs/xfs/linux-2.6/xfs_cred.h
@@ -24,15 +24,7 @@
24 * Credentials 24 * Credentials
25 */ 25 */
26typedef struct cred { 26typedef struct cred {
27 /* EMPTY */ 27 /* EMPTY */
28} cred_t; 28} cred_t;
29 29
30extern struct cred *sys_cred;
31
32/* this is a hack.. (assumes sys_cred is the only cred_t in the system) */
33static inline int capable_cred(cred_t *cr, int cid)
34{
35 return (cr == sys_cred) ? 1 : capable(cid);
36}
37
38#endif /* __XFS_CRED_H__ */ 30#endif /* __XFS_CRED_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_globals.c b/fs/xfs/linux-2.6/xfs_globals.c
index ef90e64641e6..2ae8b1ccb02e 100644
--- a/fs/xfs/linux-2.6/xfs_globals.c
+++ b/fs/xfs/linux-2.6/xfs_globals.c
@@ -26,7 +26,6 @@
26 */ 26 */
27xfs_param_t xfs_params = { 27xfs_param_t xfs_params = {
28 /* MIN DFLT MAX */ 28 /* MIN DFLT MAX */
29 .restrict_chown = { 0, 1, 1 },
30 .sgid_inherit = { 0, 0, 1 }, 29 .sgid_inherit = { 0, 0, 1 },
31 .symlink_mode = { 0, 0, 1 }, 30 .symlink_mode = { 0, 0, 1 },
32 .panic_mask = { 0, 0, 255 }, 31 .panic_mask = { 0, 0, 255 },
@@ -43,10 +42,3 @@ xfs_param_t xfs_params = {
43 .inherit_nodfrg = { 0, 1, 1 }, 42 .inherit_nodfrg = { 0, 1, 1 },
44 .fstrm_timer = { 1, 30*100, 3600*100}, 43 .fstrm_timer = { 1, 30*100, 3600*100},
45}; 44};
46
47/*
48 * Global system credential structure.
49 */
50static cred_t sys_cred_val;
51cred_t *sys_cred = &sys_cred_val;
52
diff --git a/fs/xfs/linux-2.6/xfs_globals.h b/fs/xfs/linux-2.6/xfs_globals.h
index 2770b0085ee8..69f71caf061c 100644
--- a/fs/xfs/linux-2.6/xfs_globals.h
+++ b/fs/xfs/linux-2.6/xfs_globals.h
@@ -19,6 +19,5 @@
19#define __XFS_GLOBALS_H__ 19#define __XFS_GLOBALS_H__
20 20
21extern uint64_t xfs_panic_mask; /* set to cause more panics */ 21extern uint64_t xfs_panic_mask; /* set to cause more panics */
22extern struct cred *sys_cred;
23 22
24#endif /* __XFS_GLOBALS_H__ */ 23#endif /* __XFS_GLOBALS_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index d3438c72dcaf..f1bd6c36e6fe 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -691,8 +691,7 @@ xfs_ioc_space(
691 if (ioflags & IO_INVIS) 691 if (ioflags & IO_INVIS)
692 attr_flags |= XFS_ATTR_DMI; 692 attr_flags |= XFS_ATTR_DMI;
693 693
694 error = xfs_change_file_space(ip, cmd, &bf, filp->f_pos, 694 error = xfs_change_file_space(ip, cmd, &bf, filp->f_pos, attr_flags);
695 NULL, attr_flags);
696 return -error; 695 return -error;
697} 696}
698 697
@@ -1007,7 +1006,7 @@ xfs_ioctl_setattr(
1007 * to the file owner ID, except in cases where the 1006 * to the file owner ID, except in cases where the
1008 * CAP_FSETID capability is applicable. 1007 * CAP_FSETID capability is applicable.
1009 */ 1008 */
1010 if (current->fsuid != ip->i_d.di_uid && !capable(CAP_FOWNER)) { 1009 if (current_fsuid() != ip->i_d.di_uid && !capable(CAP_FOWNER)) {
1011 code = XFS_ERROR(EPERM); 1010 code = XFS_ERROR(EPERM);
1012 goto error_return; 1011 goto error_return;
1013 } 1012 }
@@ -1104,10 +1103,6 @@ xfs_ioctl_setattr(
1104 1103
1105 /* 1104 /*
1106 * Change file ownership. Must be the owner or privileged. 1105 * Change file ownership. Must be the owner or privileged.
1107 * If the system was configured with the "restricted_chown"
1108 * option, the owner is not permitted to give away the file,
1109 * and can change the group id only to a group of which he
1110 * or she is a member.
1111 */ 1106 */
1112 if (mask & FSX_PROJID) { 1107 if (mask & FSX_PROJID) {
1113 /* 1108 /*
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index 095d271f3434..f78bc2215764 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -64,14 +64,14 @@ xfs_synchronize_atime(
64{ 64{
65 struct inode *inode = VFS_I(ip); 65 struct inode *inode = VFS_I(ip);
66 66
67 if (inode) { 67 if (!(inode->i_state & I_CLEAR)) {
68 ip->i_d.di_atime.t_sec = (__int32_t)inode->i_atime.tv_sec; 68 ip->i_d.di_atime.t_sec = (__int32_t)inode->i_atime.tv_sec;
69 ip->i_d.di_atime.t_nsec = (__int32_t)inode->i_atime.tv_nsec; 69 ip->i_d.di_atime.t_nsec = (__int32_t)inode->i_atime.tv_nsec;
70 } 70 }
71} 71}
72 72
73/* 73/*
74 * If the linux inode exists, mark it dirty. 74 * If the linux inode is valid, mark it dirty.
75 * Used when commiting a dirty inode into a transaction so that 75 * Used when commiting a dirty inode into a transaction so that
76 * the inode will get written back by the linux code 76 * the inode will get written back by the linux code
77 */ 77 */
@@ -81,7 +81,7 @@ xfs_mark_inode_dirty_sync(
81{ 81{
82 struct inode *inode = VFS_I(ip); 82 struct inode *inode = VFS_I(ip);
83 83
84 if (inode) 84 if (!(inode->i_state & (I_WILL_FREE|I_FREEING|I_CLEAR)))
85 mark_inode_dirty_sync(inode); 85 mark_inode_dirty_sync(inode);
86} 86}
87 87
@@ -128,7 +128,7 @@ xfs_ichgtime(
128 if (sync_it) { 128 if (sync_it) {
129 SYNCHRONIZE(); 129 SYNCHRONIZE();
130 ip->i_update_core = 1; 130 ip->i_update_core = 1;
131 mark_inode_dirty_sync(inode); 131 xfs_mark_inode_dirty_sync(ip);
132 } 132 }
133} 133}
134 134
@@ -601,7 +601,7 @@ xfs_vn_setattr(
601 struct dentry *dentry, 601 struct dentry *dentry,
602 struct iattr *iattr) 602 struct iattr *iattr)
603{ 603{
604 return -xfs_setattr(XFS_I(dentry->d_inode), iattr, 0, NULL); 604 return -xfs_setattr(XFS_I(dentry->d_inode), iattr, 0);
605} 605}
606 606
607/* 607/*
@@ -642,7 +642,7 @@ xfs_vn_fallocate(
642 642
643 xfs_ilock(ip, XFS_IOLOCK_EXCL); 643 xfs_ilock(ip, XFS_IOLOCK_EXCL);
644 error = xfs_change_file_space(ip, XFS_IOC_RESVSP, &bf, 644 error = xfs_change_file_space(ip, XFS_IOC_RESVSP, &bf,
645 0, NULL, XFS_ATTR_NOLOCK); 645 0, XFS_ATTR_NOLOCK);
646 if (!error && !(mode & FALLOC_FL_KEEP_SIZE) && 646 if (!error && !(mode & FALLOC_FL_KEEP_SIZE) &&
647 offset + len > i_size_read(inode)) 647 offset + len > i_size_read(inode))
648 new_size = offset + len; 648 new_size = offset + len;
@@ -653,7 +653,7 @@ xfs_vn_fallocate(
653 653
654 iattr.ia_valid = ATTR_SIZE; 654 iattr.ia_valid = ATTR_SIZE;
655 iattr.ia_size = new_size; 655 iattr.ia_size = new_size;
656 error = xfs_setattr(ip, &iattr, XFS_ATTR_NOLOCK, NULL); 656 error = xfs_setattr(ip, &iattr, XFS_ATTR_NOLOCK);
657 } 657 }
658 658
659 xfs_iunlock(ip, XFS_IOLOCK_EXCL); 659 xfs_iunlock(ip, XFS_IOLOCK_EXCL);
@@ -766,12 +766,21 @@ xfs_diflags_to_iflags(
766 * When reading existing inodes from disk this is called directly 766 * When reading existing inodes from disk this is called directly
767 * from xfs_iget, when creating a new inode it is called from 767 * from xfs_iget, when creating a new inode it is called from
768 * xfs_ialloc after setting up the inode. 768 * xfs_ialloc after setting up the inode.
769 *
770 * We are always called with an uninitialised linux inode here.
771 * We need to initialise the necessary fields and take a reference
772 * on it.
769 */ 773 */
770void 774void
771xfs_setup_inode( 775xfs_setup_inode(
772 struct xfs_inode *ip) 776 struct xfs_inode *ip)
773{ 777{
774 struct inode *inode = ip->i_vnode; 778 struct inode *inode = &ip->i_vnode;
779
780 inode->i_ino = ip->i_ino;
781 inode->i_state = I_NEW|I_LOCK;
782 inode_add_to_lists(ip->i_mount->m_super, inode);
783 ASSERT(atomic_read(&inode->i_count) == 1);
775 784
776 inode->i_mode = ip->i_d.di_mode; 785 inode->i_mode = ip->i_d.di_mode;
777 inode->i_nlink = ip->i_d.di_nlink; 786 inode->i_nlink = ip->i_d.di_nlink;
diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h
index cc0f7b3a9795..77d6ddcaf547 100644
--- a/fs/xfs/linux-2.6/xfs_linux.h
+++ b/fs/xfs/linux-2.6/xfs_linux.h
@@ -77,6 +77,7 @@
77#include <linux/spinlock.h> 77#include <linux/spinlock.h>
78#include <linux/random.h> 78#include <linux/random.h>
79#include <linux/ctype.h> 79#include <linux/ctype.h>
80#include <linux/writeback.h>
80 81
81#include <asm/page.h> 82#include <asm/page.h>
82#include <asm/div64.h> 83#include <asm/div64.h>
@@ -107,7 +108,6 @@
107#undef HAVE_PERCPU_SB /* per cpu superblock counters are a 2.6 feature */ 108#undef HAVE_PERCPU_SB /* per cpu superblock counters are a 2.6 feature */
108#endif 109#endif
109 110
110#define restricted_chown xfs_params.restrict_chown.val
111#define irix_sgid_inherit xfs_params.sgid_inherit.val 111#define irix_sgid_inherit xfs_params.sgid_inherit.val
112#define irix_symlink_mode xfs_params.symlink_mode.val 112#define irix_symlink_mode xfs_params.symlink_mode.val
113#define xfs_panic_mask xfs_params.panic_mask.val 113#define xfs_panic_mask xfs_params.panic_mask.val
diff --git a/fs/xfs/linux-2.6/xfs_stats.c b/fs/xfs/linux-2.6/xfs_stats.c
index 3d5b67c075c7..64f4ec90b8b2 100644
--- a/fs/xfs/linux-2.6/xfs_stats.c
+++ b/fs/xfs/linux-2.6/xfs_stats.c
@@ -53,6 +53,10 @@ xfs_read_xfsstats(
53 { "icluster", XFSSTAT_END_INODE_CLUSTER }, 53 { "icluster", XFSSTAT_END_INODE_CLUSTER },
54 { "vnodes", XFSSTAT_END_VNODE_OPS }, 54 { "vnodes", XFSSTAT_END_VNODE_OPS },
55 { "buf", XFSSTAT_END_BUF }, 55 { "buf", XFSSTAT_END_BUF },
56 { "abtb2", XFSSTAT_END_ABTB_V2 },
57 { "abtc2", XFSSTAT_END_ABTC_V2 },
58 { "bmbt2", XFSSTAT_END_BMBT_V2 },
59 { "ibt2", XFSSTAT_END_IBT_V2 },
56 }; 60 };
57 61
58 /* Loop over all stats groups */ 62 /* Loop over all stats groups */
diff --git a/fs/xfs/linux-2.6/xfs_stats.h b/fs/xfs/linux-2.6/xfs_stats.h
index e83820febc9f..736854b1ca1a 100644
--- a/fs/xfs/linux-2.6/xfs_stats.h
+++ b/fs/xfs/linux-2.6/xfs_stats.h
@@ -118,6 +118,71 @@ struct xfsstats {
118 __uint32_t xb_page_retries; 118 __uint32_t xb_page_retries;
119 __uint32_t xb_page_found; 119 __uint32_t xb_page_found;
120 __uint32_t xb_get_read; 120 __uint32_t xb_get_read;
121/* Version 2 btree counters */
122#define XFSSTAT_END_ABTB_V2 (XFSSTAT_END_BUF+15)
123 __uint32_t xs_abtb_2_lookup;
124 __uint32_t xs_abtb_2_compare;
125 __uint32_t xs_abtb_2_insrec;
126 __uint32_t xs_abtb_2_delrec;
127 __uint32_t xs_abtb_2_newroot;
128 __uint32_t xs_abtb_2_killroot;
129 __uint32_t xs_abtb_2_increment;
130 __uint32_t xs_abtb_2_decrement;
131 __uint32_t xs_abtb_2_lshift;
132 __uint32_t xs_abtb_2_rshift;
133 __uint32_t xs_abtb_2_split;
134 __uint32_t xs_abtb_2_join;
135 __uint32_t xs_abtb_2_alloc;
136 __uint32_t xs_abtb_2_free;
137 __uint32_t xs_abtb_2_moves;
138#define XFSSTAT_END_ABTC_V2 (XFSSTAT_END_ABTB_V2+15)
139 __uint32_t xs_abtc_2_lookup;
140 __uint32_t xs_abtc_2_compare;
141 __uint32_t xs_abtc_2_insrec;
142 __uint32_t xs_abtc_2_delrec;
143 __uint32_t xs_abtc_2_newroot;
144 __uint32_t xs_abtc_2_killroot;
145 __uint32_t xs_abtc_2_increment;
146 __uint32_t xs_abtc_2_decrement;
147 __uint32_t xs_abtc_2_lshift;
148 __uint32_t xs_abtc_2_rshift;
149 __uint32_t xs_abtc_2_split;
150 __uint32_t xs_abtc_2_join;
151 __uint32_t xs_abtc_2_alloc;
152 __uint32_t xs_abtc_2_free;
153 __uint32_t xs_abtc_2_moves;
154#define XFSSTAT_END_BMBT_V2 (XFSSTAT_END_ABTC_V2+15)
155 __uint32_t xs_bmbt_2_lookup;
156 __uint32_t xs_bmbt_2_compare;
157 __uint32_t xs_bmbt_2_insrec;
158 __uint32_t xs_bmbt_2_delrec;
159 __uint32_t xs_bmbt_2_newroot;
160 __uint32_t xs_bmbt_2_killroot;
161 __uint32_t xs_bmbt_2_increment;
162 __uint32_t xs_bmbt_2_decrement;
163 __uint32_t xs_bmbt_2_lshift;
164 __uint32_t xs_bmbt_2_rshift;
165 __uint32_t xs_bmbt_2_split;
166 __uint32_t xs_bmbt_2_join;
167 __uint32_t xs_bmbt_2_alloc;
168 __uint32_t xs_bmbt_2_free;
169 __uint32_t xs_bmbt_2_moves;
170#define XFSSTAT_END_IBT_V2 (XFSSTAT_END_BMBT_V2+15)
171 __uint32_t xs_ibt_2_lookup;
172 __uint32_t xs_ibt_2_compare;
173 __uint32_t xs_ibt_2_insrec;
174 __uint32_t xs_ibt_2_delrec;
175 __uint32_t xs_ibt_2_newroot;
176 __uint32_t xs_ibt_2_killroot;
177 __uint32_t xs_ibt_2_increment;
178 __uint32_t xs_ibt_2_decrement;
179 __uint32_t xs_ibt_2_lshift;
180 __uint32_t xs_ibt_2_rshift;
181 __uint32_t xs_ibt_2_split;
182 __uint32_t xs_ibt_2_join;
183 __uint32_t xs_ibt_2_alloc;
184 __uint32_t xs_ibt_2_free;
185 __uint32_t xs_ibt_2_moves;
121/* Extra precision counters */ 186/* Extra precision counters */
122 __uint64_t xs_xstrat_bytes; 187 __uint64_t xs_xstrat_bytes;
123 __uint64_t xs_write_bytes; 188 __uint64_t xs_write_bytes;
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 37ebe36056eb..c3d004bc4621 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -18,7 +18,6 @@
18#include "xfs.h" 18#include "xfs.h"
19#include "xfs_bit.h" 19#include "xfs_bit.h"
20#include "xfs_log.h" 20#include "xfs_log.h"
21#include "xfs_clnt.h"
22#include "xfs_inum.h" 21#include "xfs_inum.h"
23#include "xfs_trans.h" 22#include "xfs_trans.h"
24#include "xfs_sb.h" 23#include "xfs_sb.h"
@@ -36,6 +35,7 @@
36#include "xfs_dinode.h" 35#include "xfs_dinode.h"
37#include "xfs_inode.h" 36#include "xfs_inode.h"
38#include "xfs_btree.h" 37#include "xfs_btree.h"
38#include "xfs_btree_trace.h"
39#include "xfs_ialloc.h" 39#include "xfs_ialloc.h"
40#include "xfs_bmap.h" 40#include "xfs_bmap.h"
41#include "xfs_rtalloc.h" 41#include "xfs_rtalloc.h"
@@ -58,6 +58,7 @@
58#include "xfs_extfree_item.h" 58#include "xfs_extfree_item.h"
59#include "xfs_mru_cache.h" 59#include "xfs_mru_cache.h"
60#include "xfs_inode_item.h" 60#include "xfs_inode_item.h"
61#include "xfs_sync.h"
61 62
62#include <linux/namei.h> 63#include <linux/namei.h>
63#include <linux/init.h> 64#include <linux/init.h>
@@ -70,36 +71,9 @@
70 71
71static struct quotactl_ops xfs_quotactl_operations; 72static struct quotactl_ops xfs_quotactl_operations;
72static struct super_operations xfs_super_operations; 73static struct super_operations xfs_super_operations;
73static kmem_zone_t *xfs_vnode_zone;
74static kmem_zone_t *xfs_ioend_zone; 74static kmem_zone_t *xfs_ioend_zone;
75mempool_t *xfs_ioend_pool; 75mempool_t *xfs_ioend_pool;
76 76
77STATIC struct xfs_mount_args *
78xfs_args_allocate(
79 struct super_block *sb,
80 int silent)
81{
82 struct xfs_mount_args *args;
83
84 args = kzalloc(sizeof(struct xfs_mount_args), GFP_KERNEL);
85 if (!args)
86 return NULL;
87
88 args->logbufs = args->logbufsize = -1;
89 strncpy(args->fsname, sb->s_id, MAXNAMELEN);
90
91 /* Copy the already-parsed mount(2) flags we're interested in */
92 if (sb->s_flags & MS_DIRSYNC)
93 args->flags |= XFSMNT_DIRSYNC;
94 if (sb->s_flags & MS_SYNCHRONOUS)
95 args->flags |= XFSMNT_WSYNC;
96 if (silent)
97 args->flags |= XFSMNT_QUIET;
98 args->flags |= XFSMNT_32BITINODES;
99
100 return args;
101}
102
103#define MNTOPT_LOGBUFS "logbufs" /* number of XFS log buffers */ 77#define MNTOPT_LOGBUFS "logbufs" /* number of XFS log buffers */
104#define MNTOPT_LOGBSIZE "logbsize" /* size of XFS log buffers */ 78#define MNTOPT_LOGBSIZE "logbsize" /* size of XFS log buffers */
105#define MNTOPT_LOGDEV "logdev" /* log device */ 79#define MNTOPT_LOGDEV "logdev" /* log device */
@@ -188,26 +162,54 @@ suffix_strtoul(char *s, char **endp, unsigned int base)
188 return simple_strtoul((const char *)s, endp, base) << shift_left_factor; 162 return simple_strtoul((const char *)s, endp, base) << shift_left_factor;
189} 163}
190 164
165/*
166 * This function fills in xfs_mount_t fields based on mount args.
167 * Note: the superblock has _not_ yet been read in.
168 *
169 * Note that this function leaks the various device name allocations on
170 * failure. The caller takes care of them.
171 */
191STATIC int 172STATIC int
192xfs_parseargs( 173xfs_parseargs(
193 struct xfs_mount *mp, 174 struct xfs_mount *mp,
194 char *options, 175 char *options,
195 struct xfs_mount_args *args, 176 char **mtpt)
196 int update)
197{ 177{
178 struct super_block *sb = mp->m_super;
198 char *this_char, *value, *eov; 179 char *this_char, *value, *eov;
199 int dsunit, dswidth, vol_dsunit, vol_dswidth; 180 int dsunit = 0;
200 int iosize; 181 int dswidth = 0;
182 int iosize = 0;
201 int dmapi_implies_ikeep = 1; 183 int dmapi_implies_ikeep = 1;
184 uchar_t iosizelog = 0;
202 185
203 args->flags |= XFSMNT_BARRIER; 186 /*
204 args->flags2 |= XFSMNT2_COMPAT_IOSIZE; 187 * Copy binary VFS mount flags we are interested in.
188 */
189 if (sb->s_flags & MS_RDONLY)
190 mp->m_flags |= XFS_MOUNT_RDONLY;
191 if (sb->s_flags & MS_DIRSYNC)
192 mp->m_flags |= XFS_MOUNT_DIRSYNC;
193 if (sb->s_flags & MS_SYNCHRONOUS)
194 mp->m_flags |= XFS_MOUNT_WSYNC;
195
196 /*
197 * Set some default flags that could be cleared by the mount option
198 * parsing.
199 */
200 mp->m_flags |= XFS_MOUNT_BARRIER;
201 mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE;
202 mp->m_flags |= XFS_MOUNT_SMALL_INUMS;
203
204 /*
205 * These can be overridden by the mount option parsing.
206 */
207 mp->m_logbufs = -1;
208 mp->m_logbsize = -1;
205 209
206 if (!options) 210 if (!options)
207 goto done; 211 goto done;
208 212
209 iosize = dsunit = dswidth = vol_dsunit = vol_dswidth = 0;
210
211 while ((this_char = strsep(&options, ",")) != NULL) { 213 while ((this_char = strsep(&options, ",")) != NULL) {
212 if (!*this_char) 214 if (!*this_char)
213 continue; 215 continue;
@@ -221,7 +223,7 @@ xfs_parseargs(
221 this_char); 223 this_char);
222 return EINVAL; 224 return EINVAL;
223 } 225 }
224 args->logbufs = simple_strtoul(value, &eov, 10); 226 mp->m_logbufs = simple_strtoul(value, &eov, 10);
225 } else if (!strcmp(this_char, MNTOPT_LOGBSIZE)) { 227 } else if (!strcmp(this_char, MNTOPT_LOGBSIZE)) {
226 if (!value || !*value) { 228 if (!value || !*value) {
227 cmn_err(CE_WARN, 229 cmn_err(CE_WARN,
@@ -229,7 +231,7 @@ xfs_parseargs(
229 this_char); 231 this_char);
230 return EINVAL; 232 return EINVAL;
231 } 233 }
232 args->logbufsize = suffix_strtoul(value, &eov, 10); 234 mp->m_logbsize = suffix_strtoul(value, &eov, 10);
233 } else if (!strcmp(this_char, MNTOPT_LOGDEV)) { 235 } else if (!strcmp(this_char, MNTOPT_LOGDEV)) {
234 if (!value || !*value) { 236 if (!value || !*value) {
235 cmn_err(CE_WARN, 237 cmn_err(CE_WARN,
@@ -237,7 +239,9 @@ xfs_parseargs(
237 this_char); 239 this_char);
238 return EINVAL; 240 return EINVAL;
239 } 241 }
240 strncpy(args->logname, value, MAXNAMELEN); 242 mp->m_logname = kstrndup(value, MAXNAMELEN, GFP_KERNEL);
243 if (!mp->m_logname)
244 return ENOMEM;
241 } else if (!strcmp(this_char, MNTOPT_MTPT)) { 245 } else if (!strcmp(this_char, MNTOPT_MTPT)) {
242 if (!value || !*value) { 246 if (!value || !*value) {
243 cmn_err(CE_WARN, 247 cmn_err(CE_WARN,
@@ -245,7 +249,9 @@ xfs_parseargs(
245 this_char); 249 this_char);
246 return EINVAL; 250 return EINVAL;
247 } 251 }
248 strncpy(args->mtpt, value, MAXNAMELEN); 252 *mtpt = kstrndup(value, MAXNAMELEN, GFP_KERNEL);
253 if (!*mtpt)
254 return ENOMEM;
249 } else if (!strcmp(this_char, MNTOPT_RTDEV)) { 255 } else if (!strcmp(this_char, MNTOPT_RTDEV)) {
250 if (!value || !*value) { 256 if (!value || !*value) {
251 cmn_err(CE_WARN, 257 cmn_err(CE_WARN,
@@ -253,7 +259,9 @@ xfs_parseargs(
253 this_char); 259 this_char);
254 return EINVAL; 260 return EINVAL;
255 } 261 }
256 strncpy(args->rtname, value, MAXNAMELEN); 262 mp->m_rtname = kstrndup(value, MAXNAMELEN, GFP_KERNEL);
263 if (!mp->m_rtname)
264 return ENOMEM;
257 } else if (!strcmp(this_char, MNTOPT_BIOSIZE)) { 265 } else if (!strcmp(this_char, MNTOPT_BIOSIZE)) {
258 if (!value || !*value) { 266 if (!value || !*value) {
259 cmn_err(CE_WARN, 267 cmn_err(CE_WARN,
@@ -262,8 +270,7 @@ xfs_parseargs(
262 return EINVAL; 270 return EINVAL;
263 } 271 }
264 iosize = simple_strtoul(value, &eov, 10); 272 iosize = simple_strtoul(value, &eov, 10);
265 args->flags |= XFSMNT_IOSIZE; 273 iosizelog = ffs(iosize) - 1;
266 args->iosizelog = (uint8_t) iosize;
267 } else if (!strcmp(this_char, MNTOPT_ALLOCSIZE)) { 274 } else if (!strcmp(this_char, MNTOPT_ALLOCSIZE)) {
268 if (!value || !*value) { 275 if (!value || !*value) {
269 cmn_err(CE_WARN, 276 cmn_err(CE_WARN,
@@ -272,8 +279,7 @@ xfs_parseargs(
272 return EINVAL; 279 return EINVAL;
273 } 280 }
274 iosize = suffix_strtoul(value, &eov, 10); 281 iosize = suffix_strtoul(value, &eov, 10);
275 args->flags |= XFSMNT_IOSIZE; 282 iosizelog = ffs(iosize) - 1;
276 args->iosizelog = ffs(iosize) - 1;
277 } else if (!strcmp(this_char, MNTOPT_GRPID) || 283 } else if (!strcmp(this_char, MNTOPT_GRPID) ||
278 !strcmp(this_char, MNTOPT_BSDGROUPS)) { 284 !strcmp(this_char, MNTOPT_BSDGROUPS)) {
279 mp->m_flags |= XFS_MOUNT_GRPID; 285 mp->m_flags |= XFS_MOUNT_GRPID;
@@ -281,23 +287,25 @@ xfs_parseargs(
281 !strcmp(this_char, MNTOPT_SYSVGROUPS)) { 287 !strcmp(this_char, MNTOPT_SYSVGROUPS)) {
282 mp->m_flags &= ~XFS_MOUNT_GRPID; 288 mp->m_flags &= ~XFS_MOUNT_GRPID;
283 } else if (!strcmp(this_char, MNTOPT_WSYNC)) { 289 } else if (!strcmp(this_char, MNTOPT_WSYNC)) {
284 args->flags |= XFSMNT_WSYNC; 290 mp->m_flags |= XFS_MOUNT_WSYNC;
285 } else if (!strcmp(this_char, MNTOPT_OSYNCISOSYNC)) { 291 } else if (!strcmp(this_char, MNTOPT_OSYNCISOSYNC)) {
286 args->flags |= XFSMNT_OSYNCISOSYNC; 292 mp->m_flags |= XFS_MOUNT_OSYNCISOSYNC;
287 } else if (!strcmp(this_char, MNTOPT_NORECOVERY)) { 293 } else if (!strcmp(this_char, MNTOPT_NORECOVERY)) {
288 args->flags |= XFSMNT_NORECOVERY; 294 mp->m_flags |= XFS_MOUNT_NORECOVERY;
289 } else if (!strcmp(this_char, MNTOPT_INO64)) { 295 } else if (!strcmp(this_char, MNTOPT_INO64)) {
290 args->flags |= XFSMNT_INO64; 296#if XFS_BIG_INUMS
291#if !XFS_BIG_INUMS 297 mp->m_flags |= XFS_MOUNT_INO64;
298 mp->m_inoadd = XFS_INO64_OFFSET;
299#else
292 cmn_err(CE_WARN, 300 cmn_err(CE_WARN,
293 "XFS: %s option not allowed on this system", 301 "XFS: %s option not allowed on this system",
294 this_char); 302 this_char);
295 return EINVAL; 303 return EINVAL;
296#endif 304#endif
297 } else if (!strcmp(this_char, MNTOPT_NOALIGN)) { 305 } else if (!strcmp(this_char, MNTOPT_NOALIGN)) {
298 args->flags |= XFSMNT_NOALIGN; 306 mp->m_flags |= XFS_MOUNT_NOALIGN;
299 } else if (!strcmp(this_char, MNTOPT_SWALLOC)) { 307 } else if (!strcmp(this_char, MNTOPT_SWALLOC)) {
300 args->flags |= XFSMNT_SWALLOC; 308 mp->m_flags |= XFS_MOUNT_SWALLOC;
301 } else if (!strcmp(this_char, MNTOPT_SUNIT)) { 309 } else if (!strcmp(this_char, MNTOPT_SUNIT)) {
302 if (!value || !*value) { 310 if (!value || !*value) {
303 cmn_err(CE_WARN, 311 cmn_err(CE_WARN,
@@ -315,7 +323,7 @@ xfs_parseargs(
315 } 323 }
316 dswidth = simple_strtoul(value, &eov, 10); 324 dswidth = simple_strtoul(value, &eov, 10);
317 } else if (!strcmp(this_char, MNTOPT_64BITINODE)) { 325 } else if (!strcmp(this_char, MNTOPT_64BITINODE)) {
318 args->flags &= ~XFSMNT_32BITINODES; 326 mp->m_flags &= ~XFS_MOUNT_SMALL_INUMS;
319#if !XFS_BIG_INUMS 327#if !XFS_BIG_INUMS
320 cmn_err(CE_WARN, 328 cmn_err(CE_WARN,
321 "XFS: %s option not allowed on this system", 329 "XFS: %s option not allowed on this system",
@@ -323,56 +331,61 @@ xfs_parseargs(
323 return EINVAL; 331 return EINVAL;
324#endif 332#endif
325 } else if (!strcmp(this_char, MNTOPT_NOUUID)) { 333 } else if (!strcmp(this_char, MNTOPT_NOUUID)) {
326 args->flags |= XFSMNT_NOUUID; 334 mp->m_flags |= XFS_MOUNT_NOUUID;
327 } else if (!strcmp(this_char, MNTOPT_BARRIER)) { 335 } else if (!strcmp(this_char, MNTOPT_BARRIER)) {
328 args->flags |= XFSMNT_BARRIER; 336 mp->m_flags |= XFS_MOUNT_BARRIER;
329 } else if (!strcmp(this_char, MNTOPT_NOBARRIER)) { 337 } else if (!strcmp(this_char, MNTOPT_NOBARRIER)) {
330 args->flags &= ~XFSMNT_BARRIER; 338 mp->m_flags &= ~XFS_MOUNT_BARRIER;
331 } else if (!strcmp(this_char, MNTOPT_IKEEP)) { 339 } else if (!strcmp(this_char, MNTOPT_IKEEP)) {
332 args->flags |= XFSMNT_IKEEP; 340 mp->m_flags |= XFS_MOUNT_IKEEP;
333 } else if (!strcmp(this_char, MNTOPT_NOIKEEP)) { 341 } else if (!strcmp(this_char, MNTOPT_NOIKEEP)) {
334 dmapi_implies_ikeep = 0; 342 dmapi_implies_ikeep = 0;
335 args->flags &= ~XFSMNT_IKEEP; 343 mp->m_flags &= ~XFS_MOUNT_IKEEP;
336 } else if (!strcmp(this_char, MNTOPT_LARGEIO)) { 344 } else if (!strcmp(this_char, MNTOPT_LARGEIO)) {
337 args->flags2 &= ~XFSMNT2_COMPAT_IOSIZE; 345 mp->m_flags &= ~XFS_MOUNT_COMPAT_IOSIZE;
338 } else if (!strcmp(this_char, MNTOPT_NOLARGEIO)) { 346 } else if (!strcmp(this_char, MNTOPT_NOLARGEIO)) {
339 args->flags2 |= XFSMNT2_COMPAT_IOSIZE; 347 mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE;
340 } else if (!strcmp(this_char, MNTOPT_ATTR2)) { 348 } else if (!strcmp(this_char, MNTOPT_ATTR2)) {
341 args->flags |= XFSMNT_ATTR2; 349 mp->m_flags |= XFS_MOUNT_ATTR2;
342 } else if (!strcmp(this_char, MNTOPT_NOATTR2)) { 350 } else if (!strcmp(this_char, MNTOPT_NOATTR2)) {
343 args->flags &= ~XFSMNT_ATTR2; 351 mp->m_flags &= ~XFS_MOUNT_ATTR2;
344 args->flags |= XFSMNT_NOATTR2; 352 mp->m_flags |= XFS_MOUNT_NOATTR2;
345 } else if (!strcmp(this_char, MNTOPT_FILESTREAM)) { 353 } else if (!strcmp(this_char, MNTOPT_FILESTREAM)) {
346 args->flags2 |= XFSMNT2_FILESTREAMS; 354 mp->m_flags |= XFS_MOUNT_FILESTREAMS;
347 } else if (!strcmp(this_char, MNTOPT_NOQUOTA)) { 355 } else if (!strcmp(this_char, MNTOPT_NOQUOTA)) {
348 args->flags &= ~(XFSMNT_UQUOTAENF|XFSMNT_UQUOTA); 356 mp->m_qflags &= ~(XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE |
349 args->flags &= ~(XFSMNT_GQUOTAENF|XFSMNT_GQUOTA); 357 XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE |
358 XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE |
359 XFS_UQUOTA_ENFD | XFS_OQUOTA_ENFD);
350 } else if (!strcmp(this_char, MNTOPT_QUOTA) || 360 } else if (!strcmp(this_char, MNTOPT_QUOTA) ||
351 !strcmp(this_char, MNTOPT_UQUOTA) || 361 !strcmp(this_char, MNTOPT_UQUOTA) ||
352 !strcmp(this_char, MNTOPT_USRQUOTA)) { 362 !strcmp(this_char, MNTOPT_USRQUOTA)) {
353 args->flags |= XFSMNT_UQUOTA | XFSMNT_UQUOTAENF; 363 mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE |
364 XFS_UQUOTA_ENFD);
354 } else if (!strcmp(this_char, MNTOPT_QUOTANOENF) || 365 } else if (!strcmp(this_char, MNTOPT_QUOTANOENF) ||
355 !strcmp(this_char, MNTOPT_UQUOTANOENF)) { 366 !strcmp(this_char, MNTOPT_UQUOTANOENF)) {
356 args->flags |= XFSMNT_UQUOTA; 367 mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE);
357 args->flags &= ~XFSMNT_UQUOTAENF; 368 mp->m_qflags &= ~XFS_UQUOTA_ENFD;
358 } else if (!strcmp(this_char, MNTOPT_PQUOTA) || 369 } else if (!strcmp(this_char, MNTOPT_PQUOTA) ||
359 !strcmp(this_char, MNTOPT_PRJQUOTA)) { 370 !strcmp(this_char, MNTOPT_PRJQUOTA)) {
360 args->flags |= XFSMNT_PQUOTA | XFSMNT_PQUOTAENF; 371 mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE |
372 XFS_OQUOTA_ENFD);
361 } else if (!strcmp(this_char, MNTOPT_PQUOTANOENF)) { 373 } else if (!strcmp(this_char, MNTOPT_PQUOTANOENF)) {
362 args->flags |= XFSMNT_PQUOTA; 374 mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE);
363 args->flags &= ~XFSMNT_PQUOTAENF; 375 mp->m_qflags &= ~XFS_OQUOTA_ENFD;
364 } else if (!strcmp(this_char, MNTOPT_GQUOTA) || 376 } else if (!strcmp(this_char, MNTOPT_GQUOTA) ||
365 !strcmp(this_char, MNTOPT_GRPQUOTA)) { 377 !strcmp(this_char, MNTOPT_GRPQUOTA)) {
366 args->flags |= XFSMNT_GQUOTA | XFSMNT_GQUOTAENF; 378 mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE |
379 XFS_OQUOTA_ENFD);
367 } else if (!strcmp(this_char, MNTOPT_GQUOTANOENF)) { 380 } else if (!strcmp(this_char, MNTOPT_GQUOTANOENF)) {
368 args->flags |= XFSMNT_GQUOTA; 381 mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE);
369 args->flags &= ~XFSMNT_GQUOTAENF; 382 mp->m_qflags &= ~XFS_OQUOTA_ENFD;
370 } else if (!strcmp(this_char, MNTOPT_DMAPI)) { 383 } else if (!strcmp(this_char, MNTOPT_DMAPI)) {
371 args->flags |= XFSMNT_DMAPI; 384 mp->m_flags |= XFS_MOUNT_DMAPI;
372 } else if (!strcmp(this_char, MNTOPT_XDSM)) { 385 } else if (!strcmp(this_char, MNTOPT_XDSM)) {
373 args->flags |= XFSMNT_DMAPI; 386 mp->m_flags |= XFS_MOUNT_DMAPI;
374 } else if (!strcmp(this_char, MNTOPT_DMI)) { 387 } else if (!strcmp(this_char, MNTOPT_DMI)) {
375 args->flags |= XFSMNT_DMAPI; 388 mp->m_flags |= XFS_MOUNT_DMAPI;
376 } else if (!strcmp(this_char, "ihashsize")) { 389 } else if (!strcmp(this_char, "ihashsize")) {
377 cmn_err(CE_WARN, 390 cmn_err(CE_WARN,
378 "XFS: ihashsize no longer used, option is deprecated."); 391 "XFS: ihashsize no longer used, option is deprecated.");
@@ -390,27 +403,29 @@ xfs_parseargs(
390 } 403 }
391 } 404 }
392 405
393 if (args->flags & XFSMNT_NORECOVERY) { 406 /*
394 if ((mp->m_flags & XFS_MOUNT_RDONLY) == 0) { 407 * no recovery flag requires a read-only mount
395 cmn_err(CE_WARN, 408 */
396 "XFS: no-recovery mounts must be read-only."); 409 if ((mp->m_flags & XFS_MOUNT_NORECOVERY) &&
397 return EINVAL; 410 !(mp->m_flags & XFS_MOUNT_RDONLY)) {
398 } 411 cmn_err(CE_WARN, "XFS: no-recovery mounts must be read-only.");
412 return EINVAL;
399 } 413 }
400 414
401 if ((args->flags & XFSMNT_NOALIGN) && (dsunit || dswidth)) { 415 if ((mp->m_flags & XFS_MOUNT_NOALIGN) && (dsunit || dswidth)) {
402 cmn_err(CE_WARN, 416 cmn_err(CE_WARN,
403 "XFS: sunit and swidth options incompatible with the noalign option"); 417 "XFS: sunit and swidth options incompatible with the noalign option");
404 return EINVAL; 418 return EINVAL;
405 } 419 }
406 420
407 if ((args->flags & XFSMNT_GQUOTA) && (args->flags & XFSMNT_PQUOTA)) { 421 if ((mp->m_qflags & (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE)) &&
422 (mp->m_qflags & (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE))) {
408 cmn_err(CE_WARN, 423 cmn_err(CE_WARN,
409 "XFS: cannot mount with both project and group quota"); 424 "XFS: cannot mount with both project and group quota");
410 return EINVAL; 425 return EINVAL;
411 } 426 }
412 427
413 if ((args->flags & XFSMNT_DMAPI) && *args->mtpt == '\0') { 428 if ((mp->m_flags & XFS_MOUNT_DMAPI) && (!*mtpt || *mtpt[0] == '\0')) {
414 printk("XFS: %s option needs the mount point option as well\n", 429 printk("XFS: %s option needs the mount point option as well\n",
415 MNTOPT_DMAPI); 430 MNTOPT_DMAPI);
416 return EINVAL; 431 return EINVAL;
@@ -438,27 +453,66 @@ xfs_parseargs(
438 * Note that if "ikeep" or "noikeep" mount options are 453 * Note that if "ikeep" or "noikeep" mount options are
439 * supplied, then they are honored. 454 * supplied, then they are honored.
440 */ 455 */
441 if ((args->flags & XFSMNT_DMAPI) && dmapi_implies_ikeep) 456 if ((mp->m_flags & XFS_MOUNT_DMAPI) && dmapi_implies_ikeep)
442 args->flags |= XFSMNT_IKEEP; 457 mp->m_flags |= XFS_MOUNT_IKEEP;
443 458
444 if ((args->flags & XFSMNT_NOALIGN) != XFSMNT_NOALIGN) { 459done:
460 if (!(mp->m_flags & XFS_MOUNT_NOALIGN)) {
461 /*
462 * At this point the superblock has not been read
463 * in, therefore we do not know the block size.
464 * Before the mount call ends we will convert
465 * these to FSBs.
466 */
445 if (dsunit) { 467 if (dsunit) {
446 args->sunit = dsunit; 468 mp->m_dalign = dsunit;
447 args->flags |= XFSMNT_RETERR; 469 mp->m_flags |= XFS_MOUNT_RETERR;
448 } else {
449 args->sunit = vol_dsunit;
450 } 470 }
451 dswidth ? (args->swidth = dswidth) : 471
452 (args->swidth = vol_dswidth); 472 if (dswidth)
453 } else { 473 mp->m_swidth = dswidth;
454 args->sunit = args->swidth = 0; 474 }
475
476 if (mp->m_logbufs != -1 &&
477 mp->m_logbufs != 0 &&
478 (mp->m_logbufs < XLOG_MIN_ICLOGS ||
479 mp->m_logbufs > XLOG_MAX_ICLOGS)) {
480 cmn_err(CE_WARN,
481 "XFS: invalid logbufs value: %d [not %d-%d]",
482 mp->m_logbufs, XLOG_MIN_ICLOGS, XLOG_MAX_ICLOGS);
483 return XFS_ERROR(EINVAL);
484 }
485 if (mp->m_logbsize != -1 &&
486 mp->m_logbsize != 0 &&
487 (mp->m_logbsize < XLOG_MIN_RECORD_BSIZE ||
488 mp->m_logbsize > XLOG_MAX_RECORD_BSIZE ||
489 !is_power_of_2(mp->m_logbsize))) {
490 cmn_err(CE_WARN,
491 "XFS: invalid logbufsize: %d [not 16k,32k,64k,128k or 256k]",
492 mp->m_logbsize);
493 return XFS_ERROR(EINVAL);
494 }
495
496 mp->m_fsname = kstrndup(sb->s_id, MAXNAMELEN, GFP_KERNEL);
497 if (!mp->m_fsname)
498 return ENOMEM;
499 mp->m_fsname_len = strlen(mp->m_fsname) + 1;
500
501 if (iosizelog) {
502 if (iosizelog > XFS_MAX_IO_LOG ||
503 iosizelog < XFS_MIN_IO_LOG) {
504 cmn_err(CE_WARN,
505 "XFS: invalid log iosize: %d [not %d-%d]",
506 iosizelog, XFS_MIN_IO_LOG,
507 XFS_MAX_IO_LOG);
508 return XFS_ERROR(EINVAL);
509 }
510
511 mp->m_flags |= XFS_MOUNT_DFLT_IOSIZE;
512 mp->m_readio_log = iosizelog;
513 mp->m_writeio_log = iosizelog;
455 } 514 }
456 515
457done:
458 if (args->flags & XFSMNT_32BITINODES)
459 mp->m_flags |= XFS_MOUNT_SMALL_INUMS;
460 if (args->flags2)
461 args->flags |= XFSMNT_FLAGS2;
462 return 0; 516 return 0;
463} 517}
464 518
@@ -704,8 +758,7 @@ xfs_close_devices(
704 */ 758 */
705STATIC int 759STATIC int
706xfs_open_devices( 760xfs_open_devices(
707 struct xfs_mount *mp, 761 struct xfs_mount *mp)
708 struct xfs_mount_args *args)
709{ 762{
710 struct block_device *ddev = mp->m_super->s_bdev; 763 struct block_device *ddev = mp->m_super->s_bdev;
711 struct block_device *logdev = NULL, *rtdev = NULL; 764 struct block_device *logdev = NULL, *rtdev = NULL;
@@ -714,14 +767,14 @@ xfs_open_devices(
714 /* 767 /*
715 * Open real time and log devices - order is important. 768 * Open real time and log devices - order is important.
716 */ 769 */
717 if (args->logname[0]) { 770 if (mp->m_logname) {
718 error = xfs_blkdev_get(mp, args->logname, &logdev); 771 error = xfs_blkdev_get(mp, mp->m_logname, &logdev);
719 if (error) 772 if (error)
720 goto out; 773 goto out;
721 } 774 }
722 775
723 if (args->rtname[0]) { 776 if (mp->m_rtname) {
724 error = xfs_blkdev_get(mp, args->rtname, &rtdev); 777 error = xfs_blkdev_get(mp, mp->m_rtname, &rtdev);
725 if (error) 778 if (error)
726 goto out_close_logdev; 779 goto out_close_logdev;
727 780
@@ -813,18 +866,18 @@ xfs_setup_devices(
813 */ 866 */
814void 867void
815xfsaild_wakeup( 868xfsaild_wakeup(
816 xfs_mount_t *mp, 869 struct xfs_ail *ailp,
817 xfs_lsn_t threshold_lsn) 870 xfs_lsn_t threshold_lsn)
818{ 871{
819 mp->m_ail.xa_target = threshold_lsn; 872 ailp->xa_target = threshold_lsn;
820 wake_up_process(mp->m_ail.xa_task); 873 wake_up_process(ailp->xa_task);
821} 874}
822 875
823int 876int
824xfsaild( 877xfsaild(
825 void *data) 878 void *data)
826{ 879{
827 xfs_mount_t *mp = (xfs_mount_t *)data; 880 struct xfs_ail *ailp = data;
828 xfs_lsn_t last_pushed_lsn = 0; 881 xfs_lsn_t last_pushed_lsn = 0;
829 long tout = 0; 882 long tout = 0;
830 883
@@ -836,11 +889,11 @@ xfsaild(
836 /* swsusp */ 889 /* swsusp */
837 try_to_freeze(); 890 try_to_freeze();
838 891
839 ASSERT(mp->m_log); 892 ASSERT(ailp->xa_mount->m_log);
840 if (XFS_FORCED_SHUTDOWN(mp)) 893 if (XFS_FORCED_SHUTDOWN(ailp->xa_mount))
841 continue; 894 continue;
842 895
843 tout = xfsaild_push(mp, &last_pushed_lsn); 896 tout = xfsaild_push(ailp, &last_pushed_lsn);
844 } 897 }
845 898
846 return 0; 899 return 0;
@@ -848,43 +901,82 @@ xfsaild(
848 901
849int 902int
850xfsaild_start( 903xfsaild_start(
851 xfs_mount_t *mp) 904 struct xfs_ail *ailp)
852{ 905{
853 mp->m_ail.xa_target = 0; 906 ailp->xa_target = 0;
854 mp->m_ail.xa_task = kthread_run(xfsaild, mp, "xfsaild"); 907 ailp->xa_task = kthread_run(xfsaild, ailp, "xfsaild");
855 if (IS_ERR(mp->m_ail.xa_task)) 908 if (IS_ERR(ailp->xa_task))
856 return -PTR_ERR(mp->m_ail.xa_task); 909 return -PTR_ERR(ailp->xa_task);
857 return 0; 910 return 0;
858} 911}
859 912
860void 913void
861xfsaild_stop( 914xfsaild_stop(
862 xfs_mount_t *mp) 915 struct xfs_ail *ailp)
863{ 916{
864 kthread_stop(mp->m_ail.xa_task); 917 kthread_stop(ailp->xa_task);
865} 918}
866 919
867 920
868 921/* Catch misguided souls that try to use this interface on XFS */
869STATIC struct inode * 922STATIC struct inode *
870xfs_fs_alloc_inode( 923xfs_fs_alloc_inode(
871 struct super_block *sb) 924 struct super_block *sb)
872{ 925{
873 return kmem_zone_alloc(xfs_vnode_zone, KM_SLEEP); 926 BUG();
927 return NULL;
874} 928}
875 929
930/*
931 * Now that the generic code is guaranteed not to be accessing
932 * the linux inode, we can reclaim the inode.
933 */
876STATIC void 934STATIC void
877xfs_fs_destroy_inode( 935xfs_fs_destroy_inode(
878 struct inode *inode) 936 struct inode *inode)
879{ 937{
880 kmem_zone_free(xfs_vnode_zone, inode); 938 xfs_inode_t *ip = XFS_I(inode);
939
940 XFS_STATS_INC(vn_reclaim);
941 if (xfs_reclaim(ip))
942 panic("%s: cannot reclaim 0x%p\n", __func__, inode);
881} 943}
882 944
945/*
946 * Slab object creation initialisation for the XFS inode.
947 * This covers only the idempotent fields in the XFS inode;
948 * all other fields need to be initialised on allocation
949 * from the slab. This avoids the need to repeatedly intialise
950 * fields in the xfs inode that left in the initialise state
951 * when freeing the inode.
952 */
883STATIC void 953STATIC void
884xfs_fs_inode_init_once( 954xfs_fs_inode_init_once(
885 void *vnode) 955 void *inode)
886{ 956{
887 inode_init_once((struct inode *)vnode); 957 struct xfs_inode *ip = inode;
958
959 memset(ip, 0, sizeof(struct xfs_inode));
960
961 /* vfs inode */
962 inode_init_once(VFS_I(ip));
963
964 /* xfs inode */
965 atomic_set(&ip->i_iocount, 0);
966 atomic_set(&ip->i_pincount, 0);
967 spin_lock_init(&ip->i_flags_lock);
968 init_waitqueue_head(&ip->i_ipin_wait);
969 /*
970 * Because we want to use a counting completion, complete
971 * the flush completion once to allow a single access to
972 * the flush completion without blocking.
973 */
974 init_completion(&ip->i_flush);
975 complete(&ip->i_flush);
976
977 mrlock_init(&ip->i_lock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER,
978 "xfsino", ip->i_ino);
979 mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino);
888} 980}
889 981
890/* 982/*
@@ -912,7 +1004,7 @@ xfs_fs_write_inode(
912 * it dirty again so we'll try again later. 1004 * it dirty again so we'll try again later.
913 */ 1005 */
914 if (error) 1006 if (error)
915 mark_inode_dirty_sync(inode); 1007 xfs_mark_inode_dirty_sync(XFS_I(inode));
916 1008
917 return -error; 1009 return -error;
918} 1010}
@@ -923,164 +1015,13 @@ xfs_fs_clear_inode(
923{ 1015{
924 xfs_inode_t *ip = XFS_I(inode); 1016 xfs_inode_t *ip = XFS_I(inode);
925 1017
926 /* 1018 xfs_itrace_entry(ip);
927 * ip can be null when xfs_iget_core calls xfs_idestroy if we 1019 XFS_STATS_INC(vn_rele);
928 * find an inode with di_mode == 0 but without IGET_CREATE set. 1020 XFS_STATS_INC(vn_remove);
929 */ 1021 XFS_STATS_DEC(vn_active);
930 if (ip) {
931 xfs_itrace_entry(ip);
932 XFS_STATS_INC(vn_rele);
933 XFS_STATS_INC(vn_remove);
934 XFS_STATS_INC(vn_reclaim);
935 XFS_STATS_DEC(vn_active);
936
937 xfs_inactive(ip);
938 xfs_iflags_clear(ip, XFS_IMODIFIED);
939 if (xfs_reclaim(ip))
940 panic("%s: cannot reclaim 0x%p\n", __func__, inode);
941 }
942 1022
943 ASSERT(XFS_I(inode) == NULL); 1023 xfs_inactive(ip);
944} 1024 xfs_iflags_clear(ip, XFS_IMODIFIED);
945
946/*
947 * Enqueue a work item to be picked up by the vfs xfssyncd thread.
948 * Doing this has two advantages:
949 * - It saves on stack space, which is tight in certain situations
950 * - It can be used (with care) as a mechanism to avoid deadlocks.
951 * Flushing while allocating in a full filesystem requires both.
952 */
953STATIC void
954xfs_syncd_queue_work(
955 struct xfs_mount *mp,
956 void *data,
957 void (*syncer)(struct xfs_mount *, void *))
958{
959 struct bhv_vfs_sync_work *work;
960
961 work = kmem_alloc(sizeof(struct bhv_vfs_sync_work), KM_SLEEP);
962 INIT_LIST_HEAD(&work->w_list);
963 work->w_syncer = syncer;
964 work->w_data = data;
965 work->w_mount = mp;
966 spin_lock(&mp->m_sync_lock);
967 list_add_tail(&work->w_list, &mp->m_sync_list);
968 spin_unlock(&mp->m_sync_lock);
969 wake_up_process(mp->m_sync_task);
970}
971
972/*
973 * Flush delayed allocate data, attempting to free up reserved space
974 * from existing allocations. At this point a new allocation attempt
975 * has failed with ENOSPC and we are in the process of scratching our
976 * heads, looking about for more room...
977 */
978STATIC void
979xfs_flush_inode_work(
980 struct xfs_mount *mp,
981 void *arg)
982{
983 struct inode *inode = arg;
984 filemap_flush(inode->i_mapping);
985 iput(inode);
986}
987
988void
989xfs_flush_inode(
990 xfs_inode_t *ip)
991{
992 struct inode *inode = VFS_I(ip);
993
994 igrab(inode);
995 xfs_syncd_queue_work(ip->i_mount, inode, xfs_flush_inode_work);
996 delay(msecs_to_jiffies(500));
997}
998
999/*
1000 * This is the "bigger hammer" version of xfs_flush_inode_work...
1001 * (IOW, "If at first you don't succeed, use a Bigger Hammer").
1002 */
1003STATIC void
1004xfs_flush_device_work(
1005 struct xfs_mount *mp,
1006 void *arg)
1007{
1008 struct inode *inode = arg;
1009 sync_blockdev(mp->m_super->s_bdev);
1010 iput(inode);
1011}
1012
1013void
1014xfs_flush_device(
1015 xfs_inode_t *ip)
1016{
1017 struct inode *inode = VFS_I(ip);
1018
1019 igrab(inode);
1020 xfs_syncd_queue_work(ip->i_mount, inode, xfs_flush_device_work);
1021 delay(msecs_to_jiffies(500));
1022 xfs_log_force(ip->i_mount, (xfs_lsn_t)0, XFS_LOG_FORCE|XFS_LOG_SYNC);
1023}
1024
1025STATIC void
1026xfs_sync_worker(
1027 struct xfs_mount *mp,
1028 void *unused)
1029{
1030 int error;
1031
1032 if (!(mp->m_flags & XFS_MOUNT_RDONLY))
1033 error = xfs_sync(mp, SYNC_FSDATA | SYNC_BDFLUSH | SYNC_ATTR);
1034 mp->m_sync_seq++;
1035 wake_up(&mp->m_wait_single_sync_task);
1036}
1037
1038STATIC int
1039xfssyncd(
1040 void *arg)
1041{
1042 struct xfs_mount *mp = arg;
1043 long timeleft;
1044 bhv_vfs_sync_work_t *work, *n;
1045 LIST_HEAD (tmp);
1046
1047 set_freezable();
1048 timeleft = xfs_syncd_centisecs * msecs_to_jiffies(10);
1049 for (;;) {
1050 timeleft = schedule_timeout_interruptible(timeleft);
1051 /* swsusp */
1052 try_to_freeze();
1053 if (kthread_should_stop() && list_empty(&mp->m_sync_list))
1054 break;
1055
1056 spin_lock(&mp->m_sync_lock);
1057 /*
1058 * We can get woken by laptop mode, to do a sync -
1059 * that's the (only!) case where the list would be
1060 * empty with time remaining.
1061 */
1062 if (!timeleft || list_empty(&mp->m_sync_list)) {
1063 if (!timeleft)
1064 timeleft = xfs_syncd_centisecs *
1065 msecs_to_jiffies(10);
1066 INIT_LIST_HEAD(&mp->m_sync_work.w_list);
1067 list_add_tail(&mp->m_sync_work.w_list,
1068 &mp->m_sync_list);
1069 }
1070 list_for_each_entry_safe(work, n, &mp->m_sync_list, w_list)
1071 list_move(&work->w_list, &tmp);
1072 spin_unlock(&mp->m_sync_lock);
1073
1074 list_for_each_entry_safe(work, n, &tmp, w_list) {
1075 (*work->w_syncer)(mp, work->w_data);
1076 list_del(&work->w_list);
1077 if (work == &mp->m_sync_work)
1078 continue;
1079 kmem_free(work);
1080 }
1081 }
1082
1083 return 0;
1084} 1025}
1085 1026
1086STATIC void 1027STATIC void
@@ -1101,9 +1042,8 @@ xfs_fs_put_super(
1101 int unmount_event_flags = 0; 1042 int unmount_event_flags = 0;
1102 int error; 1043 int error;
1103 1044
1104 kthread_stop(mp->m_sync_task); 1045 xfs_syncd_stop(mp);
1105 1046 xfs_sync_inodes(mp, SYNC_ATTR|SYNC_DELWRI);
1106 xfs_sync(mp, SYNC_ATTR | SYNC_DELWRI);
1107 1047
1108#ifdef HAVE_DMAPI 1048#ifdef HAVE_DMAPI
1109 if (mp->m_flags & XFS_MOUNT_DMAPI) { 1049 if (mp->m_flags & XFS_MOUNT_DMAPI) {
@@ -1131,16 +1071,6 @@ xfs_fs_put_super(
1131 error = xfs_unmount_flush(mp, 0); 1071 error = xfs_unmount_flush(mp, 0);
1132 WARN_ON(error); 1072 WARN_ON(error);
1133 1073
1134 /*
1135 * If we're forcing a shutdown, typically because of a media error,
1136 * we want to make sure we invalidate dirty pages that belong to
1137 * referenced vnodes as well.
1138 */
1139 if (XFS_FORCED_SHUTDOWN(mp)) {
1140 error = xfs_sync(mp, SYNC_WAIT | SYNC_CLOSE);
1141 ASSERT(error != EFSCORRUPTED);
1142 }
1143
1144 if (mp->m_flags & XFS_MOUNT_DMAPI) { 1074 if (mp->m_flags & XFS_MOUNT_DMAPI) {
1145 XFS_SEND_UNMOUNT(mp, rip, DM_RIGHT_NULL, 0, 0, 1075 XFS_SEND_UNMOUNT(mp, rip, DM_RIGHT_NULL, 0, 0,
1146 unmount_event_flags); 1076 unmount_event_flags);
@@ -1161,7 +1091,7 @@ xfs_fs_write_super(
1161 struct super_block *sb) 1091 struct super_block *sb)
1162{ 1092{
1163 if (!(sb->s_flags & MS_RDONLY)) 1093 if (!(sb->s_flags & MS_RDONLY))
1164 xfs_sync(XFS_M(sb), SYNC_FSDATA); 1094 xfs_sync_fsdata(XFS_M(sb), 0);
1165 sb->s_dirt = 0; 1095 sb->s_dirt = 0;
1166} 1096}
1167 1097
@@ -1172,7 +1102,6 @@ xfs_fs_sync_super(
1172{ 1102{
1173 struct xfs_mount *mp = XFS_M(sb); 1103 struct xfs_mount *mp = XFS_M(sb);
1174 int error; 1104 int error;
1175 int flags;
1176 1105
1177 /* 1106 /*
1178 * Treat a sync operation like a freeze. This is to work 1107 * Treat a sync operation like a freeze. This is to work
@@ -1186,20 +1115,10 @@ xfs_fs_sync_super(
1186 * dirty the Linux inode until after the transaction I/O 1115 * dirty the Linux inode until after the transaction I/O
1187 * completes. 1116 * completes.
1188 */ 1117 */
1189 if (wait || unlikely(sb->s_frozen == SB_FREEZE_WRITE)) { 1118 if (wait || unlikely(sb->s_frozen == SB_FREEZE_WRITE))
1190 /* 1119 error = xfs_quiesce_data(mp);
1191 * First stage of freeze - no more writers will make progress 1120 else
1192 * now we are here, so we flush delwri and delalloc buffers 1121 error = xfs_sync_fsdata(mp, 0);
1193 * here, then wait for all I/O to complete. Data is frozen at
1194 * that point. Metadata is not frozen, transactions can still
1195 * occur here so don't bother flushing the buftarg (i.e
1196 * SYNC_QUIESCE) because it'll just get dirty again.
1197 */
1198 flags = SYNC_DATA_QUIESCE;
1199 } else
1200 flags = SYNC_FSDATA;
1201
1202 error = xfs_sync(mp, flags);
1203 sb->s_dirt = 0; 1122 sb->s_dirt = 0;
1204 1123
1205 if (unlikely(laptop_mode)) { 1124 if (unlikely(laptop_mode)) {
@@ -1337,9 +1256,8 @@ xfs_fs_remount(
1337 1256
1338 /* rw -> ro */ 1257 /* rw -> ro */
1339 if (!(mp->m_flags & XFS_MOUNT_RDONLY) && (*flags & MS_RDONLY)) { 1258 if (!(mp->m_flags & XFS_MOUNT_RDONLY) && (*flags & MS_RDONLY)) {
1340 xfs_filestream_flush(mp); 1259 xfs_quiesce_data(mp);
1341 xfs_sync(mp, SYNC_DATA_QUIESCE); 1260 xfs_quiesce_attr(mp);
1342 xfs_attr_quiesce(mp);
1343 mp->m_flags |= XFS_MOUNT_RDONLY; 1261 mp->m_flags |= XFS_MOUNT_RDONLY;
1344 } 1262 }
1345 1263
@@ -1348,7 +1266,7 @@ xfs_fs_remount(
1348 1266
1349/* 1267/*
1350 * Second stage of a freeze. The data is already frozen so we only 1268 * Second stage of a freeze. The data is already frozen so we only
1351 * need to take care of themetadata. Once that's done write a dummy 1269 * need to take care of the metadata. Once that's done write a dummy
1352 * record to dirty the log in case of a crash while frozen. 1270 * record to dirty the log in case of a crash while frozen.
1353 */ 1271 */
1354STATIC void 1272STATIC void
@@ -1357,7 +1275,7 @@ xfs_fs_lockfs(
1357{ 1275{
1358 struct xfs_mount *mp = XFS_M(sb); 1276 struct xfs_mount *mp = XFS_M(sb);
1359 1277
1360 xfs_attr_quiesce(mp); 1278 xfs_quiesce_attr(mp);
1361 xfs_fs_log_dummy(mp); 1279 xfs_fs_log_dummy(mp);
1362} 1280}
1363 1281
@@ -1422,175 +1340,28 @@ xfs_fs_setxquota(
1422 1340
1423/* 1341/*
1424 * This function fills in xfs_mount_t fields based on mount args. 1342 * This function fills in xfs_mount_t fields based on mount args.
1425 * Note: the superblock has _not_ yet been read in.
1426 */
1427STATIC int
1428xfs_start_flags(
1429 struct xfs_mount_args *ap,
1430 struct xfs_mount *mp)
1431{
1432 int error;
1433
1434 /* Values are in BBs */
1435 if ((ap->flags & XFSMNT_NOALIGN) != XFSMNT_NOALIGN) {
1436 /*
1437 * At this point the superblock has not been read
1438 * in, therefore we do not know the block size.
1439 * Before the mount call ends we will convert
1440 * these to FSBs.
1441 */
1442 mp->m_dalign = ap->sunit;
1443 mp->m_swidth = ap->swidth;
1444 }
1445
1446 if (ap->logbufs != -1 &&
1447 ap->logbufs != 0 &&
1448 (ap->logbufs < XLOG_MIN_ICLOGS ||
1449 ap->logbufs > XLOG_MAX_ICLOGS)) {
1450 cmn_err(CE_WARN,
1451 "XFS: invalid logbufs value: %d [not %d-%d]",
1452 ap->logbufs, XLOG_MIN_ICLOGS, XLOG_MAX_ICLOGS);
1453 return XFS_ERROR(EINVAL);
1454 }
1455 mp->m_logbufs = ap->logbufs;
1456 if (ap->logbufsize != -1 &&
1457 ap->logbufsize != 0 &&
1458 (ap->logbufsize < XLOG_MIN_RECORD_BSIZE ||
1459 ap->logbufsize > XLOG_MAX_RECORD_BSIZE ||
1460 !is_power_of_2(ap->logbufsize))) {
1461 cmn_err(CE_WARN,
1462 "XFS: invalid logbufsize: %d [not 16k,32k,64k,128k or 256k]",
1463 ap->logbufsize);
1464 return XFS_ERROR(EINVAL);
1465 }
1466
1467 error = ENOMEM;
1468
1469 mp->m_logbsize = ap->logbufsize;
1470 mp->m_fsname_len = strlen(ap->fsname) + 1;
1471
1472 mp->m_fsname = kstrdup(ap->fsname, GFP_KERNEL);
1473 if (!mp->m_fsname)
1474 goto out;
1475
1476 if (ap->rtname[0]) {
1477 mp->m_rtname = kstrdup(ap->rtname, GFP_KERNEL);
1478 if (!mp->m_rtname)
1479 goto out_free_fsname;
1480
1481 }
1482
1483 if (ap->logname[0]) {
1484 mp->m_logname = kstrdup(ap->logname, GFP_KERNEL);
1485 if (!mp->m_logname)
1486 goto out_free_rtname;
1487 }
1488
1489 if (ap->flags & XFSMNT_WSYNC)
1490 mp->m_flags |= XFS_MOUNT_WSYNC;
1491#if XFS_BIG_INUMS
1492 if (ap->flags & XFSMNT_INO64) {
1493 mp->m_flags |= XFS_MOUNT_INO64;
1494 mp->m_inoadd = XFS_INO64_OFFSET;
1495 }
1496#endif
1497 if (ap->flags & XFSMNT_RETERR)
1498 mp->m_flags |= XFS_MOUNT_RETERR;
1499 if (ap->flags & XFSMNT_NOALIGN)
1500 mp->m_flags |= XFS_MOUNT_NOALIGN;
1501 if (ap->flags & XFSMNT_SWALLOC)
1502 mp->m_flags |= XFS_MOUNT_SWALLOC;
1503 if (ap->flags & XFSMNT_OSYNCISOSYNC)
1504 mp->m_flags |= XFS_MOUNT_OSYNCISOSYNC;
1505 if (ap->flags & XFSMNT_32BITINODES)
1506 mp->m_flags |= XFS_MOUNT_32BITINODES;
1507
1508 if (ap->flags & XFSMNT_IOSIZE) {
1509 if (ap->iosizelog > XFS_MAX_IO_LOG ||
1510 ap->iosizelog < XFS_MIN_IO_LOG) {
1511 cmn_err(CE_WARN,
1512 "XFS: invalid log iosize: %d [not %d-%d]",
1513 ap->iosizelog, XFS_MIN_IO_LOG,
1514 XFS_MAX_IO_LOG);
1515 return XFS_ERROR(EINVAL);
1516 }
1517
1518 mp->m_flags |= XFS_MOUNT_DFLT_IOSIZE;
1519 mp->m_readio_log = mp->m_writeio_log = ap->iosizelog;
1520 }
1521
1522 if (ap->flags & XFSMNT_IKEEP)
1523 mp->m_flags |= XFS_MOUNT_IKEEP;
1524 if (ap->flags & XFSMNT_DIRSYNC)
1525 mp->m_flags |= XFS_MOUNT_DIRSYNC;
1526 if (ap->flags & XFSMNT_ATTR2)
1527 mp->m_flags |= XFS_MOUNT_ATTR2;
1528 if (ap->flags & XFSMNT_NOATTR2)
1529 mp->m_flags |= XFS_MOUNT_NOATTR2;
1530
1531 if (ap->flags2 & XFSMNT2_COMPAT_IOSIZE)
1532 mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE;
1533
1534 /*
1535 * no recovery flag requires a read-only mount
1536 */
1537 if (ap->flags & XFSMNT_NORECOVERY) {
1538 if (!(mp->m_flags & XFS_MOUNT_RDONLY)) {
1539 cmn_err(CE_WARN,
1540 "XFS: tried to mount a FS read-write without recovery!");
1541 return XFS_ERROR(EINVAL);
1542 }
1543 mp->m_flags |= XFS_MOUNT_NORECOVERY;
1544 }
1545
1546 if (ap->flags & XFSMNT_NOUUID)
1547 mp->m_flags |= XFS_MOUNT_NOUUID;
1548 if (ap->flags & XFSMNT_BARRIER)
1549 mp->m_flags |= XFS_MOUNT_BARRIER;
1550 else
1551 mp->m_flags &= ~XFS_MOUNT_BARRIER;
1552
1553 if (ap->flags2 & XFSMNT2_FILESTREAMS)
1554 mp->m_flags |= XFS_MOUNT_FILESTREAMS;
1555
1556 if (ap->flags & XFSMNT_DMAPI)
1557 mp->m_flags |= XFS_MOUNT_DMAPI;
1558 return 0;
1559
1560
1561 out_free_rtname:
1562 kfree(mp->m_rtname);
1563 out_free_fsname:
1564 kfree(mp->m_fsname);
1565 out:
1566 return error;
1567}
1568
1569/*
1570 * This function fills in xfs_mount_t fields based on mount args.
1571 * Note: the superblock _has_ now been read in. 1343 * Note: the superblock _has_ now been read in.
1572 */ 1344 */
1573STATIC int 1345STATIC int
1574xfs_finish_flags( 1346xfs_finish_flags(
1575 struct xfs_mount_args *ap,
1576 struct xfs_mount *mp) 1347 struct xfs_mount *mp)
1577{ 1348{
1578 int ronly = (mp->m_flags & XFS_MOUNT_RDONLY); 1349 int ronly = (mp->m_flags & XFS_MOUNT_RDONLY);
1579 1350
1580 /* Fail a mount where the logbuf is smaller then the log stripe */ 1351 /* Fail a mount where the logbuf is smaller then the log stripe */
1581 if (xfs_sb_version_haslogv2(&mp->m_sb)) { 1352 if (xfs_sb_version_haslogv2(&mp->m_sb)) {
1582 if ((ap->logbufsize <= 0) && 1353 if (mp->m_logbsize <= 0 &&
1583 (mp->m_sb.sb_logsunit > XLOG_BIG_RECORD_BSIZE)) { 1354 mp->m_sb.sb_logsunit > XLOG_BIG_RECORD_BSIZE) {
1584 mp->m_logbsize = mp->m_sb.sb_logsunit; 1355 mp->m_logbsize = mp->m_sb.sb_logsunit;
1585 } else if (ap->logbufsize > 0 && 1356 } else if (mp->m_logbsize > 0 &&
1586 ap->logbufsize < mp->m_sb.sb_logsunit) { 1357 mp->m_logbsize < mp->m_sb.sb_logsunit) {
1587 cmn_err(CE_WARN, 1358 cmn_err(CE_WARN,
1588 "XFS: logbuf size must be greater than or equal to log stripe size"); 1359 "XFS: logbuf size must be greater than or equal to log stripe size");
1589 return XFS_ERROR(EINVAL); 1360 return XFS_ERROR(EINVAL);
1590 } 1361 }
1591 } else { 1362 } else {
1592 /* Fail a mount if the logbuf is larger than 32K */ 1363 /* Fail a mount if the logbuf is larger than 32K */
1593 if (ap->logbufsize > XLOG_BIG_RECORD_BSIZE) { 1364 if (mp->m_logbsize > XLOG_BIG_RECORD_BSIZE) {
1594 cmn_err(CE_WARN, 1365 cmn_err(CE_WARN,
1595 "XFS: logbuf size for version 1 logs must be 16K or 32K"); 1366 "XFS: logbuf size for version 1 logs must be 16K or 32K");
1596 return XFS_ERROR(EINVAL); 1367 return XFS_ERROR(EINVAL);
@@ -1602,7 +1373,7 @@ xfs_finish_flags(
1602 * told by noattr2 to turn it off 1373 * told by noattr2 to turn it off
1603 */ 1374 */
1604 if (xfs_sb_version_hasattr2(&mp->m_sb) && 1375 if (xfs_sb_version_hasattr2(&mp->m_sb) &&
1605 !(ap->flags & XFSMNT_NOATTR2)) 1376 !(mp->m_flags & XFS_MOUNT_NOATTR2))
1606 mp->m_flags |= XFS_MOUNT_ATTR2; 1377 mp->m_flags |= XFS_MOUNT_ATTR2;
1607 1378
1608 /* 1379 /*
@@ -1614,6 +1385,7 @@ xfs_finish_flags(
1614 return XFS_ERROR(EROFS); 1385 return XFS_ERROR(EROFS);
1615 } 1386 }
1616 1387
1388#if 0 /* shared mounts were never supported on Linux */
1617 /* 1389 /*
1618 * check for shared mount. 1390 * check for shared mount.
1619 */ 1391 */
@@ -1636,25 +1408,11 @@ xfs_finish_flags(
1636 /* 1408 /*
1637 * Shared XFS V0 can't deal with DMI. Return EINVAL. 1409 * Shared XFS V0 can't deal with DMI. Return EINVAL.
1638 */ 1410 */
1639 if (mp->m_sb.sb_shared_vn == 0 && (ap->flags & XFSMNT_DMAPI)) 1411 if (mp->m_sb.sb_shared_vn == 0 &&
1412 (mp->m_flags & XFS_MOUNT_DMAPI))
1640 return XFS_ERROR(EINVAL); 1413 return XFS_ERROR(EINVAL);
1641 } 1414 }
1642 1415#endif
1643 if (ap->flags & XFSMNT_UQUOTA) {
1644 mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE);
1645 if (ap->flags & XFSMNT_UQUOTAENF)
1646 mp->m_qflags |= XFS_UQUOTA_ENFD;
1647 }
1648
1649 if (ap->flags & XFSMNT_GQUOTA) {
1650 mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE);
1651 if (ap->flags & XFSMNT_GQUOTAENF)
1652 mp->m_qflags |= XFS_OQUOTA_ENFD;
1653 } else if (ap->flags & XFSMNT_PQUOTA) {
1654 mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE);
1655 if (ap->flags & XFSMNT_PQUOTAENF)
1656 mp->m_qflags |= XFS_OQUOTA_ENFD;
1657 }
1658 1416
1659 return 0; 1417 return 0;
1660} 1418}
@@ -1667,19 +1425,14 @@ xfs_fs_fill_super(
1667{ 1425{
1668 struct inode *root; 1426 struct inode *root;
1669 struct xfs_mount *mp = NULL; 1427 struct xfs_mount *mp = NULL;
1670 struct xfs_mount_args *args;
1671 int flags = 0, error = ENOMEM; 1428 int flags = 0, error = ENOMEM;
1672 1429 char *mtpt = NULL;
1673 args = xfs_args_allocate(sb, silent);
1674 if (!args)
1675 return -ENOMEM;
1676 1430
1677 mp = kzalloc(sizeof(struct xfs_mount), GFP_KERNEL); 1431 mp = kzalloc(sizeof(struct xfs_mount), GFP_KERNEL);
1678 if (!mp) 1432 if (!mp)
1679 goto out_free_args; 1433 goto out;
1680 1434
1681 spin_lock_init(&mp->m_sb_lock); 1435 spin_lock_init(&mp->m_sb_lock);
1682 mutex_init(&mp->m_ilock);
1683 mutex_init(&mp->m_growlock); 1436 mutex_init(&mp->m_growlock);
1684 atomic_set(&mp->m_active_trans, 0); 1437 atomic_set(&mp->m_active_trans, 0);
1685 INIT_LIST_HEAD(&mp->m_sync_list); 1438 INIT_LIST_HEAD(&mp->m_sync_list);
@@ -1689,12 +1442,9 @@ xfs_fs_fill_super(
1689 mp->m_super = sb; 1442 mp->m_super = sb;
1690 sb->s_fs_info = mp; 1443 sb->s_fs_info = mp;
1691 1444
1692 if (sb->s_flags & MS_RDONLY) 1445 error = xfs_parseargs(mp, (char *)data, &mtpt);
1693 mp->m_flags |= XFS_MOUNT_RDONLY;
1694
1695 error = xfs_parseargs(mp, (char *)data, args, 0);
1696 if (error) 1446 if (error)
1697 goto out_free_mp; 1447 goto out_free_fsname;
1698 1448
1699 sb_min_blocksize(sb, BBSIZE); 1449 sb_min_blocksize(sb, BBSIZE);
1700 sb->s_xattr = xfs_xattr_handlers; 1450 sb->s_xattr = xfs_xattr_handlers;
@@ -1702,33 +1452,28 @@ xfs_fs_fill_super(
1702 sb->s_qcop = &xfs_quotactl_operations; 1452 sb->s_qcop = &xfs_quotactl_operations;
1703 sb->s_op = &xfs_super_operations; 1453 sb->s_op = &xfs_super_operations;
1704 1454
1705 error = xfs_dmops_get(mp, args); 1455 error = xfs_dmops_get(mp);
1706 if (error) 1456 if (error)
1707 goto out_free_mp; 1457 goto out_free_fsname;
1708 error = xfs_qmops_get(mp, args); 1458 error = xfs_qmops_get(mp);
1709 if (error) 1459 if (error)
1710 goto out_put_dmops; 1460 goto out_put_dmops;
1711 1461
1712 if (args->flags & XFSMNT_QUIET) 1462 if (silent)
1713 flags |= XFS_MFSI_QUIET; 1463 flags |= XFS_MFSI_QUIET;
1714 1464
1715 error = xfs_open_devices(mp, args); 1465 error = xfs_open_devices(mp);
1716 if (error) 1466 if (error)
1717 goto out_put_qmops; 1467 goto out_put_qmops;
1718 1468
1719 if (xfs_icsb_init_counters(mp)) 1469 if (xfs_icsb_init_counters(mp))
1720 mp->m_flags |= XFS_MOUNT_NO_PERCPU_SB; 1470 mp->m_flags |= XFS_MOUNT_NO_PERCPU_SB;
1721 1471
1722 /*
1723 * Setup flags based on mount(2) options and then the superblock
1724 */
1725 error = xfs_start_flags(args, mp);
1726 if (error)
1727 goto out_free_fsname;
1728 error = xfs_readsb(mp, flags); 1472 error = xfs_readsb(mp, flags);
1729 if (error) 1473 if (error)
1730 goto out_free_fsname; 1474 goto out_destroy_counters;
1731 error = xfs_finish_flags(args, mp); 1475
1476 error = xfs_finish_flags(mp);
1732 if (error) 1477 if (error)
1733 goto out_free_sb; 1478 goto out_free_sb;
1734 1479
@@ -1747,7 +1492,7 @@ xfs_fs_fill_super(
1747 if (error) 1492 if (error)
1748 goto out_filestream_unmount; 1493 goto out_filestream_unmount;
1749 1494
1750 XFS_SEND_MOUNT(mp, DM_RIGHT_NULL, args->mtpt, args->fsname); 1495 XFS_SEND_MOUNT(mp, DM_RIGHT_NULL, mtpt, mp->m_fsname);
1751 1496
1752 sb->s_dirt = 1; 1497 sb->s_dirt = 1;
1753 sb->s_magic = XFS_SB_MAGIC; 1498 sb->s_magic = XFS_SB_MAGIC;
@@ -1772,35 +1517,31 @@ xfs_fs_fill_super(
1772 goto fail_vnrele; 1517 goto fail_vnrele;
1773 } 1518 }
1774 1519
1775 mp->m_sync_work.w_syncer = xfs_sync_worker; 1520 error = xfs_syncd_init(mp);
1776 mp->m_sync_work.w_mount = mp; 1521 if (error)
1777 mp->m_sync_task = kthread_run(xfssyncd, mp, "xfssyncd");
1778 if (IS_ERR(mp->m_sync_task)) {
1779 error = -PTR_ERR(mp->m_sync_task);
1780 goto fail_vnrele; 1522 goto fail_vnrele;
1781 }
1782 1523
1783 xfs_itrace_exit(XFS_I(sb->s_root->d_inode)); 1524 kfree(mtpt);
1784 1525
1785 kfree(args); 1526 xfs_itrace_exit(XFS_I(sb->s_root->d_inode));
1786 return 0; 1527 return 0;
1787 1528
1788 out_filestream_unmount: 1529 out_filestream_unmount:
1789 xfs_filestream_unmount(mp); 1530 xfs_filestream_unmount(mp);
1790 out_free_sb: 1531 out_free_sb:
1791 xfs_freesb(mp); 1532 xfs_freesb(mp);
1792 out_free_fsname: 1533 out_destroy_counters:
1793 xfs_free_fsname(mp);
1794 xfs_icsb_destroy_counters(mp); 1534 xfs_icsb_destroy_counters(mp);
1795 xfs_close_devices(mp); 1535 xfs_close_devices(mp);
1796 out_put_qmops: 1536 out_put_qmops:
1797 xfs_qmops_put(mp); 1537 xfs_qmops_put(mp);
1798 out_put_dmops: 1538 out_put_dmops:
1799 xfs_dmops_put(mp); 1539 xfs_dmops_put(mp);
1800 out_free_mp: 1540 out_free_fsname:
1541 xfs_free_fsname(mp);
1542 kfree(mtpt);
1801 kfree(mp); 1543 kfree(mp);
1802 out_free_args: 1544 out:
1803 kfree(args);
1804 return -error; 1545 return -error;
1805 1546
1806 fail_vnrele: 1547 fail_vnrele:
@@ -1882,10 +1623,19 @@ xfs_alloc_trace_bufs(void)
1882 if (!xfs_bmap_trace_buf) 1623 if (!xfs_bmap_trace_buf)
1883 goto out_free_alloc_trace; 1624 goto out_free_alloc_trace;
1884#endif 1625#endif
1885#ifdef XFS_BMBT_TRACE 1626#ifdef XFS_BTREE_TRACE
1627 xfs_allocbt_trace_buf = ktrace_alloc(XFS_ALLOCBT_TRACE_SIZE,
1628 KM_MAYFAIL);
1629 if (!xfs_allocbt_trace_buf)
1630 goto out_free_bmap_trace;
1631
1632 xfs_inobt_trace_buf = ktrace_alloc(XFS_INOBT_TRACE_SIZE, KM_MAYFAIL);
1633 if (!xfs_inobt_trace_buf)
1634 goto out_free_allocbt_trace;
1635
1886 xfs_bmbt_trace_buf = ktrace_alloc(XFS_BMBT_TRACE_SIZE, KM_MAYFAIL); 1636 xfs_bmbt_trace_buf = ktrace_alloc(XFS_BMBT_TRACE_SIZE, KM_MAYFAIL);
1887 if (!xfs_bmbt_trace_buf) 1637 if (!xfs_bmbt_trace_buf)
1888 goto out_free_bmap_trace; 1638 goto out_free_inobt_trace;
1889#endif 1639#endif
1890#ifdef XFS_ATTR_TRACE 1640#ifdef XFS_ATTR_TRACE
1891 xfs_attr_trace_buf = ktrace_alloc(XFS_ATTR_TRACE_SIZE, KM_MAYFAIL); 1641 xfs_attr_trace_buf = ktrace_alloc(XFS_ATTR_TRACE_SIZE, KM_MAYFAIL);
@@ -1907,8 +1657,12 @@ xfs_alloc_trace_bufs(void)
1907 ktrace_free(xfs_attr_trace_buf); 1657 ktrace_free(xfs_attr_trace_buf);
1908 out_free_bmbt_trace: 1658 out_free_bmbt_trace:
1909#endif 1659#endif
1910#ifdef XFS_BMBT_TRACE 1660#ifdef XFS_BTREE_TRACE
1911 ktrace_free(xfs_bmbt_trace_buf); 1661 ktrace_free(xfs_bmbt_trace_buf);
1662 out_free_inobt_trace:
1663 ktrace_free(xfs_inobt_trace_buf);
1664 out_free_allocbt_trace:
1665 ktrace_free(xfs_allocbt_trace_buf);
1912 out_free_bmap_trace: 1666 out_free_bmap_trace:
1913#endif 1667#endif
1914#ifdef XFS_BMAP_TRACE 1668#ifdef XFS_BMAP_TRACE
@@ -1931,8 +1685,10 @@ xfs_free_trace_bufs(void)
1931#ifdef XFS_ATTR_TRACE 1685#ifdef XFS_ATTR_TRACE
1932 ktrace_free(xfs_attr_trace_buf); 1686 ktrace_free(xfs_attr_trace_buf);
1933#endif 1687#endif
1934#ifdef XFS_BMBT_TRACE 1688#ifdef XFS_BTREE_TRACE
1935 ktrace_free(xfs_bmbt_trace_buf); 1689 ktrace_free(xfs_bmbt_trace_buf);
1690 ktrace_free(xfs_inobt_trace_buf);
1691 ktrace_free(xfs_allocbt_trace_buf);
1936#endif 1692#endif
1937#ifdef XFS_BMAP_TRACE 1693#ifdef XFS_BMAP_TRACE
1938 ktrace_free(xfs_bmap_trace_buf); 1694 ktrace_free(xfs_bmap_trace_buf);
@@ -1945,16 +1701,10 @@ xfs_free_trace_bufs(void)
1945STATIC int __init 1701STATIC int __init
1946xfs_init_zones(void) 1702xfs_init_zones(void)
1947{ 1703{
1948 xfs_vnode_zone = kmem_zone_init_flags(sizeof(struct inode), "xfs_vnode",
1949 KM_ZONE_HWALIGN | KM_ZONE_RECLAIM |
1950 KM_ZONE_SPREAD,
1951 xfs_fs_inode_init_once);
1952 if (!xfs_vnode_zone)
1953 goto out;
1954 1704
1955 xfs_ioend_zone = kmem_zone_init(sizeof(xfs_ioend_t), "xfs_ioend"); 1705 xfs_ioend_zone = kmem_zone_init(sizeof(xfs_ioend_t), "xfs_ioend");
1956 if (!xfs_ioend_zone) 1706 if (!xfs_ioend_zone)
1957 goto out_destroy_vnode_zone; 1707 goto out;
1958 1708
1959 xfs_ioend_pool = mempool_create_slab_pool(4 * MAX_BUF_PER_PAGE, 1709 xfs_ioend_pool = mempool_create_slab_pool(4 * MAX_BUF_PER_PAGE,
1960 xfs_ioend_zone); 1710 xfs_ioend_zone);
@@ -1970,6 +1720,7 @@ xfs_init_zones(void)
1970 "xfs_bmap_free_item"); 1720 "xfs_bmap_free_item");
1971 if (!xfs_bmap_free_item_zone) 1721 if (!xfs_bmap_free_item_zone)
1972 goto out_destroy_log_ticket_zone; 1722 goto out_destroy_log_ticket_zone;
1723
1973 xfs_btree_cur_zone = kmem_zone_init(sizeof(xfs_btree_cur_t), 1724 xfs_btree_cur_zone = kmem_zone_init(sizeof(xfs_btree_cur_t),
1974 "xfs_btree_cur"); 1725 "xfs_btree_cur");
1975 if (!xfs_btree_cur_zone) 1726 if (!xfs_btree_cur_zone)
@@ -2017,8 +1768,8 @@ xfs_init_zones(void)
2017 1768
2018 xfs_inode_zone = 1769 xfs_inode_zone =
2019 kmem_zone_init_flags(sizeof(xfs_inode_t), "xfs_inode", 1770 kmem_zone_init_flags(sizeof(xfs_inode_t), "xfs_inode",
2020 KM_ZONE_HWALIGN | KM_ZONE_RECLAIM | 1771 KM_ZONE_HWALIGN | KM_ZONE_RECLAIM | KM_ZONE_SPREAD,
2021 KM_ZONE_SPREAD, NULL); 1772 xfs_fs_inode_init_once);
2022 if (!xfs_inode_zone) 1773 if (!xfs_inode_zone)
2023 goto out_destroy_efi_zone; 1774 goto out_destroy_efi_zone;
2024 1775
@@ -2066,8 +1817,6 @@ xfs_init_zones(void)
2066 mempool_destroy(xfs_ioend_pool); 1817 mempool_destroy(xfs_ioend_pool);
2067 out_destroy_ioend_zone: 1818 out_destroy_ioend_zone:
2068 kmem_zone_destroy(xfs_ioend_zone); 1819 kmem_zone_destroy(xfs_ioend_zone);
2069 out_destroy_vnode_zone:
2070 kmem_zone_destroy(xfs_vnode_zone);
2071 out: 1820 out:
2072 return -ENOMEM; 1821 return -ENOMEM;
2073} 1822}
@@ -2092,7 +1841,6 @@ xfs_destroy_zones(void)
2092 kmem_zone_destroy(xfs_log_ticket_zone); 1841 kmem_zone_destroy(xfs_log_ticket_zone);
2093 mempool_destroy(xfs_ioend_pool); 1842 mempool_destroy(xfs_ioend_pool);
2094 kmem_zone_destroy(xfs_ioend_zone); 1843 kmem_zone_destroy(xfs_ioend_zone);
2095 kmem_zone_destroy(xfs_vnode_zone);
2096 1844
2097} 1845}
2098 1846
diff --git a/fs/xfs/linux-2.6/xfs_super.h b/fs/xfs/linux-2.6/xfs_super.h
index fe2ef4e6a0f9..56dc48a76fab 100644
--- a/fs/xfs/linux-2.6/xfs_super.h
+++ b/fs/xfs/linux-2.6/xfs_super.h
@@ -101,9 +101,6 @@ struct block_device;
101 101
102extern __uint64_t xfs_max_file_offset(unsigned int); 102extern __uint64_t xfs_max_file_offset(unsigned int);
103 103
104extern void xfs_flush_inode(struct xfs_inode *);
105extern void xfs_flush_device(struct xfs_inode *);
106
107extern void xfs_blkdev_issue_flush(struct xfs_buftarg *); 104extern void xfs_blkdev_issue_flush(struct xfs_buftarg *);
108 105
109extern const struct export_operations xfs_export_operations; 106extern const struct export_operations xfs_export_operations;
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
new file mode 100644
index 000000000000..fb5cca3df840
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -0,0 +1,763 @@
1/*
2 * Copyright (c) 2000-2005 Silicon Graphics, Inc.
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18#include "xfs.h"
19#include "xfs_fs.h"
20#include "xfs_types.h"
21#include "xfs_bit.h"
22#include "xfs_log.h"
23#include "xfs_inum.h"
24#include "xfs_trans.h"
25#include "xfs_sb.h"
26#include "xfs_ag.h"
27#include "xfs_dir2.h"
28#include "xfs_dmapi.h"
29#include "xfs_mount.h"
30#include "xfs_bmap_btree.h"
31#include "xfs_alloc_btree.h"
32#include "xfs_ialloc_btree.h"
33#include "xfs_btree.h"
34#include "xfs_dir2_sf.h"
35#include "xfs_attr_sf.h"
36#include "xfs_inode.h"
37#include "xfs_dinode.h"
38#include "xfs_error.h"
39#include "xfs_mru_cache.h"
40#include "xfs_filestream.h"
41#include "xfs_vnodeops.h"
42#include "xfs_utils.h"
43#include "xfs_buf_item.h"
44#include "xfs_inode_item.h"
45#include "xfs_rw.h"
46
47#include <linux/kthread.h>
48#include <linux/freezer.h>
49
50/*
51 * Sync all the inodes in the given AG according to the
52 * direction given by the flags.
53 */
54STATIC int
55xfs_sync_inodes_ag(
56 xfs_mount_t *mp,
57 int ag,
58 int flags)
59{
60 xfs_perag_t *pag = &mp->m_perag[ag];
61 int nr_found;
62 uint32_t first_index = 0;
63 int error = 0;
64 int last_error = 0;
65 int fflag = XFS_B_ASYNC;
66
67 if (flags & SYNC_DELWRI)
68 fflag = XFS_B_DELWRI;
69 if (flags & SYNC_WAIT)
70 fflag = 0; /* synchronous overrides all */
71
72 do {
73 struct inode *inode;
74 xfs_inode_t *ip = NULL;
75 int lock_flags = XFS_ILOCK_SHARED;
76
77 /*
78 * use a gang lookup to find the next inode in the tree
79 * as the tree is sparse and a gang lookup walks to find
80 * the number of objects requested.
81 */
82 read_lock(&pag->pag_ici_lock);
83 nr_found = radix_tree_gang_lookup(&pag->pag_ici_root,
84 (void**)&ip, first_index, 1);
85
86 if (!nr_found) {
87 read_unlock(&pag->pag_ici_lock);
88 break;
89 }
90
91 /*
92 * Update the index for the next lookup. Catch overflows
93 * into the next AG range which can occur if we have inodes
94 * in the last block of the AG and we are currently
95 * pointing to the last inode.
96 */
97 first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
98 if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) {
99 read_unlock(&pag->pag_ici_lock);
100 break;
101 }
102
103 /* nothing to sync during shutdown */
104 if (XFS_FORCED_SHUTDOWN(mp)) {
105 read_unlock(&pag->pag_ici_lock);
106 return 0;
107 }
108
109 /*
110 * If we can't get a reference on the inode, it must be
111 * in reclaim. Leave it for the reclaim code to flush.
112 */
113 inode = VFS_I(ip);
114 if (!igrab(inode)) {
115 read_unlock(&pag->pag_ici_lock);
116 continue;
117 }
118 read_unlock(&pag->pag_ici_lock);
119
120 /* bad inodes are dealt with elsewhere */
121 if (is_bad_inode(inode)) {
122 IRELE(ip);
123 continue;
124 }
125
126 /*
127 * If we have to flush data or wait for I/O completion
128 * we need to hold the iolock.
129 */
130 if ((flags & SYNC_DELWRI) && VN_DIRTY(inode)) {
131 xfs_ilock(ip, XFS_IOLOCK_SHARED);
132 lock_flags |= XFS_IOLOCK_SHARED;
133 error = xfs_flush_pages(ip, 0, -1, fflag, FI_NONE);
134 if (flags & SYNC_IOWAIT)
135 vn_iowait(ip);
136 }
137 xfs_ilock(ip, XFS_ILOCK_SHARED);
138
139 if ((flags & SYNC_ATTR) && !xfs_inode_clean(ip)) {
140 if (flags & SYNC_WAIT) {
141 xfs_iflock(ip);
142 if (!xfs_inode_clean(ip))
143 error = xfs_iflush(ip, XFS_IFLUSH_SYNC);
144 else
145 xfs_ifunlock(ip);
146 } else if (xfs_iflock_nowait(ip)) {
147 if (!xfs_inode_clean(ip))
148 error = xfs_iflush(ip, XFS_IFLUSH_DELWRI);
149 else
150 xfs_ifunlock(ip);
151 }
152 }
153 xfs_iput(ip, lock_flags);
154
155 if (error)
156 last_error = error;
157 /*
158 * bail out if the filesystem is corrupted.
159 */
160 if (error == EFSCORRUPTED)
161 return XFS_ERROR(error);
162
163 } while (nr_found);
164
165 return last_error;
166}
167
168int
169xfs_sync_inodes(
170 xfs_mount_t *mp,
171 int flags)
172{
173 int error;
174 int last_error;
175 int i;
176 int lflags = XFS_LOG_FORCE;
177
178 if (mp->m_flags & XFS_MOUNT_RDONLY)
179 return 0;
180 error = 0;
181 last_error = 0;
182
183 if (flags & SYNC_WAIT)
184 lflags |= XFS_LOG_SYNC;
185
186 for (i = 0; i < mp->m_sb.sb_agcount; i++) {
187 if (!mp->m_perag[i].pag_ici_init)
188 continue;
189 error = xfs_sync_inodes_ag(mp, i, flags);
190 if (error)
191 last_error = error;
192 if (error == EFSCORRUPTED)
193 break;
194 }
195 if (flags & SYNC_DELWRI)
196 xfs_log_force(mp, 0, lflags);
197
198 return XFS_ERROR(last_error);
199}
200
201STATIC int
202xfs_commit_dummy_trans(
203 struct xfs_mount *mp,
204 uint log_flags)
205{
206 struct xfs_inode *ip = mp->m_rootip;
207 struct xfs_trans *tp;
208 int error;
209
210 /*
211 * Put a dummy transaction in the log to tell recovery
212 * that all others are OK.
213 */
214 tp = xfs_trans_alloc(mp, XFS_TRANS_DUMMY1);
215 error = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp), 0, 0, 0);
216 if (error) {
217 xfs_trans_cancel(tp, 0);
218 return error;
219 }
220
221 xfs_ilock(ip, XFS_ILOCK_EXCL);
222
223 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
224 xfs_trans_ihold(tp, ip);
225 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
226 /* XXX(hch): ignoring the error here.. */
227 error = xfs_trans_commit(tp, 0);
228
229 xfs_iunlock(ip, XFS_ILOCK_EXCL);
230
231 xfs_log_force(mp, 0, log_flags);
232 return 0;
233}
234
235int
236xfs_sync_fsdata(
237 struct xfs_mount *mp,
238 int flags)
239{
240 struct xfs_buf *bp;
241 struct xfs_buf_log_item *bip;
242 int error = 0;
243
244 /*
245 * If this is xfssyncd() then only sync the superblock if we can
246 * lock it without sleeping and it is not pinned.
247 */
248 if (flags & SYNC_BDFLUSH) {
249 ASSERT(!(flags & SYNC_WAIT));
250
251 bp = xfs_getsb(mp, XFS_BUF_TRYLOCK);
252 if (!bp)
253 goto out;
254
255 bip = XFS_BUF_FSPRIVATE(bp, struct xfs_buf_log_item *);
256 if (!bip || !xfs_buf_item_dirty(bip) || XFS_BUF_ISPINNED(bp))
257 goto out_brelse;
258 } else {
259 bp = xfs_getsb(mp, 0);
260
261 /*
262 * If the buffer is pinned then push on the log so we won't
263 * get stuck waiting in the write for someone, maybe
264 * ourselves, to flush the log.
265 *
266 * Even though we just pushed the log above, we did not have
267 * the superblock buffer locked at that point so it can
268 * become pinned in between there and here.
269 */
270 if (XFS_BUF_ISPINNED(bp))
271 xfs_log_force(mp, 0, XFS_LOG_FORCE);
272 }
273
274
275 if (flags & SYNC_WAIT)
276 XFS_BUF_UNASYNC(bp);
277 else
278 XFS_BUF_ASYNC(bp);
279
280 return xfs_bwrite(mp, bp);
281
282 out_brelse:
283 xfs_buf_relse(bp);
284 out:
285 return error;
286}
287
288/*
289 * When remounting a filesystem read-only or freezing the filesystem, we have
290 * two phases to execute. This first phase is syncing the data before we
291 * quiesce the filesystem, and the second is flushing all the inodes out after
292 * we've waited for all the transactions created by the first phase to
293 * complete. The second phase ensures that the inodes are written to their
294 * location on disk rather than just existing in transactions in the log. This
295 * means after a quiesce there is no log replay required to write the inodes to
296 * disk (this is the main difference between a sync and a quiesce).
297 */
298/*
299 * First stage of freeze - no writers will make progress now we are here,
300 * so we flush delwri and delalloc buffers here, then wait for all I/O to
301 * complete. Data is frozen at that point. Metadata is not frozen,
302 * transactions can still occur here so don't bother flushing the buftarg
303 * because it'll just get dirty again.
304 */
305int
306xfs_quiesce_data(
307 struct xfs_mount *mp)
308{
309 int error;
310
311 /* push non-blocking */
312 xfs_sync_inodes(mp, SYNC_DELWRI|SYNC_BDFLUSH);
313 XFS_QM_DQSYNC(mp, SYNC_BDFLUSH);
314 xfs_filestream_flush(mp);
315
316 /* push and block */
317 xfs_sync_inodes(mp, SYNC_DELWRI|SYNC_WAIT|SYNC_IOWAIT);
318 XFS_QM_DQSYNC(mp, SYNC_WAIT);
319
320 /* write superblock and hoover up shutdown errors */
321 error = xfs_sync_fsdata(mp, 0);
322
323 /* flush data-only devices */
324 if (mp->m_rtdev_targp)
325 XFS_bflush(mp->m_rtdev_targp);
326
327 return error;
328}
329
330STATIC void
331xfs_quiesce_fs(
332 struct xfs_mount *mp)
333{
334 int count = 0, pincount;
335
336 xfs_flush_buftarg(mp->m_ddev_targp, 0);
337 xfs_reclaim_inodes(mp, 0, XFS_IFLUSH_DELWRI_ELSE_ASYNC);
338
339 /*
340 * This loop must run at least twice. The first instance of the loop
341 * will flush most meta data but that will generate more meta data
342 * (typically directory updates). Which then must be flushed and
343 * logged before we can write the unmount record.
344 */
345 do {
346 xfs_sync_inodes(mp, SYNC_ATTR|SYNC_WAIT);
347 pincount = xfs_flush_buftarg(mp->m_ddev_targp, 1);
348 if (!pincount) {
349 delay(50);
350 count++;
351 }
352 } while (count < 2);
353}
354
355/*
356 * Second stage of a quiesce. The data is already synced, now we have to take
357 * care of the metadata. New transactions are already blocked, so we need to
358 * wait for any remaining transactions to drain out before proceding.
359 */
360void
361xfs_quiesce_attr(
362 struct xfs_mount *mp)
363{
364 int error = 0;
365
366 /* wait for all modifications to complete */
367 while (atomic_read(&mp->m_active_trans) > 0)
368 delay(100);
369
370 /* flush inodes and push all remaining buffers out to disk */
371 xfs_quiesce_fs(mp);
372
373 ASSERT_ALWAYS(atomic_read(&mp->m_active_trans) == 0);
374
375 /* Push the superblock and write an unmount record */
376 error = xfs_log_sbcount(mp, 1);
377 if (error)
378 xfs_fs_cmn_err(CE_WARN, mp,
379 "xfs_attr_quiesce: failed to log sb changes. "
380 "Frozen image may not be consistent.");
381 xfs_log_unmount_write(mp);
382 xfs_unmountfs_writesb(mp);
383}
384
385/*
386 * Enqueue a work item to be picked up by the vfs xfssyncd thread.
387 * Doing this has two advantages:
388 * - It saves on stack space, which is tight in certain situations
389 * - It can be used (with care) as a mechanism to avoid deadlocks.
390 * Flushing while allocating in a full filesystem requires both.
391 */
392STATIC void
393xfs_syncd_queue_work(
394 struct xfs_mount *mp,
395 void *data,
396 void (*syncer)(struct xfs_mount *, void *))
397{
398 struct bhv_vfs_sync_work *work;
399
400 work = kmem_alloc(sizeof(struct bhv_vfs_sync_work), KM_SLEEP);
401 INIT_LIST_HEAD(&work->w_list);
402 work->w_syncer = syncer;
403 work->w_data = data;
404 work->w_mount = mp;
405 spin_lock(&mp->m_sync_lock);
406 list_add_tail(&work->w_list, &mp->m_sync_list);
407 spin_unlock(&mp->m_sync_lock);
408 wake_up_process(mp->m_sync_task);
409}
410
411/*
412 * Flush delayed allocate data, attempting to free up reserved space
413 * from existing allocations. At this point a new allocation attempt
414 * has failed with ENOSPC and we are in the process of scratching our
415 * heads, looking about for more room...
416 */
417STATIC void
418xfs_flush_inode_work(
419 struct xfs_mount *mp,
420 void *arg)
421{
422 struct inode *inode = arg;
423 filemap_flush(inode->i_mapping);
424 iput(inode);
425}
426
427void
428xfs_flush_inode(
429 xfs_inode_t *ip)
430{
431 struct inode *inode = VFS_I(ip);
432
433 igrab(inode);
434 xfs_syncd_queue_work(ip->i_mount, inode, xfs_flush_inode_work);
435 delay(msecs_to_jiffies(500));
436}
437
438/*
439 * This is the "bigger hammer" version of xfs_flush_inode_work...
440 * (IOW, "If at first you don't succeed, use a Bigger Hammer").
441 */
442STATIC void
443xfs_flush_device_work(
444 struct xfs_mount *mp,
445 void *arg)
446{
447 struct inode *inode = arg;
448 sync_blockdev(mp->m_super->s_bdev);
449 iput(inode);
450}
451
452void
453xfs_flush_device(
454 xfs_inode_t *ip)
455{
456 struct inode *inode = VFS_I(ip);
457
458 igrab(inode);
459 xfs_syncd_queue_work(ip->i_mount, inode, xfs_flush_device_work);
460 delay(msecs_to_jiffies(500));
461 xfs_log_force(ip->i_mount, (xfs_lsn_t)0, XFS_LOG_FORCE|XFS_LOG_SYNC);
462}
463
464/*
465 * Every sync period we need to unpin all items, reclaim inodes, sync
466 * quota and write out the superblock. We might need to cover the log
467 * to indicate it is idle.
468 */
469STATIC void
470xfs_sync_worker(
471 struct xfs_mount *mp,
472 void *unused)
473{
474 int error;
475
476 if (!(mp->m_flags & XFS_MOUNT_RDONLY)) {
477 xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE);
478 xfs_reclaim_inodes(mp, 0, XFS_IFLUSH_DELWRI_ELSE_ASYNC);
479 /* dgc: errors ignored here */
480 error = XFS_QM_DQSYNC(mp, SYNC_BDFLUSH);
481 error = xfs_sync_fsdata(mp, SYNC_BDFLUSH);
482 if (xfs_log_need_covered(mp))
483 error = xfs_commit_dummy_trans(mp, XFS_LOG_FORCE);
484 }
485 mp->m_sync_seq++;
486 wake_up(&mp->m_wait_single_sync_task);
487}
488
489STATIC int
490xfssyncd(
491 void *arg)
492{
493 struct xfs_mount *mp = arg;
494 long timeleft;
495 bhv_vfs_sync_work_t *work, *n;
496 LIST_HEAD (tmp);
497
498 set_freezable();
499 timeleft = xfs_syncd_centisecs * msecs_to_jiffies(10);
500 for (;;) {
501 timeleft = schedule_timeout_interruptible(timeleft);
502 /* swsusp */
503 try_to_freeze();
504 if (kthread_should_stop() && list_empty(&mp->m_sync_list))
505 break;
506
507 spin_lock(&mp->m_sync_lock);
508 /*
509 * We can get woken by laptop mode, to do a sync -
510 * that's the (only!) case where the list would be
511 * empty with time remaining.
512 */
513 if (!timeleft || list_empty(&mp->m_sync_list)) {
514 if (!timeleft)
515 timeleft = xfs_syncd_centisecs *
516 msecs_to_jiffies(10);
517 INIT_LIST_HEAD(&mp->m_sync_work.w_list);
518 list_add_tail(&mp->m_sync_work.w_list,
519 &mp->m_sync_list);
520 }
521 list_for_each_entry_safe(work, n, &mp->m_sync_list, w_list)
522 list_move(&work->w_list, &tmp);
523 spin_unlock(&mp->m_sync_lock);
524
525 list_for_each_entry_safe(work, n, &tmp, w_list) {
526 (*work->w_syncer)(mp, work->w_data);
527 list_del(&work->w_list);
528 if (work == &mp->m_sync_work)
529 continue;
530 kmem_free(work);
531 }
532 }
533
534 return 0;
535}
536
537int
538xfs_syncd_init(
539 struct xfs_mount *mp)
540{
541 mp->m_sync_work.w_syncer = xfs_sync_worker;
542 mp->m_sync_work.w_mount = mp;
543 mp->m_sync_task = kthread_run(xfssyncd, mp, "xfssyncd");
544 if (IS_ERR(mp->m_sync_task))
545 return -PTR_ERR(mp->m_sync_task);
546 return 0;
547}
548
549void
550xfs_syncd_stop(
551 struct xfs_mount *mp)
552{
553 kthread_stop(mp->m_sync_task);
554}
555
556int
557xfs_reclaim_inode(
558 xfs_inode_t *ip,
559 int locked,
560 int sync_mode)
561{
562 xfs_perag_t *pag = xfs_get_perag(ip->i_mount, ip->i_ino);
563
564 /* The hash lock here protects a thread in xfs_iget_core from
565 * racing with us on linking the inode back with a vnode.
566 * Once we have the XFS_IRECLAIM flag set it will not touch
567 * us.
568 */
569 write_lock(&pag->pag_ici_lock);
570 spin_lock(&ip->i_flags_lock);
571 if (__xfs_iflags_test(ip, XFS_IRECLAIM) ||
572 !__xfs_iflags_test(ip, XFS_IRECLAIMABLE)) {
573 spin_unlock(&ip->i_flags_lock);
574 write_unlock(&pag->pag_ici_lock);
575 if (locked) {
576 xfs_ifunlock(ip);
577 xfs_iunlock(ip, XFS_ILOCK_EXCL);
578 }
579 return 1;
580 }
581 __xfs_iflags_set(ip, XFS_IRECLAIM);
582 spin_unlock(&ip->i_flags_lock);
583 write_unlock(&pag->pag_ici_lock);
584 xfs_put_perag(ip->i_mount, pag);
585
586 /*
587 * If the inode is still dirty, then flush it out. If the inode
588 * is not in the AIL, then it will be OK to flush it delwri as
589 * long as xfs_iflush() does not keep any references to the inode.
590 * We leave that decision up to xfs_iflush() since it has the
591 * knowledge of whether it's OK to simply do a delwri flush of
592 * the inode or whether we need to wait until the inode is
593 * pulled from the AIL.
594 * We get the flush lock regardless, though, just to make sure
595 * we don't free it while it is being flushed.
596 */
597 if (!locked) {
598 xfs_ilock(ip, XFS_ILOCK_EXCL);
599 xfs_iflock(ip);
600 }
601
602 /*
603 * In the case of a forced shutdown we rely on xfs_iflush() to
604 * wait for the inode to be unpinned before returning an error.
605 */
606 if (!is_bad_inode(VFS_I(ip)) && xfs_iflush(ip, sync_mode) == 0) {
607 /* synchronize with xfs_iflush_done */
608 xfs_iflock(ip);
609 xfs_ifunlock(ip);
610 }
611
612 xfs_iunlock(ip, XFS_ILOCK_EXCL);
613 xfs_ireclaim(ip);
614 return 0;
615}
616
617/*
618 * We set the inode flag atomically with the radix tree tag.
619 * Once we get tag lookups on the radix tree, this inode flag
620 * can go away.
621 */
622void
623xfs_inode_set_reclaim_tag(
624 xfs_inode_t *ip)
625{
626 xfs_mount_t *mp = ip->i_mount;
627 xfs_perag_t *pag = xfs_get_perag(mp, ip->i_ino);
628
629 read_lock(&pag->pag_ici_lock);
630 spin_lock(&ip->i_flags_lock);
631 radix_tree_tag_set(&pag->pag_ici_root,
632 XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG);
633 __xfs_iflags_set(ip, XFS_IRECLAIMABLE);
634 spin_unlock(&ip->i_flags_lock);
635 read_unlock(&pag->pag_ici_lock);
636 xfs_put_perag(mp, pag);
637}
638
639void
640__xfs_inode_clear_reclaim_tag(
641 xfs_mount_t *mp,
642 xfs_perag_t *pag,
643 xfs_inode_t *ip)
644{
645 radix_tree_tag_clear(&pag->pag_ici_root,
646 XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG);
647}
648
649void
650xfs_inode_clear_reclaim_tag(
651 xfs_inode_t *ip)
652{
653 xfs_mount_t *mp = ip->i_mount;
654 xfs_perag_t *pag = xfs_get_perag(mp, ip->i_ino);
655
656 read_lock(&pag->pag_ici_lock);
657 spin_lock(&ip->i_flags_lock);
658 __xfs_inode_clear_reclaim_tag(mp, pag, ip);
659 spin_unlock(&ip->i_flags_lock);
660 read_unlock(&pag->pag_ici_lock);
661 xfs_put_perag(mp, pag);
662}
663
664
665STATIC void
666xfs_reclaim_inodes_ag(
667 xfs_mount_t *mp,
668 int ag,
669 int noblock,
670 int mode)
671{
672 xfs_inode_t *ip = NULL;
673 xfs_perag_t *pag = &mp->m_perag[ag];
674 int nr_found;
675 uint32_t first_index;
676 int skipped;
677
678restart:
679 first_index = 0;
680 skipped = 0;
681 do {
682 /*
683 * use a gang lookup to find the next inode in the tree
684 * as the tree is sparse and a gang lookup walks to find
685 * the number of objects requested.
686 */
687 read_lock(&pag->pag_ici_lock);
688 nr_found = radix_tree_gang_lookup_tag(&pag->pag_ici_root,
689 (void**)&ip, first_index, 1,
690 XFS_ICI_RECLAIM_TAG);
691
692 if (!nr_found) {
693 read_unlock(&pag->pag_ici_lock);
694 break;
695 }
696
697 /*
698 * Update the index for the next lookup. Catch overflows
699 * into the next AG range which can occur if we have inodes
700 * in the last block of the AG and we are currently
701 * pointing to the last inode.
702 */
703 first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
704 if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) {
705 read_unlock(&pag->pag_ici_lock);
706 break;
707 }
708
709 ASSERT(xfs_iflags_test(ip, (XFS_IRECLAIMABLE|XFS_IRECLAIM)));
710
711 /* ignore if already under reclaim */
712 if (xfs_iflags_test(ip, XFS_IRECLAIM)) {
713 read_unlock(&pag->pag_ici_lock);
714 continue;
715 }
716
717 if (noblock) {
718 if (!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) {
719 read_unlock(&pag->pag_ici_lock);
720 continue;
721 }
722 if (xfs_ipincount(ip) ||
723 !xfs_iflock_nowait(ip)) {
724 xfs_iunlock(ip, XFS_ILOCK_EXCL);
725 read_unlock(&pag->pag_ici_lock);
726 continue;
727 }
728 }
729 read_unlock(&pag->pag_ici_lock);
730
731 /*
732 * hmmm - this is an inode already in reclaim. Do
733 * we even bother catching it here?
734 */
735 if (xfs_reclaim_inode(ip, noblock, mode))
736 skipped++;
737 } while (nr_found);
738
739 if (skipped) {
740 delay(1);
741 goto restart;
742 }
743 return;
744
745}
746
747int
748xfs_reclaim_inodes(
749 xfs_mount_t *mp,
750 int noblock,
751 int mode)
752{
753 int i;
754
755 for (i = 0; i < mp->m_sb.sb_agcount; i++) {
756 if (!mp->m_perag[i].pag_ici_init)
757 continue;
758 xfs_reclaim_inodes_ag(mp, i, noblock, mode);
759 }
760 return 0;
761}
762
763
diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h
new file mode 100644
index 000000000000..5f6de1efe1f6
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_sync.h
@@ -0,0 +1,55 @@
1/*
2 * Copyright (c) 2000-2006 Silicon Graphics, Inc.
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18#ifndef XFS_SYNC_H
19#define XFS_SYNC_H 1
20
21struct xfs_mount;
22
23typedef struct bhv_vfs_sync_work {
24 struct list_head w_list;
25 struct xfs_mount *w_mount;
26 void *w_data; /* syncer routine argument */
27 void (*w_syncer)(struct xfs_mount *, void *);
28} bhv_vfs_sync_work_t;
29
30#define SYNC_ATTR 0x0001 /* sync attributes */
31#define SYNC_DELWRI 0x0002 /* look at delayed writes */
32#define SYNC_WAIT 0x0004 /* wait for i/o to complete */
33#define SYNC_BDFLUSH 0x0008 /* BDFLUSH is calling -- don't block */
34#define SYNC_IOWAIT 0x0010 /* wait for all I/O to complete */
35
36int xfs_syncd_init(struct xfs_mount *mp);
37void xfs_syncd_stop(struct xfs_mount *mp);
38
39int xfs_sync_inodes(struct xfs_mount *mp, int flags);
40int xfs_sync_fsdata(struct xfs_mount *mp, int flags);
41
42int xfs_quiesce_data(struct xfs_mount *mp);
43void xfs_quiesce_attr(struct xfs_mount *mp);
44
45void xfs_flush_inode(struct xfs_inode *ip);
46void xfs_flush_device(struct xfs_inode *ip);
47
48int xfs_reclaim_inode(struct xfs_inode *ip, int locked, int sync_mode);
49int xfs_reclaim_inodes(struct xfs_mount *mp, int noblock, int mode);
50
51void xfs_inode_set_reclaim_tag(struct xfs_inode *ip);
52void xfs_inode_clear_reclaim_tag(struct xfs_inode *ip);
53void __xfs_inode_clear_reclaim_tag(struct xfs_mount *mp, struct xfs_perag *pag,
54 struct xfs_inode *ip);
55#endif
diff --git a/fs/xfs/linux-2.6/xfs_sysctl.c b/fs/xfs/linux-2.6/xfs_sysctl.c
index 7dacb5bbde3f..916c0ffb6083 100644
--- a/fs/xfs/linux-2.6/xfs_sysctl.c
+++ b/fs/xfs/linux-2.6/xfs_sysctl.c
@@ -56,17 +56,6 @@ xfs_stats_clear_proc_handler(
56 56
57static ctl_table xfs_table[] = { 57static ctl_table xfs_table[] = {
58 { 58 {
59 .ctl_name = XFS_RESTRICT_CHOWN,
60 .procname = "restrict_chown",
61 .data = &xfs_params.restrict_chown.val,
62 .maxlen = sizeof(int),
63 .mode = 0644,
64 .proc_handler = &proc_dointvec_minmax,
65 .strategy = &sysctl_intvec,
66 .extra1 = &xfs_params.restrict_chown.min,
67 .extra2 = &xfs_params.restrict_chown.max
68 },
69 {
70 .ctl_name = XFS_SGID_INHERIT, 59 .ctl_name = XFS_SGID_INHERIT,
71 .procname = "irix_sgid_inherit", 60 .procname = "irix_sgid_inherit",
72 .data = &xfs_params.sgid_inherit.val, 61 .data = &xfs_params.sgid_inherit.val,
diff --git a/fs/xfs/linux-2.6/xfs_sysctl.h b/fs/xfs/linux-2.6/xfs_sysctl.h
index 4aadb8056c37..b9937d450f8e 100644
--- a/fs/xfs/linux-2.6/xfs_sysctl.h
+++ b/fs/xfs/linux-2.6/xfs_sysctl.h
@@ -31,7 +31,6 @@ typedef struct xfs_sysctl_val {
31} xfs_sysctl_val_t; 31} xfs_sysctl_val_t;
32 32
33typedef struct xfs_param { 33typedef struct xfs_param {
34 xfs_sysctl_val_t restrict_chown;/* Root/non-root can give away files.*/
35 xfs_sysctl_val_t sgid_inherit; /* Inherit S_ISGID if process' GID is 34 xfs_sysctl_val_t sgid_inherit; /* Inherit S_ISGID if process' GID is
36 * not a member of parent dir GID. */ 35 * not a member of parent dir GID. */
37 xfs_sysctl_val_t symlink_mode; /* Link creat mode affected by umask */ 36 xfs_sysctl_val_t symlink_mode; /* Link creat mode affected by umask */
@@ -68,7 +67,7 @@ typedef struct xfs_param {
68enum { 67enum {
69 /* XFS_REFCACHE_SIZE = 1 */ 68 /* XFS_REFCACHE_SIZE = 1 */
70 /* XFS_REFCACHE_PURGE = 2 */ 69 /* XFS_REFCACHE_PURGE = 2 */
71 XFS_RESTRICT_CHOWN = 3, 70 /* XFS_RESTRICT_CHOWN = 3 */
72 XFS_SGID_INHERIT = 4, 71 XFS_SGID_INHERIT = 4,
73 XFS_SYMLINK_MODE = 5, 72 XFS_SYMLINK_MODE = 5,
74 XFS_PANIC_MASK = 6, 73 XFS_PANIC_MASK = 6,
diff --git a/fs/xfs/linux-2.6/xfs_vfs.h b/fs/xfs/linux-2.6/xfs_vfs.h
index 7e60c7776b1c..0ab60bc2e761 100644
--- a/fs/xfs/linux-2.6/xfs_vfs.h
+++ b/fs/xfs/linux-2.6/xfs_vfs.h
@@ -33,37 +33,6 @@ struct xfs_mount_args;
33 33
34typedef struct kstatfs bhv_statvfs_t; 34typedef struct kstatfs bhv_statvfs_t;
35 35
36typedef struct bhv_vfs_sync_work {
37 struct list_head w_list;
38 struct xfs_mount *w_mount;
39 void *w_data; /* syncer routine argument */
40 void (*w_syncer)(struct xfs_mount *, void *);
41} bhv_vfs_sync_work_t;
42
43#define SYNC_ATTR 0x0001 /* sync attributes */
44#define SYNC_CLOSE 0x0002 /* close file system down */
45#define SYNC_DELWRI 0x0004 /* look at delayed writes */
46#define SYNC_WAIT 0x0008 /* wait for i/o to complete */
47#define SYNC_BDFLUSH 0x0010 /* BDFLUSH is calling -- don't block */
48#define SYNC_FSDATA 0x0020 /* flush fs data (e.g. superblocks) */
49#define SYNC_REFCACHE 0x0040 /* prune some of the nfs ref cache */
50#define SYNC_REMOUNT 0x0080 /* remount readonly, no dummy LRs */
51#define SYNC_IOWAIT 0x0100 /* wait for all I/O to complete */
52
53/*
54 * When remounting a filesystem read-only or freezing the filesystem,
55 * we have two phases to execute. This first phase is syncing the data
56 * before we quiesce the fielsystem, and the second is flushing all the
57 * inodes out after we've waited for all the transactions created by
58 * the first phase to complete. The second phase uses SYNC_INODE_QUIESCE
59 * to ensure that the inodes are written to their location on disk
60 * rather than just existing in transactions in the log. This means
61 * after a quiesce there is no log replay required to write the inodes
62 * to disk (this is the main difference between a sync and a quiesce).
63 */
64#define SYNC_DATA_QUIESCE (SYNC_DELWRI|SYNC_FSDATA|SYNC_WAIT|SYNC_IOWAIT)
65#define SYNC_INODE_QUIESCE (SYNC_REMOUNT|SYNC_ATTR|SYNC_WAIT)
66
67#define SHUTDOWN_META_IO_ERROR 0x0001 /* write attempt to metadata failed */ 36#define SHUTDOWN_META_IO_ERROR 0x0001 /* write attempt to metadata failed */
68#define SHUTDOWN_LOG_IO_ERROR 0x0002 /* write attempt to the log failed */ 37#define SHUTDOWN_LOG_IO_ERROR 0x0002 /* write attempt to the log failed */
69#define SHUTDOWN_FORCE_UMOUNT 0x0004 /* shutdown from a forced unmount */ 38#define SHUTDOWN_FORCE_UMOUNT 0x0004 /* shutdown from a forced unmount */
diff --git a/fs/xfs/linux-2.6/xfs_vnode.c b/fs/xfs/linux-2.6/xfs_vnode.c
index b52528bbbfff..ad18262d651b 100644
--- a/fs/xfs/linux-2.6/xfs_vnode.c
+++ b/fs/xfs/linux-2.6/xfs_vnode.c
@@ -84,25 +84,12 @@ vn_ioerror(
84 84
85#ifdef XFS_INODE_TRACE 85#ifdef XFS_INODE_TRACE
86 86
87/*
88 * Reference count of Linux inode if present, -1 if the xfs_inode
89 * has no associated Linux inode.
90 */
91static inline int xfs_icount(struct xfs_inode *ip)
92{
93 struct inode *vp = VFS_I(ip);
94
95 if (vp)
96 return vn_count(vp);
97 return -1;
98}
99
100#define KTRACE_ENTER(ip, vk, s, line, ra) \ 87#define KTRACE_ENTER(ip, vk, s, line, ra) \
101 ktrace_enter( (ip)->i_trace, \ 88 ktrace_enter( (ip)->i_trace, \
102/* 0 */ (void *)(__psint_t)(vk), \ 89/* 0 */ (void *)(__psint_t)(vk), \
103/* 1 */ (void *)(s), \ 90/* 1 */ (void *)(s), \
104/* 2 */ (void *)(__psint_t) line, \ 91/* 2 */ (void *)(__psint_t) line, \
105/* 3 */ (void *)(__psint_t)xfs_icount(ip), \ 92/* 3 */ (void *)(__psint_t)atomic_read(&VFS_I(ip)->i_count), \
106/* 4 */ (void *)(ra), \ 93/* 4 */ (void *)(ra), \
107/* 5 */ NULL, \ 94/* 5 */ NULL, \
108/* 6 */ (void *)(__psint_t)current_cpu(), \ 95/* 6 */ (void *)(__psint_t)current_cpu(), \
diff --git a/fs/xfs/linux-2.6/xfs_vnode.h b/fs/xfs/linux-2.6/xfs_vnode.h
index 683ce16210ff..bf89e41c3b8d 100644
--- a/fs/xfs/linux-2.6/xfs_vnode.h
+++ b/fs/xfs/linux-2.6/xfs_vnode.h
@@ -80,11 +80,6 @@ do { \
80 iput(VFS_I(ip)); \ 80 iput(VFS_I(ip)); \
81} while (0) 81} while (0)
82 82
83static inline struct inode *vn_grab(struct inode *vp)
84{
85 return igrab(vp);
86}
87
88/* 83/*
89 * Dealing with bad inodes 84 * Dealing with bad inodes
90 */ 85 */
diff --git a/fs/xfs/quota/xfs_dquot.c b/fs/xfs/quota/xfs_dquot.c
index f2705f2fd43c..591ca6602bfb 100644
--- a/fs/xfs/quota/xfs_dquot.c
+++ b/fs/xfs/quota/xfs_dquot.c
@@ -101,7 +101,7 @@ xfs_qm_dqinit(
101 if (brandnewdquot) { 101 if (brandnewdquot) {
102 dqp->dq_flnext = dqp->dq_flprev = dqp; 102 dqp->dq_flnext = dqp->dq_flprev = dqp;
103 mutex_init(&dqp->q_qlock); 103 mutex_init(&dqp->q_qlock);
104 sv_init(&dqp->q_pinwait, SV_DEFAULT, "pdq"); 104 init_waitqueue_head(&dqp->q_pinwait);
105 105
106 /* 106 /*
107 * Because we want to use a counting completion, complete 107 * Because we want to use a counting completion, complete
@@ -131,7 +131,7 @@ xfs_qm_dqinit(
131 dqp->q_res_bcount = 0; 131 dqp->q_res_bcount = 0;
132 dqp->q_res_icount = 0; 132 dqp->q_res_icount = 0;
133 dqp->q_res_rtbcount = 0; 133 dqp->q_res_rtbcount = 0;
134 dqp->q_pincount = 0; 134 atomic_set(&dqp->q_pincount, 0);
135 dqp->q_hash = NULL; 135 dqp->q_hash = NULL;
136 ASSERT(dqp->dq_flnext == dqp->dq_flprev); 136 ASSERT(dqp->dq_flnext == dqp->dq_flprev);
137 137
@@ -1221,16 +1221,14 @@ xfs_qm_dqflush(
1221 xfs_dqtrace_entry(dqp, "DQFLUSH"); 1221 xfs_dqtrace_entry(dqp, "DQFLUSH");
1222 1222
1223 /* 1223 /*
1224 * If not dirty, nada. 1224 * If not dirty, or it's pinned and we are not supposed to
1225 * block, nada.
1225 */ 1226 */
1226 if (!XFS_DQ_IS_DIRTY(dqp)) { 1227 if (!XFS_DQ_IS_DIRTY(dqp) ||
1228 (!(flags & XFS_QMOPT_SYNC) && atomic_read(&dqp->q_pincount) > 0)) {
1227 xfs_dqfunlock(dqp); 1229 xfs_dqfunlock(dqp);
1228 return (0); 1230 return 0;
1229 } 1231 }
1230
1231 /*
1232 * Cant flush a pinned dquot. Wait for it.
1233 */
1234 xfs_qm_dqunpin_wait(dqp); 1232 xfs_qm_dqunpin_wait(dqp);
1235 1233
1236 /* 1234 /*
@@ -1274,10 +1272,8 @@ xfs_qm_dqflush(
1274 dqp->dq_flags &= ~(XFS_DQ_DIRTY); 1272 dqp->dq_flags &= ~(XFS_DQ_DIRTY);
1275 mp = dqp->q_mount; 1273 mp = dqp->q_mount;
1276 1274
1277 /* lsn is 64 bits */ 1275 xfs_trans_ail_copy_lsn(mp->m_ail, &dqp->q_logitem.qli_flush_lsn,
1278 spin_lock(&mp->m_ail_lock); 1276 &dqp->q_logitem.qli_item.li_lsn);
1279 dqp->q_logitem.qli_flush_lsn = dqp->q_logitem.qli_item.li_lsn;
1280 spin_unlock(&mp->m_ail_lock);
1281 1277
1282 /* 1278 /*
1283 * Attach an iodone routine so that we can remove this dquot from the 1279 * Attach an iodone routine so that we can remove this dquot from the
@@ -1323,8 +1319,10 @@ xfs_qm_dqflush_done(
1323 xfs_dq_logitem_t *qip) 1319 xfs_dq_logitem_t *qip)
1324{ 1320{
1325 xfs_dquot_t *dqp; 1321 xfs_dquot_t *dqp;
1322 struct xfs_ail *ailp;
1326 1323
1327 dqp = qip->qli_dquot; 1324 dqp = qip->qli_dquot;
1325 ailp = qip->qli_item.li_ailp;
1328 1326
1329 /* 1327 /*
1330 * We only want to pull the item from the AIL if its 1328 * We only want to pull the item from the AIL if its
@@ -1337,15 +1335,12 @@ xfs_qm_dqflush_done(
1337 if ((qip->qli_item.li_flags & XFS_LI_IN_AIL) && 1335 if ((qip->qli_item.li_flags & XFS_LI_IN_AIL) &&
1338 qip->qli_item.li_lsn == qip->qli_flush_lsn) { 1336 qip->qli_item.li_lsn == qip->qli_flush_lsn) {
1339 1337
1340 spin_lock(&dqp->q_mount->m_ail_lock); 1338 /* xfs_trans_ail_delete() drops the AIL lock. */
1341 /* 1339 spin_lock(&ailp->xa_lock);
1342 * xfs_trans_delete_ail() drops the AIL lock.
1343 */
1344 if (qip->qli_item.li_lsn == qip->qli_flush_lsn) 1340 if (qip->qli_item.li_lsn == qip->qli_flush_lsn)
1345 xfs_trans_delete_ail(dqp->q_mount, 1341 xfs_trans_ail_delete(ailp, (xfs_log_item_t*)qip);
1346 (xfs_log_item_t*)qip);
1347 else 1342 else
1348 spin_unlock(&dqp->q_mount->m_ail_lock); 1343 spin_unlock(&ailp->xa_lock);
1349 } 1344 }
1350 1345
1351 /* 1346 /*
@@ -1375,7 +1370,7 @@ xfs_dqunlock(
1375 mutex_unlock(&(dqp->q_qlock)); 1370 mutex_unlock(&(dqp->q_qlock));
1376 if (dqp->q_logitem.qli_dquot == dqp) { 1371 if (dqp->q_logitem.qli_dquot == dqp) {
1377 /* Once was dqp->q_mount, but might just have been cleared */ 1372 /* Once was dqp->q_mount, but might just have been cleared */
1378 xfs_trans_unlocked_item(dqp->q_logitem.qli_item.li_mountp, 1373 xfs_trans_unlocked_item(dqp->q_logitem.qli_item.li_ailp,
1379 (xfs_log_item_t*)&(dqp->q_logitem)); 1374 (xfs_log_item_t*)&(dqp->q_logitem));
1380 } 1375 }
1381} 1376}
@@ -1489,7 +1484,7 @@ xfs_qm_dqpurge(
1489 "xfs_qm_dqpurge: dquot %p flush failed", dqp); 1484 "xfs_qm_dqpurge: dquot %p flush failed", dqp);
1490 xfs_dqflock(dqp); 1485 xfs_dqflock(dqp);
1491 } 1486 }
1492 ASSERT(dqp->q_pincount == 0); 1487 ASSERT(atomic_read(&dqp->q_pincount) == 0);
1493 ASSERT(XFS_FORCED_SHUTDOWN(mp) || 1488 ASSERT(XFS_FORCED_SHUTDOWN(mp) ||
1494 !(dqp->q_logitem.qli_item.li_flags & XFS_LI_IN_AIL)); 1489 !(dqp->q_logitem.qli_item.li_flags & XFS_LI_IN_AIL));
1495 1490
diff --git a/fs/xfs/quota/xfs_dquot.h b/fs/xfs/quota/xfs_dquot.h
index 8958d0faf8d3..7e455337e2ba 100644
--- a/fs/xfs/quota/xfs_dquot.h
+++ b/fs/xfs/quota/xfs_dquot.h
@@ -83,8 +83,8 @@ typedef struct xfs_dquot {
83 xfs_qcnt_t q_res_rtbcount;/* total realtime blks used+reserved */ 83 xfs_qcnt_t q_res_rtbcount;/* total realtime blks used+reserved */
84 mutex_t q_qlock; /* quota lock */ 84 mutex_t q_qlock; /* quota lock */
85 struct completion q_flush; /* flush completion queue */ 85 struct completion q_flush; /* flush completion queue */
86 uint q_pincount; /* pin count for this dquot */ 86 atomic_t q_pincount; /* dquot pin count */
87 sv_t q_pinwait; /* sync var for pinning */ 87 wait_queue_head_t q_pinwait; /* dquot pinning wait queue */
88#ifdef XFS_DQUOT_TRACE 88#ifdef XFS_DQUOT_TRACE
89 struct ktrace *q_trace; /* trace header structure */ 89 struct ktrace *q_trace; /* trace header structure */
90#endif 90#endif
diff --git a/fs/xfs/quota/xfs_dquot_item.c b/fs/xfs/quota/xfs_dquot_item.c
index f028644caa5e..1728f6a7c4f5 100644
--- a/fs/xfs/quota/xfs_dquot_item.c
+++ b/fs/xfs/quota/xfs_dquot_item.c
@@ -88,25 +88,22 @@ xfs_qm_dquot_logitem_format(
88 88
89/* 89/*
90 * Increment the pin count of the given dquot. 90 * Increment the pin count of the given dquot.
91 * This value is protected by pinlock spinlock in the xQM structure.
92 */ 91 */
93STATIC void 92STATIC void
94xfs_qm_dquot_logitem_pin( 93xfs_qm_dquot_logitem_pin(
95 xfs_dq_logitem_t *logitem) 94 xfs_dq_logitem_t *logitem)
96{ 95{
97 xfs_dquot_t *dqp; 96 xfs_dquot_t *dqp = logitem->qli_dquot;
98 97
99 dqp = logitem->qli_dquot;
100 ASSERT(XFS_DQ_IS_LOCKED(dqp)); 98 ASSERT(XFS_DQ_IS_LOCKED(dqp));
101 spin_lock(&(XFS_DQ_TO_QINF(dqp)->qi_pinlock)); 99 atomic_inc(&dqp->q_pincount);
102 dqp->q_pincount++;
103 spin_unlock(&(XFS_DQ_TO_QINF(dqp)->qi_pinlock));
104} 100}
105 101
106/* 102/*
107 * Decrement the pin count of the given dquot, and wake up 103 * Decrement the pin count of the given dquot, and wake up
108 * anyone in xfs_dqwait_unpin() if the count goes to 0. The 104 * anyone in xfs_dqwait_unpin() if the count goes to 0. The
109 * dquot must have been previously pinned with a call to xfs_dqpin(). 105 * dquot must have been previously pinned with a call to
106 * xfs_qm_dquot_logitem_pin().
110 */ 107 */
111/* ARGSUSED */ 108/* ARGSUSED */
112STATIC void 109STATIC void
@@ -114,16 +111,11 @@ xfs_qm_dquot_logitem_unpin(
114 xfs_dq_logitem_t *logitem, 111 xfs_dq_logitem_t *logitem,
115 int stale) 112 int stale)
116{ 113{
117 xfs_dquot_t *dqp; 114 xfs_dquot_t *dqp = logitem->qli_dquot;
118 115
119 dqp = logitem->qli_dquot; 116 ASSERT(atomic_read(&dqp->q_pincount) > 0);
120 ASSERT(dqp->q_pincount > 0); 117 if (atomic_dec_and_test(&dqp->q_pincount))
121 spin_lock(&(XFS_DQ_TO_QINF(dqp)->qi_pinlock)); 118 wake_up(&dqp->q_pinwait);
122 dqp->q_pincount--;
123 if (dqp->q_pincount == 0) {
124 sv_broadcast(&dqp->q_pinwait);
125 }
126 spin_unlock(&(XFS_DQ_TO_QINF(dqp)->qi_pinlock));
127} 119}
128 120
129/* ARGSUSED */ 121/* ARGSUSED */
@@ -193,21 +185,14 @@ xfs_qm_dqunpin_wait(
193 xfs_dquot_t *dqp) 185 xfs_dquot_t *dqp)
194{ 186{
195 ASSERT(XFS_DQ_IS_LOCKED(dqp)); 187 ASSERT(XFS_DQ_IS_LOCKED(dqp));
196 if (dqp->q_pincount == 0) { 188 if (atomic_read(&dqp->q_pincount) == 0)
197 return; 189 return;
198 }
199 190
200 /* 191 /*
201 * Give the log a push so we don't wait here too long. 192 * Give the log a push so we don't wait here too long.
202 */ 193 */
203 xfs_log_force(dqp->q_mount, (xfs_lsn_t)0, XFS_LOG_FORCE); 194 xfs_log_force(dqp->q_mount, (xfs_lsn_t)0, XFS_LOG_FORCE);
204 spin_lock(&(XFS_DQ_TO_QINF(dqp)->qi_pinlock)); 195 wait_event(dqp->q_pinwait, (atomic_read(&dqp->q_pincount) == 0));
205 if (dqp->q_pincount == 0) {
206 spin_unlock(&(XFS_DQ_TO_QINF(dqp)->qi_pinlock));
207 return;
208 }
209 sv_wait(&(dqp->q_pinwait), PINOD,
210 &(XFS_DQ_TO_QINF(dqp)->qi_pinlock), s);
211} 196}
212 197
213/* 198/*
@@ -310,7 +295,7 @@ xfs_qm_dquot_logitem_trylock(
310 uint retval; 295 uint retval;
311 296
312 dqp = qip->qli_dquot; 297 dqp = qip->qli_dquot;
313 if (dqp->q_pincount > 0) 298 if (atomic_read(&dqp->q_pincount) > 0)
314 return (XFS_ITEM_PINNED); 299 return (XFS_ITEM_PINNED);
315 300
316 if (! xfs_qm_dqlock_nowait(dqp)) 301 if (! xfs_qm_dqlock_nowait(dqp))
@@ -568,14 +553,16 @@ xfs_qm_qoffend_logitem_committed(
568 xfs_lsn_t lsn) 553 xfs_lsn_t lsn)
569{ 554{
570 xfs_qoff_logitem_t *qfs; 555 xfs_qoff_logitem_t *qfs;
556 struct xfs_ail *ailp;
571 557
572 qfs = qfe->qql_start_lip; 558 qfs = qfe->qql_start_lip;
573 spin_lock(&qfs->qql_item.li_mountp->m_ail_lock); 559 ailp = qfs->qql_item.li_ailp;
560 spin_lock(&ailp->xa_lock);
574 /* 561 /*
575 * Delete the qoff-start logitem from the AIL. 562 * Delete the qoff-start logitem from the AIL.
576 * xfs_trans_delete_ail() drops the AIL lock. 563 * xfs_trans_ail_delete() drops the AIL lock.
577 */ 564 */
578 xfs_trans_delete_ail(qfs->qql_item.li_mountp, (xfs_log_item_t *)qfs); 565 xfs_trans_ail_delete(ailp, (xfs_log_item_t *)qfs);
579 kmem_free(qfs); 566 kmem_free(qfs);
580 kmem_free(qfe); 567 kmem_free(qfe);
581 return (xfs_lsn_t)-1; 568 return (xfs_lsn_t)-1;
diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c
index df0ffef9775a..5b198d15e76b 100644
--- a/fs/xfs/quota/xfs_qm.c
+++ b/fs/xfs/quota/xfs_qm.c
@@ -20,7 +20,6 @@
20#include "xfs_bit.h" 20#include "xfs_bit.h"
21#include "xfs_log.h" 21#include "xfs_log.h"
22#include "xfs_inum.h" 22#include "xfs_inum.h"
23#include "xfs_clnt.h"
24#include "xfs_trans.h" 23#include "xfs_trans.h"
25#include "xfs_sb.h" 24#include "xfs_sb.h"
26#include "xfs_ag.h" 25#include "xfs_ag.h"
@@ -987,14 +986,10 @@ xfs_qm_dqdetach(
987} 986}
988 987
989/* 988/*
990 * This is called by VFS_SYNC and flags arg determines the caller, 989 * This is called to sync quotas. We can be told to use non-blocking
991 * and its motives, as done in xfs_sync. 990 * semantics by either the SYNC_BDFLUSH flag or the absence of the
992 * 991 * SYNC_WAIT flag.
993 * vfs_sync: SYNC_FSDATA|SYNC_ATTR|SYNC_BDFLUSH 0x31
994 * syscall sync: SYNC_FSDATA|SYNC_ATTR|SYNC_DELWRI 0x25
995 * umountroot : SYNC_WAIT | SYNC_CLOSE | SYNC_ATTR | SYNC_FSDATA
996 */ 992 */
997
998int 993int
999xfs_qm_sync( 994xfs_qm_sync(
1000 xfs_mount_t *mp, 995 xfs_mount_t *mp,
@@ -1137,7 +1132,6 @@ xfs_qm_init_quotainfo(
1137 return error; 1132 return error;
1138 } 1133 }
1139 1134
1140 spin_lock_init(&qinf->qi_pinlock);
1141 xfs_qm_list_init(&qinf->qi_dqlist, "mpdqlist", 0); 1135 xfs_qm_list_init(&qinf->qi_dqlist, "mpdqlist", 0);
1142 qinf->qi_dqreclaims = 0; 1136 qinf->qi_dqreclaims = 0;
1143 1137
@@ -1234,7 +1228,6 @@ xfs_qm_destroy_quotainfo(
1234 */ 1228 */
1235 xfs_qm_rele_quotafs_ref(mp); 1229 xfs_qm_rele_quotafs_ref(mp);
1236 1230
1237 spinlock_destroy(&qi->qi_pinlock);
1238 xfs_qm_list_destroy(&qi->qi_dqlist); 1231 xfs_qm_list_destroy(&qi->qi_dqlist);
1239 1232
1240 if (qi->qi_uquotaip) { 1233 if (qi->qi_uquotaip) {
diff --git a/fs/xfs/quota/xfs_qm.h b/fs/xfs/quota/xfs_qm.h
index 44f25349e478..4f2de9771728 100644
--- a/fs/xfs/quota/xfs_qm.h
+++ b/fs/xfs/quota/xfs_qm.h
@@ -106,7 +106,6 @@ typedef struct xfs_qm {
106typedef struct xfs_quotainfo { 106typedef struct xfs_quotainfo {
107 xfs_inode_t *qi_uquotaip; /* user quota inode */ 107 xfs_inode_t *qi_uquotaip; /* user quota inode */
108 xfs_inode_t *qi_gquotaip; /* group quota inode */ 108 xfs_inode_t *qi_gquotaip; /* group quota inode */
109 spinlock_t qi_pinlock; /* dquot pinning lock */
110 xfs_dqlist_t qi_dqlist; /* all dquots in filesys */ 109 xfs_dqlist_t qi_dqlist; /* all dquots in filesys */
111 int qi_dqreclaims; /* a change here indicates 110 int qi_dqreclaims; /* a change here indicates
112 a removal in the dqlist */ 111 a removal in the dqlist */
diff --git a/fs/xfs/quota/xfs_qm_bhv.c b/fs/xfs/quota/xfs_qm_bhv.c
index eea2e60b456b..9556df9f7dab 100644
--- a/fs/xfs/quota/xfs_qm_bhv.c
+++ b/fs/xfs/quota/xfs_qm_bhv.c
@@ -20,7 +20,6 @@
20#include "xfs_bit.h" 20#include "xfs_bit.h"
21#include "xfs_log.h" 21#include "xfs_log.h"
22#include "xfs_inum.h" 22#include "xfs_inum.h"
23#include "xfs_clnt.h"
24#include "xfs_trans.h" 23#include "xfs_trans.h"
25#include "xfs_sb.h" 24#include "xfs_sb.h"
26#include "xfs_ag.h" 25#include "xfs_ag.h"
diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c
index 1a3b803dfa55..9ff28e6c5b8b 100644
--- a/fs/xfs/quota/xfs_qm_syscalls.c
+++ b/fs/xfs/quota/xfs_qm_syscalls.c
@@ -127,7 +127,7 @@ xfs_qm_quotactl(
127 break; 127 break;
128 128
129 case Q_XQUOTASYNC: 129 case Q_XQUOTASYNC:
130 return (xfs_sync_inodes(mp, SYNC_DELWRI, NULL)); 130 return xfs_sync_inodes(mp, SYNC_DELWRI);
131 131
132 default: 132 default:
133 break; 133 break;
@@ -1022,101 +1022,92 @@ xfs_qm_export_flags(
1022 1022
1023 1023
1024/* 1024/*
1025 * Go thru all the inodes in the file system, releasing their dquots. 1025 * Release all the dquots on the inodes in an AG.
1026 * Note that the mount structure gets modified to indicate that quotas are off
1027 * AFTER this, in the case of quotaoff. This also gets called from
1028 * xfs_rootumount.
1029 */ 1026 */
1030void 1027STATIC void
1031xfs_qm_dqrele_all_inodes( 1028xfs_qm_dqrele_inodes_ag(
1032 struct xfs_mount *mp, 1029 xfs_mount_t *mp,
1033 uint flags) 1030 int ag,
1031 uint flags)
1034{ 1032{
1035 xfs_inode_t *ip, *topino; 1033 xfs_inode_t *ip = NULL;
1036 uint ireclaims; 1034 xfs_perag_t *pag = &mp->m_perag[ag];
1037 struct inode *vp; 1035 int first_index = 0;
1038 boolean_t vnode_refd; 1036 int nr_found;
1039
1040 ASSERT(mp->m_quotainfo);
1041 1037
1042 XFS_MOUNT_ILOCK(mp);
1043again:
1044 ip = mp->m_inodes;
1045 if (ip == NULL) {
1046 XFS_MOUNT_IUNLOCK(mp);
1047 return;
1048 }
1049 do { 1038 do {
1050 /* Skip markers inserted by xfs_sync */ 1039 boolean_t inode_refed;
1051 if (ip->i_mount == NULL) { 1040 struct inode *inode;
1052 ip = ip->i_mnext; 1041
1053 continue; 1042 /*
1054 } 1043 * use a gang lookup to find the next inode in the tree
1055 /* Root inode, rbmip and rsumip have associated blocks */ 1044 * as the tree is sparse and a gang lookup walks to find
1056 if (ip == XFS_QI_UQIP(mp) || ip == XFS_QI_GQIP(mp)) { 1045 * the number of objects requested.
1057 ASSERT(ip->i_udquot == NULL); 1046 */
1058 ASSERT(ip->i_gdquot == NULL); 1047 read_lock(&pag->pag_ici_lock);
1059 ip = ip->i_mnext; 1048 nr_found = radix_tree_gang_lookup(&pag->pag_ici_root,
1060 continue; 1049 (void**)&ip, first_index, 1);
1050
1051 if (!nr_found) {
1052 read_unlock(&pag->pag_ici_lock);
1053 break;
1061 } 1054 }
1062 vp = VFS_I(ip); 1055
1063 if (!vp) { 1056 /* update the index for the next lookup */
1057 first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
1058
1059 /* skip quota inodes and those in reclaim */
1060 inode = VFS_I(ip);
1061 if (!inode || ip == XFS_QI_UQIP(mp) || ip == XFS_QI_GQIP(mp)) {
1064 ASSERT(ip->i_udquot == NULL); 1062 ASSERT(ip->i_udquot == NULL);
1065 ASSERT(ip->i_gdquot == NULL); 1063 ASSERT(ip->i_gdquot == NULL);
1066 ip = ip->i_mnext; 1064 read_unlock(&pag->pag_ici_lock);
1067 continue; 1065 continue;
1068 } 1066 }
1069 vnode_refd = B_FALSE;
1070 if (xfs_ilock_nowait(ip, XFS_ILOCK_EXCL) == 0) { 1067 if (xfs_ilock_nowait(ip, XFS_ILOCK_EXCL) == 0) {
1071 ireclaims = mp->m_ireclaims; 1068 inode = igrab(inode);
1072 topino = mp->m_inodes; 1069 read_unlock(&pag->pag_ici_lock);
1073 vp = vn_grab(vp); 1070 if (!inode)
1074 if (!vp) 1071 continue;
1075 goto again; 1072 inode_refed = B_TRUE;
1076
1077 XFS_MOUNT_IUNLOCK(mp);
1078 /* XXX restart limit ? */
1079 xfs_ilock(ip, XFS_ILOCK_EXCL); 1073 xfs_ilock(ip, XFS_ILOCK_EXCL);
1080 vnode_refd = B_TRUE;
1081 } else { 1074 } else {
1082 ireclaims = mp->m_ireclaims; 1075 read_unlock(&pag->pag_ici_lock);
1083 topino = mp->m_inodes;
1084 XFS_MOUNT_IUNLOCK(mp);
1085 } 1076 }
1086
1087 /*
1088 * We don't keep the mountlock across the dqrele() call,
1089 * since it can take a while..
1090 */
1091 if ((flags & XFS_UQUOTA_ACCT) && ip->i_udquot) { 1077 if ((flags & XFS_UQUOTA_ACCT) && ip->i_udquot) {
1092 xfs_qm_dqrele(ip->i_udquot); 1078 xfs_qm_dqrele(ip->i_udquot);
1093 ip->i_udquot = NULL; 1079 ip->i_udquot = NULL;
1094 } 1080 }
1095 if (flags & (XFS_PQUOTA_ACCT|XFS_GQUOTA_ACCT) && ip->i_gdquot) { 1081 if (flags & (XFS_PQUOTA_ACCT|XFS_GQUOTA_ACCT) &&
1082 ip->i_gdquot) {
1096 xfs_qm_dqrele(ip->i_gdquot); 1083 xfs_qm_dqrele(ip->i_gdquot);
1097 ip->i_gdquot = NULL; 1084 ip->i_gdquot = NULL;
1098 } 1085 }
1099 xfs_iunlock(ip, XFS_ILOCK_EXCL); 1086 xfs_iunlock(ip, XFS_ILOCK_EXCL);
1100 /* 1087 if (inode_refed)
1101 * Wait until we've dropped the ilock and mountlock to
1102 * do the vn_rele. Or be condemned to an eternity in the
1103 * inactive code in hell.
1104 */
1105 if (vnode_refd)
1106 IRELE(ip); 1088 IRELE(ip);
1107 XFS_MOUNT_ILOCK(mp); 1089 } while (nr_found);
1108 /* 1090}
1109 * If an inode was inserted or removed, we gotta 1091
1110 * start over again. 1092/*
1111 */ 1093 * Go thru all the inodes in the file system, releasing their dquots.
1112 if (topino != mp->m_inodes || mp->m_ireclaims != ireclaims) { 1094 * Note that the mount structure gets modified to indicate that quotas are off
1113 /* XXX use a sentinel */ 1095 * AFTER this, in the case of quotaoff. This also gets called from
1114 goto again; 1096 * xfs_rootumount.
1115 } 1097 */
1116 ip = ip->i_mnext; 1098void
1117 } while (ip != mp->m_inodes); 1099xfs_qm_dqrele_all_inodes(
1100 struct xfs_mount *mp,
1101 uint flags)
1102{
1103 int i;
1118 1104
1119 XFS_MOUNT_IUNLOCK(mp); 1105 ASSERT(mp->m_quotainfo);
1106 for (i = 0; i < mp->m_sb.sb_agcount; i++) {
1107 if (!mp->m_perag[i].pag_ici_init)
1108 continue;
1109 xfs_qm_dqrele_inodes_ag(mp, i, flags);
1110 }
1120} 1111}
1121 1112
1122/*------------------------------------------------------------------------*/ 1113/*------------------------------------------------------------------------*/
diff --git a/fs/xfs/support/debug.c b/fs/xfs/support/debug.c
index c27abef7b84f..636104254cfd 100644
--- a/fs/xfs/support/debug.c
+++ b/fs/xfs/support/debug.c
@@ -84,5 +84,5 @@ assfail(char *expr, char *file, int line)
84void 84void
85xfs_hex_dump(void *p, int length) 85xfs_hex_dump(void *p, int length)
86{ 86{
87 print_hex_dump(KERN_ALERT, "", DUMP_PREFIX_OFFSET, 16, 1, p, length, 1); 87 print_hex_dump(KERN_ALERT, "", DUMP_PREFIX_ADDRESS, 16, 1, p, length, 1);
88} 88}
diff --git a/fs/xfs/xfs.h b/fs/xfs/xfs.h
index 540e4c989825..17254b529c54 100644
--- a/fs/xfs/xfs.h
+++ b/fs/xfs/xfs.h
@@ -30,7 +30,7 @@
30#define XFS_ATTR_TRACE 1 30#define XFS_ATTR_TRACE 1
31#define XFS_BLI_TRACE 1 31#define XFS_BLI_TRACE 1
32#define XFS_BMAP_TRACE 1 32#define XFS_BMAP_TRACE 1
33#define XFS_BMBT_TRACE 1 33#define XFS_BTREE_TRACE 1
34#define XFS_DIR2_TRACE 1 34#define XFS_DIR2_TRACE 1
35#define XFS_DQUOT_TRACE 1 35#define XFS_DQUOT_TRACE 1
36#define XFS_ILOCK_TRACE 1 36#define XFS_ILOCK_TRACE 1
diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c
index b2f639a1416f..a8cdd73999a4 100644
--- a/fs/xfs/xfs_acl.c
+++ b/fs/xfs/xfs_acl.c
@@ -366,7 +366,7 @@ xfs_acl_allow_set(
366 return ENOTDIR; 366 return ENOTDIR;
367 if (vp->i_sb->s_flags & MS_RDONLY) 367 if (vp->i_sb->s_flags & MS_RDONLY)
368 return EROFS; 368 return EROFS;
369 if (XFS_I(vp)->i_d.di_uid != current->fsuid && !capable(CAP_FOWNER)) 369 if (XFS_I(vp)->i_d.di_uid != current_fsuid() && !capable(CAP_FOWNER))
370 return EPERM; 370 return EPERM;
371 return 0; 371 return 0;
372} 372}
@@ -413,13 +413,13 @@ xfs_acl_access(
413 switch (fap->acl_entry[i].ae_tag) { 413 switch (fap->acl_entry[i].ae_tag) {
414 case ACL_USER_OBJ: 414 case ACL_USER_OBJ:
415 seen_userobj = 1; 415 seen_userobj = 1;
416 if (fuid != current->fsuid) 416 if (fuid != current_fsuid())
417 continue; 417 continue;
418 matched.ae_tag = ACL_USER_OBJ; 418 matched.ae_tag = ACL_USER_OBJ;
419 matched.ae_perm = allows; 419 matched.ae_perm = allows;
420 break; 420 break;
421 case ACL_USER: 421 case ACL_USER:
422 if (fap->acl_entry[i].ae_id != current->fsuid) 422 if (fap->acl_entry[i].ae_id != current_fsuid())
423 continue; 423 continue;
424 matched.ae_tag = ACL_USER; 424 matched.ae_tag = ACL_USER;
425 matched.ae_perm = allows; 425 matched.ae_perm = allows;
@@ -758,7 +758,7 @@ xfs_acl_setmode(
758 if (gap && nomask) 758 if (gap && nomask)
759 iattr.ia_mode |= gap->ae_perm << 3; 759 iattr.ia_mode |= gap->ae_perm << 3;
760 760
761 return xfs_setattr(XFS_I(vp), &iattr, 0, sys_cred); 761 return xfs_setattr(XFS_I(vp), &iattr, 0);
762} 762}
763 763
764/* 764/*
diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h
index 61b292a9fb41..2bfd86329141 100644
--- a/fs/xfs/xfs_ag.h
+++ b/fs/xfs/xfs_ag.h
@@ -192,17 +192,23 @@ typedef struct xfs_perag
192 xfs_agino_t pagi_freecount; /* number of free inodes */ 192 xfs_agino_t pagi_freecount; /* number of free inodes */
193 xfs_agino_t pagi_count; /* number of allocated inodes */ 193 xfs_agino_t pagi_count; /* number of allocated inodes */
194 int pagb_count; /* pagb slots in use */ 194 int pagb_count; /* pagb slots in use */
195 xfs_perag_busy_t *pagb_list; /* unstable blocks */
195#ifdef __KERNEL__ 196#ifdef __KERNEL__
196 spinlock_t pagb_lock; /* lock for pagb_list */ 197 spinlock_t pagb_lock; /* lock for pagb_list */
197#endif 198
198 xfs_perag_busy_t *pagb_list; /* unstable blocks */
199 atomic_t pagf_fstrms; /* # of filestreams active in this AG */ 199 atomic_t pagf_fstrms; /* # of filestreams active in this AG */
200 200
201 int pag_ici_init; /* incore inode cache initialised */ 201 int pag_ici_init; /* incore inode cache initialised */
202 rwlock_t pag_ici_lock; /* incore inode lock */ 202 rwlock_t pag_ici_lock; /* incore inode lock */
203 struct radix_tree_root pag_ici_root; /* incore inode cache root */ 203 struct radix_tree_root pag_ici_root; /* incore inode cache root */
204#endif
204} xfs_perag_t; 205} xfs_perag_t;
205 206
207/*
208 * tags for inode radix tree
209 */
210#define XFS_ICI_RECLAIM_TAG 0 /* inode is to be reclaimed */
211
206#define XFS_AG_MAXLEVELS(mp) ((mp)->m_ag_maxlevels) 212#define XFS_AG_MAXLEVELS(mp) ((mp)->m_ag_maxlevels)
207#define XFS_MIN_FREELIST_RAW(bl,cl,mp) \ 213#define XFS_MIN_FREELIST_RAW(bl,cl,mp) \
208 (MIN(bl + 1, XFS_AG_MAXLEVELS(mp)) + MIN(cl + 1, XFS_AG_MAXLEVELS(mp))) 214 (MIN(bl + 1, XFS_AG_MAXLEVELS(mp)) + MIN(cl + 1, XFS_AG_MAXLEVELS(mp)))
diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c
index 1956f83489f1..c47ce9075728 100644
--- a/fs/xfs/xfs_alloc.c
+++ b/fs/xfs/xfs_alloc.c
@@ -90,6 +90,92 @@ STATIC int xfs_alloc_ag_vextent_small(xfs_alloc_arg_t *,
90 */ 90 */
91 91
92/* 92/*
93 * Lookup the record equal to [bno, len] in the btree given by cur.
94 */
95STATIC int /* error */
96xfs_alloc_lookup_eq(
97 struct xfs_btree_cur *cur, /* btree cursor */
98 xfs_agblock_t bno, /* starting block of extent */
99 xfs_extlen_t len, /* length of extent */
100 int *stat) /* success/failure */
101{
102 cur->bc_rec.a.ar_startblock = bno;
103 cur->bc_rec.a.ar_blockcount = len;
104 return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat);
105}
106
107/*
108 * Lookup the first record greater than or equal to [bno, len]
109 * in the btree given by cur.
110 */
111STATIC int /* error */
112xfs_alloc_lookup_ge(
113 struct xfs_btree_cur *cur, /* btree cursor */
114 xfs_agblock_t bno, /* starting block of extent */
115 xfs_extlen_t len, /* length of extent */
116 int *stat) /* success/failure */
117{
118 cur->bc_rec.a.ar_startblock = bno;
119 cur->bc_rec.a.ar_blockcount = len;
120 return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat);
121}
122
123/*
124 * Lookup the first record less than or equal to [bno, len]
125 * in the btree given by cur.
126 */
127STATIC int /* error */
128xfs_alloc_lookup_le(
129 struct xfs_btree_cur *cur, /* btree cursor */
130 xfs_agblock_t bno, /* starting block of extent */
131 xfs_extlen_t len, /* length of extent */
132 int *stat) /* success/failure */
133{
134 cur->bc_rec.a.ar_startblock = bno;
135 cur->bc_rec.a.ar_blockcount = len;
136 return xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat);
137}
138
139/*
140 * Update the record referred to by cur to the value given
141 * by [bno, len].
142 * This either works (return 0) or gets an EFSCORRUPTED error.
143 */
144STATIC int /* error */
145xfs_alloc_update(
146 struct xfs_btree_cur *cur, /* btree cursor */
147 xfs_agblock_t bno, /* starting block of extent */
148 xfs_extlen_t len) /* length of extent */
149{
150 union xfs_btree_rec rec;
151
152 rec.alloc.ar_startblock = cpu_to_be32(bno);
153 rec.alloc.ar_blockcount = cpu_to_be32(len);
154 return xfs_btree_update(cur, &rec);
155}
156
157/*
158 * Get the data from the pointed-to record.
159 */
160STATIC int /* error */
161xfs_alloc_get_rec(
162 struct xfs_btree_cur *cur, /* btree cursor */
163 xfs_agblock_t *bno, /* output: starting block of extent */
164 xfs_extlen_t *len, /* output: length of extent */
165 int *stat) /* output: success/failure */
166{
167 union xfs_btree_rec *rec;
168 int error;
169
170 error = xfs_btree_get_rec(cur, &rec, stat);
171 if (!error && *stat == 1) {
172 *bno = be32_to_cpu(rec->alloc.ar_startblock);
173 *len = be32_to_cpu(rec->alloc.ar_blockcount);
174 }
175 return error;
176}
177
178/*
93 * Compute aligned version of the found extent. 179 * Compute aligned version of the found extent.
94 * Takes alignment and min length into account. 180 * Takes alignment and min length into account.
95 */ 181 */
@@ -294,21 +380,20 @@ xfs_alloc_fixup_trees(
294 return error; 380 return error;
295 XFS_WANT_CORRUPTED_RETURN(i == 1); 381 XFS_WANT_CORRUPTED_RETURN(i == 1);
296 } 382 }
383
297#ifdef DEBUG 384#ifdef DEBUG
298 { 385 if (bno_cur->bc_nlevels == 1 && cnt_cur->bc_nlevels == 1) {
299 xfs_alloc_block_t *bnoblock; 386 struct xfs_btree_block *bnoblock;
300 xfs_alloc_block_t *cntblock; 387 struct xfs_btree_block *cntblock;
301 388
302 if (bno_cur->bc_nlevels == 1 && 389 bnoblock = XFS_BUF_TO_BLOCK(bno_cur->bc_bufs[0]);
303 cnt_cur->bc_nlevels == 1) { 390 cntblock = XFS_BUF_TO_BLOCK(cnt_cur->bc_bufs[0]);
304 bnoblock = XFS_BUF_TO_ALLOC_BLOCK(bno_cur->bc_bufs[0]); 391
305 cntblock = XFS_BUF_TO_ALLOC_BLOCK(cnt_cur->bc_bufs[0]); 392 XFS_WANT_CORRUPTED_RETURN(
306 XFS_WANT_CORRUPTED_RETURN( 393 bnoblock->bb_numrecs == cntblock->bb_numrecs);
307 be16_to_cpu(bnoblock->bb_numrecs) ==
308 be16_to_cpu(cntblock->bb_numrecs));
309 }
310 } 394 }
311#endif 395#endif
396
312 /* 397 /*
313 * Deal with all four cases: the allocated record is contained 398 * Deal with all four cases: the allocated record is contained
314 * within the freespace record, so we can have new freespace 399 * within the freespace record, so we can have new freespace
@@ -333,7 +418,7 @@ xfs_alloc_fixup_trees(
333 /* 418 /*
334 * Delete the entry from the by-size btree. 419 * Delete the entry from the by-size btree.
335 */ 420 */
336 if ((error = xfs_alloc_delete(cnt_cur, &i))) 421 if ((error = xfs_btree_delete(cnt_cur, &i)))
337 return error; 422 return error;
338 XFS_WANT_CORRUPTED_RETURN(i == 1); 423 XFS_WANT_CORRUPTED_RETURN(i == 1);
339 /* 424 /*
@@ -343,7 +428,7 @@ xfs_alloc_fixup_trees(
343 if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno1, nflen1, &i))) 428 if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno1, nflen1, &i)))
344 return error; 429 return error;
345 XFS_WANT_CORRUPTED_RETURN(i == 0); 430 XFS_WANT_CORRUPTED_RETURN(i == 0);
346 if ((error = xfs_alloc_insert(cnt_cur, &i))) 431 if ((error = xfs_btree_insert(cnt_cur, &i)))
347 return error; 432 return error;
348 XFS_WANT_CORRUPTED_RETURN(i == 1); 433 XFS_WANT_CORRUPTED_RETURN(i == 1);
349 } 434 }
@@ -351,7 +436,7 @@ xfs_alloc_fixup_trees(
351 if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno2, nflen2, &i))) 436 if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno2, nflen2, &i)))
352 return error; 437 return error;
353 XFS_WANT_CORRUPTED_RETURN(i == 0); 438 XFS_WANT_CORRUPTED_RETURN(i == 0);
354 if ((error = xfs_alloc_insert(cnt_cur, &i))) 439 if ((error = xfs_btree_insert(cnt_cur, &i)))
355 return error; 440 return error;
356 XFS_WANT_CORRUPTED_RETURN(i == 1); 441 XFS_WANT_CORRUPTED_RETURN(i == 1);
357 } 442 }
@@ -362,7 +447,7 @@ xfs_alloc_fixup_trees(
362 /* 447 /*
363 * No remaining freespace, just delete the by-block tree entry. 448 * No remaining freespace, just delete the by-block tree entry.
364 */ 449 */
365 if ((error = xfs_alloc_delete(bno_cur, &i))) 450 if ((error = xfs_btree_delete(bno_cur, &i)))
366 return error; 451 return error;
367 XFS_WANT_CORRUPTED_RETURN(i == 1); 452 XFS_WANT_CORRUPTED_RETURN(i == 1);
368 } else { 453 } else {
@@ -379,7 +464,7 @@ xfs_alloc_fixup_trees(
379 if ((error = xfs_alloc_lookup_eq(bno_cur, nfbno2, nflen2, &i))) 464 if ((error = xfs_alloc_lookup_eq(bno_cur, nfbno2, nflen2, &i)))
380 return error; 465 return error;
381 XFS_WANT_CORRUPTED_RETURN(i == 0); 466 XFS_WANT_CORRUPTED_RETURN(i == 0);
382 if ((error = xfs_alloc_insert(bno_cur, &i))) 467 if ((error = xfs_btree_insert(bno_cur, &i)))
383 return error; 468 return error;
384 XFS_WANT_CORRUPTED_RETURN(i == 1); 469 XFS_WANT_CORRUPTED_RETURN(i == 1);
385 } 470 }
@@ -640,8 +725,8 @@ xfs_alloc_ag_vextent_exact(
640 /* 725 /*
641 * Allocate/initialize a cursor for the by-number freespace btree. 726 * Allocate/initialize a cursor for the by-number freespace btree.
642 */ 727 */
643 bno_cur = xfs_btree_init_cursor(args->mp, args->tp, args->agbp, 728 bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
644 args->agno, XFS_BTNUM_BNO, NULL, 0); 729 args->agno, XFS_BTNUM_BNO);
645 /* 730 /*
646 * Lookup bno and minlen in the btree (minlen is irrelevant, really). 731 * Lookup bno and minlen in the btree (minlen is irrelevant, really).
647 * Look for the closest free block <= bno, it must contain bno 732 * Look for the closest free block <= bno, it must contain bno
@@ -696,8 +781,8 @@ xfs_alloc_ag_vextent_exact(
696 * We are allocating agbno for rlen [agbno .. end] 781 * We are allocating agbno for rlen [agbno .. end]
697 * Allocate/initialize a cursor for the by-size btree. 782 * Allocate/initialize a cursor for the by-size btree.
698 */ 783 */
699 cnt_cur = xfs_btree_init_cursor(args->mp, args->tp, args->agbp, 784 cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
700 args->agno, XFS_BTNUM_CNT, NULL, 0); 785 args->agno, XFS_BTNUM_CNT);
701 ASSERT(args->agbno + args->len <= 786 ASSERT(args->agbno + args->len <=
702 be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length)); 787 be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
703 if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen, 788 if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen,
@@ -759,8 +844,8 @@ xfs_alloc_ag_vextent_near(
759 /* 844 /*
760 * Get a cursor for the by-size btree. 845 * Get a cursor for the by-size btree.
761 */ 846 */
762 cnt_cur = xfs_btree_init_cursor(args->mp, args->tp, args->agbp, 847 cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
763 args->agno, XFS_BTNUM_CNT, NULL, 0); 848 args->agno, XFS_BTNUM_CNT);
764 ltlen = 0; 849 ltlen = 0;
765 bno_cur_lt = bno_cur_gt = NULL; 850 bno_cur_lt = bno_cur_gt = NULL;
766 /* 851 /*
@@ -818,7 +903,7 @@ xfs_alloc_ag_vextent_near(
818 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 903 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
819 if (ltlen >= args->minlen) 904 if (ltlen >= args->minlen)
820 break; 905 break;
821 if ((error = xfs_alloc_increment(cnt_cur, 0, &i))) 906 if ((error = xfs_btree_increment(cnt_cur, 0, &i)))
822 goto error0; 907 goto error0;
823 } while (i); 908 } while (i);
824 ASSERT(ltlen >= args->minlen); 909 ASSERT(ltlen >= args->minlen);
@@ -828,7 +913,7 @@ xfs_alloc_ag_vextent_near(
828 i = cnt_cur->bc_ptrs[0]; 913 i = cnt_cur->bc_ptrs[0];
829 for (j = 1, blen = 0, bdiff = 0; 914 for (j = 1, blen = 0, bdiff = 0;
830 !error && j && (blen < args->maxlen || bdiff > 0); 915 !error && j && (blen < args->maxlen || bdiff > 0);
831 error = xfs_alloc_increment(cnt_cur, 0, &j)) { 916 error = xfs_btree_increment(cnt_cur, 0, &j)) {
832 /* 917 /*
833 * For each entry, decide if it's better than 918 * For each entry, decide if it's better than
834 * the previous best entry. 919 * the previous best entry.
@@ -886,8 +971,8 @@ xfs_alloc_ag_vextent_near(
886 /* 971 /*
887 * Set up a cursor for the by-bno tree. 972 * Set up a cursor for the by-bno tree.
888 */ 973 */
889 bno_cur_lt = xfs_btree_init_cursor(args->mp, args->tp, 974 bno_cur_lt = xfs_allocbt_init_cursor(args->mp, args->tp,
890 args->agbp, args->agno, XFS_BTNUM_BNO, NULL, 0); 975 args->agbp, args->agno, XFS_BTNUM_BNO);
891 /* 976 /*
892 * Fix up the btree entries. 977 * Fix up the btree entries.
893 */ 978 */
@@ -914,8 +999,8 @@ xfs_alloc_ag_vextent_near(
914 /* 999 /*
915 * Allocate and initialize the cursor for the leftward search. 1000 * Allocate and initialize the cursor for the leftward search.
916 */ 1001 */
917 bno_cur_lt = xfs_btree_init_cursor(args->mp, args->tp, args->agbp, 1002 bno_cur_lt = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
918 args->agno, XFS_BTNUM_BNO, NULL, 0); 1003 args->agno, XFS_BTNUM_BNO);
919 /* 1004 /*
920 * Lookup <= bno to find the leftward search's starting point. 1005 * Lookup <= bno to find the leftward search's starting point.
921 */ 1006 */
@@ -938,7 +1023,7 @@ xfs_alloc_ag_vextent_near(
938 * Increment the cursor, so we will point at the entry just right 1023 * Increment the cursor, so we will point at the entry just right
939 * of the leftward entry if any, or to the leftmost entry. 1024 * of the leftward entry if any, or to the leftmost entry.
940 */ 1025 */
941 if ((error = xfs_alloc_increment(bno_cur_gt, 0, &i))) 1026 if ((error = xfs_btree_increment(bno_cur_gt, 0, &i)))
942 goto error0; 1027 goto error0;
943 if (!i) { 1028 if (!i) {
944 /* 1029 /*
@@ -961,7 +1046,7 @@ xfs_alloc_ag_vextent_near(
961 args->minlen, &ltbnoa, &ltlena); 1046 args->minlen, &ltbnoa, &ltlena);
962 if (ltlena >= args->minlen) 1047 if (ltlena >= args->minlen)
963 break; 1048 break;
964 if ((error = xfs_alloc_decrement(bno_cur_lt, 0, &i))) 1049 if ((error = xfs_btree_decrement(bno_cur_lt, 0, &i)))
965 goto error0; 1050 goto error0;
966 if (!i) { 1051 if (!i) {
967 xfs_btree_del_cursor(bno_cur_lt, 1052 xfs_btree_del_cursor(bno_cur_lt,
@@ -977,7 +1062,7 @@ xfs_alloc_ag_vextent_near(
977 args->minlen, &gtbnoa, &gtlena); 1062 args->minlen, &gtbnoa, &gtlena);
978 if (gtlena >= args->minlen) 1063 if (gtlena >= args->minlen)
979 break; 1064 break;
980 if ((error = xfs_alloc_increment(bno_cur_gt, 0, &i))) 1065 if ((error = xfs_btree_increment(bno_cur_gt, 0, &i)))
981 goto error0; 1066 goto error0;
982 if (!i) { 1067 if (!i) {
983 xfs_btree_del_cursor(bno_cur_gt, 1068 xfs_btree_del_cursor(bno_cur_gt,
@@ -1066,7 +1151,7 @@ xfs_alloc_ag_vextent_near(
1066 /* 1151 /*
1067 * Fell off the right end. 1152 * Fell off the right end.
1068 */ 1153 */
1069 if ((error = xfs_alloc_increment( 1154 if ((error = xfs_btree_increment(
1070 bno_cur_gt, 0, &i))) 1155 bno_cur_gt, 0, &i)))
1071 goto error0; 1156 goto error0;
1072 if (!i) { 1157 if (!i) {
@@ -1162,7 +1247,7 @@ xfs_alloc_ag_vextent_near(
1162 /* 1247 /*
1163 * Fell off the left end. 1248 * Fell off the left end.
1164 */ 1249 */
1165 if ((error = xfs_alloc_decrement( 1250 if ((error = xfs_btree_decrement(
1166 bno_cur_lt, 0, &i))) 1251 bno_cur_lt, 0, &i)))
1167 goto error0; 1252 goto error0;
1168 if (!i) { 1253 if (!i) {
@@ -1267,8 +1352,8 @@ xfs_alloc_ag_vextent_size(
1267 /* 1352 /*
1268 * Allocate and initialize a cursor for the by-size btree. 1353 * Allocate and initialize a cursor for the by-size btree.
1269 */ 1354 */
1270 cnt_cur = xfs_btree_init_cursor(args->mp, args->tp, args->agbp, 1355 cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
1271 args->agno, XFS_BTNUM_CNT, NULL, 0); 1356 args->agno, XFS_BTNUM_CNT);
1272 bno_cur = NULL; 1357 bno_cur = NULL;
1273 /* 1358 /*
1274 * Look for an entry >= maxlen+alignment-1 blocks. 1359 * Look for an entry >= maxlen+alignment-1 blocks.
@@ -1321,7 +1406,7 @@ xfs_alloc_ag_vextent_size(
1321 bestflen = flen; 1406 bestflen = flen;
1322 bestfbno = fbno; 1407 bestfbno = fbno;
1323 for (;;) { 1408 for (;;) {
1324 if ((error = xfs_alloc_decrement(cnt_cur, 0, &i))) 1409 if ((error = xfs_btree_decrement(cnt_cur, 0, &i)))
1325 goto error0; 1410 goto error0;
1326 if (i == 0) 1411 if (i == 0)
1327 break; 1412 break;
@@ -1372,8 +1457,8 @@ xfs_alloc_ag_vextent_size(
1372 /* 1457 /*
1373 * Allocate and initialize a cursor for the by-block tree. 1458 * Allocate and initialize a cursor for the by-block tree.
1374 */ 1459 */
1375 bno_cur = xfs_btree_init_cursor(args->mp, args->tp, args->agbp, 1460 bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
1376 args->agno, XFS_BTNUM_BNO, NULL, 0); 1461 args->agno, XFS_BTNUM_BNO);
1377 if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen, 1462 if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen,
1378 rbno, rlen, XFSA_FIXUP_CNT_OK))) 1463 rbno, rlen, XFSA_FIXUP_CNT_OK)))
1379 goto error0; 1464 goto error0;
@@ -1416,7 +1501,7 @@ xfs_alloc_ag_vextent_small(
1416 xfs_extlen_t flen; 1501 xfs_extlen_t flen;
1417 int i; 1502 int i;
1418 1503
1419 if ((error = xfs_alloc_decrement(ccur, 0, &i))) 1504 if ((error = xfs_btree_decrement(ccur, 0, &i)))
1420 goto error0; 1505 goto error0;
1421 if (i) { 1506 if (i) {
1422 if ((error = xfs_alloc_get_rec(ccur, &fbno, &flen, &i))) 1507 if ((error = xfs_alloc_get_rec(ccur, &fbno, &flen, &i)))
@@ -1515,8 +1600,7 @@ xfs_free_ag_extent(
1515 /* 1600 /*
1516 * Allocate and initialize a cursor for the by-block btree. 1601 * Allocate and initialize a cursor for the by-block btree.
1517 */ 1602 */
1518 bno_cur = xfs_btree_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_BNO, NULL, 1603 bno_cur = xfs_allocbt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_BNO);
1519 0);
1520 cnt_cur = NULL; 1604 cnt_cur = NULL;
1521 /* 1605 /*
1522 * Look for a neighboring block on the left (lower block numbers) 1606 * Look for a neighboring block on the left (lower block numbers)
@@ -1549,7 +1633,7 @@ xfs_free_ag_extent(
1549 * Look for a neighboring block on the right (higher block numbers) 1633 * Look for a neighboring block on the right (higher block numbers)
1550 * that is contiguous with this space. 1634 * that is contiguous with this space.
1551 */ 1635 */
1552 if ((error = xfs_alloc_increment(bno_cur, 0, &haveright))) 1636 if ((error = xfs_btree_increment(bno_cur, 0, &haveright)))
1553 goto error0; 1637 goto error0;
1554 if (haveright) { 1638 if (haveright) {
1555 /* 1639 /*
@@ -1575,8 +1659,7 @@ xfs_free_ag_extent(
1575 /* 1659 /*
1576 * Now allocate and initialize a cursor for the by-size tree. 1660 * Now allocate and initialize a cursor for the by-size tree.
1577 */ 1661 */
1578 cnt_cur = xfs_btree_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_CNT, NULL, 1662 cnt_cur = xfs_allocbt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_CNT);
1579 0);
1580 /* 1663 /*
1581 * Have both left and right contiguous neighbors. 1664 * Have both left and right contiguous neighbors.
1582 * Merge all three into a single free block. 1665 * Merge all three into a single free block.
@@ -1588,7 +1671,7 @@ xfs_free_ag_extent(
1588 if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i))) 1671 if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i)))
1589 goto error0; 1672 goto error0;
1590 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1673 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1591 if ((error = xfs_alloc_delete(cnt_cur, &i))) 1674 if ((error = xfs_btree_delete(cnt_cur, &i)))
1592 goto error0; 1675 goto error0;
1593 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1676 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1594 /* 1677 /*
@@ -1597,19 +1680,19 @@ xfs_free_ag_extent(
1597 if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i))) 1680 if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i)))
1598 goto error0; 1681 goto error0;
1599 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1682 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1600 if ((error = xfs_alloc_delete(cnt_cur, &i))) 1683 if ((error = xfs_btree_delete(cnt_cur, &i)))
1601 goto error0; 1684 goto error0;
1602 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1685 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1603 /* 1686 /*
1604 * Delete the old by-block entry for the right block. 1687 * Delete the old by-block entry for the right block.
1605 */ 1688 */
1606 if ((error = xfs_alloc_delete(bno_cur, &i))) 1689 if ((error = xfs_btree_delete(bno_cur, &i)))
1607 goto error0; 1690 goto error0;
1608 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1691 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1609 /* 1692 /*
1610 * Move the by-block cursor back to the left neighbor. 1693 * Move the by-block cursor back to the left neighbor.
1611 */ 1694 */
1612 if ((error = xfs_alloc_decrement(bno_cur, 0, &i))) 1695 if ((error = xfs_btree_decrement(bno_cur, 0, &i)))
1613 goto error0; 1696 goto error0;
1614 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1697 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1615#ifdef DEBUG 1698#ifdef DEBUG
@@ -1648,14 +1731,14 @@ xfs_free_ag_extent(
1648 if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i))) 1731 if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i)))
1649 goto error0; 1732 goto error0;
1650 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1733 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1651 if ((error = xfs_alloc_delete(cnt_cur, &i))) 1734 if ((error = xfs_btree_delete(cnt_cur, &i)))
1652 goto error0; 1735 goto error0;
1653 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1736 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1654 /* 1737 /*
1655 * Back up the by-block cursor to the left neighbor, and 1738 * Back up the by-block cursor to the left neighbor, and
1656 * update its length. 1739 * update its length.
1657 */ 1740 */
1658 if ((error = xfs_alloc_decrement(bno_cur, 0, &i))) 1741 if ((error = xfs_btree_decrement(bno_cur, 0, &i)))
1659 goto error0; 1742 goto error0;
1660 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1743 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1661 nbno = ltbno; 1744 nbno = ltbno;
@@ -1674,7 +1757,7 @@ xfs_free_ag_extent(
1674 if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i))) 1757 if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i)))
1675 goto error0; 1758 goto error0;
1676 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1759 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1677 if ((error = xfs_alloc_delete(cnt_cur, &i))) 1760 if ((error = xfs_btree_delete(cnt_cur, &i)))
1678 goto error0; 1761 goto error0;
1679 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1762 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1680 /* 1763 /*
@@ -1693,7 +1776,7 @@ xfs_free_ag_extent(
1693 else { 1776 else {
1694 nbno = bno; 1777 nbno = bno;
1695 nlen = len; 1778 nlen = len;
1696 if ((error = xfs_alloc_insert(bno_cur, &i))) 1779 if ((error = xfs_btree_insert(bno_cur, &i)))
1697 goto error0; 1780 goto error0;
1698 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1781 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1699 } 1782 }
@@ -1705,7 +1788,7 @@ xfs_free_ag_extent(
1705 if ((error = xfs_alloc_lookup_eq(cnt_cur, nbno, nlen, &i))) 1788 if ((error = xfs_alloc_lookup_eq(cnt_cur, nbno, nlen, &i)))
1706 goto error0; 1789 goto error0;
1707 XFS_WANT_CORRUPTED_GOTO(i == 0, error0); 1790 XFS_WANT_CORRUPTED_GOTO(i == 0, error0);
1708 if ((error = xfs_alloc_insert(cnt_cur, &i))) 1791 if ((error = xfs_btree_insert(cnt_cur, &i)))
1709 goto error0; 1792 goto error0;
1710 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1793 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1711 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); 1794 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
@@ -2188,6 +2271,9 @@ xfs_alloc_read_agf(
2188 be32_to_cpu(agf->agf_flfirst) < XFS_AGFL_SIZE(mp) && 2271 be32_to_cpu(agf->agf_flfirst) < XFS_AGFL_SIZE(mp) &&
2189 be32_to_cpu(agf->agf_fllast) < XFS_AGFL_SIZE(mp) && 2272 be32_to_cpu(agf->agf_fllast) < XFS_AGFL_SIZE(mp) &&
2190 be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp); 2273 be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp);
2274 if (xfs_sb_version_haslazysbcount(&mp->m_sb))
2275 agf_ok = agf_ok && be32_to_cpu(agf->agf_btreeblks) <=
2276 be32_to_cpu(agf->agf_length);
2191 if (unlikely(XFS_TEST_ERROR(!agf_ok, mp, XFS_ERRTAG_ALLOC_READ_AGF, 2277 if (unlikely(XFS_TEST_ERROR(!agf_ok, mp, XFS_ERRTAG_ALLOC_READ_AGF,
2192 XFS_RANDOM_ALLOC_READ_AGF))) { 2278 XFS_RANDOM_ALLOC_READ_AGF))) {
2193 XFS_CORRUPTION_ERROR("xfs_alloc_read_agf", 2279 XFS_CORRUPTION_ERROR("xfs_alloc_read_agf",
@@ -2213,6 +2299,7 @@ xfs_alloc_read_agf(
2213#ifdef DEBUG 2299#ifdef DEBUG
2214 else if (!XFS_FORCED_SHUTDOWN(mp)) { 2300 else if (!XFS_FORCED_SHUTDOWN(mp)) {
2215 ASSERT(pag->pagf_freeblks == be32_to_cpu(agf->agf_freeblks)); 2301 ASSERT(pag->pagf_freeblks == be32_to_cpu(agf->agf_freeblks));
2302 ASSERT(pag->pagf_btreeblks == be32_to_cpu(agf->agf_btreeblks));
2216 ASSERT(pag->pagf_flcount == be32_to_cpu(agf->agf_flcount)); 2303 ASSERT(pag->pagf_flcount == be32_to_cpu(agf->agf_flcount));
2217 ASSERT(pag->pagf_longest == be32_to_cpu(agf->agf_longest)); 2304 ASSERT(pag->pagf_longest == be32_to_cpu(agf->agf_longest));
2218 ASSERT(pag->pagf_levels[XFS_BTNUM_BNOi] == 2305 ASSERT(pag->pagf_levels[XFS_BTNUM_BNOi] ==
diff --git a/fs/xfs/xfs_alloc.h b/fs/xfs/xfs_alloc.h
index 5aec15d0651e..588172796f7b 100644
--- a/fs/xfs/xfs_alloc.h
+++ b/fs/xfs/xfs_alloc.h
@@ -121,6 +121,19 @@ extern ktrace_t *xfs_alloc_trace_buf;
121#define XFS_ALLOC_KTRACE_BUSYSEARCH 6 121#define XFS_ALLOC_KTRACE_BUSYSEARCH 6
122#endif 122#endif
123 123
124void
125xfs_alloc_mark_busy(xfs_trans_t *tp,
126 xfs_agnumber_t agno,
127 xfs_agblock_t bno,
128 xfs_extlen_t len);
129
130void
131xfs_alloc_clear_busy(xfs_trans_t *tp,
132 xfs_agnumber_t ag,
133 int idx);
134
135#endif /* __KERNEL__ */
136
124/* 137/*
125 * Compute and fill in value of m_ag_maxlevels. 138 * Compute and fill in value of m_ag_maxlevels.
126 */ 139 */
@@ -196,18 +209,4 @@ xfs_free_extent(
196 xfs_fsblock_t bno, /* starting block number of extent */ 209 xfs_fsblock_t bno, /* starting block number of extent */
197 xfs_extlen_t len); /* length of extent */ 210 xfs_extlen_t len); /* length of extent */
198 211
199void
200xfs_alloc_mark_busy(xfs_trans_t *tp,
201 xfs_agnumber_t agno,
202 xfs_agblock_t bno,
203 xfs_extlen_t len);
204
205void
206xfs_alloc_clear_busy(xfs_trans_t *tp,
207 xfs_agnumber_t ag,
208 int idx);
209
210
211#endif /* __KERNEL__ */
212
213#endif /* __XFS_ALLOC_H__ */ 212#endif /* __XFS_ALLOC_H__ */
diff --git a/fs/xfs/xfs_alloc_btree.c b/fs/xfs/xfs_alloc_btree.c
index 3ce2645508ae..733cb75a8c5d 100644
--- a/fs/xfs/xfs_alloc_btree.c
+++ b/fs/xfs/xfs_alloc_btree.c
@@ -35,2177 +35,464 @@
35#include "xfs_dinode.h" 35#include "xfs_dinode.h"
36#include "xfs_inode.h" 36#include "xfs_inode.h"
37#include "xfs_btree.h" 37#include "xfs_btree.h"
38#include "xfs_btree_trace.h"
38#include "xfs_ialloc.h" 39#include "xfs_ialloc.h"
39#include "xfs_alloc.h" 40#include "xfs_alloc.h"
40#include "xfs_error.h" 41#include "xfs_error.h"
41 42
42/*
43 * Prototypes for internal functions.
44 */
45 43
46STATIC void xfs_alloc_log_block(xfs_trans_t *, xfs_buf_t *, int); 44STATIC struct xfs_btree_cur *
47STATIC void xfs_alloc_log_keys(xfs_btree_cur_t *, xfs_buf_t *, int, int); 45xfs_allocbt_dup_cursor(
48STATIC void xfs_alloc_log_ptrs(xfs_btree_cur_t *, xfs_buf_t *, int, int); 46 struct xfs_btree_cur *cur)
49STATIC void xfs_alloc_log_recs(xfs_btree_cur_t *, xfs_buf_t *, int, int); 47{
50STATIC int xfs_alloc_lshift(xfs_btree_cur_t *, int, int *); 48 return xfs_allocbt_init_cursor(cur->bc_mp, cur->bc_tp,
51STATIC int xfs_alloc_newroot(xfs_btree_cur_t *, int *); 49 cur->bc_private.a.agbp, cur->bc_private.a.agno,
52STATIC int xfs_alloc_rshift(xfs_btree_cur_t *, int, int *); 50 cur->bc_btnum);
53STATIC int xfs_alloc_split(xfs_btree_cur_t *, int, xfs_agblock_t *, 51}
54 xfs_alloc_key_t *, xfs_btree_cur_t **, int *);
55STATIC int xfs_alloc_updkey(xfs_btree_cur_t *, xfs_alloc_key_t *, int);
56 52
57/* 53STATIC void
58 * Internal functions. 54xfs_allocbt_set_root(
59 */ 55 struct xfs_btree_cur *cur,
56 union xfs_btree_ptr *ptr,
57 int inc)
58{
59 struct xfs_buf *agbp = cur->bc_private.a.agbp;
60 struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp);
61 xfs_agnumber_t seqno = be32_to_cpu(agf->agf_seqno);
62 int btnum = cur->bc_btnum;
60 63
61/* 64 ASSERT(ptr->s != 0);
62 * Single level of the xfs_alloc_delete record deletion routine. 65
63 * Delete record pointed to by cur/level. 66 agf->agf_roots[btnum] = ptr->s;
64 * Remove the record from its block then rebalance the tree. 67 be32_add_cpu(&agf->agf_levels[btnum], inc);
65 * Return 0 for error, 1 for done, 2 to go on to the next level. 68 cur->bc_mp->m_perag[seqno].pagf_levels[btnum] += inc;
66 */ 69
67STATIC int /* error */ 70 xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_ROOTS | XFS_AGF_LEVELS);
68xfs_alloc_delrec( 71}
69 xfs_btree_cur_t *cur, /* btree cursor */ 72
70 int level, /* level removing record from */ 73STATIC int
71 int *stat) /* fail/done/go-on */ 74xfs_allocbt_alloc_block(
75 struct xfs_btree_cur *cur,
76 union xfs_btree_ptr *start,
77 union xfs_btree_ptr *new,
78 int length,
79 int *stat)
72{ 80{
73 xfs_agf_t *agf; /* allocation group freelist header */ 81 int error;
74 xfs_alloc_block_t *block; /* btree block record/key lives in */ 82 xfs_agblock_t bno;
75 xfs_agblock_t bno; /* btree block number */
76 xfs_buf_t *bp; /* buffer for block */
77 int error; /* error return value */
78 int i; /* loop index */
79 xfs_alloc_key_t key; /* kp points here if block is level 0 */
80 xfs_agblock_t lbno; /* left block's block number */
81 xfs_buf_t *lbp; /* left block's buffer pointer */
82 xfs_alloc_block_t *left; /* left btree block */
83 xfs_alloc_key_t *lkp=NULL; /* left block key pointer */
84 xfs_alloc_ptr_t *lpp=NULL; /* left block address pointer */
85 int lrecs=0; /* number of records in left block */
86 xfs_alloc_rec_t *lrp; /* left block record pointer */
87 xfs_mount_t *mp; /* mount structure */
88 int ptr; /* index in btree block for this rec */
89 xfs_agblock_t rbno; /* right block's block number */
90 xfs_buf_t *rbp; /* right block's buffer pointer */
91 xfs_alloc_block_t *right; /* right btree block */
92 xfs_alloc_key_t *rkp; /* right block key pointer */
93 xfs_alloc_ptr_t *rpp; /* right block address pointer */
94 int rrecs=0; /* number of records in right block */
95 int numrecs;
96 xfs_alloc_rec_t *rrp; /* right block record pointer */
97 xfs_btree_cur_t *tcur; /* temporary btree cursor */
98 83
99 /* 84 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
100 * Get the index of the entry being deleted, check for nothing there. 85
101 */ 86 /* Allocate the new block from the freelist. If we can't, give up. */
102 ptr = cur->bc_ptrs[level]; 87 error = xfs_alloc_get_freelist(cur->bc_tp, cur->bc_private.a.agbp,
103 if (ptr == 0) { 88 &bno, 1);
104 *stat = 0; 89 if (error) {
105 return 0; 90 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
106 }
107 /*
108 * Get the buffer & block containing the record or key/ptr.
109 */
110 bp = cur->bc_bufs[level];
111 block = XFS_BUF_TO_ALLOC_BLOCK(bp);
112#ifdef DEBUG
113 if ((error = xfs_btree_check_sblock(cur, block, level, bp)))
114 return error; 91 return error;
115#endif 92 }
116 /* 93
117 * Fail if we're off the end of the block. 94 if (bno == NULLAGBLOCK) {
118 */ 95 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
119 numrecs = be16_to_cpu(block->bb_numrecs);
120 if (ptr > numrecs) {
121 *stat = 0; 96 *stat = 0;
122 return 0; 97 return 0;
123 } 98 }
124 XFS_STATS_INC(xs_abt_delrec);
125 /*
126 * It's a nonleaf. Excise the key and ptr being deleted, by
127 * sliding the entries past them down one.
128 * Log the changed areas of the block.
129 */
130 if (level > 0) {
131 lkp = XFS_ALLOC_KEY_ADDR(block, 1, cur);
132 lpp = XFS_ALLOC_PTR_ADDR(block, 1, cur);
133#ifdef DEBUG
134 for (i = ptr; i < numrecs; i++) {
135 if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(lpp[i]), level)))
136 return error;
137 }
138#endif
139 if (ptr < numrecs) {
140 memmove(&lkp[ptr - 1], &lkp[ptr],
141 (numrecs - ptr) * sizeof(*lkp));
142 memmove(&lpp[ptr - 1], &lpp[ptr],
143 (numrecs - ptr) * sizeof(*lpp));
144 xfs_alloc_log_ptrs(cur, bp, ptr, numrecs - 1);
145 xfs_alloc_log_keys(cur, bp, ptr, numrecs - 1);
146 }
147 }
148 /*
149 * It's a leaf. Excise the record being deleted, by sliding the
150 * entries past it down one. Log the changed areas of the block.
151 */
152 else {
153 lrp = XFS_ALLOC_REC_ADDR(block, 1, cur);
154 if (ptr < numrecs) {
155 memmove(&lrp[ptr - 1], &lrp[ptr],
156 (numrecs - ptr) * sizeof(*lrp));
157 xfs_alloc_log_recs(cur, bp, ptr, numrecs - 1);
158 }
159 /*
160 * If it's the first record in the block, we'll need a key
161 * structure to pass up to the next level (updkey).
162 */
163 if (ptr == 1) {
164 key.ar_startblock = lrp->ar_startblock;
165 key.ar_blockcount = lrp->ar_blockcount;
166 lkp = &key;
167 }
168 }
169 /*
170 * Decrement and log the number of entries in the block.
171 */
172 numrecs--;
173 block->bb_numrecs = cpu_to_be16(numrecs);
174 xfs_alloc_log_block(cur->bc_tp, bp, XFS_BB_NUMRECS);
175 /*
176 * See if the longest free extent in the allocation group was
177 * changed by this operation. True if it's the by-size btree, and
178 * this is the leaf level, and there is no right sibling block,
179 * and this was the last record.
180 */
181 agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp);
182 mp = cur->bc_mp;
183 99
184 if (level == 0 && 100 xfs_trans_agbtree_delta(cur->bc_tp, 1);
185 cur->bc_btnum == XFS_BTNUM_CNT && 101 new->s = cpu_to_be32(bno);
186 be32_to_cpu(block->bb_rightsib) == NULLAGBLOCK &&
187 ptr > numrecs) {
188 ASSERT(ptr == numrecs + 1);
189 /*
190 * There are still records in the block. Grab the size
191 * from the last one.
192 */
193 if (numrecs) {
194 rrp = XFS_ALLOC_REC_ADDR(block, numrecs, cur);
195 agf->agf_longest = rrp->ar_blockcount;
196 }
197 /*
198 * No free extents left.
199 */
200 else
201 agf->agf_longest = 0;
202 mp->m_perag[be32_to_cpu(agf->agf_seqno)].pagf_longest =
203 be32_to_cpu(agf->agf_longest);
204 xfs_alloc_log_agf(cur->bc_tp, cur->bc_private.a.agbp,
205 XFS_AGF_LONGEST);
206 }
207 /*
208 * Is this the root level? If so, we're almost done.
209 */
210 if (level == cur->bc_nlevels - 1) {
211 /*
212 * If this is the root level,
213 * and there's only one entry left,
214 * and it's NOT the leaf level,
215 * then we can get rid of this level.
216 */
217 if (numrecs == 1 && level > 0) {
218 /*
219 * lpp is still set to the first pointer in the block.
220 * Make it the new root of the btree.
221 */
222 bno = be32_to_cpu(agf->agf_roots[cur->bc_btnum]);
223 agf->agf_roots[cur->bc_btnum] = *lpp;
224 be32_add_cpu(&agf->agf_levels[cur->bc_btnum], -1);
225 mp->m_perag[be32_to_cpu(agf->agf_seqno)].pagf_levels[cur->bc_btnum]--;
226 /*
227 * Put this buffer/block on the ag's freelist.
228 */
229 error = xfs_alloc_put_freelist(cur->bc_tp,
230 cur->bc_private.a.agbp, NULL, bno, 1);
231 if (error)
232 return error;
233 /*
234 * Since blocks move to the free list without the
235 * coordination used in xfs_bmap_finish, we can't allow
236 * block to be available for reallocation and
237 * non-transaction writing (user data) until we know
238 * that the transaction that moved it to the free list
239 * is permanently on disk. We track the blocks by
240 * declaring these blocks as "busy"; the busy list is
241 * maintained on a per-ag basis and each transaction
242 * records which entries should be removed when the
243 * iclog commits to disk. If a busy block is
244 * allocated, the iclog is pushed up to the LSN
245 * that freed the block.
246 */
247 xfs_alloc_mark_busy(cur->bc_tp,
248 be32_to_cpu(agf->agf_seqno), bno, 1);
249 102
250 xfs_trans_agbtree_delta(cur->bc_tp, -1); 103 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
251 xfs_alloc_log_agf(cur->bc_tp, cur->bc_private.a.agbp, 104 *stat = 1;
252 XFS_AGF_ROOTS | XFS_AGF_LEVELS); 105 return 0;
253 /* 106}
254 * Update the cursor so there's one fewer level.
255 */
256 xfs_btree_setbuf(cur, level, NULL);
257 cur->bc_nlevels--;
258 } else if (level > 0 &&
259 (error = xfs_alloc_decrement(cur, level, &i)))
260 return error;
261 *stat = 1;
262 return 0;
263 }
264 /*
265 * If we deleted the leftmost entry in the block, update the
266 * key values above us in the tree.
267 */
268 if (ptr == 1 && (error = xfs_alloc_updkey(cur, lkp, level + 1)))
269 return error;
270 /*
271 * If the number of records remaining in the block is at least
272 * the minimum, we're done.
273 */
274 if (numrecs >= XFS_ALLOC_BLOCK_MINRECS(level, cur)) {
275 if (level > 0 && (error = xfs_alloc_decrement(cur, level, &i)))
276 return error;
277 *stat = 1;
278 return 0;
279 }
280 /*
281 * Otherwise, we have to move some records around to keep the
282 * tree balanced. Look at the left and right sibling blocks to
283 * see if we can re-balance by moving only one record.
284 */
285 rbno = be32_to_cpu(block->bb_rightsib);
286 lbno = be32_to_cpu(block->bb_leftsib);
287 bno = NULLAGBLOCK;
288 ASSERT(rbno != NULLAGBLOCK || lbno != NULLAGBLOCK);
289 /*
290 * Duplicate the cursor so our btree manipulations here won't
291 * disrupt the next level up.
292 */
293 if ((error = xfs_btree_dup_cursor(cur, &tcur)))
294 return error;
295 /*
296 * If there's a right sibling, see if it's ok to shift an entry
297 * out of it.
298 */
299 if (rbno != NULLAGBLOCK) {
300 /*
301 * Move the temp cursor to the last entry in the next block.
302 * Actually any entry but the first would suffice.
303 */
304 i = xfs_btree_lastrec(tcur, level);
305 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
306 if ((error = xfs_alloc_increment(tcur, level, &i)))
307 goto error0;
308 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
309 i = xfs_btree_lastrec(tcur, level);
310 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
311 /*
312 * Grab a pointer to the block.
313 */
314 rbp = tcur->bc_bufs[level];
315 right = XFS_BUF_TO_ALLOC_BLOCK(rbp);
316#ifdef DEBUG
317 if ((error = xfs_btree_check_sblock(cur, right, level, rbp)))
318 goto error0;
319#endif
320 /*
321 * Grab the current block number, for future use.
322 */
323 bno = be32_to_cpu(right->bb_leftsib);
324 /*
325 * If right block is full enough so that removing one entry
326 * won't make it too empty, and left-shifting an entry out
327 * of right to us works, we're done.
328 */
329 if (be16_to_cpu(right->bb_numrecs) - 1 >=
330 XFS_ALLOC_BLOCK_MINRECS(level, cur)) {
331 if ((error = xfs_alloc_lshift(tcur, level, &i)))
332 goto error0;
333 if (i) {
334 ASSERT(be16_to_cpu(block->bb_numrecs) >=
335 XFS_ALLOC_BLOCK_MINRECS(level, cur));
336 xfs_btree_del_cursor(tcur,
337 XFS_BTREE_NOERROR);
338 if (level > 0 &&
339 (error = xfs_alloc_decrement(cur, level,
340 &i)))
341 return error;
342 *stat = 1;
343 return 0;
344 }
345 }
346 /*
347 * Otherwise, grab the number of records in right for
348 * future reference, and fix up the temp cursor to point
349 * to our block again (last record).
350 */
351 rrecs = be16_to_cpu(right->bb_numrecs);
352 if (lbno != NULLAGBLOCK) {
353 i = xfs_btree_firstrec(tcur, level);
354 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
355 if ((error = xfs_alloc_decrement(tcur, level, &i)))
356 goto error0;
357 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
358 }
359 }
360 /*
361 * If there's a left sibling, see if it's ok to shift an entry
362 * out of it.
363 */
364 if (lbno != NULLAGBLOCK) {
365 /*
366 * Move the temp cursor to the first entry in the
367 * previous block.
368 */
369 i = xfs_btree_firstrec(tcur, level);
370 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
371 if ((error = xfs_alloc_decrement(tcur, level, &i)))
372 goto error0;
373 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
374 xfs_btree_firstrec(tcur, level);
375 /*
376 * Grab a pointer to the block.
377 */
378 lbp = tcur->bc_bufs[level];
379 left = XFS_BUF_TO_ALLOC_BLOCK(lbp);
380#ifdef DEBUG
381 if ((error = xfs_btree_check_sblock(cur, left, level, lbp)))
382 goto error0;
383#endif
384 /*
385 * Grab the current block number, for future use.
386 */
387 bno = be32_to_cpu(left->bb_rightsib);
388 /*
389 * If left block is full enough so that removing one entry
390 * won't make it too empty, and right-shifting an entry out
391 * of left to us works, we're done.
392 */
393 if (be16_to_cpu(left->bb_numrecs) - 1 >=
394 XFS_ALLOC_BLOCK_MINRECS(level, cur)) {
395 if ((error = xfs_alloc_rshift(tcur, level, &i)))
396 goto error0;
397 if (i) {
398 ASSERT(be16_to_cpu(block->bb_numrecs) >=
399 XFS_ALLOC_BLOCK_MINRECS(level, cur));
400 xfs_btree_del_cursor(tcur,
401 XFS_BTREE_NOERROR);
402 if (level == 0)
403 cur->bc_ptrs[0]++;
404 *stat = 1;
405 return 0;
406 }
407 }
408 /*
409 * Otherwise, grab the number of records in right for
410 * future reference.
411 */
412 lrecs = be16_to_cpu(left->bb_numrecs);
413 }
414 /*
415 * Delete the temp cursor, we're done with it.
416 */
417 xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
418 /*
419 * If here, we need to do a join to keep the tree balanced.
420 */
421 ASSERT(bno != NULLAGBLOCK);
422 /*
423 * See if we can join with the left neighbor block.
424 */
425 if (lbno != NULLAGBLOCK &&
426 lrecs + numrecs <= XFS_ALLOC_BLOCK_MAXRECS(level, cur)) {
427 /*
428 * Set "right" to be the starting block,
429 * "left" to be the left neighbor.
430 */
431 rbno = bno;
432 right = block;
433 rrecs = be16_to_cpu(right->bb_numrecs);
434 rbp = bp;
435 if ((error = xfs_btree_read_bufs(mp, cur->bc_tp,
436 cur->bc_private.a.agno, lbno, 0, &lbp,
437 XFS_ALLOC_BTREE_REF)))
438 return error;
439 left = XFS_BUF_TO_ALLOC_BLOCK(lbp);
440 lrecs = be16_to_cpu(left->bb_numrecs);
441 if ((error = xfs_btree_check_sblock(cur, left, level, lbp)))
442 return error;
443 }
444 /*
445 * If that won't work, see if we can join with the right neighbor block.
446 */
447 else if (rbno != NULLAGBLOCK &&
448 rrecs + numrecs <= XFS_ALLOC_BLOCK_MAXRECS(level, cur)) {
449 /*
450 * Set "left" to be the starting block,
451 * "right" to be the right neighbor.
452 */
453 lbno = bno;
454 left = block;
455 lrecs = be16_to_cpu(left->bb_numrecs);
456 lbp = bp;
457 if ((error = xfs_btree_read_bufs(mp, cur->bc_tp,
458 cur->bc_private.a.agno, rbno, 0, &rbp,
459 XFS_ALLOC_BTREE_REF)))
460 return error;
461 right = XFS_BUF_TO_ALLOC_BLOCK(rbp);
462 rrecs = be16_to_cpu(right->bb_numrecs);
463 if ((error = xfs_btree_check_sblock(cur, right, level, rbp)))
464 return error;
465 }
466 /*
467 * Otherwise, we can't fix the imbalance.
468 * Just return. This is probably a logic error, but it's not fatal.
469 */
470 else {
471 if (level > 0 && (error = xfs_alloc_decrement(cur, level, &i)))
472 return error;
473 *stat = 1;
474 return 0;
475 }
476 /*
477 * We're now going to join "left" and "right" by moving all the stuff
478 * in "right" to "left" and deleting "right".
479 */
480 if (level > 0) {
481 /*
482 * It's a non-leaf. Move keys and pointers.
483 */
484 lkp = XFS_ALLOC_KEY_ADDR(left, lrecs + 1, cur);
485 lpp = XFS_ALLOC_PTR_ADDR(left, lrecs + 1, cur);
486 rkp = XFS_ALLOC_KEY_ADDR(right, 1, cur);
487 rpp = XFS_ALLOC_PTR_ADDR(right, 1, cur);
488#ifdef DEBUG
489 for (i = 0; i < rrecs; i++) {
490 if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(rpp[i]), level)))
491 return error;
492 }
493#endif
494 memcpy(lkp, rkp, rrecs * sizeof(*lkp));
495 memcpy(lpp, rpp, rrecs * sizeof(*lpp));
496 xfs_alloc_log_keys(cur, lbp, lrecs + 1, lrecs + rrecs);
497 xfs_alloc_log_ptrs(cur, lbp, lrecs + 1, lrecs + rrecs);
498 } else {
499 /*
500 * It's a leaf. Move records.
501 */
502 lrp = XFS_ALLOC_REC_ADDR(left, lrecs + 1, cur);
503 rrp = XFS_ALLOC_REC_ADDR(right, 1, cur);
504 memcpy(lrp, rrp, rrecs * sizeof(*lrp));
505 xfs_alloc_log_recs(cur, lbp, lrecs + 1, lrecs + rrecs);
506 }
507 /*
508 * If we joined with the left neighbor, set the buffer in the
509 * cursor to the left block, and fix up the index.
510 */
511 if (bp != lbp) {
512 xfs_btree_setbuf(cur, level, lbp);
513 cur->bc_ptrs[level] += lrecs;
514 }
515 /*
516 * If we joined with the right neighbor and there's a level above
517 * us, increment the cursor at that level.
518 */
519 else if (level + 1 < cur->bc_nlevels &&
520 (error = xfs_alloc_increment(cur, level + 1, &i)))
521 return error;
522 /*
523 * Fix up the number of records in the surviving block.
524 */
525 lrecs += rrecs;
526 left->bb_numrecs = cpu_to_be16(lrecs);
527 /*
528 * Fix up the right block pointer in the surviving block, and log it.
529 */
530 left->bb_rightsib = right->bb_rightsib;
531 xfs_alloc_log_block(cur->bc_tp, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB);
532 /*
533 * If there is a right sibling now, make it point to the
534 * remaining block.
535 */
536 if (be32_to_cpu(left->bb_rightsib) != NULLAGBLOCK) {
537 xfs_alloc_block_t *rrblock;
538 xfs_buf_t *rrbp;
539 107
540 if ((error = xfs_btree_read_bufs(mp, cur->bc_tp, 108STATIC int
541 cur->bc_private.a.agno, be32_to_cpu(left->bb_rightsib), 0, 109xfs_allocbt_free_block(
542 &rrbp, XFS_ALLOC_BTREE_REF))) 110 struct xfs_btree_cur *cur,
543 return error; 111 struct xfs_buf *bp)
544 rrblock = XFS_BUF_TO_ALLOC_BLOCK(rrbp); 112{
545 if ((error = xfs_btree_check_sblock(cur, rrblock, level, rrbp))) 113 struct xfs_buf *agbp = cur->bc_private.a.agbp;
546 return error; 114 struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp);
547 rrblock->bb_leftsib = cpu_to_be32(lbno); 115 xfs_agblock_t bno;
548 xfs_alloc_log_block(cur->bc_tp, rrbp, XFS_BB_LEFTSIB); 116 int error;
549 } 117
550 /* 118 bno = XFS_DADDR_TO_AGBNO(cur->bc_mp, XFS_BUF_ADDR(bp));
551 * Free the deleting block by putting it on the freelist. 119 error = xfs_alloc_put_freelist(cur->bc_tp, agbp, NULL, bno, 1);
552 */
553 error = xfs_alloc_put_freelist(cur->bc_tp,
554 cur->bc_private.a.agbp, NULL, rbno, 1);
555 if (error) 120 if (error)
556 return error; 121 return error;
122
557 /* 123 /*
558 * Since blocks move to the free list without the coordination 124 * Since blocks move to the free list without the coordination used in
559 * used in xfs_bmap_finish, we can't allow block to be available 125 * xfs_bmap_finish, we can't allow block to be available for
560 * for reallocation and non-transaction writing (user data) 126 * reallocation and non-transaction writing (user data) until we know
561 * until we know that the transaction that moved it to the free 127 * that the transaction that moved it to the free list is permanently
562 * list is permanently on disk. We track the blocks by declaring 128 * on disk. We track the blocks by declaring these blocks as "busy";
563 * these blocks as "busy"; the busy list is maintained on a 129 * the busy list is maintained on a per-ag basis and each transaction
564 * per-ag basis and each transaction records which entries 130 * records which entries should be removed when the iclog commits to
565 * should be removed when the iclog commits to disk. If a 131 * disk. If a busy block is allocated, the iclog is pushed up to the
566 * busy block is allocated, the iclog is pushed up to the
567 * LSN that freed the block. 132 * LSN that freed the block.
568 */ 133 */
569 xfs_alloc_mark_busy(cur->bc_tp, be32_to_cpu(agf->agf_seqno), bno, 1); 134 xfs_alloc_mark_busy(cur->bc_tp, be32_to_cpu(agf->agf_seqno), bno, 1);
570 xfs_trans_agbtree_delta(cur->bc_tp, -1); 135 xfs_trans_agbtree_delta(cur->bc_tp, -1);
571
572 /*
573 * Adjust the current level's cursor so that we're left referring
574 * to the right node, after we're done.
575 * If this leaves the ptr value 0 our caller will fix it up.
576 */
577 if (level > 0)
578 cur->bc_ptrs[level]--;
579 /*
580 * Return value means the next level up has something to do.
581 */
582 *stat = 2;
583 return 0; 136 return 0;
584
585error0:
586 xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
587 return error;
588} 137}
589 138
590/* 139/*
591 * Insert one record/level. Return information to the caller 140 * Update the longest extent in the AGF
592 * allowing the next level up to proceed if necessary.
593 */ 141 */
594STATIC int /* error */ 142STATIC void
595xfs_alloc_insrec( 143xfs_allocbt_update_lastrec(
596 xfs_btree_cur_t *cur, /* btree cursor */ 144 struct xfs_btree_cur *cur,
597 int level, /* level to insert record at */ 145 struct xfs_btree_block *block,
598 xfs_agblock_t *bnop, /* i/o: block number inserted */ 146 union xfs_btree_rec *rec,
599 xfs_alloc_rec_t *recp, /* i/o: record data inserted */ 147 int ptr,
600 xfs_btree_cur_t **curp, /* output: new cursor replacing cur */ 148 int reason)
601 int *stat) /* output: success/failure */
602{ 149{
603 xfs_agf_t *agf; /* allocation group freelist header */ 150 struct xfs_agf *agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp);
604 xfs_alloc_block_t *block; /* btree block record/key lives in */ 151 xfs_agnumber_t seqno = be32_to_cpu(agf->agf_seqno);
605 xfs_buf_t *bp; /* buffer for block */ 152 __be32 len;
606 int error; /* error return value */
607 int i; /* loop index */
608 xfs_alloc_key_t key; /* key value being inserted */
609 xfs_alloc_key_t *kp; /* pointer to btree keys */
610 xfs_agblock_t nbno; /* block number of allocated block */
611 xfs_btree_cur_t *ncur; /* new cursor to be used at next lvl */
612 xfs_alloc_key_t nkey; /* new key value, from split */
613 xfs_alloc_rec_t nrec; /* new record value, for caller */
614 int numrecs; 153 int numrecs;
615 int optr; /* old ptr value */
616 xfs_alloc_ptr_t *pp; /* pointer to btree addresses */
617 int ptr; /* index in btree block for this rec */
618 xfs_alloc_rec_t *rp; /* pointer to btree records */
619 154
620 ASSERT(be32_to_cpu(recp->ar_blockcount) > 0); 155 ASSERT(cur->bc_btnum == XFS_BTNUM_CNT);
156
157 switch (reason) {
158 case LASTREC_UPDATE:
159 /*
160 * If this is the last leaf block and it's the last record,
161 * then update the size of the longest extent in the AG.
162 */
163 if (ptr != xfs_btree_get_numrecs(block))
164 return;
165 len = rec->alloc.ar_blockcount;
166 break;
167 case LASTREC_INSREC:
168 if (be32_to_cpu(rec->alloc.ar_blockcount) <=
169 be32_to_cpu(agf->agf_longest))
170 return;
171 len = rec->alloc.ar_blockcount;
172 break;
173 case LASTREC_DELREC:
174 numrecs = xfs_btree_get_numrecs(block);
175 if (ptr <= numrecs)
176 return;
177 ASSERT(ptr == numrecs + 1);
621 178
622 /* 179 if (numrecs) {
623 * GCC doesn't understand the (arguably complex) control flow in 180 xfs_alloc_rec_t *rrp;
624 * this function and complains about uninitialized structure fields
625 * without this.
626 */
627 memset(&nrec, 0, sizeof(nrec));
628 181
629 /* 182 rrp = XFS_ALLOC_REC_ADDR(cur->bc_mp, block, numrecs);
630 * If we made it to the root level, allocate a new root block 183 len = rrp->ar_blockcount;
631 * and we're done.
632 */
633 if (level >= cur->bc_nlevels) {
634 XFS_STATS_INC(xs_abt_insrec);
635 if ((error = xfs_alloc_newroot(cur, &i)))
636 return error;
637 *bnop = NULLAGBLOCK;
638 *stat = i;
639 return 0;
640 }
641 /*
642 * Make a key out of the record data to be inserted, and save it.
643 */
644 key.ar_startblock = recp->ar_startblock;
645 key.ar_blockcount = recp->ar_blockcount;
646 optr = ptr = cur->bc_ptrs[level];
647 /*
648 * If we're off the left edge, return failure.
649 */
650 if (ptr == 0) {
651 *stat = 0;
652 return 0;
653 }
654 XFS_STATS_INC(xs_abt_insrec);
655 /*
656 * Get pointers to the btree buffer and block.
657 */
658 bp = cur->bc_bufs[level];
659 block = XFS_BUF_TO_ALLOC_BLOCK(bp);
660 numrecs = be16_to_cpu(block->bb_numrecs);
661#ifdef DEBUG
662 if ((error = xfs_btree_check_sblock(cur, block, level, bp)))
663 return error;
664 /*
665 * Check that the new entry is being inserted in the right place.
666 */
667 if (ptr <= numrecs) {
668 if (level == 0) {
669 rp = XFS_ALLOC_REC_ADDR(block, ptr, cur);
670 xfs_btree_check_rec(cur->bc_btnum, recp, rp);
671 } else { 184 } else {
672 kp = XFS_ALLOC_KEY_ADDR(block, ptr, cur); 185 len = 0;
673 xfs_btree_check_key(cur->bc_btnum, &key, kp);
674 }
675 }
676#endif
677 nbno = NULLAGBLOCK;
678 ncur = NULL;
679 /*
680 * If the block is full, we can't insert the new entry until we
681 * make the block un-full.
682 */
683 if (numrecs == XFS_ALLOC_BLOCK_MAXRECS(level, cur)) {
684 /*
685 * First, try shifting an entry to the right neighbor.
686 */
687 if ((error = xfs_alloc_rshift(cur, level, &i)))
688 return error;
689 if (i) {
690 /* nothing */
691 }
692 /*
693 * Next, try shifting an entry to the left neighbor.
694 */
695 else {
696 if ((error = xfs_alloc_lshift(cur, level, &i)))
697 return error;
698 if (i)
699 optr = ptr = cur->bc_ptrs[level];
700 else {
701 /*
702 * Next, try splitting the current block in
703 * half. If this works we have to re-set our
704 * variables because we could be in a
705 * different block now.
706 */
707 if ((error = xfs_alloc_split(cur, level, &nbno,
708 &nkey, &ncur, &i)))
709 return error;
710 if (i) {
711 bp = cur->bc_bufs[level];
712 block = XFS_BUF_TO_ALLOC_BLOCK(bp);
713#ifdef DEBUG
714 if ((error =
715 xfs_btree_check_sblock(cur,
716 block, level, bp)))
717 return error;
718#endif
719 ptr = cur->bc_ptrs[level];
720 nrec.ar_startblock = nkey.ar_startblock;
721 nrec.ar_blockcount = nkey.ar_blockcount;
722 }
723 /*
724 * Otherwise the insert fails.
725 */
726 else {
727 *stat = 0;
728 return 0;
729 }
730 }
731 }
732 }
733 /*
734 * At this point we know there's room for our new entry in the block
735 * we're pointing at.
736 */
737 numrecs = be16_to_cpu(block->bb_numrecs);
738 if (level > 0) {
739 /*
740 * It's a non-leaf entry. Make a hole for the new data
741 * in the key and ptr regions of the block.
742 */
743 kp = XFS_ALLOC_KEY_ADDR(block, 1, cur);
744 pp = XFS_ALLOC_PTR_ADDR(block, 1, cur);
745#ifdef DEBUG
746 for (i = numrecs; i >= ptr; i--) {
747 if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(pp[i - 1]), level)))
748 return error;
749 } 186 }
750#endif
751 memmove(&kp[ptr], &kp[ptr - 1],
752 (numrecs - ptr + 1) * sizeof(*kp));
753 memmove(&pp[ptr], &pp[ptr - 1],
754 (numrecs - ptr + 1) * sizeof(*pp));
755#ifdef DEBUG
756 if ((error = xfs_btree_check_sptr(cur, *bnop, level)))
757 return error;
758#endif
759 /*
760 * Now stuff the new data in, bump numrecs and log the new data.
761 */
762 kp[ptr - 1] = key;
763 pp[ptr - 1] = cpu_to_be32(*bnop);
764 numrecs++;
765 block->bb_numrecs = cpu_to_be16(numrecs);
766 xfs_alloc_log_keys(cur, bp, ptr, numrecs);
767 xfs_alloc_log_ptrs(cur, bp, ptr, numrecs);
768#ifdef DEBUG
769 if (ptr < numrecs)
770 xfs_btree_check_key(cur->bc_btnum, kp + ptr - 1,
771 kp + ptr);
772#endif
773 } else {
774 /*
775 * It's a leaf entry. Make a hole for the new record.
776 */
777 rp = XFS_ALLOC_REC_ADDR(block, 1, cur);
778 memmove(&rp[ptr], &rp[ptr - 1],
779 (numrecs - ptr + 1) * sizeof(*rp));
780 /*
781 * Now stuff the new record in, bump numrecs
782 * and log the new data.
783 */
784 rp[ptr - 1] = *recp;
785 numrecs++;
786 block->bb_numrecs = cpu_to_be16(numrecs);
787 xfs_alloc_log_recs(cur, bp, ptr, numrecs);
788#ifdef DEBUG
789 if (ptr < numrecs)
790 xfs_btree_check_rec(cur->bc_btnum, rp + ptr - 1,
791 rp + ptr);
792#endif
793 }
794 /*
795 * Log the new number of records in the btree header.
796 */
797 xfs_alloc_log_block(cur->bc_tp, bp, XFS_BB_NUMRECS);
798 /*
799 * If we inserted at the start of a block, update the parents' keys.
800 */
801 if (optr == 1 && (error = xfs_alloc_updkey(cur, &key, level + 1)))
802 return error;
803 /*
804 * Look to see if the longest extent in the allocation group
805 * needs to be updated.
806 */
807 187
808 agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp); 188 break;
809 if (level == 0 && 189 default:
810 cur->bc_btnum == XFS_BTNUM_CNT && 190 ASSERT(0);
811 be32_to_cpu(block->bb_rightsib) == NULLAGBLOCK && 191 return;
812 be32_to_cpu(recp->ar_blockcount) > be32_to_cpu(agf->agf_longest)) {
813 /*
814 * If this is a leaf in the by-size btree and there
815 * is no right sibling block and this block is bigger
816 * than the previous longest block, update it.
817 */
818 agf->agf_longest = recp->ar_blockcount;
819 cur->bc_mp->m_perag[be32_to_cpu(agf->agf_seqno)].pagf_longest
820 = be32_to_cpu(recp->ar_blockcount);
821 xfs_alloc_log_agf(cur->bc_tp, cur->bc_private.a.agbp,
822 XFS_AGF_LONGEST);
823 } 192 }
824 /* 193
825 * Return the new block number, if any. 194 agf->agf_longest = len;
826 * If there is one, give back a record value and a cursor too. 195 cur->bc_mp->m_perag[seqno].pagf_longest = be32_to_cpu(len);
827 */ 196 xfs_alloc_log_agf(cur->bc_tp, cur->bc_private.a.agbp, XFS_AGF_LONGEST);
828 *bnop = nbno;
829 if (nbno != NULLAGBLOCK) {
830 *recp = nrec;
831 *curp = ncur;
832 }
833 *stat = 1;
834 return 0;
835} 197}
836 198
837/* 199STATIC int
838 * Log header fields from a btree block. 200xfs_allocbt_get_minrecs(
839 */ 201 struct xfs_btree_cur *cur,
840STATIC void 202 int level)
841xfs_alloc_log_block(
842 xfs_trans_t *tp, /* transaction pointer */
843 xfs_buf_t *bp, /* buffer containing btree block */
844 int fields) /* mask of fields: XFS_BB_... */
845{ 203{
846 int first; /* first byte offset logged */ 204 return cur->bc_mp->m_alloc_mnr[level != 0];
847 int last; /* last byte offset logged */ 205}
848 static const short offsets[] = { /* table of offsets */
849 offsetof(xfs_alloc_block_t, bb_magic),
850 offsetof(xfs_alloc_block_t, bb_level),
851 offsetof(xfs_alloc_block_t, bb_numrecs),
852 offsetof(xfs_alloc_block_t, bb_leftsib),
853 offsetof(xfs_alloc_block_t, bb_rightsib),
854 sizeof(xfs_alloc_block_t)
855 };
856 206
857 xfs_btree_offsets(fields, offsets, XFS_BB_NUM_BITS, &first, &last); 207STATIC int
858 xfs_trans_log_buf(tp, bp, first, last); 208xfs_allocbt_get_maxrecs(
209 struct xfs_btree_cur *cur,
210 int level)
211{
212 return cur->bc_mp->m_alloc_mxr[level != 0];
859} 213}
860 214
861/*
862 * Log keys from a btree block (nonleaf).
863 */
864STATIC void 215STATIC void
865xfs_alloc_log_keys( 216xfs_allocbt_init_key_from_rec(
866 xfs_btree_cur_t *cur, /* btree cursor */ 217 union xfs_btree_key *key,
867 xfs_buf_t *bp, /* buffer containing btree block */ 218 union xfs_btree_rec *rec)
868 int kfirst, /* index of first key to log */
869 int klast) /* index of last key to log */
870{ 219{
871 xfs_alloc_block_t *block; /* btree block to log from */ 220 ASSERT(rec->alloc.ar_startblock != 0);
872 int first; /* first byte offset logged */
873 xfs_alloc_key_t *kp; /* key pointer in btree block */
874 int last; /* last byte offset logged */
875 221
876 block = XFS_BUF_TO_ALLOC_BLOCK(bp); 222 key->alloc.ar_startblock = rec->alloc.ar_startblock;
877 kp = XFS_ALLOC_KEY_ADDR(block, 1, cur); 223 key->alloc.ar_blockcount = rec->alloc.ar_blockcount;
878 first = (int)((xfs_caddr_t)&kp[kfirst - 1] - (xfs_caddr_t)block);
879 last = (int)(((xfs_caddr_t)&kp[klast] - 1) - (xfs_caddr_t)block);
880 xfs_trans_log_buf(cur->bc_tp, bp, first, last);
881} 224}
882 225
883/*
884 * Log block pointer fields from a btree block (nonleaf).
885 */
886STATIC void 226STATIC void
887xfs_alloc_log_ptrs( 227xfs_allocbt_init_rec_from_key(
888 xfs_btree_cur_t *cur, /* btree cursor */ 228 union xfs_btree_key *key,
889 xfs_buf_t *bp, /* buffer containing btree block */ 229 union xfs_btree_rec *rec)
890 int pfirst, /* index of first pointer to log */
891 int plast) /* index of last pointer to log */
892{ 230{
893 xfs_alloc_block_t *block; /* btree block to log from */ 231 ASSERT(key->alloc.ar_startblock != 0);
894 int first; /* first byte offset logged */
895 int last; /* last byte offset logged */
896 xfs_alloc_ptr_t *pp; /* block-pointer pointer in btree blk */
897 232
898 block = XFS_BUF_TO_ALLOC_BLOCK(bp); 233 rec->alloc.ar_startblock = key->alloc.ar_startblock;
899 pp = XFS_ALLOC_PTR_ADDR(block, 1, cur); 234 rec->alloc.ar_blockcount = key->alloc.ar_blockcount;
900 first = (int)((xfs_caddr_t)&pp[pfirst - 1] - (xfs_caddr_t)block);
901 last = (int)(((xfs_caddr_t)&pp[plast] - 1) - (xfs_caddr_t)block);
902 xfs_trans_log_buf(cur->bc_tp, bp, first, last);
903} 235}
904 236
905/*
906 * Log records from a btree block (leaf).
907 */
908STATIC void 237STATIC void
909xfs_alloc_log_recs( 238xfs_allocbt_init_rec_from_cur(
910 xfs_btree_cur_t *cur, /* btree cursor */ 239 struct xfs_btree_cur *cur,
911 xfs_buf_t *bp, /* buffer containing btree block */ 240 union xfs_btree_rec *rec)
912 int rfirst, /* index of first record to log */
913 int rlast) /* index of last record to log */
914{ 241{
915 xfs_alloc_block_t *block; /* btree block to log from */ 242 ASSERT(cur->bc_rec.a.ar_startblock != 0);
916 int first; /* first byte offset logged */
917 int last; /* last byte offset logged */
918 xfs_alloc_rec_t *rp; /* record pointer for btree block */
919
920 243
921 block = XFS_BUF_TO_ALLOC_BLOCK(bp); 244 rec->alloc.ar_startblock = cpu_to_be32(cur->bc_rec.a.ar_startblock);
922 rp = XFS_ALLOC_REC_ADDR(block, 1, cur); 245 rec->alloc.ar_blockcount = cpu_to_be32(cur->bc_rec.a.ar_blockcount);
923#ifdef DEBUG
924 {
925 xfs_agf_t *agf;
926 xfs_alloc_rec_t *p;
927
928 agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp);
929 for (p = &rp[rfirst - 1]; p <= &rp[rlast - 1]; p++)
930 ASSERT(be32_to_cpu(p->ar_startblock) +
931 be32_to_cpu(p->ar_blockcount) <=
932 be32_to_cpu(agf->agf_length));
933 }
934#endif
935 first = (int)((xfs_caddr_t)&rp[rfirst - 1] - (xfs_caddr_t)block);
936 last = (int)(((xfs_caddr_t)&rp[rlast] - 1) - (xfs_caddr_t)block);
937 xfs_trans_log_buf(cur->bc_tp, bp, first, last);
938} 246}
939 247
940/* 248STATIC void
941 * Lookup the record. The cursor is made to point to it, based on dir. 249xfs_allocbt_init_ptr_from_cur(
942 * Return 0 if can't find any such record, 1 for success. 250 struct xfs_btree_cur *cur,
943 */ 251 union xfs_btree_ptr *ptr)
944STATIC int /* error */
945xfs_alloc_lookup(
946 xfs_btree_cur_t *cur, /* btree cursor */
947 xfs_lookup_t dir, /* <=, ==, or >= */
948 int *stat) /* success/failure */
949{ 252{
950 xfs_agblock_t agbno; /* a.g. relative btree block number */ 253 struct xfs_agf *agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp);
951 xfs_agnumber_t agno; /* allocation group number */
952 xfs_alloc_block_t *block=NULL; /* current btree block */
953 int diff; /* difference for the current key */
954 int error; /* error return value */
955 int keyno=0; /* current key number */
956 int level; /* level in the btree */
957 xfs_mount_t *mp; /* file system mount point */
958
959 XFS_STATS_INC(xs_abt_lookup);
960 /*
961 * Get the allocation group header, and the root block number.
962 */
963 mp = cur->bc_mp;
964
965 {
966 xfs_agf_t *agf; /* a.g. freespace header */
967
968 agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp);
969 agno = be32_to_cpu(agf->agf_seqno);
970 agbno = be32_to_cpu(agf->agf_roots[cur->bc_btnum]);
971 }
972 /*
973 * Iterate over each level in the btree, starting at the root.
974 * For each level above the leaves, find the key we need, based
975 * on the lookup record, then follow the corresponding block
976 * pointer down to the next level.
977 */
978 for (level = cur->bc_nlevels - 1, diff = 1; level >= 0; level--) {
979 xfs_buf_t *bp; /* buffer pointer for btree block */
980 xfs_daddr_t d; /* disk address of btree block */
981
982 /*
983 * Get the disk address we're looking for.
984 */
985 d = XFS_AGB_TO_DADDR(mp, agno, agbno);
986 /*
987 * If the old buffer at this level is for a different block,
988 * throw it away, otherwise just use it.
989 */
990 bp = cur->bc_bufs[level];
991 if (bp && XFS_BUF_ADDR(bp) != d)
992 bp = NULL;
993 if (!bp) {
994 /*
995 * Need to get a new buffer. Read it, then
996 * set it in the cursor, releasing the old one.
997 */
998 if ((error = xfs_btree_read_bufs(mp, cur->bc_tp, agno,
999 agbno, 0, &bp, XFS_ALLOC_BTREE_REF)))
1000 return error;
1001 xfs_btree_setbuf(cur, level, bp);
1002 /*
1003 * Point to the btree block, now that we have the buffer
1004 */
1005 block = XFS_BUF_TO_ALLOC_BLOCK(bp);
1006 if ((error = xfs_btree_check_sblock(cur, block, level,
1007 bp)))
1008 return error;
1009 } else
1010 block = XFS_BUF_TO_ALLOC_BLOCK(bp);
1011 /*
1012 * If we already had a key match at a higher level, we know
1013 * we need to use the first entry in this block.
1014 */
1015 if (diff == 0)
1016 keyno = 1;
1017 /*
1018 * Otherwise we need to search this block. Do a binary search.
1019 */
1020 else {
1021 int high; /* high entry number */
1022 xfs_alloc_key_t *kkbase=NULL;/* base of keys in block */
1023 xfs_alloc_rec_t *krbase=NULL;/* base of records in block */
1024 int low; /* low entry number */
1025
1026 /*
1027 * Get a pointer to keys or records.
1028 */
1029 if (level > 0)
1030 kkbase = XFS_ALLOC_KEY_ADDR(block, 1, cur);
1031 else
1032 krbase = XFS_ALLOC_REC_ADDR(block, 1, cur);
1033 /*
1034 * Set low and high entry numbers, 1-based.
1035 */
1036 low = 1;
1037 if (!(high = be16_to_cpu(block->bb_numrecs))) {
1038 /*
1039 * If the block is empty, the tree must
1040 * be an empty leaf.
1041 */
1042 ASSERT(level == 0 && cur->bc_nlevels == 1);
1043 cur->bc_ptrs[0] = dir != XFS_LOOKUP_LE;
1044 *stat = 0;
1045 return 0;
1046 }
1047 /*
1048 * Binary search the block.
1049 */
1050 while (low <= high) {
1051 xfs_extlen_t blockcount; /* key value */
1052 xfs_agblock_t startblock; /* key value */
1053
1054 XFS_STATS_INC(xs_abt_compare);
1055 /*
1056 * keyno is average of low and high.
1057 */
1058 keyno = (low + high) >> 1;
1059 /*
1060 * Get startblock & blockcount.
1061 */
1062 if (level > 0) {
1063 xfs_alloc_key_t *kkp;
1064
1065 kkp = kkbase + keyno - 1;
1066 startblock = be32_to_cpu(kkp->ar_startblock);
1067 blockcount = be32_to_cpu(kkp->ar_blockcount);
1068 } else {
1069 xfs_alloc_rec_t *krp;
1070 254
1071 krp = krbase + keyno - 1; 255 ASSERT(cur->bc_private.a.agno == be32_to_cpu(agf->agf_seqno));
1072 startblock = be32_to_cpu(krp->ar_startblock); 256 ASSERT(agf->agf_roots[cur->bc_btnum] != 0);
1073 blockcount = be32_to_cpu(krp->ar_blockcount);
1074 }
1075 /*
1076 * Compute difference to get next direction.
1077 */
1078 if (cur->bc_btnum == XFS_BTNUM_BNO)
1079 diff = (int)startblock -
1080 (int)cur->bc_rec.a.ar_startblock;
1081 else if (!(diff = (int)blockcount -
1082 (int)cur->bc_rec.a.ar_blockcount))
1083 diff = (int)startblock -
1084 (int)cur->bc_rec.a.ar_startblock;
1085 /*
1086 * Less than, move right.
1087 */
1088 if (diff < 0)
1089 low = keyno + 1;
1090 /*
1091 * Greater than, move left.
1092 */
1093 else if (diff > 0)
1094 high = keyno - 1;
1095 /*
1096 * Equal, we're done.
1097 */
1098 else
1099 break;
1100 }
1101 }
1102 /*
1103 * If there are more levels, set up for the next level
1104 * by getting the block number and filling in the cursor.
1105 */
1106 if (level > 0) {
1107 /*
1108 * If we moved left, need the previous key number,
1109 * unless there isn't one.
1110 */
1111 if (diff > 0 && --keyno < 1)
1112 keyno = 1;
1113 agbno = be32_to_cpu(*XFS_ALLOC_PTR_ADDR(block, keyno, cur));
1114#ifdef DEBUG
1115 if ((error = xfs_btree_check_sptr(cur, agbno, level)))
1116 return error;
1117#endif
1118 cur->bc_ptrs[level] = keyno;
1119 }
1120 }
1121 /*
1122 * Done with the search.
1123 * See if we need to adjust the results.
1124 */
1125 if (dir != XFS_LOOKUP_LE && diff < 0) {
1126 keyno++;
1127 /*
1128 * If ge search and we went off the end of the block, but it's
1129 * not the last block, we're in the wrong block.
1130 */
1131 if (dir == XFS_LOOKUP_GE &&
1132 keyno > be16_to_cpu(block->bb_numrecs) &&
1133 be32_to_cpu(block->bb_rightsib) != NULLAGBLOCK) {
1134 int i;
1135 257
1136 cur->bc_ptrs[0] = keyno; 258 ptr->s = agf->agf_roots[cur->bc_btnum];
1137 if ((error = xfs_alloc_increment(cur, 0, &i)))
1138 return error;
1139 XFS_WANT_CORRUPTED_RETURN(i == 1);
1140 *stat = 1;
1141 return 0;
1142 }
1143 }
1144 else if (dir == XFS_LOOKUP_LE && diff > 0)
1145 keyno--;
1146 cur->bc_ptrs[0] = keyno;
1147 /*
1148 * Return if we succeeded or not.
1149 */
1150 if (keyno == 0 || keyno > be16_to_cpu(block->bb_numrecs))
1151 *stat = 0;
1152 else
1153 *stat = ((dir != XFS_LOOKUP_EQ) || (diff == 0));
1154 return 0;
1155} 259}
1156 260
1157/* 261STATIC __int64_t
1158 * Move 1 record left from cur/level if possible. 262xfs_allocbt_key_diff(
1159 * Update cur to reflect the new path. 263 struct xfs_btree_cur *cur,
1160 */ 264 union xfs_btree_key *key)
1161STATIC int /* error */
1162xfs_alloc_lshift(
1163 xfs_btree_cur_t *cur, /* btree cursor */
1164 int level, /* level to shift record on */
1165 int *stat) /* success/failure */
1166{ 265{
1167 int error; /* error return value */ 266 xfs_alloc_rec_incore_t *rec = &cur->bc_rec.a;
1168#ifdef DEBUG 267 xfs_alloc_key_t *kp = &key->alloc;
1169 int i; /* loop index */ 268 __int64_t diff;
1170#endif
1171 xfs_alloc_key_t key; /* key value for leaf level upward */
1172 xfs_buf_t *lbp; /* buffer for left neighbor block */
1173 xfs_alloc_block_t *left; /* left neighbor btree block */
1174 int nrec; /* new number of left block entries */
1175 xfs_buf_t *rbp; /* buffer for right (current) block */
1176 xfs_alloc_block_t *right; /* right (current) btree block */
1177 xfs_alloc_key_t *rkp=NULL; /* key pointer for right block */
1178 xfs_alloc_ptr_t *rpp=NULL; /* address pointer for right block */
1179 xfs_alloc_rec_t *rrp=NULL; /* record pointer for right block */
1180 269
1181 /* 270 if (cur->bc_btnum == XFS_BTNUM_BNO) {
1182 * Set up variables for this block as "right". 271 return (__int64_t)be32_to_cpu(kp->ar_startblock) -
1183 */ 272 rec->ar_startblock;
1184 rbp = cur->bc_bufs[level];
1185 right = XFS_BUF_TO_ALLOC_BLOCK(rbp);
1186#ifdef DEBUG
1187 if ((error = xfs_btree_check_sblock(cur, right, level, rbp)))
1188 return error;
1189#endif
1190 /*
1191 * If we've got no left sibling then we can't shift an entry left.
1192 */
1193 if (be32_to_cpu(right->bb_leftsib) == NULLAGBLOCK) {
1194 *stat = 0;
1195 return 0;
1196 }
1197 /*
1198 * If the cursor entry is the one that would be moved, don't
1199 * do it... it's too complicated.
1200 */
1201 if (cur->bc_ptrs[level] <= 1) {
1202 *stat = 0;
1203 return 0;
1204 }
1205 /*
1206 * Set up the left neighbor as "left".
1207 */
1208 if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp,
1209 cur->bc_private.a.agno, be32_to_cpu(right->bb_leftsib),
1210 0, &lbp, XFS_ALLOC_BTREE_REF)))
1211 return error;
1212 left = XFS_BUF_TO_ALLOC_BLOCK(lbp);
1213 if ((error = xfs_btree_check_sblock(cur, left, level, lbp)))
1214 return error;
1215 /*
1216 * If it's full, it can't take another entry.
1217 */
1218 if (be16_to_cpu(left->bb_numrecs) == XFS_ALLOC_BLOCK_MAXRECS(level, cur)) {
1219 *stat = 0;
1220 return 0;
1221 } 273 }
1222 nrec = be16_to_cpu(left->bb_numrecs) + 1;
1223 /*
1224 * If non-leaf, copy a key and a ptr to the left block.
1225 */
1226 if (level > 0) {
1227 xfs_alloc_key_t *lkp; /* key pointer for left block */
1228 xfs_alloc_ptr_t *lpp; /* address pointer for left block */
1229 274
1230 lkp = XFS_ALLOC_KEY_ADDR(left, nrec, cur); 275 diff = (__int64_t)be32_to_cpu(kp->ar_blockcount) - rec->ar_blockcount;
1231 rkp = XFS_ALLOC_KEY_ADDR(right, 1, cur); 276 if (diff)
1232 *lkp = *rkp; 277 return diff;
1233 xfs_alloc_log_keys(cur, lbp, nrec, nrec);
1234 lpp = XFS_ALLOC_PTR_ADDR(left, nrec, cur);
1235 rpp = XFS_ALLOC_PTR_ADDR(right, 1, cur);
1236#ifdef DEBUG
1237 if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(*rpp), level)))
1238 return error;
1239#endif
1240 *lpp = *rpp;
1241 xfs_alloc_log_ptrs(cur, lbp, nrec, nrec);
1242 xfs_btree_check_key(cur->bc_btnum, lkp - 1, lkp);
1243 }
1244 /*
1245 * If leaf, copy a record to the left block.
1246 */
1247 else {
1248 xfs_alloc_rec_t *lrp; /* record pointer for left block */
1249 278
1250 lrp = XFS_ALLOC_REC_ADDR(left, nrec, cur); 279 return (__int64_t)be32_to_cpu(kp->ar_startblock) - rec->ar_startblock;
1251 rrp = XFS_ALLOC_REC_ADDR(right, 1, cur);
1252 *lrp = *rrp;
1253 xfs_alloc_log_recs(cur, lbp, nrec, nrec);
1254 xfs_btree_check_rec(cur->bc_btnum, lrp - 1, lrp);
1255 }
1256 /*
1257 * Bump and log left's numrecs, decrement and log right's numrecs.
1258 */
1259 be16_add_cpu(&left->bb_numrecs, 1);
1260 xfs_alloc_log_block(cur->bc_tp, lbp, XFS_BB_NUMRECS);
1261 be16_add_cpu(&right->bb_numrecs, -1);
1262 xfs_alloc_log_block(cur->bc_tp, rbp, XFS_BB_NUMRECS);
1263 /*
1264 * Slide the contents of right down one entry.
1265 */
1266 if (level > 0) {
1267#ifdef DEBUG
1268 for (i = 0; i < be16_to_cpu(right->bb_numrecs); i++) {
1269 if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(rpp[i + 1]),
1270 level)))
1271 return error;
1272 }
1273#endif
1274 memmove(rkp, rkp + 1, be16_to_cpu(right->bb_numrecs) * sizeof(*rkp));
1275 memmove(rpp, rpp + 1, be16_to_cpu(right->bb_numrecs) * sizeof(*rpp));
1276 xfs_alloc_log_keys(cur, rbp, 1, be16_to_cpu(right->bb_numrecs));
1277 xfs_alloc_log_ptrs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs));
1278 } else {
1279 memmove(rrp, rrp + 1, be16_to_cpu(right->bb_numrecs) * sizeof(*rrp));
1280 xfs_alloc_log_recs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs));
1281 key.ar_startblock = rrp->ar_startblock;
1282 key.ar_blockcount = rrp->ar_blockcount;
1283 rkp = &key;
1284 }
1285 /*
1286 * Update the parent key values of right.
1287 */
1288 if ((error = xfs_alloc_updkey(cur, rkp, level + 1)))
1289 return error;
1290 /*
1291 * Slide the cursor value left one.
1292 */
1293 cur->bc_ptrs[level]--;
1294 *stat = 1;
1295 return 0;
1296} 280}
1297 281
1298/* 282STATIC int
1299 * Allocate a new root block, fill it in. 283xfs_allocbt_kill_root(
1300 */ 284 struct xfs_btree_cur *cur,
1301STATIC int /* error */ 285 struct xfs_buf *bp,
1302xfs_alloc_newroot( 286 int level,
1303 xfs_btree_cur_t *cur, /* btree cursor */ 287 union xfs_btree_ptr *newroot)
1304 int *stat) /* success/failure */
1305{ 288{
1306 int error; /* error return value */ 289 int error;
1307 xfs_agblock_t lbno; /* left block number */
1308 xfs_buf_t *lbp; /* left btree buffer */
1309 xfs_alloc_block_t *left; /* left btree block */
1310 xfs_mount_t *mp; /* mount structure */
1311 xfs_agblock_t nbno; /* new block number */
1312 xfs_buf_t *nbp; /* new (root) buffer */
1313 xfs_alloc_block_t *new; /* new (root) btree block */
1314 int nptr; /* new value for key index, 1 or 2 */
1315 xfs_agblock_t rbno; /* right block number */
1316 xfs_buf_t *rbp; /* right btree buffer */
1317 xfs_alloc_block_t *right; /* right btree block */
1318
1319 mp = cur->bc_mp;
1320 290
1321 ASSERT(cur->bc_nlevels < XFS_AG_MAXLEVELS(mp)); 291 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1322 /* 292 XFS_BTREE_STATS_INC(cur, killroot);
1323 * Get a buffer from the freelist blocks, for the new root.
1324 */
1325 error = xfs_alloc_get_freelist(cur->bc_tp,
1326 cur->bc_private.a.agbp, &nbno, 1);
1327 if (error)
1328 return error;
1329 /*
1330 * None available, we fail.
1331 */
1332 if (nbno == NULLAGBLOCK) {
1333 *stat = 0;
1334 return 0;
1335 }
1336 xfs_trans_agbtree_delta(cur->bc_tp, 1);
1337 nbp = xfs_btree_get_bufs(mp, cur->bc_tp, cur->bc_private.a.agno, nbno,
1338 0);
1339 new = XFS_BUF_TO_ALLOC_BLOCK(nbp);
1340 /*
1341 * Set the root data in the a.g. freespace structure.
1342 */
1343 {
1344 xfs_agf_t *agf; /* a.g. freespace header */
1345 xfs_agnumber_t seqno;
1346 293
1347 agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp);
1348 agf->agf_roots[cur->bc_btnum] = cpu_to_be32(nbno);
1349 be32_add_cpu(&agf->agf_levels[cur->bc_btnum], 1);
1350 seqno = be32_to_cpu(agf->agf_seqno);
1351 mp->m_perag[seqno].pagf_levels[cur->bc_btnum]++;
1352 xfs_alloc_log_agf(cur->bc_tp, cur->bc_private.a.agbp,
1353 XFS_AGF_ROOTS | XFS_AGF_LEVELS);
1354 }
1355 /* 294 /*
1356 * At the previous root level there are now two blocks: the old 295 * Update the root pointer, decreasing the level by 1 and then
1357 * root, and the new block generated when it was split. 296 * free the old root.
1358 * We don't know which one the cursor is pointing at, so we
1359 * set up variables "left" and "right" for each case.
1360 */ 297 */
1361 lbp = cur->bc_bufs[cur->bc_nlevels - 1]; 298 xfs_allocbt_set_root(cur, newroot, -1);
1362 left = XFS_BUF_TO_ALLOC_BLOCK(lbp); 299 error = xfs_allocbt_free_block(cur, bp);
1363#ifdef DEBUG 300 if (error) {
1364 if ((error = xfs_btree_check_sblock(cur, left, cur->bc_nlevels - 1, lbp))) 301 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
1365 return error; 302 return error;
1366#endif
1367 if (be32_to_cpu(left->bb_rightsib) != NULLAGBLOCK) {
1368 /*
1369 * Our block is left, pick up the right block.
1370 */
1371 lbno = XFS_DADDR_TO_AGBNO(mp, XFS_BUF_ADDR(lbp));
1372 rbno = be32_to_cpu(left->bb_rightsib);
1373 if ((error = xfs_btree_read_bufs(mp, cur->bc_tp,
1374 cur->bc_private.a.agno, rbno, 0, &rbp,
1375 XFS_ALLOC_BTREE_REF)))
1376 return error;
1377 right = XFS_BUF_TO_ALLOC_BLOCK(rbp);
1378 if ((error = xfs_btree_check_sblock(cur, right,
1379 cur->bc_nlevels - 1, rbp)))
1380 return error;
1381 nptr = 1;
1382 } else {
1383 /*
1384 * Our block is right, pick up the left block.
1385 */
1386 rbp = lbp;
1387 right = left;
1388 rbno = XFS_DADDR_TO_AGBNO(mp, XFS_BUF_ADDR(rbp));
1389 lbno = be32_to_cpu(right->bb_leftsib);
1390 if ((error = xfs_btree_read_bufs(mp, cur->bc_tp,
1391 cur->bc_private.a.agno, lbno, 0, &lbp,
1392 XFS_ALLOC_BTREE_REF)))
1393 return error;
1394 left = XFS_BUF_TO_ALLOC_BLOCK(lbp);
1395 if ((error = xfs_btree_check_sblock(cur, left,
1396 cur->bc_nlevels - 1, lbp)))
1397 return error;
1398 nptr = 2;
1399 } 303 }
1400 /*
1401 * Fill in the new block's btree header and log it.
1402 */
1403 new->bb_magic = cpu_to_be32(xfs_magics[cur->bc_btnum]);
1404 new->bb_level = cpu_to_be16(cur->bc_nlevels);
1405 new->bb_numrecs = cpu_to_be16(2);
1406 new->bb_leftsib = cpu_to_be32(NULLAGBLOCK);
1407 new->bb_rightsib = cpu_to_be32(NULLAGBLOCK);
1408 xfs_alloc_log_block(cur->bc_tp, nbp, XFS_BB_ALL_BITS);
1409 ASSERT(lbno != NULLAGBLOCK && rbno != NULLAGBLOCK);
1410 /*
1411 * Fill in the key data in the new root.
1412 */
1413 {
1414 xfs_alloc_key_t *kp; /* btree key pointer */
1415 304
1416 kp = XFS_ALLOC_KEY_ADDR(new, 1, cur); 305 XFS_BTREE_STATS_INC(cur, free);
1417 if (be16_to_cpu(left->bb_level) > 0) {
1418 kp[0] = *XFS_ALLOC_KEY_ADDR(left, 1, cur);
1419 kp[1] = *XFS_ALLOC_KEY_ADDR(right, 1, cur);
1420 } else {
1421 xfs_alloc_rec_t *rp; /* btree record pointer */
1422 306
1423 rp = XFS_ALLOC_REC_ADDR(left, 1, cur); 307 xfs_btree_setbuf(cur, level, NULL);
1424 kp[0].ar_startblock = rp->ar_startblock; 308 cur->bc_nlevels--;
1425 kp[0].ar_blockcount = rp->ar_blockcount;
1426 rp = XFS_ALLOC_REC_ADDR(right, 1, cur);
1427 kp[1].ar_startblock = rp->ar_startblock;
1428 kp[1].ar_blockcount = rp->ar_blockcount;
1429 }
1430 }
1431 xfs_alloc_log_keys(cur, nbp, 1, 2);
1432 /*
1433 * Fill in the pointer data in the new root.
1434 */
1435 {
1436 xfs_alloc_ptr_t *pp; /* btree address pointer */
1437 309
1438 pp = XFS_ALLOC_PTR_ADDR(new, 1, cur); 310 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1439 pp[0] = cpu_to_be32(lbno);
1440 pp[1] = cpu_to_be32(rbno);
1441 }
1442 xfs_alloc_log_ptrs(cur, nbp, 1, 2);
1443 /*
1444 * Fix up the cursor.
1445 */
1446 xfs_btree_setbuf(cur, cur->bc_nlevels, nbp);
1447 cur->bc_ptrs[cur->bc_nlevels] = nptr;
1448 cur->bc_nlevels++;
1449 *stat = 1;
1450 return 0; 311 return 0;
1451} 312}
1452 313
1453/*
1454 * Move 1 record right from cur/level if possible.
1455 * Update cur to reflect the new path.
1456 */
1457STATIC int /* error */
1458xfs_alloc_rshift(
1459 xfs_btree_cur_t *cur, /* btree cursor */
1460 int level, /* level to shift record on */
1461 int *stat) /* success/failure */
1462{
1463 int error; /* error return value */
1464 int i; /* loop index */
1465 xfs_alloc_key_t key; /* key value for leaf level upward */
1466 xfs_buf_t *lbp; /* buffer for left (current) block */
1467 xfs_alloc_block_t *left; /* left (current) btree block */
1468 xfs_buf_t *rbp; /* buffer for right neighbor block */
1469 xfs_alloc_block_t *right; /* right neighbor btree block */
1470 xfs_alloc_key_t *rkp; /* key pointer for right block */
1471 xfs_btree_cur_t *tcur; /* temporary cursor */
1472
1473 /*
1474 * Set up variables for this block as "left".
1475 */
1476 lbp = cur->bc_bufs[level];
1477 left = XFS_BUF_TO_ALLOC_BLOCK(lbp);
1478#ifdef DEBUG
1479 if ((error = xfs_btree_check_sblock(cur, left, level, lbp)))
1480 return error;
1481#endif
1482 /*
1483 * If we've got no right sibling then we can't shift an entry right.
1484 */
1485 if (be32_to_cpu(left->bb_rightsib) == NULLAGBLOCK) {
1486 *stat = 0;
1487 return 0;
1488 }
1489 /*
1490 * If the cursor entry is the one that would be moved, don't
1491 * do it... it's too complicated.
1492 */
1493 if (cur->bc_ptrs[level] >= be16_to_cpu(left->bb_numrecs)) {
1494 *stat = 0;
1495 return 0;
1496 }
1497 /*
1498 * Set up the right neighbor as "right".
1499 */
1500 if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp,
1501 cur->bc_private.a.agno, be32_to_cpu(left->bb_rightsib),
1502 0, &rbp, XFS_ALLOC_BTREE_REF)))
1503 return error;
1504 right = XFS_BUF_TO_ALLOC_BLOCK(rbp);
1505 if ((error = xfs_btree_check_sblock(cur, right, level, rbp)))
1506 return error;
1507 /*
1508 * If it's full, it can't take another entry.
1509 */
1510 if (be16_to_cpu(right->bb_numrecs) == XFS_ALLOC_BLOCK_MAXRECS(level, cur)) {
1511 *stat = 0;
1512 return 0;
1513 }
1514 /*
1515 * Make a hole at the start of the right neighbor block, then
1516 * copy the last left block entry to the hole.
1517 */
1518 if (level > 0) {
1519 xfs_alloc_key_t *lkp; /* key pointer for left block */
1520 xfs_alloc_ptr_t *lpp; /* address pointer for left block */
1521 xfs_alloc_ptr_t *rpp; /* address pointer for right block */
1522
1523 lkp = XFS_ALLOC_KEY_ADDR(left, be16_to_cpu(left->bb_numrecs), cur);
1524 lpp = XFS_ALLOC_PTR_ADDR(left, be16_to_cpu(left->bb_numrecs), cur);
1525 rkp = XFS_ALLOC_KEY_ADDR(right, 1, cur);
1526 rpp = XFS_ALLOC_PTR_ADDR(right, 1, cur);
1527#ifdef DEBUG 314#ifdef DEBUG
1528 for (i = be16_to_cpu(right->bb_numrecs) - 1; i >= 0; i--) { 315STATIC int
1529 if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(rpp[i]), level))) 316xfs_allocbt_keys_inorder(
1530 return error; 317 struct xfs_btree_cur *cur,
1531 } 318 union xfs_btree_key *k1,
1532#endif 319 union xfs_btree_key *k2)
1533 memmove(rkp + 1, rkp, be16_to_cpu(right->bb_numrecs) * sizeof(*rkp)); 320{
1534 memmove(rpp + 1, rpp, be16_to_cpu(right->bb_numrecs) * sizeof(*rpp)); 321 if (cur->bc_btnum == XFS_BTNUM_BNO) {
1535#ifdef DEBUG 322 return be32_to_cpu(k1->alloc.ar_startblock) <
1536 if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(*lpp), level))) 323 be32_to_cpu(k2->alloc.ar_startblock);
1537 return error;
1538#endif
1539 *rkp = *lkp;
1540 *rpp = *lpp;
1541 xfs_alloc_log_keys(cur, rbp, 1, be16_to_cpu(right->bb_numrecs) + 1);
1542 xfs_alloc_log_ptrs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs) + 1);
1543 xfs_btree_check_key(cur->bc_btnum, rkp, rkp + 1);
1544 } else { 324 } else {
1545 xfs_alloc_rec_t *lrp; /* record pointer for left block */ 325 return be32_to_cpu(k1->alloc.ar_blockcount) <
1546 xfs_alloc_rec_t *rrp; /* record pointer for right block */ 326 be32_to_cpu(k2->alloc.ar_blockcount) ||
1547 327 (k1->alloc.ar_blockcount == k2->alloc.ar_blockcount &&
1548 lrp = XFS_ALLOC_REC_ADDR(left, be16_to_cpu(left->bb_numrecs), cur); 328 be32_to_cpu(k1->alloc.ar_startblock) <
1549 rrp = XFS_ALLOC_REC_ADDR(right, 1, cur); 329 be32_to_cpu(k2->alloc.ar_startblock));
1550 memmove(rrp + 1, rrp, be16_to_cpu(right->bb_numrecs) * sizeof(*rrp));
1551 *rrp = *lrp;
1552 xfs_alloc_log_recs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs) + 1);
1553 key.ar_startblock = rrp->ar_startblock;
1554 key.ar_blockcount = rrp->ar_blockcount;
1555 rkp = &key;
1556 xfs_btree_check_rec(cur->bc_btnum, rrp, rrp + 1);
1557 } 330 }
1558 /*
1559 * Decrement and log left's numrecs, bump and log right's numrecs.
1560 */
1561 be16_add_cpu(&left->bb_numrecs, -1);
1562 xfs_alloc_log_block(cur->bc_tp, lbp, XFS_BB_NUMRECS);
1563 be16_add_cpu(&right->bb_numrecs, 1);
1564 xfs_alloc_log_block(cur->bc_tp, rbp, XFS_BB_NUMRECS);
1565 /*
1566 * Using a temporary cursor, update the parent key values of the
1567 * block on the right.
1568 */
1569 if ((error = xfs_btree_dup_cursor(cur, &tcur)))
1570 return error;
1571 i = xfs_btree_lastrec(tcur, level);
1572 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1573 if ((error = xfs_alloc_increment(tcur, level, &i)) ||
1574 (error = xfs_alloc_updkey(tcur, rkp, level + 1)))
1575 goto error0;
1576 xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
1577 *stat = 1;
1578 return 0;
1579error0:
1580 xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
1581 return error;
1582} 331}
1583 332
1584/* 333STATIC int
1585 * Split cur/level block in half. 334xfs_allocbt_recs_inorder(
1586 * Return new block number and its first record (to be inserted into parent). 335 struct xfs_btree_cur *cur,
1587 */ 336 union xfs_btree_rec *r1,
1588STATIC int /* error */ 337 union xfs_btree_rec *r2)
1589xfs_alloc_split(
1590 xfs_btree_cur_t *cur, /* btree cursor */
1591 int level, /* level to split */
1592 xfs_agblock_t *bnop, /* output: block number allocated */
1593 xfs_alloc_key_t *keyp, /* output: first key of new block */
1594 xfs_btree_cur_t **curp, /* output: new cursor */
1595 int *stat) /* success/failure */
1596{ 338{
1597 int error; /* error return value */ 339 if (cur->bc_btnum == XFS_BTNUM_BNO) {
1598 int i; /* loop index/record number */ 340 return be32_to_cpu(r1->alloc.ar_startblock) +
1599 xfs_agblock_t lbno; /* left (current) block number */ 341 be32_to_cpu(r1->alloc.ar_blockcount) <=
1600 xfs_buf_t *lbp; /* buffer for left block */ 342 be32_to_cpu(r2->alloc.ar_startblock);
1601 xfs_alloc_block_t *left; /* left (current) btree block */ 343 } else {
1602 xfs_agblock_t rbno; /* right (new) block number */ 344 return be32_to_cpu(r1->alloc.ar_blockcount) <
1603 xfs_buf_t *rbp; /* buffer for right block */ 345 be32_to_cpu(r2->alloc.ar_blockcount) ||
1604 xfs_alloc_block_t *right; /* right (new) btree block */ 346 (r1->alloc.ar_blockcount == r2->alloc.ar_blockcount &&
1605 347 be32_to_cpu(r1->alloc.ar_startblock) <
1606 /* 348 be32_to_cpu(r2->alloc.ar_startblock));
1607 * Allocate the new block from the freelist.
1608 * If we can't do it, we're toast. Give up.
1609 */
1610 error = xfs_alloc_get_freelist(cur->bc_tp,
1611 cur->bc_private.a.agbp, &rbno, 1);
1612 if (error)
1613 return error;
1614 if (rbno == NULLAGBLOCK) {
1615 *stat = 0;
1616 return 0;
1617 }
1618 xfs_trans_agbtree_delta(cur->bc_tp, 1);
1619 rbp = xfs_btree_get_bufs(cur->bc_mp, cur->bc_tp, cur->bc_private.a.agno,
1620 rbno, 0);
1621 /*
1622 * Set up the new block as "right".
1623 */
1624 right = XFS_BUF_TO_ALLOC_BLOCK(rbp);
1625 /*
1626 * "Left" is the current (according to the cursor) block.
1627 */
1628 lbp = cur->bc_bufs[level];
1629 left = XFS_BUF_TO_ALLOC_BLOCK(lbp);
1630#ifdef DEBUG
1631 if ((error = xfs_btree_check_sblock(cur, left, level, lbp)))
1632 return error;
1633#endif
1634 /*
1635 * Fill in the btree header for the new block.
1636 */
1637 right->bb_magic = cpu_to_be32(xfs_magics[cur->bc_btnum]);
1638 right->bb_level = left->bb_level;
1639 right->bb_numrecs = cpu_to_be16(be16_to_cpu(left->bb_numrecs) / 2);
1640 /*
1641 * Make sure that if there's an odd number of entries now, that
1642 * each new block will have the same number of entries.
1643 */
1644 if ((be16_to_cpu(left->bb_numrecs) & 1) &&
1645 cur->bc_ptrs[level] <= be16_to_cpu(right->bb_numrecs) + 1)
1646 be16_add_cpu(&right->bb_numrecs, 1);
1647 i = be16_to_cpu(left->bb_numrecs) - be16_to_cpu(right->bb_numrecs) + 1;
1648 /*
1649 * For non-leaf blocks, copy keys and addresses over to the new block.
1650 */
1651 if (level > 0) {
1652 xfs_alloc_key_t *lkp; /* left btree key pointer */
1653 xfs_alloc_ptr_t *lpp; /* left btree address pointer */
1654 xfs_alloc_key_t *rkp; /* right btree key pointer */
1655 xfs_alloc_ptr_t *rpp; /* right btree address pointer */
1656
1657 lkp = XFS_ALLOC_KEY_ADDR(left, i, cur);
1658 lpp = XFS_ALLOC_PTR_ADDR(left, i, cur);
1659 rkp = XFS_ALLOC_KEY_ADDR(right, 1, cur);
1660 rpp = XFS_ALLOC_PTR_ADDR(right, 1, cur);
1661#ifdef DEBUG
1662 for (i = 0; i < be16_to_cpu(right->bb_numrecs); i++) {
1663 if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(lpp[i]), level)))
1664 return error;
1665 }
1666#endif
1667 memcpy(rkp, lkp, be16_to_cpu(right->bb_numrecs) * sizeof(*rkp));
1668 memcpy(rpp, lpp, be16_to_cpu(right->bb_numrecs) * sizeof(*rpp));
1669 xfs_alloc_log_keys(cur, rbp, 1, be16_to_cpu(right->bb_numrecs));
1670 xfs_alloc_log_ptrs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs));
1671 *keyp = *rkp;
1672 } 349 }
1673 /* 350}
1674 * For leaf blocks, copy records over to the new block. 351#endif /* DEBUG */
1675 */
1676 else {
1677 xfs_alloc_rec_t *lrp; /* left btree record pointer */
1678 xfs_alloc_rec_t *rrp; /* right btree record pointer */
1679 352
1680 lrp = XFS_ALLOC_REC_ADDR(left, i, cur); 353#ifdef XFS_BTREE_TRACE
1681 rrp = XFS_ALLOC_REC_ADDR(right, 1, cur); 354ktrace_t *xfs_allocbt_trace_buf;
1682 memcpy(rrp, lrp, be16_to_cpu(right->bb_numrecs) * sizeof(*rrp));
1683 xfs_alloc_log_recs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs));
1684 keyp->ar_startblock = rrp->ar_startblock;
1685 keyp->ar_blockcount = rrp->ar_blockcount;
1686 }
1687 /*
1688 * Find the left block number by looking in the buffer.
1689 * Adjust numrecs, sibling pointers.
1690 */
1691 lbno = XFS_DADDR_TO_AGBNO(cur->bc_mp, XFS_BUF_ADDR(lbp));
1692 be16_add_cpu(&left->bb_numrecs, -(be16_to_cpu(right->bb_numrecs)));
1693 right->bb_rightsib = left->bb_rightsib;
1694 left->bb_rightsib = cpu_to_be32(rbno);
1695 right->bb_leftsib = cpu_to_be32(lbno);
1696 xfs_alloc_log_block(cur->bc_tp, rbp, XFS_BB_ALL_BITS);
1697 xfs_alloc_log_block(cur->bc_tp, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB);
1698 /*
1699 * If there's a block to the new block's right, make that block
1700 * point back to right instead of to left.
1701 */
1702 if (be32_to_cpu(right->bb_rightsib) != NULLAGBLOCK) {
1703 xfs_alloc_block_t *rrblock; /* rr btree block */
1704 xfs_buf_t *rrbp; /* buffer for rrblock */
1705 355
1706 if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp, 356STATIC void
1707 cur->bc_private.a.agno, be32_to_cpu(right->bb_rightsib), 0, 357xfs_allocbt_trace_enter(
1708 &rrbp, XFS_ALLOC_BTREE_REF))) 358 struct xfs_btree_cur *cur,
1709 return error; 359 const char *func,
1710 rrblock = XFS_BUF_TO_ALLOC_BLOCK(rrbp); 360 char *s,
1711 if ((error = xfs_btree_check_sblock(cur, rrblock, level, rrbp))) 361 int type,
1712 return error; 362 int line,
1713 rrblock->bb_leftsib = cpu_to_be32(rbno); 363 __psunsigned_t a0,
1714 xfs_alloc_log_block(cur->bc_tp, rrbp, XFS_BB_LEFTSIB); 364 __psunsigned_t a1,
1715 } 365 __psunsigned_t a2,
1716 /* 366 __psunsigned_t a3,
1717 * If the cursor is really in the right block, move it there. 367 __psunsigned_t a4,
1718 * If it's just pointing past the last entry in left, then we'll 368 __psunsigned_t a5,
1719 * insert there, so don't change anything in that case. 369 __psunsigned_t a6,
1720 */ 370 __psunsigned_t a7,
1721 if (cur->bc_ptrs[level] > be16_to_cpu(left->bb_numrecs) + 1) { 371 __psunsigned_t a8,
1722 xfs_btree_setbuf(cur, level, rbp); 372 __psunsigned_t a9,
1723 cur->bc_ptrs[level] -= be16_to_cpu(left->bb_numrecs); 373 __psunsigned_t a10)
1724 } 374{
1725 /* 375 ktrace_enter(xfs_allocbt_trace_buf, (void *)(__psint_t)type,
1726 * If there are more levels, we'll need another cursor which refers to 376 (void *)func, (void *)s, NULL, (void *)cur,
1727 * the right block, no matter where this cursor was. 377 (void *)a0, (void *)a1, (void *)a2, (void *)a3,
1728 */ 378 (void *)a4, (void *)a5, (void *)a6, (void *)a7,
1729 if (level + 1 < cur->bc_nlevels) { 379 (void *)a8, (void *)a9, (void *)a10);
1730 if ((error = xfs_btree_dup_cursor(cur, curp)))
1731 return error;
1732 (*curp)->bc_ptrs[level + 1]++;
1733 }
1734 *bnop = rbno;
1735 *stat = 1;
1736 return 0;
1737} 380}
1738 381
1739/* 382STATIC void
1740 * Update keys at all levels from here to the root along the cursor's path. 383xfs_allocbt_trace_cursor(
1741 */ 384 struct xfs_btree_cur *cur,
1742STATIC int /* error */ 385 __uint32_t *s0,
1743xfs_alloc_updkey( 386 __uint64_t *l0,
1744 xfs_btree_cur_t *cur, /* btree cursor */ 387 __uint64_t *l1)
1745 xfs_alloc_key_t *keyp, /* new key value to update to */
1746 int level) /* starting level for update */
1747{ 388{
1748 int ptr; /* index of key in block */ 389 *s0 = cur->bc_private.a.agno;
1749 390 *l0 = cur->bc_rec.a.ar_startblock;
1750 /* 391 *l1 = cur->bc_rec.a.ar_blockcount;
1751 * Go up the tree from this level toward the root.
1752 * At each level, update the key value to the value input.
1753 * Stop when we reach a level where the cursor isn't pointing
1754 * at the first entry in the block.
1755 */
1756 for (ptr = 1; ptr == 1 && level < cur->bc_nlevels; level++) {
1757 xfs_alloc_block_t *block; /* btree block */
1758 xfs_buf_t *bp; /* buffer for block */
1759#ifdef DEBUG
1760 int error; /* error return value */
1761#endif
1762 xfs_alloc_key_t *kp; /* ptr to btree block keys */
1763
1764 bp = cur->bc_bufs[level];
1765 block = XFS_BUF_TO_ALLOC_BLOCK(bp);
1766#ifdef DEBUG
1767 if ((error = xfs_btree_check_sblock(cur, block, level, bp)))
1768 return error;
1769#endif
1770 ptr = cur->bc_ptrs[level];
1771 kp = XFS_ALLOC_KEY_ADDR(block, ptr, cur);
1772 *kp = *keyp;
1773 xfs_alloc_log_keys(cur, bp, ptr, ptr);
1774 }
1775 return 0;
1776} 392}
1777 393
1778/* 394STATIC void
1779 * Externally visible routines. 395xfs_allocbt_trace_key(
1780 */ 396 struct xfs_btree_cur *cur,
1781 397 union xfs_btree_key *key,
1782/* 398 __uint64_t *l0,
1783 * Decrement cursor by one record at the level. 399 __uint64_t *l1)
1784 * For nonzero levels the leaf-ward information is untouched.
1785 */
1786int /* error */
1787xfs_alloc_decrement(
1788 xfs_btree_cur_t *cur, /* btree cursor */
1789 int level, /* level in btree, 0 is leaf */
1790 int *stat) /* success/failure */
1791{ 400{
1792 xfs_alloc_block_t *block; /* btree block */ 401 *l0 = be32_to_cpu(key->alloc.ar_startblock);
1793 int error; /* error return value */ 402 *l1 = be32_to_cpu(key->alloc.ar_blockcount);
1794 int lev; /* btree level */
1795
1796 ASSERT(level < cur->bc_nlevels);
1797 /*
1798 * Read-ahead to the left at this level.
1799 */
1800 xfs_btree_readahead(cur, level, XFS_BTCUR_LEFTRA);
1801 /*
1802 * Decrement the ptr at this level. If we're still in the block
1803 * then we're done.
1804 */
1805 if (--cur->bc_ptrs[level] > 0) {
1806 *stat = 1;
1807 return 0;
1808 }
1809 /*
1810 * Get a pointer to the btree block.
1811 */
1812 block = XFS_BUF_TO_ALLOC_BLOCK(cur->bc_bufs[level]);
1813#ifdef DEBUG
1814 if ((error = xfs_btree_check_sblock(cur, block, level,
1815 cur->bc_bufs[level])))
1816 return error;
1817#endif
1818 /*
1819 * If we just went off the left edge of the tree, return failure.
1820 */
1821 if (be32_to_cpu(block->bb_leftsib) == NULLAGBLOCK) {
1822 *stat = 0;
1823 return 0;
1824 }
1825 /*
1826 * March up the tree decrementing pointers.
1827 * Stop when we don't go off the left edge of a block.
1828 */
1829 for (lev = level + 1; lev < cur->bc_nlevels; lev++) {
1830 if (--cur->bc_ptrs[lev] > 0)
1831 break;
1832 /*
1833 * Read-ahead the left block, we're going to read it
1834 * in the next loop.
1835 */
1836 xfs_btree_readahead(cur, lev, XFS_BTCUR_LEFTRA);
1837 }
1838 /*
1839 * If we went off the root then we are seriously confused.
1840 */
1841 ASSERT(lev < cur->bc_nlevels);
1842 /*
1843 * Now walk back down the tree, fixing up the cursor's buffer
1844 * pointers and key numbers.
1845 */
1846 for (block = XFS_BUF_TO_ALLOC_BLOCK(cur->bc_bufs[lev]); lev > level; ) {
1847 xfs_agblock_t agbno; /* block number of btree block */
1848 xfs_buf_t *bp; /* buffer pointer for block */
1849
1850 agbno = be32_to_cpu(*XFS_ALLOC_PTR_ADDR(block, cur->bc_ptrs[lev], cur));
1851 if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp,
1852 cur->bc_private.a.agno, agbno, 0, &bp,
1853 XFS_ALLOC_BTREE_REF)))
1854 return error;
1855 lev--;
1856 xfs_btree_setbuf(cur, lev, bp);
1857 block = XFS_BUF_TO_ALLOC_BLOCK(bp);
1858 if ((error = xfs_btree_check_sblock(cur, block, lev, bp)))
1859 return error;
1860 cur->bc_ptrs[lev] = be16_to_cpu(block->bb_numrecs);
1861 }
1862 *stat = 1;
1863 return 0;
1864} 403}
1865 404
1866/* 405STATIC void
1867 * Delete the record pointed to by cur. 406xfs_allocbt_trace_record(
1868 * The cursor refers to the place where the record was (could be inserted) 407 struct xfs_btree_cur *cur,
1869 * when the operation returns. 408 union xfs_btree_rec *rec,
1870 */ 409 __uint64_t *l0,
1871int /* error */ 410 __uint64_t *l1,
1872xfs_alloc_delete( 411 __uint64_t *l2)
1873 xfs_btree_cur_t *cur, /* btree cursor */
1874 int *stat) /* success/failure */
1875{ 412{
1876 int error; /* error return value */ 413 *l0 = be32_to_cpu(rec->alloc.ar_startblock);
1877 int i; /* result code */ 414 *l1 = be32_to_cpu(rec->alloc.ar_blockcount);
1878 int level; /* btree level */ 415 *l2 = 0;
1879
1880 /*
1881 * Go up the tree, starting at leaf level.
1882 * If 2 is returned then a join was done; go to the next level.
1883 * Otherwise we are done.
1884 */
1885 for (level = 0, i = 2; i == 2; level++) {
1886 if ((error = xfs_alloc_delrec(cur, level, &i)))
1887 return error;
1888 }
1889 if (i == 0) {
1890 for (level = 1; level < cur->bc_nlevels; level++) {
1891 if (cur->bc_ptrs[level] == 0) {
1892 if ((error = xfs_alloc_decrement(cur, level, &i)))
1893 return error;
1894 break;
1895 }
1896 }
1897 }
1898 *stat = i;
1899 return 0;
1900} 416}
417#endif /* XFS_BTREE_TRACE */
418
419static const struct xfs_btree_ops xfs_allocbt_ops = {
420 .rec_len = sizeof(xfs_alloc_rec_t),
421 .key_len = sizeof(xfs_alloc_key_t),
422
423 .dup_cursor = xfs_allocbt_dup_cursor,
424 .set_root = xfs_allocbt_set_root,
425 .kill_root = xfs_allocbt_kill_root,
426 .alloc_block = xfs_allocbt_alloc_block,
427 .free_block = xfs_allocbt_free_block,
428 .update_lastrec = xfs_allocbt_update_lastrec,
429 .get_minrecs = xfs_allocbt_get_minrecs,
430 .get_maxrecs = xfs_allocbt_get_maxrecs,
431 .init_key_from_rec = xfs_allocbt_init_key_from_rec,
432 .init_rec_from_key = xfs_allocbt_init_rec_from_key,
433 .init_rec_from_cur = xfs_allocbt_init_rec_from_cur,
434 .init_ptr_from_cur = xfs_allocbt_init_ptr_from_cur,
435 .key_diff = xfs_allocbt_key_diff,
1901 436
1902/*
1903 * Get the data from the pointed-to record.
1904 */
1905int /* error */
1906xfs_alloc_get_rec(
1907 xfs_btree_cur_t *cur, /* btree cursor */
1908 xfs_agblock_t *bno, /* output: starting block of extent */
1909 xfs_extlen_t *len, /* output: length of extent */
1910 int *stat) /* output: success/failure */
1911{
1912 xfs_alloc_block_t *block; /* btree block */
1913#ifdef DEBUG 437#ifdef DEBUG
1914 int error; /* error return value */ 438 .keys_inorder = xfs_allocbt_keys_inorder,
439 .recs_inorder = xfs_allocbt_recs_inorder,
1915#endif 440#endif
1916 int ptr; /* record number */
1917 441
1918 ptr = cur->bc_ptrs[0]; 442#ifdef XFS_BTREE_TRACE
1919 block = XFS_BUF_TO_ALLOC_BLOCK(cur->bc_bufs[0]); 443 .trace_enter = xfs_allocbt_trace_enter,
1920#ifdef DEBUG 444 .trace_cursor = xfs_allocbt_trace_cursor,
1921 if ((error = xfs_btree_check_sblock(cur, block, 0, cur->bc_bufs[0]))) 445 .trace_key = xfs_allocbt_trace_key,
1922 return error; 446 .trace_record = xfs_allocbt_trace_record,
1923#endif 447#endif
1924 /* 448};
1925 * Off the right end or left end, return failure.
1926 */
1927 if (ptr > be16_to_cpu(block->bb_numrecs) || ptr <= 0) {
1928 *stat = 0;
1929 return 0;
1930 }
1931 /*
1932 * Point to the record and extract its data.
1933 */
1934 {
1935 xfs_alloc_rec_t *rec; /* record data */
1936
1937 rec = XFS_ALLOC_REC_ADDR(block, ptr, cur);
1938 *bno = be32_to_cpu(rec->ar_startblock);
1939 *len = be32_to_cpu(rec->ar_blockcount);
1940 }
1941 *stat = 1;
1942 return 0;
1943}
1944 449
1945/* 450/*
1946 * Increment cursor by one record at the level. 451 * Allocate a new allocation btree cursor.
1947 * For nonzero levels the leaf-ward information is untouched.
1948 */ 452 */
1949int /* error */ 453struct xfs_btree_cur * /* new alloc btree cursor */
1950xfs_alloc_increment( 454xfs_allocbt_init_cursor(
1951 xfs_btree_cur_t *cur, /* btree cursor */ 455 struct xfs_mount *mp, /* file system mount point */
1952 int level, /* level in btree, 0 is leaf */ 456 struct xfs_trans *tp, /* transaction pointer */
1953 int *stat) /* success/failure */ 457 struct xfs_buf *agbp, /* buffer for agf structure */
458 xfs_agnumber_t agno, /* allocation group number */
459 xfs_btnum_t btnum) /* btree identifier */
1954{ 460{
1955 xfs_alloc_block_t *block; /* btree block */ 461 struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp);
1956 xfs_buf_t *bp; /* tree block buffer */ 462 struct xfs_btree_cur *cur;
1957 int error; /* error return value */
1958 int lev; /* btree level */
1959
1960 ASSERT(level < cur->bc_nlevels);
1961 /*
1962 * Read-ahead to the right at this level.
1963 */
1964 xfs_btree_readahead(cur, level, XFS_BTCUR_RIGHTRA);
1965 /*
1966 * Get a pointer to the btree block.
1967 */
1968 bp = cur->bc_bufs[level];
1969 block = XFS_BUF_TO_ALLOC_BLOCK(bp);
1970#ifdef DEBUG
1971 if ((error = xfs_btree_check_sblock(cur, block, level, bp)))
1972 return error;
1973#endif
1974 /*
1975 * Increment the ptr at this level. If we're still in the block
1976 * then we're done.
1977 */
1978 if (++cur->bc_ptrs[level] <= be16_to_cpu(block->bb_numrecs)) {
1979 *stat = 1;
1980 return 0;
1981 }
1982 /*
1983 * If we just went off the right edge of the tree, return failure.
1984 */
1985 if (be32_to_cpu(block->bb_rightsib) == NULLAGBLOCK) {
1986 *stat = 0;
1987 return 0;
1988 }
1989 /*
1990 * March up the tree incrementing pointers.
1991 * Stop when we don't go off the right edge of a block.
1992 */
1993 for (lev = level + 1; lev < cur->bc_nlevels; lev++) {
1994 bp = cur->bc_bufs[lev];
1995 block = XFS_BUF_TO_ALLOC_BLOCK(bp);
1996#ifdef DEBUG
1997 if ((error = xfs_btree_check_sblock(cur, block, lev, bp)))
1998 return error;
1999#endif
2000 if (++cur->bc_ptrs[lev] <= be16_to_cpu(block->bb_numrecs))
2001 break;
2002 /*
2003 * Read-ahead the right block, we're going to read it
2004 * in the next loop.
2005 */
2006 xfs_btree_readahead(cur, lev, XFS_BTCUR_RIGHTRA);
2007 }
2008 /*
2009 * If we went off the root then we are seriously confused.
2010 */
2011 ASSERT(lev < cur->bc_nlevels);
2012 /*
2013 * Now walk back down the tree, fixing up the cursor's buffer
2014 * pointers and key numbers.
2015 */
2016 for (bp = cur->bc_bufs[lev], block = XFS_BUF_TO_ALLOC_BLOCK(bp);
2017 lev > level; ) {
2018 xfs_agblock_t agbno; /* block number of btree block */
2019 463
2020 agbno = be32_to_cpu(*XFS_ALLOC_PTR_ADDR(block, cur->bc_ptrs[lev], cur)); 464 ASSERT(btnum == XFS_BTNUM_BNO || btnum == XFS_BTNUM_CNT);
2021 if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp,
2022 cur->bc_private.a.agno, agbno, 0, &bp,
2023 XFS_ALLOC_BTREE_REF)))
2024 return error;
2025 lev--;
2026 xfs_btree_setbuf(cur, lev, bp);
2027 block = XFS_BUF_TO_ALLOC_BLOCK(bp);
2028 if ((error = xfs_btree_check_sblock(cur, block, lev, bp)))
2029 return error;
2030 cur->bc_ptrs[lev] = 1;
2031 }
2032 *stat = 1;
2033 return 0;
2034}
2035 465
2036/* 466 cur = kmem_zone_zalloc(xfs_btree_cur_zone, KM_SLEEP);
2037 * Insert the current record at the point referenced by cur.
2038 * The cursor may be inconsistent on return if splits have been done.
2039 */
2040int /* error */
2041xfs_alloc_insert(
2042 xfs_btree_cur_t *cur, /* btree cursor */
2043 int *stat) /* success/failure */
2044{
2045 int error; /* error return value */
2046 int i; /* result value, 0 for failure */
2047 int level; /* current level number in btree */
2048 xfs_agblock_t nbno; /* new block number (split result) */
2049 xfs_btree_cur_t *ncur; /* new cursor (split result) */
2050 xfs_alloc_rec_t nrec; /* record being inserted this level */
2051 xfs_btree_cur_t *pcur; /* previous level's cursor */
2052 467
2053 level = 0; 468 cur->bc_tp = tp;
2054 nbno = NULLAGBLOCK; 469 cur->bc_mp = mp;
2055 nrec.ar_startblock = cpu_to_be32(cur->bc_rec.a.ar_startblock); 470 cur->bc_nlevels = be32_to_cpu(agf->agf_levels[btnum]);
2056 nrec.ar_blockcount = cpu_to_be32(cur->bc_rec.a.ar_blockcount); 471 cur->bc_btnum = btnum;
2057 ncur = NULL; 472 cur->bc_blocklog = mp->m_sb.sb_blocklog;
2058 pcur = cur;
2059 /*
2060 * Loop going up the tree, starting at the leaf level.
2061 * Stop when we don't get a split block, that must mean that
2062 * the insert is finished with this level.
2063 */
2064 do {
2065 /*
2066 * Insert nrec/nbno into this level of the tree.
2067 * Note if we fail, nbno will be null.
2068 */
2069 if ((error = xfs_alloc_insrec(pcur, level++, &nbno, &nrec, &ncur,
2070 &i))) {
2071 if (pcur != cur)
2072 xfs_btree_del_cursor(pcur, XFS_BTREE_ERROR);
2073 return error;
2074 }
2075 /*
2076 * See if the cursor we just used is trash.
2077 * Can't trash the caller's cursor, but otherwise we should
2078 * if ncur is a new cursor or we're about to be done.
2079 */
2080 if (pcur != cur && (ncur || nbno == NULLAGBLOCK)) {
2081 cur->bc_nlevels = pcur->bc_nlevels;
2082 xfs_btree_del_cursor(pcur, XFS_BTREE_NOERROR);
2083 }
2084 /*
2085 * If we got a new cursor, switch to it.
2086 */
2087 if (ncur) {
2088 pcur = ncur;
2089 ncur = NULL;
2090 }
2091 } while (nbno != NULLAGBLOCK);
2092 *stat = i;
2093 return 0;
2094}
2095 473
2096/* 474 cur->bc_ops = &xfs_allocbt_ops;
2097 * Lookup the record equal to [bno, len] in the btree given by cur. 475 if (btnum == XFS_BTNUM_CNT)
2098 */ 476 cur->bc_flags = XFS_BTREE_LASTREC_UPDATE;
2099int /* error */
2100xfs_alloc_lookup_eq(
2101 xfs_btree_cur_t *cur, /* btree cursor */
2102 xfs_agblock_t bno, /* starting block of extent */
2103 xfs_extlen_t len, /* length of extent */
2104 int *stat) /* success/failure */
2105{
2106 cur->bc_rec.a.ar_startblock = bno;
2107 cur->bc_rec.a.ar_blockcount = len;
2108 return xfs_alloc_lookup(cur, XFS_LOOKUP_EQ, stat);
2109}
2110 477
2111/* 478 cur->bc_private.a.agbp = agbp;
2112 * Lookup the first record greater than or equal to [bno, len] 479 cur->bc_private.a.agno = agno;
2113 * in the btree given by cur.
2114 */
2115int /* error */
2116xfs_alloc_lookup_ge(
2117 xfs_btree_cur_t *cur, /* btree cursor */
2118 xfs_agblock_t bno, /* starting block of extent */
2119 xfs_extlen_t len, /* length of extent */
2120 int *stat) /* success/failure */
2121{
2122 cur->bc_rec.a.ar_startblock = bno;
2123 cur->bc_rec.a.ar_blockcount = len;
2124 return xfs_alloc_lookup(cur, XFS_LOOKUP_GE, stat);
2125}
2126 480
2127/* 481 return cur;
2128 * Lookup the first record less than or equal to [bno, len]
2129 * in the btree given by cur.
2130 */
2131int /* error */
2132xfs_alloc_lookup_le(
2133 xfs_btree_cur_t *cur, /* btree cursor */
2134 xfs_agblock_t bno, /* starting block of extent */
2135 xfs_extlen_t len, /* length of extent */
2136 int *stat) /* success/failure */
2137{
2138 cur->bc_rec.a.ar_startblock = bno;
2139 cur->bc_rec.a.ar_blockcount = len;
2140 return xfs_alloc_lookup(cur, XFS_LOOKUP_LE, stat);
2141} 482}
2142 483
2143/* 484/*
2144 * Update the record referred to by cur, to the value given by [bno, len]. 485 * Calculate number of records in an alloc btree block.
2145 * This either works (return 0) or gets an EFSCORRUPTED error.
2146 */ 486 */
2147int /* error */ 487int
2148xfs_alloc_update( 488xfs_allocbt_maxrecs(
2149 xfs_btree_cur_t *cur, /* btree cursor */ 489 struct xfs_mount *mp,
2150 xfs_agblock_t bno, /* starting block of extent */ 490 int blocklen,
2151 xfs_extlen_t len) /* length of extent */ 491 int leaf)
2152{ 492{
2153 xfs_alloc_block_t *block; /* btree block to update */ 493 blocklen -= XFS_ALLOC_BLOCK_LEN(mp);
2154 int error; /* error return value */
2155 int ptr; /* current record number (updating) */
2156 494
2157 ASSERT(len > 0); 495 if (leaf)
2158 /* 496 return blocklen / sizeof(xfs_alloc_rec_t);
2159 * Pick up the a.g. freelist struct and the current block. 497 return blocklen / (sizeof(xfs_alloc_key_t) + sizeof(xfs_alloc_ptr_t));
2160 */
2161 block = XFS_BUF_TO_ALLOC_BLOCK(cur->bc_bufs[0]);
2162#ifdef DEBUG
2163 if ((error = xfs_btree_check_sblock(cur, block, 0, cur->bc_bufs[0])))
2164 return error;
2165#endif
2166 /*
2167 * Get the address of the rec to be updated.
2168 */
2169 ptr = cur->bc_ptrs[0];
2170 {
2171 xfs_alloc_rec_t *rp; /* pointer to updated record */
2172
2173 rp = XFS_ALLOC_REC_ADDR(block, ptr, cur);
2174 /*
2175 * Fill in the new contents and log them.
2176 */
2177 rp->ar_startblock = cpu_to_be32(bno);
2178 rp->ar_blockcount = cpu_to_be32(len);
2179 xfs_alloc_log_recs(cur, cur->bc_bufs[0], ptr, ptr);
2180 }
2181 /*
2182 * If it's the by-size btree and it's the last leaf block and
2183 * it's the last record... then update the size of the longest
2184 * extent in the a.g., which we cache in the a.g. freelist header.
2185 */
2186 if (cur->bc_btnum == XFS_BTNUM_CNT &&
2187 be32_to_cpu(block->bb_rightsib) == NULLAGBLOCK &&
2188 ptr == be16_to_cpu(block->bb_numrecs)) {
2189 xfs_agf_t *agf; /* a.g. freespace header */
2190 xfs_agnumber_t seqno;
2191
2192 agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp);
2193 seqno = be32_to_cpu(agf->agf_seqno);
2194 cur->bc_mp->m_perag[seqno].pagf_longest = len;
2195 agf->agf_longest = cpu_to_be32(len);
2196 xfs_alloc_log_agf(cur->bc_tp, cur->bc_private.a.agbp,
2197 XFS_AGF_LONGEST);
2198 }
2199 /*
2200 * Updating first record in leaf. Pass new key value up to our parent.
2201 */
2202 if (ptr == 1) {
2203 xfs_alloc_key_t key; /* key containing [bno, len] */
2204
2205 key.ar_startblock = cpu_to_be32(bno);
2206 key.ar_blockcount = cpu_to_be32(len);
2207 if ((error = xfs_alloc_updkey(cur, &key, 1)))
2208 return error;
2209 }
2210 return 0;
2211} 498}
diff --git a/fs/xfs/xfs_alloc_btree.h b/fs/xfs/xfs_alloc_btree.h
index 5bd1a2c8bd07..a6caa0022c9b 100644
--- a/fs/xfs/xfs_alloc_btree.h
+++ b/fs/xfs/xfs_alloc_btree.h
@@ -24,7 +24,6 @@
24 24
25struct xfs_buf; 25struct xfs_buf;
26struct xfs_btree_cur; 26struct xfs_btree_cur;
27struct xfs_btree_sblock;
28struct xfs_mount; 27struct xfs_mount;
29 28
30/* 29/*
@@ -50,16 +49,6 @@ typedef struct xfs_alloc_rec_incore {
50 49
51/* btree pointer type */ 50/* btree pointer type */
52typedef __be32 xfs_alloc_ptr_t; 51typedef __be32 xfs_alloc_ptr_t;
53/* btree block header type */
54typedef struct xfs_btree_sblock xfs_alloc_block_t;
55
56#define XFS_BUF_TO_ALLOC_BLOCK(bp) ((xfs_alloc_block_t *)XFS_BUF_PTR(bp))
57
58/*
59 * Real block structures have a size equal to the disk block size.
60 */
61#define XFS_ALLOC_BLOCK_MAXRECS(lev,cur) ((cur)->bc_mp->m_alloc_mxr[lev != 0])
62#define XFS_ALLOC_BLOCK_MINRECS(lev,cur) ((cur)->bc_mp->m_alloc_mnr[lev != 0])
63 52
64/* 53/*
65 * Minimum and maximum blocksize and sectorsize. 54 * Minimum and maximum blocksize and sectorsize.
@@ -83,73 +72,39 @@ typedef struct xfs_btree_sblock xfs_alloc_block_t;
83#define XFS_CNT_BLOCK(mp) ((xfs_agblock_t)(XFS_BNO_BLOCK(mp) + 1)) 72#define XFS_CNT_BLOCK(mp) ((xfs_agblock_t)(XFS_BNO_BLOCK(mp) + 1))
84 73
85/* 74/*
86 * Record, key, and pointer address macros for btree blocks. 75 * Btree block header size depends on a superblock flag.
87 */ 76 *
88#define XFS_ALLOC_REC_ADDR(bb,i,cur) \ 77 * (not quite yet, but soon)
89 XFS_BTREE_REC_ADDR(xfs_alloc, bb, i)
90
91#define XFS_ALLOC_KEY_ADDR(bb,i,cur) \
92 XFS_BTREE_KEY_ADDR(xfs_alloc, bb, i)
93
94#define XFS_ALLOC_PTR_ADDR(bb,i,cur) \
95 XFS_BTREE_PTR_ADDR(xfs_alloc, bb, i, XFS_ALLOC_BLOCK_MAXRECS(1, cur))
96
97/*
98 * Decrement cursor by one record at the level.
99 * For nonzero levels the leaf-ward information is untouched.
100 */
101extern int xfs_alloc_decrement(struct xfs_btree_cur *cur, int level, int *stat);
102
103/*
104 * Delete the record pointed to by cur.
105 * The cursor refers to the place where the record was (could be inserted)
106 * when the operation returns.
107 */
108extern int xfs_alloc_delete(struct xfs_btree_cur *cur, int *stat);
109
110/*
111 * Get the data from the pointed-to record.
112 */
113extern int xfs_alloc_get_rec(struct xfs_btree_cur *cur, xfs_agblock_t *bno,
114 xfs_extlen_t *len, int *stat);
115
116/*
117 * Increment cursor by one record at the level.
118 * For nonzero levels the leaf-ward information is untouched.
119 */
120extern int xfs_alloc_increment(struct xfs_btree_cur *cur, int level, int *stat);
121
122/*
123 * Insert the current record at the point referenced by cur.
124 * The cursor may be inconsistent on return if splits have been done.
125 */
126extern int xfs_alloc_insert(struct xfs_btree_cur *cur, int *stat);
127
128/*
129 * Lookup the record equal to [bno, len] in the btree given by cur.
130 */
131extern int xfs_alloc_lookup_eq(struct xfs_btree_cur *cur, xfs_agblock_t bno,
132 xfs_extlen_t len, int *stat);
133
134/*
135 * Lookup the first record greater than or equal to [bno, len]
136 * in the btree given by cur.
137 */
138extern int xfs_alloc_lookup_ge(struct xfs_btree_cur *cur, xfs_agblock_t bno,
139 xfs_extlen_t len, int *stat);
140
141/*
142 * Lookup the first record less than or equal to [bno, len]
143 * in the btree given by cur.
144 */ 78 */
145extern int xfs_alloc_lookup_le(struct xfs_btree_cur *cur, xfs_agblock_t bno, 79#define XFS_ALLOC_BLOCK_LEN(mp) XFS_BTREE_SBLOCK_LEN
146 xfs_extlen_t len, int *stat);
147 80
148/* 81/*
149 * Update the record referred to by cur, to the value given by [bno, len]. 82 * Record, key, and pointer address macros for btree blocks.
150 * This either works (return 0) or gets an EFSCORRUPTED error. 83 *
151 */ 84 * (note that some of these may appear unused, but they are used in userspace)
152extern int xfs_alloc_update(struct xfs_btree_cur *cur, xfs_agblock_t bno, 85 */
153 xfs_extlen_t len); 86#define XFS_ALLOC_REC_ADDR(mp, block, index) \
87 ((xfs_alloc_rec_t *) \
88 ((char *)(block) + \
89 XFS_ALLOC_BLOCK_LEN(mp) + \
90 (((index) - 1) * sizeof(xfs_alloc_rec_t))))
91
92#define XFS_ALLOC_KEY_ADDR(mp, block, index) \
93 ((xfs_alloc_key_t *) \
94 ((char *)(block) + \
95 XFS_ALLOC_BLOCK_LEN(mp) + \
96 ((index) - 1) * sizeof(xfs_alloc_key_t)))
97
98#define XFS_ALLOC_PTR_ADDR(mp, block, index, maxrecs) \
99 ((xfs_alloc_ptr_t *) \
100 ((char *)(block) + \
101 XFS_ALLOC_BLOCK_LEN(mp) + \
102 (maxrecs) * sizeof(xfs_alloc_key_t) + \
103 ((index) - 1) * sizeof(xfs_alloc_ptr_t)))
104
105extern struct xfs_btree_cur *xfs_allocbt_init_cursor(struct xfs_mount *,
106 struct xfs_trans *, struct xfs_buf *,
107 xfs_agnumber_t, xfs_btnum_t);
108extern int xfs_allocbt_maxrecs(struct xfs_mount *, int, int);
154 109
155#endif /* __XFS_ALLOC_BTREE_H__ */ 110#endif /* __XFS_ALLOC_BTREE_H__ */
diff --git a/fs/xfs/xfs_arch.h b/fs/xfs/xfs_arch.h
index 0b3b5efe848c..53d5e70d1360 100644
--- a/fs/xfs/xfs_arch.h
+++ b/fs/xfs/xfs_arch.h
@@ -41,21 +41,36 @@
41#endif 41#endif
42 42
43#ifdef XFS_NATIVE_HOST 43#ifdef XFS_NATIVE_HOST
44#define cpu_to_be16(val) ((__be16)(val)) 44#define cpu_to_be16(val) ((__force __be16)(__u16)(val))
45#define cpu_to_be32(val) ((__be32)(val)) 45#define cpu_to_be32(val) ((__force __be32)(__u32)(val))
46#define cpu_to_be64(val) ((__be64)(val)) 46#define cpu_to_be64(val) ((__force __be64)(__u64)(val))
47#define be16_to_cpu(val) ((__uint16_t)(val)) 47#define be16_to_cpu(val) ((__force __u16)(__be16)(val))
48#define be32_to_cpu(val) ((__uint32_t)(val)) 48#define be32_to_cpu(val) ((__force __u32)(__be32)(val))
49#define be64_to_cpu(val) ((__uint64_t)(val)) 49#define be64_to_cpu(val) ((__force __u64)(__be64)(val))
50#else 50#else
51#define cpu_to_be16(val) (__swab16((__uint16_t)(val))) 51#define cpu_to_be16(val) ((__force __be16)__swab16((__u16)(val)))
52#define cpu_to_be32(val) (__swab32((__uint32_t)(val))) 52#define cpu_to_be32(val) ((__force __be32)__swab32((__u32)(val)))
53#define cpu_to_be64(val) (__swab64((__uint64_t)(val))) 53#define cpu_to_be64(val) ((__force __be64)__swab64((__u64)(val)))
54#define be16_to_cpu(val) (__swab16((__be16)(val))) 54#define be16_to_cpu(val) (__swab16((__force __u16)(__be16)(val)))
55#define be32_to_cpu(val) (__swab32((__be32)(val))) 55#define be32_to_cpu(val) (__swab32((__force __u32)(__be32)(val)))
56#define be64_to_cpu(val) (__swab64((__be64)(val))) 56#define be64_to_cpu(val) (__swab64((__force __u64)(__be64)(val)))
57#endif 57#endif
58 58
59static inline void be16_add_cpu(__be16 *a, __s16 b)
60{
61 *a = cpu_to_be16(be16_to_cpu(*a) + b);
62}
63
64static inline void be32_add_cpu(__be32 *a, __s32 b)
65{
66 *a = cpu_to_be32(be32_to_cpu(*a) + b);
67}
68
69static inline void be64_add_cpu(__be64 *a, __s64 b)
70{
71 *a = cpu_to_be64(be64_to_cpu(*a) + b);
72}
73
59#endif /* __KERNEL__ */ 74#endif /* __KERNEL__ */
60 75
61/* do we need conversion? */ 76/* do we need conversion? */
diff --git a/fs/xfs/xfs_bit.h b/fs/xfs/xfs_bit.h
index 8e0e463dae2d..bca7b243c319 100644
--- a/fs/xfs/xfs_bit.h
+++ b/fs/xfs/xfs_bit.h
@@ -61,8 +61,7 @@ static inline int xfs_highbit64(__uint64_t v)
61/* Get low bit set out of 32-bit argument, -1 if none set */ 61/* Get low bit set out of 32-bit argument, -1 if none set */
62static inline int xfs_lowbit32(__uint32_t v) 62static inline int xfs_lowbit32(__uint32_t v)
63{ 63{
64 unsigned long t = v; 64 return ffs(v) - 1;
65 return (v) ? find_first_bit(&t, 32) : -1;
66} 65}
67 66
68/* Get low bit set out of 64-bit argument, -1 if none set */ 67/* Get low bit set out of 64-bit argument, -1 if none set */
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index a1aab9275d5a..db289050692f 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -393,8 +393,8 @@ xfs_bmap_count_leaves(
393 393
394STATIC void 394STATIC void
395xfs_bmap_disk_count_leaves( 395xfs_bmap_disk_count_leaves(
396 xfs_extnum_t idx, 396 struct xfs_mount *mp,
397 xfs_bmbt_block_t *block, 397 struct xfs_btree_block *block,
398 int numrecs, 398 int numrecs,
399 int *count); 399 int *count);
400 400
@@ -402,6 +402,53 @@ xfs_bmap_disk_count_leaves(
402 * Bmap internal routines. 402 * Bmap internal routines.
403 */ 403 */
404 404
405STATIC int /* error */
406xfs_bmbt_lookup_eq(
407 struct xfs_btree_cur *cur,
408 xfs_fileoff_t off,
409 xfs_fsblock_t bno,
410 xfs_filblks_t len,
411 int *stat) /* success/failure */
412{
413 cur->bc_rec.b.br_startoff = off;
414 cur->bc_rec.b.br_startblock = bno;
415 cur->bc_rec.b.br_blockcount = len;
416 return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat);
417}
418
419STATIC int /* error */
420xfs_bmbt_lookup_ge(
421 struct xfs_btree_cur *cur,
422 xfs_fileoff_t off,
423 xfs_fsblock_t bno,
424 xfs_filblks_t len,
425 int *stat) /* success/failure */
426{
427 cur->bc_rec.b.br_startoff = off;
428 cur->bc_rec.b.br_startblock = bno;
429 cur->bc_rec.b.br_blockcount = len;
430 return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat);
431}
432
433/*
434* Update the record referred to by cur to the value given
435 * by [off, bno, len, state].
436 * This either works (return 0) or gets an EFSCORRUPTED error.
437 */
438STATIC int
439xfs_bmbt_update(
440 struct xfs_btree_cur *cur,
441 xfs_fileoff_t off,
442 xfs_fsblock_t bno,
443 xfs_filblks_t len,
444 xfs_exntst_t state)
445{
446 union xfs_btree_rec rec;
447
448 xfs_bmbt_disk_set_allf(&rec.bmbt, off, bno, len, state);
449 return xfs_btree_update(cur, &rec);
450}
451
405/* 452/*
406 * Called from xfs_bmap_add_attrfork to handle btree format files. 453 * Called from xfs_bmap_add_attrfork to handle btree format files.
407 */ 454 */
@@ -422,15 +469,14 @@ xfs_bmap_add_attrfork_btree(
422 if (ip->i_df.if_broot_bytes <= XFS_IFORK_DSIZE(ip)) 469 if (ip->i_df.if_broot_bytes <= XFS_IFORK_DSIZE(ip))
423 *flags |= XFS_ILOG_DBROOT; 470 *flags |= XFS_ILOG_DBROOT;
424 else { 471 else {
425 cur = xfs_btree_init_cursor(mp, tp, NULL, 0, XFS_BTNUM_BMAP, ip, 472 cur = xfs_bmbt_init_cursor(mp, tp, ip, XFS_DATA_FORK);
426 XFS_DATA_FORK);
427 cur->bc_private.b.flist = flist; 473 cur->bc_private.b.flist = flist;
428 cur->bc_private.b.firstblock = *firstblock; 474 cur->bc_private.b.firstblock = *firstblock;
429 if ((error = xfs_bmbt_lookup_ge(cur, 0, 0, 0, &stat))) 475 if ((error = xfs_bmbt_lookup_ge(cur, 0, 0, 0, &stat)))
430 goto error0; 476 goto error0;
431 /* must be at least one entry */ 477 /* must be at least one entry */
432 XFS_WANT_CORRUPTED_GOTO(stat == 1, error0); 478 XFS_WANT_CORRUPTED_GOTO(stat == 1, error0);
433 if ((error = xfs_bmbt_newroot(cur, flags, &stat))) 479 if ((error = xfs_btree_new_iroot(cur, flags, &stat)))
434 goto error0; 480 goto error0;
435 if (stat == 0) { 481 if (stat == 0) {
436 xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); 482 xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
@@ -818,10 +864,10 @@ xfs_bmap_add_extent_delay_real(
818 RIGHT.br_blockcount, &i))) 864 RIGHT.br_blockcount, &i)))
819 goto done; 865 goto done;
820 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 866 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
821 if ((error = xfs_bmbt_delete(cur, &i))) 867 if ((error = xfs_btree_delete(cur, &i)))
822 goto done; 868 goto done;
823 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 869 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
824 if ((error = xfs_bmbt_decrement(cur, 0, &i))) 870 if ((error = xfs_btree_decrement(cur, 0, &i)))
825 goto done; 871 goto done;
826 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 872 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
827 if ((error = xfs_bmbt_update(cur, LEFT.br_startoff, 873 if ((error = xfs_bmbt_update(cur, LEFT.br_startoff,
@@ -931,7 +977,7 @@ xfs_bmap_add_extent_delay_real(
931 goto done; 977 goto done;
932 XFS_WANT_CORRUPTED_GOTO(i == 0, done); 978 XFS_WANT_CORRUPTED_GOTO(i == 0, done);
933 cur->bc_rec.b.br_state = XFS_EXT_NORM; 979 cur->bc_rec.b.br_state = XFS_EXT_NORM;
934 if ((error = xfs_bmbt_insert(cur, &i))) 980 if ((error = xfs_btree_insert(cur, &i)))
935 goto done; 981 goto done;
936 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 982 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
937 } 983 }
@@ -1007,7 +1053,7 @@ xfs_bmap_add_extent_delay_real(
1007 goto done; 1053 goto done;
1008 XFS_WANT_CORRUPTED_GOTO(i == 0, done); 1054 XFS_WANT_CORRUPTED_GOTO(i == 0, done);
1009 cur->bc_rec.b.br_state = XFS_EXT_NORM; 1055 cur->bc_rec.b.br_state = XFS_EXT_NORM;
1010 if ((error = xfs_bmbt_insert(cur, &i))) 1056 if ((error = xfs_btree_insert(cur, &i)))
1011 goto done; 1057 goto done;
1012 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 1058 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1013 } 1059 }
@@ -1097,7 +1143,7 @@ xfs_bmap_add_extent_delay_real(
1097 goto done; 1143 goto done;
1098 XFS_WANT_CORRUPTED_GOTO(i == 0, done); 1144 XFS_WANT_CORRUPTED_GOTO(i == 0, done);
1099 cur->bc_rec.b.br_state = XFS_EXT_NORM; 1145 cur->bc_rec.b.br_state = XFS_EXT_NORM;
1100 if ((error = xfs_bmbt_insert(cur, &i))) 1146 if ((error = xfs_btree_insert(cur, &i)))
1101 goto done; 1147 goto done;
1102 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 1148 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1103 } 1149 }
@@ -1152,7 +1198,7 @@ xfs_bmap_add_extent_delay_real(
1152 goto done; 1198 goto done;
1153 XFS_WANT_CORRUPTED_GOTO(i == 0, done); 1199 XFS_WANT_CORRUPTED_GOTO(i == 0, done);
1154 cur->bc_rec.b.br_state = XFS_EXT_NORM; 1200 cur->bc_rec.b.br_state = XFS_EXT_NORM;
1155 if ((error = xfs_bmbt_insert(cur, &i))) 1201 if ((error = xfs_btree_insert(cur, &i)))
1156 goto done; 1202 goto done;
1157 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 1203 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1158 } 1204 }
@@ -1379,16 +1425,16 @@ xfs_bmap_add_extent_unwritten_real(
1379 RIGHT.br_blockcount, &i))) 1425 RIGHT.br_blockcount, &i)))
1380 goto done; 1426 goto done;
1381 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 1427 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1382 if ((error = xfs_bmbt_delete(cur, &i))) 1428 if ((error = xfs_btree_delete(cur, &i)))
1383 goto done; 1429 goto done;
1384 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 1430 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1385 if ((error = xfs_bmbt_decrement(cur, 0, &i))) 1431 if ((error = xfs_btree_decrement(cur, 0, &i)))
1386 goto done; 1432 goto done;
1387 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 1433 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1388 if ((error = xfs_bmbt_delete(cur, &i))) 1434 if ((error = xfs_btree_delete(cur, &i)))
1389 goto done; 1435 goto done;
1390 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 1436 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1391 if ((error = xfs_bmbt_decrement(cur, 0, &i))) 1437 if ((error = xfs_btree_decrement(cur, 0, &i)))
1392 goto done; 1438 goto done;
1393 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 1439 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1394 if ((error = xfs_bmbt_update(cur, LEFT.br_startoff, 1440 if ((error = xfs_bmbt_update(cur, LEFT.br_startoff,
@@ -1428,10 +1474,10 @@ xfs_bmap_add_extent_unwritten_real(
1428 &i))) 1474 &i)))
1429 goto done; 1475 goto done;
1430 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 1476 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1431 if ((error = xfs_bmbt_delete(cur, &i))) 1477 if ((error = xfs_btree_delete(cur, &i)))
1432 goto done; 1478 goto done;
1433 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 1479 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1434 if ((error = xfs_bmbt_decrement(cur, 0, &i))) 1480 if ((error = xfs_btree_decrement(cur, 0, &i)))
1435 goto done; 1481 goto done;
1436 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 1482 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1437 if ((error = xfs_bmbt_update(cur, LEFT.br_startoff, 1483 if ((error = xfs_bmbt_update(cur, LEFT.br_startoff,
@@ -1471,10 +1517,10 @@ xfs_bmap_add_extent_unwritten_real(
1471 RIGHT.br_blockcount, &i))) 1517 RIGHT.br_blockcount, &i)))
1472 goto done; 1518 goto done;
1473 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 1519 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1474 if ((error = xfs_bmbt_delete(cur, &i))) 1520 if ((error = xfs_btree_delete(cur, &i)))
1475 goto done; 1521 goto done;
1476 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 1522 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1477 if ((error = xfs_bmbt_decrement(cur, 0, &i))) 1523 if ((error = xfs_btree_decrement(cur, 0, &i)))
1478 goto done; 1524 goto done;
1479 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 1525 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1480 if ((error = xfs_bmbt_update(cur, new->br_startoff, 1526 if ((error = xfs_bmbt_update(cur, new->br_startoff,
@@ -1557,7 +1603,7 @@ xfs_bmap_add_extent_unwritten_real(
1557 PREV.br_blockcount - new->br_blockcount, 1603 PREV.br_blockcount - new->br_blockcount,
1558 oldext))) 1604 oldext)))
1559 goto done; 1605 goto done;
1560 if ((error = xfs_bmbt_decrement(cur, 0, &i))) 1606 if ((error = xfs_btree_decrement(cur, 0, &i)))
1561 goto done; 1607 goto done;
1562 if (xfs_bmbt_update(cur, LEFT.br_startoff, 1608 if (xfs_bmbt_update(cur, LEFT.br_startoff,
1563 LEFT.br_startblock, 1609 LEFT.br_startblock,
@@ -1605,7 +1651,7 @@ xfs_bmap_add_extent_unwritten_real(
1605 oldext))) 1651 oldext)))
1606 goto done; 1652 goto done;
1607 cur->bc_rec.b = *new; 1653 cur->bc_rec.b = *new;
1608 if ((error = xfs_bmbt_insert(cur, &i))) 1654 if ((error = xfs_btree_insert(cur, &i)))
1609 goto done; 1655 goto done;
1610 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 1656 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1611 } 1657 }
@@ -1647,7 +1693,7 @@ xfs_bmap_add_extent_unwritten_real(
1647 PREV.br_blockcount - new->br_blockcount, 1693 PREV.br_blockcount - new->br_blockcount,
1648 oldext))) 1694 oldext)))
1649 goto done; 1695 goto done;
1650 if ((error = xfs_bmbt_increment(cur, 0, &i))) 1696 if ((error = xfs_btree_increment(cur, 0, &i)))
1651 goto done; 1697 goto done;
1652 if ((error = xfs_bmbt_update(cur, new->br_startoff, 1698 if ((error = xfs_bmbt_update(cur, new->br_startoff,
1653 new->br_startblock, 1699 new->br_startblock,
@@ -1695,7 +1741,7 @@ xfs_bmap_add_extent_unwritten_real(
1695 goto done; 1741 goto done;
1696 XFS_WANT_CORRUPTED_GOTO(i == 0, done); 1742 XFS_WANT_CORRUPTED_GOTO(i == 0, done);
1697 cur->bc_rec.b.br_state = XFS_EXT_NORM; 1743 cur->bc_rec.b.br_state = XFS_EXT_NORM;
1698 if ((error = xfs_bmbt_insert(cur, &i))) 1744 if ((error = xfs_btree_insert(cur, &i)))
1699 goto done; 1745 goto done;
1700 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 1746 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1701 } 1747 }
@@ -1743,7 +1789,7 @@ xfs_bmap_add_extent_unwritten_real(
1743 cur->bc_rec.b = PREV; 1789 cur->bc_rec.b = PREV;
1744 cur->bc_rec.b.br_blockcount = 1790 cur->bc_rec.b.br_blockcount =
1745 new->br_startoff - PREV.br_startoff; 1791 new->br_startoff - PREV.br_startoff;
1746 if ((error = xfs_bmbt_insert(cur, &i))) 1792 if ((error = xfs_btree_insert(cur, &i)))
1747 goto done; 1793 goto done;
1748 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 1794 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1749 /* 1795 /*
@@ -1758,7 +1804,7 @@ xfs_bmap_add_extent_unwritten_real(
1758 XFS_WANT_CORRUPTED_GOTO(i == 0, done); 1804 XFS_WANT_CORRUPTED_GOTO(i == 0, done);
1759 /* new middle extent - newext */ 1805 /* new middle extent - newext */
1760 cur->bc_rec.b.br_state = new->br_state; 1806 cur->bc_rec.b.br_state = new->br_state;
1761 if ((error = xfs_bmbt_insert(cur, &i))) 1807 if ((error = xfs_btree_insert(cur, &i)))
1762 goto done; 1808 goto done;
1763 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 1809 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1764 } 1810 }
@@ -2106,10 +2152,10 @@ xfs_bmap_add_extent_hole_real(
2106 right.br_blockcount, &i))) 2152 right.br_blockcount, &i)))
2107 goto done; 2153 goto done;
2108 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 2154 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
2109 if ((error = xfs_bmbt_delete(cur, &i))) 2155 if ((error = xfs_btree_delete(cur, &i)))
2110 goto done; 2156 goto done;
2111 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 2157 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
2112 if ((error = xfs_bmbt_decrement(cur, 0, &i))) 2158 if ((error = xfs_btree_decrement(cur, 0, &i)))
2113 goto done; 2159 goto done;
2114 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 2160 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
2115 if ((error = xfs_bmbt_update(cur, left.br_startoff, 2161 if ((error = xfs_bmbt_update(cur, left.br_startoff,
@@ -2218,7 +2264,7 @@ xfs_bmap_add_extent_hole_real(
2218 goto done; 2264 goto done;
2219 XFS_WANT_CORRUPTED_GOTO(i == 0, done); 2265 XFS_WANT_CORRUPTED_GOTO(i == 0, done);
2220 cur->bc_rec.b.br_state = new->br_state; 2266 cur->bc_rec.b.br_state = new->br_state;
2221 if ((error = xfs_bmbt_insert(cur, &i))) 2267 if ((error = xfs_btree_insert(cur, &i)))
2222 goto done; 2268 goto done;
2223 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 2269 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
2224 } 2270 }
@@ -2996,24 +3042,24 @@ xfs_bmap_btree_to_extents(
2996 int whichfork) /* data or attr fork */ 3042 int whichfork) /* data or attr fork */
2997{ 3043{
2998 /* REFERENCED */ 3044 /* REFERENCED */
2999 xfs_bmbt_block_t *cblock;/* child btree block */ 3045 struct xfs_btree_block *cblock;/* child btree block */
3000 xfs_fsblock_t cbno; /* child block number */ 3046 xfs_fsblock_t cbno; /* child block number */
3001 xfs_buf_t *cbp; /* child block's buffer */ 3047 xfs_buf_t *cbp; /* child block's buffer */
3002 int error; /* error return value */ 3048 int error; /* error return value */
3003 xfs_ifork_t *ifp; /* inode fork data */ 3049 xfs_ifork_t *ifp; /* inode fork data */
3004 xfs_mount_t *mp; /* mount point structure */ 3050 xfs_mount_t *mp; /* mount point structure */
3005 __be64 *pp; /* ptr to block address */ 3051 __be64 *pp; /* ptr to block address */
3006 xfs_bmbt_block_t *rblock;/* root btree block */ 3052 struct xfs_btree_block *rblock;/* root btree block */
3007 3053
3054 mp = ip->i_mount;
3008 ifp = XFS_IFORK_PTR(ip, whichfork); 3055 ifp = XFS_IFORK_PTR(ip, whichfork);
3009 ASSERT(ifp->if_flags & XFS_IFEXTENTS); 3056 ASSERT(ifp->if_flags & XFS_IFEXTENTS);
3010 ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE); 3057 ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE);
3011 rblock = ifp->if_broot; 3058 rblock = ifp->if_broot;
3012 ASSERT(be16_to_cpu(rblock->bb_level) == 1); 3059 ASSERT(be16_to_cpu(rblock->bb_level) == 1);
3013 ASSERT(be16_to_cpu(rblock->bb_numrecs) == 1); 3060 ASSERT(be16_to_cpu(rblock->bb_numrecs) == 1);
3014 ASSERT(XFS_BMAP_BROOT_MAXRECS(ifp->if_broot_bytes) == 1); 3061 ASSERT(xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, 0) == 1);
3015 mp = ip->i_mount; 3062 pp = XFS_BMAP_BROOT_PTR_ADDR(mp, rblock, 1, ifp->if_broot_bytes);
3016 pp = XFS_BMAP_BROOT_PTR_ADDR(rblock, 1, ifp->if_broot_bytes);
3017 cbno = be64_to_cpu(*pp); 3063 cbno = be64_to_cpu(*pp);
3018 *logflagsp = 0; 3064 *logflagsp = 0;
3019#ifdef DEBUG 3065#ifdef DEBUG
@@ -3023,8 +3069,8 @@ xfs_bmap_btree_to_extents(
3023 if ((error = xfs_btree_read_bufl(mp, tp, cbno, 0, &cbp, 3069 if ((error = xfs_btree_read_bufl(mp, tp, cbno, 0, &cbp,
3024 XFS_BMAP_BTREE_REF))) 3070 XFS_BMAP_BTREE_REF)))
3025 return error; 3071 return error;
3026 cblock = XFS_BUF_TO_BMBT_BLOCK(cbp); 3072 cblock = XFS_BUF_TO_BLOCK(cbp);
3027 if ((error = xfs_btree_check_lblock(cur, cblock, 0, cbp))) 3073 if ((error = xfs_btree_check_block(cur, cblock, 0, cbp)))
3028 return error; 3074 return error;
3029 xfs_bmap_add_free(cbno, 1, cur->bc_private.b.flist, mp); 3075 xfs_bmap_add_free(cbno, 1, cur->bc_private.b.flist, mp);
3030 ip->i_d.di_nblocks--; 3076 ip->i_d.di_nblocks--;
@@ -3170,7 +3216,7 @@ xfs_bmap_del_extent(
3170 flags |= XFS_ILOG_FEXT(whichfork); 3216 flags |= XFS_ILOG_FEXT(whichfork);
3171 break; 3217 break;
3172 } 3218 }
3173 if ((error = xfs_bmbt_delete(cur, &i))) 3219 if ((error = xfs_btree_delete(cur, &i)))
3174 goto done; 3220 goto done;
3175 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 3221 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
3176 break; 3222 break;
@@ -3254,10 +3300,10 @@ xfs_bmap_del_extent(
3254 got.br_startblock, temp, 3300 got.br_startblock, temp,
3255 got.br_state))) 3301 got.br_state)))
3256 goto done; 3302 goto done;
3257 if ((error = xfs_bmbt_increment(cur, 0, &i))) 3303 if ((error = xfs_btree_increment(cur, 0, &i)))
3258 goto done; 3304 goto done;
3259 cur->bc_rec.b = new; 3305 cur->bc_rec.b = new;
3260 error = xfs_bmbt_insert(cur, &i); 3306 error = xfs_btree_insert(cur, &i);
3261 if (error && error != ENOSPC) 3307 if (error && error != ENOSPC)
3262 goto done; 3308 goto done;
3263 /* 3309 /*
@@ -3404,11 +3450,11 @@ xfs_bmap_extents_to_btree(
3404 int *logflagsp, /* inode logging flags */ 3450 int *logflagsp, /* inode logging flags */
3405 int whichfork) /* data or attr fork */ 3451 int whichfork) /* data or attr fork */
3406{ 3452{
3407 xfs_bmbt_block_t *ablock; /* allocated (child) bt block */ 3453 struct xfs_btree_block *ablock; /* allocated (child) bt block */
3408 xfs_buf_t *abp; /* buffer for ablock */ 3454 xfs_buf_t *abp; /* buffer for ablock */
3409 xfs_alloc_arg_t args; /* allocation arguments */ 3455 xfs_alloc_arg_t args; /* allocation arguments */
3410 xfs_bmbt_rec_t *arp; /* child record pointer */ 3456 xfs_bmbt_rec_t *arp; /* child record pointer */
3411 xfs_bmbt_block_t *block; /* btree root block */ 3457 struct xfs_btree_block *block; /* btree root block */
3412 xfs_btree_cur_t *cur; /* bmap btree cursor */ 3458 xfs_btree_cur_t *cur; /* bmap btree cursor */
3413 xfs_bmbt_rec_host_t *ep; /* extent record pointer */ 3459 xfs_bmbt_rec_host_t *ep; /* extent record pointer */
3414 int error; /* error return value */ 3460 int error; /* error return value */
@@ -3428,6 +3474,7 @@ xfs_bmap_extents_to_btree(
3428 */ 3474 */
3429 xfs_iroot_realloc(ip, 1, whichfork); 3475 xfs_iroot_realloc(ip, 1, whichfork);
3430 ifp->if_flags |= XFS_IFBROOT; 3476 ifp->if_flags |= XFS_IFBROOT;
3477
3431 /* 3478 /*
3432 * Fill in the root. 3479 * Fill in the root.
3433 */ 3480 */
@@ -3435,14 +3482,14 @@ xfs_bmap_extents_to_btree(
3435 block->bb_magic = cpu_to_be32(XFS_BMAP_MAGIC); 3482 block->bb_magic = cpu_to_be32(XFS_BMAP_MAGIC);
3436 block->bb_level = cpu_to_be16(1); 3483 block->bb_level = cpu_to_be16(1);
3437 block->bb_numrecs = cpu_to_be16(1); 3484 block->bb_numrecs = cpu_to_be16(1);
3438 block->bb_leftsib = cpu_to_be64(NULLDFSBNO); 3485 block->bb_u.l.bb_leftsib = cpu_to_be64(NULLDFSBNO);
3439 block->bb_rightsib = cpu_to_be64(NULLDFSBNO); 3486 block->bb_u.l.bb_rightsib = cpu_to_be64(NULLDFSBNO);
3487
3440 /* 3488 /*
3441 * Need a cursor. Can't allocate until bb_level is filled in. 3489 * Need a cursor. Can't allocate until bb_level is filled in.
3442 */ 3490 */
3443 mp = ip->i_mount; 3491 mp = ip->i_mount;
3444 cur = xfs_btree_init_cursor(mp, tp, NULL, 0, XFS_BTNUM_BMAP, ip, 3492 cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
3445 whichfork);
3446 cur->bc_private.b.firstblock = *firstblock; 3493 cur->bc_private.b.firstblock = *firstblock;
3447 cur->bc_private.b.flist = flist; 3494 cur->bc_private.b.flist = flist;
3448 cur->bc_private.b.flags = wasdel ? XFS_BTCUR_BPRV_WASDEL : 0; 3495 cur->bc_private.b.flags = wasdel ? XFS_BTCUR_BPRV_WASDEL : 0;
@@ -3489,12 +3536,12 @@ xfs_bmap_extents_to_btree(
3489 /* 3536 /*
3490 * Fill in the child block. 3537 * Fill in the child block.
3491 */ 3538 */
3492 ablock = XFS_BUF_TO_BMBT_BLOCK(abp); 3539 ablock = XFS_BUF_TO_BLOCK(abp);
3493 ablock->bb_magic = cpu_to_be32(XFS_BMAP_MAGIC); 3540 ablock->bb_magic = cpu_to_be32(XFS_BMAP_MAGIC);
3494 ablock->bb_level = 0; 3541 ablock->bb_level = 0;
3495 ablock->bb_leftsib = cpu_to_be64(NULLDFSBNO); 3542 ablock->bb_u.l.bb_leftsib = cpu_to_be64(NULLDFSBNO);
3496 ablock->bb_rightsib = cpu_to_be64(NULLDFSBNO); 3543 ablock->bb_u.l.bb_rightsib = cpu_to_be64(NULLDFSBNO);
3497 arp = XFS_BMAP_REC_IADDR(ablock, 1, cur); 3544 arp = XFS_BMBT_REC_ADDR(mp, ablock, 1);
3498 nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); 3545 nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
3499 for (cnt = i = 0; i < nextents; i++) { 3546 for (cnt = i = 0; i < nextents; i++) {
3500 ep = xfs_iext_get_ext(ifp, i); 3547 ep = xfs_iext_get_ext(ifp, i);
@@ -3505,21 +3552,24 @@ xfs_bmap_extents_to_btree(
3505 } 3552 }
3506 } 3553 }
3507 ASSERT(cnt == XFS_IFORK_NEXTENTS(ip, whichfork)); 3554 ASSERT(cnt == XFS_IFORK_NEXTENTS(ip, whichfork));
3508 ablock->bb_numrecs = cpu_to_be16(cnt); 3555 xfs_btree_set_numrecs(ablock, cnt);
3556
3509 /* 3557 /*
3510 * Fill in the root key and pointer. 3558 * Fill in the root key and pointer.
3511 */ 3559 */
3512 kp = XFS_BMAP_KEY_IADDR(block, 1, cur); 3560 kp = XFS_BMBT_KEY_ADDR(mp, block, 1);
3513 arp = XFS_BMAP_REC_IADDR(ablock, 1, cur); 3561 arp = XFS_BMBT_REC_ADDR(mp, ablock, 1);
3514 kp->br_startoff = cpu_to_be64(xfs_bmbt_disk_get_startoff(arp)); 3562 kp->br_startoff = cpu_to_be64(xfs_bmbt_disk_get_startoff(arp));
3515 pp = XFS_BMAP_PTR_IADDR(block, 1, cur); 3563 pp = XFS_BMBT_PTR_ADDR(mp, block, 1, xfs_bmbt_get_maxrecs(cur,
3564 be16_to_cpu(block->bb_level)));
3516 *pp = cpu_to_be64(args.fsbno); 3565 *pp = cpu_to_be64(args.fsbno);
3566
3517 /* 3567 /*
3518 * Do all this logging at the end so that 3568 * Do all this logging at the end so that
3519 * the root is at the right level. 3569 * the root is at the right level.
3520 */ 3570 */
3521 xfs_bmbt_log_block(cur, abp, XFS_BB_ALL_BITS); 3571 xfs_btree_log_block(cur, abp, XFS_BB_ALL_BITS);
3522 xfs_bmbt_log_recs(cur, abp, 1, be16_to_cpu(ablock->bb_numrecs)); 3572 xfs_btree_log_recs(cur, abp, 1, be16_to_cpu(ablock->bb_numrecs));
3523 ASSERT(*curp == NULL); 3573 ASSERT(*curp == NULL);
3524 *curp = cur; 3574 *curp = cur;
3525 *logflagsp = XFS_ILOG_CORE | XFS_ILOG_FBROOT(whichfork); 3575 *logflagsp = XFS_ILOG_CORE | XFS_ILOG_FBROOT(whichfork);
@@ -4176,7 +4226,7 @@ xfs_bmap_compute_maxlevels(
4176 maxleafents = MAXAEXTNUM; 4226 maxleafents = MAXAEXTNUM;
4177 sz = XFS_BMDR_SPACE_CALC(MINABTPTRS); 4227 sz = XFS_BMDR_SPACE_CALC(MINABTPTRS);
4178 } 4228 }
4179 maxrootrecs = (int)XFS_BTREE_BLOCK_MAXRECS(sz, xfs_bmdr, 0); 4229 maxrootrecs = xfs_bmdr_maxrecs(mp, sz, 0);
4180 minleafrecs = mp->m_bmap_dmnr[0]; 4230 minleafrecs = mp->m_bmap_dmnr[0];
4181 minnoderecs = mp->m_bmap_dmnr[1]; 4231 minnoderecs = mp->m_bmap_dmnr[1];
4182 maxblocks = (maxleafents + minleafrecs - 1) / minleafrecs; 4232 maxblocks = (maxleafents + minleafrecs - 1) / minleafrecs;
@@ -4474,6 +4524,22 @@ xfs_bmap_one_block(
4474 return rval; 4524 return rval;
4475} 4525}
4476 4526
4527STATIC int
4528xfs_bmap_sanity_check(
4529 struct xfs_mount *mp,
4530 struct xfs_buf *bp,
4531 int level)
4532{
4533 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
4534
4535 if (be32_to_cpu(block->bb_magic) != XFS_BMAP_MAGIC ||
4536 be16_to_cpu(block->bb_level) != level ||
4537 be16_to_cpu(block->bb_numrecs) == 0 ||
4538 be16_to_cpu(block->bb_numrecs) > mp->m_bmap_dmxr[level != 0])
4539 return 0;
4540 return 1;
4541}
4542
4477/* 4543/*
4478 * Read in the extents to if_extents. 4544 * Read in the extents to if_extents.
4479 * All inode fields are set up by caller, we just traverse the btree 4545 * All inode fields are set up by caller, we just traverse the btree
@@ -4486,7 +4552,7 @@ xfs_bmap_read_extents(
4486 xfs_inode_t *ip, /* incore inode */ 4552 xfs_inode_t *ip, /* incore inode */
4487 int whichfork) /* data or attr fork */ 4553 int whichfork) /* data or attr fork */
4488{ 4554{
4489 xfs_bmbt_block_t *block; /* current btree block */ 4555 struct xfs_btree_block *block; /* current btree block */
4490 xfs_fsblock_t bno; /* block # of "block" */ 4556 xfs_fsblock_t bno; /* block # of "block" */
4491 xfs_buf_t *bp; /* buffer for "block" */ 4557 xfs_buf_t *bp; /* buffer for "block" */
4492 int error; /* error return value */ 4558 int error; /* error return value */
@@ -4510,7 +4576,7 @@ xfs_bmap_read_extents(
4510 */ 4576 */
4511 level = be16_to_cpu(block->bb_level); 4577 level = be16_to_cpu(block->bb_level);
4512 ASSERT(level > 0); 4578 ASSERT(level > 0);
4513 pp = XFS_BMAP_BROOT_PTR_ADDR(block, 1, ifp->if_broot_bytes); 4579 pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes);
4514 bno = be64_to_cpu(*pp); 4580 bno = be64_to_cpu(*pp);
4515 ASSERT(bno != NULLDFSBNO); 4581 ASSERT(bno != NULLDFSBNO);
4516 ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount); 4582 ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount);
@@ -4523,13 +4589,13 @@ xfs_bmap_read_extents(
4523 if ((error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp, 4589 if ((error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp,
4524 XFS_BMAP_BTREE_REF))) 4590 XFS_BMAP_BTREE_REF)))
4525 return error; 4591 return error;
4526 block = XFS_BUF_TO_BMBT_BLOCK(bp); 4592 block = XFS_BUF_TO_BLOCK(bp);
4527 XFS_WANT_CORRUPTED_GOTO( 4593 XFS_WANT_CORRUPTED_GOTO(
4528 XFS_BMAP_SANITY_CHECK(mp, block, level), 4594 xfs_bmap_sanity_check(mp, bp, level),
4529 error0); 4595 error0);
4530 if (level == 0) 4596 if (level == 0)
4531 break; 4597 break;
4532 pp = XFS_BTREE_PTR_ADDR(xfs_bmbt, block, 1, mp->m_bmap_dmxr[1]); 4598 pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]);
4533 bno = be64_to_cpu(*pp); 4599 bno = be64_to_cpu(*pp);
4534 XFS_WANT_CORRUPTED_GOTO(XFS_FSB_SANITY_CHECK(mp, bno), error0); 4600 XFS_WANT_CORRUPTED_GOTO(XFS_FSB_SANITY_CHECK(mp, bno), error0);
4535 xfs_trans_brelse(tp, bp); 4601 xfs_trans_brelse(tp, bp);
@@ -4549,7 +4615,7 @@ xfs_bmap_read_extents(
4549 xfs_extnum_t start; 4615 xfs_extnum_t start;
4550 4616
4551 4617
4552 num_recs = be16_to_cpu(block->bb_numrecs); 4618 num_recs = xfs_btree_get_numrecs(block);
4553 if (unlikely(i + num_recs > room)) { 4619 if (unlikely(i + num_recs > room)) {
4554 ASSERT(i + num_recs <= room); 4620 ASSERT(i + num_recs <= room);
4555 xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount, 4621 xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount,
@@ -4561,18 +4627,18 @@ xfs_bmap_read_extents(
4561 goto error0; 4627 goto error0;
4562 } 4628 }
4563 XFS_WANT_CORRUPTED_GOTO( 4629 XFS_WANT_CORRUPTED_GOTO(
4564 XFS_BMAP_SANITY_CHECK(mp, block, 0), 4630 xfs_bmap_sanity_check(mp, bp, 0),
4565 error0); 4631 error0);
4566 /* 4632 /*
4567 * Read-ahead the next leaf block, if any. 4633 * Read-ahead the next leaf block, if any.
4568 */ 4634 */
4569 nextbno = be64_to_cpu(block->bb_rightsib); 4635 nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib);
4570 if (nextbno != NULLFSBLOCK) 4636 if (nextbno != NULLFSBLOCK)
4571 xfs_btree_reada_bufl(mp, nextbno, 1); 4637 xfs_btree_reada_bufl(mp, nextbno, 1);
4572 /* 4638 /*
4573 * Copy records into the extent records. 4639 * Copy records into the extent records.
4574 */ 4640 */
4575 frp = XFS_BTREE_REC_ADDR(xfs_bmbt, block, 1); 4641 frp = XFS_BMBT_REC_ADDR(mp, block, 1);
4576 start = i; 4642 start = i;
4577 for (j = 0; j < num_recs; j++, i++, frp++) { 4643 for (j = 0; j < num_recs; j++, i++, frp++) {
4578 xfs_bmbt_rec_host_t *trp = xfs_iext_get_ext(ifp, i); 4644 xfs_bmbt_rec_host_t *trp = xfs_iext_get_ext(ifp, i);
@@ -4603,7 +4669,7 @@ xfs_bmap_read_extents(
4603 if ((error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp, 4669 if ((error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp,
4604 XFS_BMAP_BTREE_REF))) 4670 XFS_BMAP_BTREE_REF)))
4605 return error; 4671 return error;
4606 block = XFS_BUF_TO_BMBT_BLOCK(bp); 4672 block = XFS_BUF_TO_BLOCK(bp);
4607 } 4673 }
4608 ASSERT(i == (ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t))); 4674 ASSERT(i == (ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t)));
4609 ASSERT(i == XFS_IFORK_NEXTENTS(ip, whichfork)); 4675 ASSERT(i == XFS_IFORK_NEXTENTS(ip, whichfork));
@@ -5029,8 +5095,7 @@ xfs_bmapi(
5029 if (abno == NULLFSBLOCK) 5095 if (abno == NULLFSBLOCK)
5030 break; 5096 break;
5031 if ((ifp->if_flags & XFS_IFBROOT) && !cur) { 5097 if ((ifp->if_flags & XFS_IFBROOT) && !cur) {
5032 cur = xfs_btree_init_cursor(mp, 5098 cur = xfs_bmbt_init_cursor(mp, tp,
5033 tp, NULL, 0, XFS_BTNUM_BMAP,
5034 ip, whichfork); 5099 ip, whichfork);
5035 cur->bc_private.b.firstblock = 5100 cur->bc_private.b.firstblock =
5036 *firstblock; 5101 *firstblock;
@@ -5147,9 +5212,8 @@ xfs_bmapi(
5147 */ 5212 */
5148 ASSERT(mval->br_blockcount <= len); 5213 ASSERT(mval->br_blockcount <= len);
5149 if ((ifp->if_flags & XFS_IFBROOT) && !cur) { 5214 if ((ifp->if_flags & XFS_IFBROOT) && !cur) {
5150 cur = xfs_btree_init_cursor(mp, 5215 cur = xfs_bmbt_init_cursor(mp,
5151 tp, NULL, 0, XFS_BTNUM_BMAP, 5216 tp, ip, whichfork);
5152 ip, whichfork);
5153 cur->bc_private.b.firstblock = 5217 cur->bc_private.b.firstblock =
5154 *firstblock; 5218 *firstblock;
5155 cur->bc_private.b.flist = flist; 5219 cur->bc_private.b.flist = flist;
@@ -5440,8 +5504,7 @@ xfs_bunmapi(
5440 logflags = 0; 5504 logflags = 0;
5441 if (ifp->if_flags & XFS_IFBROOT) { 5505 if (ifp->if_flags & XFS_IFBROOT) {
5442 ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE); 5506 ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE);
5443 cur = xfs_btree_init_cursor(mp, tp, NULL, 0, XFS_BTNUM_BMAP, ip, 5507 cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
5444 whichfork);
5445 cur->bc_private.b.firstblock = *firstblock; 5508 cur->bc_private.b.firstblock = *firstblock;
5446 cur->bc_private.b.flist = flist; 5509 cur->bc_private.b.flist = flist;
5447 cur->bc_private.b.flags = 0; 5510 cur->bc_private.b.flags = 0;
@@ -6131,7 +6194,7 @@ xfs_bmap_get_bp(
6131 6194
6132void 6195void
6133xfs_check_block( 6196xfs_check_block(
6134 xfs_bmbt_block_t *block, 6197 struct xfs_btree_block *block,
6135 xfs_mount_t *mp, 6198 xfs_mount_t *mp,
6136 int root, 6199 int root,
6137 short sz) 6200 short sz)
@@ -6143,36 +6206,29 @@ xfs_check_block(
6143 ASSERT(be16_to_cpu(block->bb_level) > 0); 6206 ASSERT(be16_to_cpu(block->bb_level) > 0);
6144 6207
6145 prevp = NULL; 6208 prevp = NULL;
6146 for( i = 1; i <= be16_to_cpu(block->bb_numrecs); i++) { 6209 for( i = 1; i <= xfs_btree_get_numrecs(block); i++) {
6147 dmxr = mp->m_bmap_dmxr[0]; 6210 dmxr = mp->m_bmap_dmxr[0];
6148 6211 keyp = XFS_BMBT_KEY_ADDR(mp, block, i);
6149 if (root) {
6150 keyp = XFS_BMAP_BROOT_KEY_ADDR(block, i, sz);
6151 } else {
6152 keyp = XFS_BTREE_KEY_ADDR(xfs_bmbt, block, i);
6153 }
6154 6212
6155 if (prevp) { 6213 if (prevp) {
6156 xfs_btree_check_key(XFS_BTNUM_BMAP, prevp, keyp); 6214 ASSERT(be64_to_cpu(prevp->br_startoff) <
6215 be64_to_cpu(keyp->br_startoff));
6157 } 6216 }
6158 prevp = keyp; 6217 prevp = keyp;
6159 6218
6160 /* 6219 /*
6161 * Compare the block numbers to see if there are dups. 6220 * Compare the block numbers to see if there are dups.
6162 */ 6221 */
6222 if (root)
6223 pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, i, sz);
6224 else
6225 pp = XFS_BMBT_PTR_ADDR(mp, block, i, dmxr);
6163 6226
6164 if (root) {
6165 pp = XFS_BMAP_BROOT_PTR_ADDR(block, i, sz);
6166 } else {
6167 pp = XFS_BTREE_PTR_ADDR(xfs_bmbt, block, i, dmxr);
6168 }
6169 for (j = i+1; j <= be16_to_cpu(block->bb_numrecs); j++) { 6227 for (j = i+1; j <= be16_to_cpu(block->bb_numrecs); j++) {
6170 if (root) { 6228 if (root)
6171 thispa = XFS_BMAP_BROOT_PTR_ADDR(block, j, sz); 6229 thispa = XFS_BMAP_BROOT_PTR_ADDR(mp, block, j, sz);
6172 } else { 6230 else
6173 thispa = XFS_BTREE_PTR_ADDR(xfs_bmbt, block, j, 6231 thispa = XFS_BMBT_PTR_ADDR(mp, block, j, dmxr);
6174 dmxr);
6175 }
6176 if (*thispa == *pp) { 6232 if (*thispa == *pp) {
6177 cmn_err(CE_WARN, "%s: thispa(%d) == pp(%d) %Ld", 6233 cmn_err(CE_WARN, "%s: thispa(%d) == pp(%d) %Ld",
6178 __func__, j, i, 6234 __func__, j, i,
@@ -6195,7 +6251,7 @@ xfs_bmap_check_leaf_extents(
6195 xfs_inode_t *ip, /* incore inode pointer */ 6251 xfs_inode_t *ip, /* incore inode pointer */
6196 int whichfork) /* data or attr fork */ 6252 int whichfork) /* data or attr fork */
6197{ 6253{
6198 xfs_bmbt_block_t *block; /* current btree block */ 6254 struct xfs_btree_block *block; /* current btree block */
6199 xfs_fsblock_t bno; /* block # of "block" */ 6255 xfs_fsblock_t bno; /* block # of "block" */
6200 xfs_buf_t *bp; /* buffer for "block" */ 6256 xfs_buf_t *bp; /* buffer for "block" */
6201 int error; /* error return value */ 6257 int error; /* error return value */
@@ -6223,7 +6279,7 @@ xfs_bmap_check_leaf_extents(
6223 level = be16_to_cpu(block->bb_level); 6279 level = be16_to_cpu(block->bb_level);
6224 ASSERT(level > 0); 6280 ASSERT(level > 0);
6225 xfs_check_block(block, mp, 1, ifp->if_broot_bytes); 6281 xfs_check_block(block, mp, 1, ifp->if_broot_bytes);
6226 pp = XFS_BMAP_BROOT_PTR_ADDR(block, 1, ifp->if_broot_bytes); 6282 pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes);
6227 bno = be64_to_cpu(*pp); 6283 bno = be64_to_cpu(*pp);
6228 6284
6229 ASSERT(bno != NULLDFSBNO); 6285 ASSERT(bno != NULLDFSBNO);
@@ -6245,9 +6301,9 @@ xfs_bmap_check_leaf_extents(
6245 if (!bp && (error = xfs_btree_read_bufl(mp, NULL, bno, 0, &bp, 6301 if (!bp && (error = xfs_btree_read_bufl(mp, NULL, bno, 0, &bp,
6246 XFS_BMAP_BTREE_REF))) 6302 XFS_BMAP_BTREE_REF)))
6247 goto error_norelse; 6303 goto error_norelse;
6248 block = XFS_BUF_TO_BMBT_BLOCK(bp); 6304 block = XFS_BUF_TO_BLOCK(bp);
6249 XFS_WANT_CORRUPTED_GOTO( 6305 XFS_WANT_CORRUPTED_GOTO(
6250 XFS_BMAP_SANITY_CHECK(mp, block, level), 6306 xfs_bmap_sanity_check(mp, bp, level),
6251 error0); 6307 error0);
6252 if (level == 0) 6308 if (level == 0)
6253 break; 6309 break;
@@ -6258,7 +6314,7 @@ xfs_bmap_check_leaf_extents(
6258 */ 6314 */
6259 6315
6260 xfs_check_block(block, mp, 0, 0); 6316 xfs_check_block(block, mp, 0, 0);
6261 pp = XFS_BTREE_PTR_ADDR(xfs_bmbt, block, 1, mp->m_bmap_dmxr[1]); 6317 pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]);
6262 bno = be64_to_cpu(*pp); 6318 bno = be64_to_cpu(*pp);
6263 XFS_WANT_CORRUPTED_GOTO(XFS_FSB_SANITY_CHECK(mp, bno), error0); 6319 XFS_WANT_CORRUPTED_GOTO(XFS_FSB_SANITY_CHECK(mp, bno), error0);
6264 if (bp_release) { 6320 if (bp_release) {
@@ -6280,13 +6336,13 @@ xfs_bmap_check_leaf_extents(
6280 xfs_extnum_t num_recs; 6336 xfs_extnum_t num_recs;
6281 6337
6282 6338
6283 num_recs = be16_to_cpu(block->bb_numrecs); 6339 num_recs = xfs_btree_get_numrecs(block);
6284 6340
6285 /* 6341 /*
6286 * Read-ahead the next leaf block, if any. 6342 * Read-ahead the next leaf block, if any.
6287 */ 6343 */
6288 6344
6289 nextbno = be64_to_cpu(block->bb_rightsib); 6345 nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib);
6290 6346
6291 /* 6347 /*
6292 * Check all the extents to make sure they are OK. 6348 * Check all the extents to make sure they are OK.
@@ -6294,13 +6350,17 @@ xfs_bmap_check_leaf_extents(
6294 * conform with the first entry in this one. 6350 * conform with the first entry in this one.
6295 */ 6351 */
6296 6352
6297 ep = XFS_BTREE_REC_ADDR(xfs_bmbt, block, 1); 6353 ep = XFS_BMBT_REC_ADDR(mp, block, 1);
6298 if (i) { 6354 if (i) {
6299 xfs_btree_check_rec(XFS_BTNUM_BMAP, &last, ep); 6355 ASSERT(xfs_bmbt_disk_get_startoff(&last) +
6356 xfs_bmbt_disk_get_blockcount(&last) <=
6357 xfs_bmbt_disk_get_startoff(ep));
6300 } 6358 }
6301 for (j = 1; j < num_recs; j++) { 6359 for (j = 1; j < num_recs; j++) {
6302 nextp = XFS_BTREE_REC_ADDR(xfs_bmbt, block, j + 1); 6360 nextp = XFS_BMBT_REC_ADDR(mp, block, j + 1);
6303 xfs_btree_check_rec(XFS_BTNUM_BMAP, ep, nextp); 6361 ASSERT(xfs_bmbt_disk_get_startoff(ep) +
6362 xfs_bmbt_disk_get_blockcount(ep) <=
6363 xfs_bmbt_disk_get_startoff(nextp));
6304 ep = nextp; 6364 ep = nextp;
6305 } 6365 }
6306 6366
@@ -6326,7 +6386,7 @@ xfs_bmap_check_leaf_extents(
6326 if (!bp && (error = xfs_btree_read_bufl(mp, NULL, bno, 0, &bp, 6386 if (!bp && (error = xfs_btree_read_bufl(mp, NULL, bno, 0, &bp,
6327 XFS_BMAP_BTREE_REF))) 6387 XFS_BMAP_BTREE_REF)))
6328 goto error_norelse; 6388 goto error_norelse;
6329 block = XFS_BUF_TO_BMBT_BLOCK(bp); 6389 block = XFS_BUF_TO_BLOCK(bp);
6330 } 6390 }
6331 if (bp_release) { 6391 if (bp_release) {
6332 bp_release = 0; 6392 bp_release = 0;
@@ -6356,7 +6416,7 @@ xfs_bmap_count_blocks(
6356 int whichfork, /* data or attr fork */ 6416 int whichfork, /* data or attr fork */
6357 int *count) /* out: count of blocks */ 6417 int *count) /* out: count of blocks */
6358{ 6418{
6359 xfs_bmbt_block_t *block; /* current btree block */ 6419 struct xfs_btree_block *block; /* current btree block */
6360 xfs_fsblock_t bno; /* block # of "block" */ 6420 xfs_fsblock_t bno; /* block # of "block" */
6361 xfs_ifork_t *ifp; /* fork structure */ 6421 xfs_ifork_t *ifp; /* fork structure */
6362 int level; /* btree level, for checking */ 6422 int level; /* btree level, for checking */
@@ -6379,7 +6439,7 @@ xfs_bmap_count_blocks(
6379 block = ifp->if_broot; 6439 block = ifp->if_broot;
6380 level = be16_to_cpu(block->bb_level); 6440 level = be16_to_cpu(block->bb_level);
6381 ASSERT(level > 0); 6441 ASSERT(level > 0);
6382 pp = XFS_BMAP_BROOT_PTR_ADDR(block, 1, ifp->if_broot_bytes); 6442 pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes);
6383 bno = be64_to_cpu(*pp); 6443 bno = be64_to_cpu(*pp);
6384 ASSERT(bno != NULLDFSBNO); 6444 ASSERT(bno != NULLDFSBNO);
6385 ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount); 6445 ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount);
@@ -6413,29 +6473,29 @@ xfs_bmap_count_tree(
6413 __be64 *pp; 6473 __be64 *pp;
6414 xfs_fsblock_t bno = blockno; 6474 xfs_fsblock_t bno = blockno;
6415 xfs_fsblock_t nextbno; 6475 xfs_fsblock_t nextbno;
6416 xfs_bmbt_block_t *block, *nextblock; 6476 struct xfs_btree_block *block, *nextblock;
6417 int numrecs; 6477 int numrecs;
6418 6478
6419 if ((error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp, XFS_BMAP_BTREE_REF))) 6479 if ((error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp, XFS_BMAP_BTREE_REF)))
6420 return error; 6480 return error;
6421 *count += 1; 6481 *count += 1;
6422 block = XFS_BUF_TO_BMBT_BLOCK(bp); 6482 block = XFS_BUF_TO_BLOCK(bp);
6423 6483
6424 if (--level) { 6484 if (--level) {
6425 /* Not at node above leafs, count this level of nodes */ 6485 /* Not at node above leafs, count this level of nodes */
6426 nextbno = be64_to_cpu(block->bb_rightsib); 6486 nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib);
6427 while (nextbno != NULLFSBLOCK) { 6487 while (nextbno != NULLFSBLOCK) {
6428 if ((error = xfs_btree_read_bufl(mp, tp, nextbno, 6488 if ((error = xfs_btree_read_bufl(mp, tp, nextbno,
6429 0, &nbp, XFS_BMAP_BTREE_REF))) 6489 0, &nbp, XFS_BMAP_BTREE_REF)))
6430 return error; 6490 return error;
6431 *count += 1; 6491 *count += 1;
6432 nextblock = XFS_BUF_TO_BMBT_BLOCK(nbp); 6492 nextblock = XFS_BUF_TO_BLOCK(nbp);
6433 nextbno = be64_to_cpu(nextblock->bb_rightsib); 6493 nextbno = be64_to_cpu(nextblock->bb_u.l.bb_rightsib);
6434 xfs_trans_brelse(tp, nbp); 6494 xfs_trans_brelse(tp, nbp);
6435 } 6495 }
6436 6496
6437 /* Dive to the next level */ 6497 /* Dive to the next level */
6438 pp = XFS_BTREE_PTR_ADDR(xfs_bmbt, block, 1, mp->m_bmap_dmxr[1]); 6498 pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]);
6439 bno = be64_to_cpu(*pp); 6499 bno = be64_to_cpu(*pp);
6440 if (unlikely((error = 6500 if (unlikely((error =
6441 xfs_bmap_count_tree(mp, tp, ifp, bno, level, count)) < 0)) { 6501 xfs_bmap_count_tree(mp, tp, ifp, bno, level, count)) < 0)) {
@@ -6448,9 +6508,9 @@ xfs_bmap_count_tree(
6448 } else { 6508 } else {
6449 /* count all level 1 nodes and their leaves */ 6509 /* count all level 1 nodes and their leaves */
6450 for (;;) { 6510 for (;;) {
6451 nextbno = be64_to_cpu(block->bb_rightsib); 6511 nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib);
6452 numrecs = be16_to_cpu(block->bb_numrecs); 6512 numrecs = be16_to_cpu(block->bb_numrecs);
6453 xfs_bmap_disk_count_leaves(0, block, numrecs, count); 6513 xfs_bmap_disk_count_leaves(mp, block, numrecs, count);
6454 xfs_trans_brelse(tp, bp); 6514 xfs_trans_brelse(tp, bp);
6455 if (nextbno == NULLFSBLOCK) 6515 if (nextbno == NULLFSBLOCK)
6456 break; 6516 break;
@@ -6459,7 +6519,7 @@ xfs_bmap_count_tree(
6459 XFS_BMAP_BTREE_REF))) 6519 XFS_BMAP_BTREE_REF)))
6460 return error; 6520 return error;
6461 *count += 1; 6521 *count += 1;
6462 block = XFS_BUF_TO_BMBT_BLOCK(bp); 6522 block = XFS_BUF_TO_BLOCK(bp);
6463 } 6523 }
6464 } 6524 }
6465 return 0; 6525 return 0;
@@ -6489,8 +6549,8 @@ xfs_bmap_count_leaves(
6489 */ 6549 */
6490STATIC void 6550STATIC void
6491xfs_bmap_disk_count_leaves( 6551xfs_bmap_disk_count_leaves(
6492 xfs_extnum_t idx, 6552 struct xfs_mount *mp,
6493 xfs_bmbt_block_t *block, 6553 struct xfs_btree_block *block,
6494 int numrecs, 6554 int numrecs,
6495 int *count) 6555 int *count)
6496{ 6556{
@@ -6498,7 +6558,7 @@ xfs_bmap_disk_count_leaves(
6498 xfs_bmbt_rec_t *frp; 6558 xfs_bmbt_rec_t *frp;
6499 6559
6500 for (b = 1; b <= numrecs; b++) { 6560 for (b = 1; b <= numrecs; b++) {
6501 frp = XFS_BTREE_REC_ADDR(xfs_bmbt, block, idx + b); 6561 frp = XFS_BMBT_REC_ADDR(mp, block, b);
6502 *count += xfs_bmbt_disk_get_blockcount(frp); 6562 *count += xfs_bmbt_disk_get_blockcount(frp);
6503 } 6563 }
6504} 6564}
diff --git a/fs/xfs/xfs_bmap.h b/fs/xfs/xfs_bmap.h
index 9f3e3a836d15..7c9d12cd7a47 100644
--- a/fs/xfs/xfs_bmap.h
+++ b/fs/xfs/xfs_bmap.h
@@ -137,9 +137,7 @@ typedef struct xfs_bmalloca {
137 char conv; /* overwriting unwritten extents */ 137 char conv; /* overwriting unwritten extents */
138} xfs_bmalloca_t; 138} xfs_bmalloca_t;
139 139
140#ifdef __KERNEL__ 140#if defined(__KERNEL__) && defined(XFS_BMAP_TRACE)
141
142#if defined(XFS_BMAP_TRACE)
143/* 141/*
144 * Trace operations for bmap extent tracing 142 * Trace operations for bmap extent tracing
145 */ 143 */
@@ -163,9 +161,12 @@ xfs_bmap_trace_exlist(
163 int whichfork); /* data or attr fork */ 161 int whichfork); /* data or attr fork */
164#define XFS_BMAP_TRACE_EXLIST(ip,c,w) \ 162#define XFS_BMAP_TRACE_EXLIST(ip,c,w) \
165 xfs_bmap_trace_exlist(__func__,ip,c,w) 163 xfs_bmap_trace_exlist(__func__,ip,c,w)
166#else 164
165#else /* __KERNEL__ && XFS_BMAP_TRACE */
166
167#define XFS_BMAP_TRACE_EXLIST(ip,c,w) 167#define XFS_BMAP_TRACE_EXLIST(ip,c,w)
168#endif 168
169#endif /* __KERNEL__ && XFS_BMAP_TRACE */
169 170
170/* 171/*
171 * Convert inode from non-attributed to attributed. 172 * Convert inode from non-attributed to attributed.
@@ -206,20 +207,6 @@ xfs_bmap_compute_maxlevels(
206 int whichfork); /* data or attr fork */ 207 int whichfork); /* data or attr fork */
207 208
208/* 209/*
209 * Routine to be called at transaction's end by xfs_bmapi, xfs_bunmapi
210 * caller. Frees all the extents that need freeing, which must be done
211 * last due to locking considerations.
212 *
213 * Return 1 if the given transaction was committed and a new one allocated,
214 * and 0 otherwise.
215 */
216int /* error */
217xfs_bmap_finish(
218 struct xfs_trans **tp, /* transaction pointer addr */
219 xfs_bmap_free_t *flist, /* i/o: list extents to free */
220 int *committed); /* xact committed or not */
221
222/*
223 * Returns the file-relative block number of the first unused block in the file. 210 * Returns the file-relative block number of the first unused block in the file.
224 * This is the lowest-address hole if the file has holes, else the first block 211 * This is the lowest-address hole if the file has holes, else the first block
225 * past the end of file. 212 * past the end of file.
@@ -344,6 +331,32 @@ xfs_bunmapi(
344 int *done); /* set if not done yet */ 331 int *done); /* set if not done yet */
345 332
346/* 333/*
334 * Check an extent list, which has just been read, for
335 * any bit in the extent flag field.
336 */
337int
338xfs_check_nostate_extents(
339 struct xfs_ifork *ifp,
340 xfs_extnum_t idx,
341 xfs_extnum_t num);
342
343#ifdef __KERNEL__
344
345/*
346 * Routine to be called at transaction's end by xfs_bmapi, xfs_bunmapi
347 * caller. Frees all the extents that need freeing, which must be done
348 * last due to locking considerations.
349 *
350 * Return 1 if the given transaction was committed and a new one allocated,
351 * and 0 otherwise.
352 */
353int /* error */
354xfs_bmap_finish(
355 struct xfs_trans **tp, /* transaction pointer addr */
356 xfs_bmap_free_t *flist, /* i/o: list extents to free */
357 int *committed); /* xact committed or not */
358
359/*
347 * Fcntl interface to xfs_bmapi. 360 * Fcntl interface to xfs_bmapi.
348 */ 361 */
349int /* error code */ 362int /* error code */
@@ -375,16 +388,6 @@ xfs_bmap_count_blocks(
375 int *count); 388 int *count);
376 389
377/* 390/*
378 * Check an extent list, which has just been read, for
379 * any bit in the extent flag field.
380 */
381int
382xfs_check_nostate_extents(
383 struct xfs_ifork *ifp,
384 xfs_extnum_t idx,
385 xfs_extnum_t num);
386
387/*
388 * Search the extent records for the entry containing block bno. 391 * Search the extent records for the entry containing block bno.
389 * If bno lies in a hole, point to the next entry. If bno lies 392 * If bno lies in a hole, point to the next entry. If bno lies
390 * past eof, *eofp will be set, and *prevp will contain the last 393 * past eof, *eofp will be set, and *prevp will contain the last
diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c
index 23efad29a5cd..e46e02b8e277 100644
--- a/fs/xfs/xfs_bmap_btree.c
+++ b/fs/xfs/xfs_bmap_btree.c
@@ -37,1406 +37,13 @@
37#include "xfs_inode_item.h" 37#include "xfs_inode_item.h"
38#include "xfs_alloc.h" 38#include "xfs_alloc.h"
39#include "xfs_btree.h" 39#include "xfs_btree.h"
40#include "xfs_btree_trace.h"
40#include "xfs_ialloc.h" 41#include "xfs_ialloc.h"
41#include "xfs_itable.h" 42#include "xfs_itable.h"
42#include "xfs_bmap.h" 43#include "xfs_bmap.h"
43#include "xfs_error.h" 44#include "xfs_error.h"
44#include "xfs_quota.h" 45#include "xfs_quota.h"
45 46
46#if defined(XFS_BMBT_TRACE)
47ktrace_t *xfs_bmbt_trace_buf;
48#endif
49
50/*
51 * Prototypes for internal btree functions.
52 */
53
54
55STATIC int xfs_bmbt_killroot(xfs_btree_cur_t *);
56STATIC void xfs_bmbt_log_keys(xfs_btree_cur_t *, xfs_buf_t *, int, int);
57STATIC void xfs_bmbt_log_ptrs(xfs_btree_cur_t *, xfs_buf_t *, int, int);
58STATIC int xfs_bmbt_lshift(xfs_btree_cur_t *, int, int *);
59STATIC int xfs_bmbt_rshift(xfs_btree_cur_t *, int, int *);
60STATIC int xfs_bmbt_split(xfs_btree_cur_t *, int, xfs_fsblock_t *,
61 __uint64_t *, xfs_btree_cur_t **, int *);
62STATIC int xfs_bmbt_updkey(xfs_btree_cur_t *, xfs_bmbt_key_t *, int);
63
64
65#if defined(XFS_BMBT_TRACE)
66
67static char ARGS[] = "args";
68static char ENTRY[] = "entry";
69static char ERROR[] = "error";
70#undef EXIT
71static char EXIT[] = "exit";
72
73/*
74 * Add a trace buffer entry for the arguments given to the routine,
75 * generic form.
76 */
77STATIC void
78xfs_bmbt_trace_enter(
79 const char *func,
80 xfs_btree_cur_t *cur,
81 char *s,
82 int type,
83 int line,
84 __psunsigned_t a0,
85 __psunsigned_t a1,
86 __psunsigned_t a2,
87 __psunsigned_t a3,
88 __psunsigned_t a4,
89 __psunsigned_t a5,
90 __psunsigned_t a6,
91 __psunsigned_t a7,
92 __psunsigned_t a8,
93 __psunsigned_t a9,
94 __psunsigned_t a10)
95{
96 xfs_inode_t *ip;
97 int whichfork;
98
99 ip = cur->bc_private.b.ip;
100 whichfork = cur->bc_private.b.whichfork;
101 ktrace_enter(xfs_bmbt_trace_buf,
102 (void *)((__psint_t)type | (whichfork << 8) | (line << 16)),
103 (void *)func, (void *)s, (void *)ip, (void *)cur,
104 (void *)a0, (void *)a1, (void *)a2, (void *)a3,
105 (void *)a4, (void *)a5, (void *)a6, (void *)a7,
106 (void *)a8, (void *)a9, (void *)a10);
107 ASSERT(ip->i_btrace);
108 ktrace_enter(ip->i_btrace,
109 (void *)((__psint_t)type | (whichfork << 8) | (line << 16)),
110 (void *)func, (void *)s, (void *)ip, (void *)cur,
111 (void *)a0, (void *)a1, (void *)a2, (void *)a3,
112 (void *)a4, (void *)a5, (void *)a6, (void *)a7,
113 (void *)a8, (void *)a9, (void *)a10);
114}
115/*
116 * Add a trace buffer entry for arguments, for a buffer & 1 integer arg.
117 */
118STATIC void
119xfs_bmbt_trace_argbi(
120 const char *func,
121 xfs_btree_cur_t *cur,
122 xfs_buf_t *b,
123 int i,
124 int line)
125{
126 xfs_bmbt_trace_enter(func, cur, ARGS, XFS_BMBT_KTRACE_ARGBI, line,
127 (__psunsigned_t)b, i, 0, 0,
128 0, 0, 0, 0,
129 0, 0, 0);
130}
131
132/*
133 * Add a trace buffer entry for arguments, for a buffer & 2 integer args.
134 */
135STATIC void
136xfs_bmbt_trace_argbii(
137 const char *func,
138 xfs_btree_cur_t *cur,
139 xfs_buf_t *b,
140 int i0,
141 int i1,
142 int line)
143{
144 xfs_bmbt_trace_enter(func, cur, ARGS, XFS_BMBT_KTRACE_ARGBII, line,
145 (__psunsigned_t)b, i0, i1, 0,
146 0, 0, 0, 0,
147 0, 0, 0);
148}
149
150/*
151 * Add a trace buffer entry for arguments, for 3 block-length args
152 * and an integer arg.
153 */
154STATIC void
155xfs_bmbt_trace_argfffi(
156 const char *func,
157 xfs_btree_cur_t *cur,
158 xfs_dfiloff_t o,
159 xfs_dfsbno_t b,
160 xfs_dfilblks_t i,
161 int j,
162 int line)
163{
164 xfs_bmbt_trace_enter(func, cur, ARGS, XFS_BMBT_KTRACE_ARGFFFI, line,
165 o >> 32, (int)o, b >> 32, (int)b,
166 i >> 32, (int)i, (int)j, 0,
167 0, 0, 0);
168}
169
170/*
171 * Add a trace buffer entry for arguments, for one integer arg.
172 */
173STATIC void
174xfs_bmbt_trace_argi(
175 const char *func,
176 xfs_btree_cur_t *cur,
177 int i,
178 int line)
179{
180 xfs_bmbt_trace_enter(func, cur, ARGS, XFS_BMBT_KTRACE_ARGI, line,
181 i, 0, 0, 0,
182 0, 0, 0, 0,
183 0, 0, 0);
184}
185
186/*
187 * Add a trace buffer entry for arguments, for int, fsblock, key.
188 */
189STATIC void
190xfs_bmbt_trace_argifk(
191 const char *func,
192 xfs_btree_cur_t *cur,
193 int i,
194 xfs_fsblock_t f,
195 xfs_dfiloff_t o,
196 int line)
197{
198 xfs_bmbt_trace_enter(func, cur, ARGS, XFS_BMBT_KTRACE_ARGIFK, line,
199 i, (xfs_dfsbno_t)f >> 32, (int)f, o >> 32,
200 (int)o, 0, 0, 0,
201 0, 0, 0);
202}
203
204/*
205 * Add a trace buffer entry for arguments, for int, fsblock, rec.
206 */
207STATIC void
208xfs_bmbt_trace_argifr(
209 const char *func,
210 xfs_btree_cur_t *cur,
211 int i,
212 xfs_fsblock_t f,
213 xfs_bmbt_rec_t *r,
214 int line)
215{
216 xfs_dfsbno_t b;
217 xfs_dfilblks_t c;
218 xfs_dfsbno_t d;
219 xfs_dfiloff_t o;
220 xfs_bmbt_irec_t s;
221
222 d = (xfs_dfsbno_t)f;
223 xfs_bmbt_disk_get_all(r, &s);
224 o = (xfs_dfiloff_t)s.br_startoff;
225 b = (xfs_dfsbno_t)s.br_startblock;
226 c = s.br_blockcount;
227 xfs_bmbt_trace_enter(func, cur, ARGS, XFS_BMBT_KTRACE_ARGIFR, line,
228 i, d >> 32, (int)d, o >> 32,
229 (int)o, b >> 32, (int)b, c >> 32,
230 (int)c, 0, 0);
231}
232
233/*
234 * Add a trace buffer entry for arguments, for int, key.
235 */
236STATIC void
237xfs_bmbt_trace_argik(
238 const char *func,
239 xfs_btree_cur_t *cur,
240 int i,
241 xfs_bmbt_key_t *k,
242 int line)
243{
244 xfs_dfiloff_t o;
245
246 o = be64_to_cpu(k->br_startoff);
247 xfs_bmbt_trace_enter(func, cur, ARGS, XFS_BMBT_KTRACE_ARGIFK, line,
248 i, o >> 32, (int)o, 0,
249 0, 0, 0, 0,
250 0, 0, 0);
251}
252
253/*
254 * Add a trace buffer entry for the cursor/operation.
255 */
256STATIC void
257xfs_bmbt_trace_cursor(
258 const char *func,
259 xfs_btree_cur_t *cur,
260 char *s,
261 int line)
262{
263 xfs_bmbt_rec_host_t r;
264
265 xfs_bmbt_set_all(&r, &cur->bc_rec.b);
266 xfs_bmbt_trace_enter(func, cur, s, XFS_BMBT_KTRACE_CUR, line,
267 (cur->bc_nlevels << 24) | (cur->bc_private.b.flags << 16) |
268 cur->bc_private.b.allocated,
269 r.l0 >> 32, (int)r.l0,
270 r.l1 >> 32, (int)r.l1,
271 (unsigned long)cur->bc_bufs[0], (unsigned long)cur->bc_bufs[1],
272 (unsigned long)cur->bc_bufs[2], (unsigned long)cur->bc_bufs[3],
273 (cur->bc_ptrs[0] << 16) | cur->bc_ptrs[1],
274 (cur->bc_ptrs[2] << 16) | cur->bc_ptrs[3]);
275}
276
277#define XFS_BMBT_TRACE_ARGBI(c,b,i) \
278 xfs_bmbt_trace_argbi(__func__, c, b, i, __LINE__)
279#define XFS_BMBT_TRACE_ARGBII(c,b,i,j) \
280 xfs_bmbt_trace_argbii(__func__, c, b, i, j, __LINE__)
281#define XFS_BMBT_TRACE_ARGFFFI(c,o,b,i,j) \
282 xfs_bmbt_trace_argfffi(__func__, c, o, b, i, j, __LINE__)
283#define XFS_BMBT_TRACE_ARGI(c,i) \
284 xfs_bmbt_trace_argi(__func__, c, i, __LINE__)
285#define XFS_BMBT_TRACE_ARGIFK(c,i,f,s) \
286 xfs_bmbt_trace_argifk(__func__, c, i, f, s, __LINE__)
287#define XFS_BMBT_TRACE_ARGIFR(c,i,f,r) \
288 xfs_bmbt_trace_argifr(__func__, c, i, f, r, __LINE__)
289#define XFS_BMBT_TRACE_ARGIK(c,i,k) \
290 xfs_bmbt_trace_argik(__func__, c, i, k, __LINE__)
291#define XFS_BMBT_TRACE_CURSOR(c,s) \
292 xfs_bmbt_trace_cursor(__func__, c, s, __LINE__)
293#else
294#define XFS_BMBT_TRACE_ARGBI(c,b,i)
295#define XFS_BMBT_TRACE_ARGBII(c,b,i,j)
296#define XFS_BMBT_TRACE_ARGFFFI(c,o,b,i,j)
297#define XFS_BMBT_TRACE_ARGI(c,i)
298#define XFS_BMBT_TRACE_ARGIFK(c,i,f,s)
299#define XFS_BMBT_TRACE_ARGIFR(c,i,f,r)
300#define XFS_BMBT_TRACE_ARGIK(c,i,k)
301#define XFS_BMBT_TRACE_CURSOR(c,s)
302#endif /* XFS_BMBT_TRACE */
303
304
305/*
306 * Internal functions.
307 */
308
309/*
310 * Delete record pointed to by cur/level.
311 */
312STATIC int /* error */
313xfs_bmbt_delrec(
314 xfs_btree_cur_t *cur,
315 int level,
316 int *stat) /* success/failure */
317{
318 xfs_bmbt_block_t *block; /* bmap btree block */
319 xfs_fsblock_t bno; /* fs-relative block number */
320 xfs_buf_t *bp; /* buffer for block */
321 int error; /* error return value */
322 int i; /* loop counter */
323 int j; /* temp state */
324 xfs_bmbt_key_t key; /* bmap btree key */
325 xfs_bmbt_key_t *kp=NULL; /* pointer to bmap btree key */
326 xfs_fsblock_t lbno; /* left sibling block number */
327 xfs_buf_t *lbp; /* left buffer pointer */
328 xfs_bmbt_block_t *left; /* left btree block */
329 xfs_bmbt_key_t *lkp; /* left btree key */
330 xfs_bmbt_ptr_t *lpp; /* left address pointer */
331 int lrecs=0; /* left record count */
332 xfs_bmbt_rec_t *lrp; /* left record pointer */
333 xfs_mount_t *mp; /* file system mount point */
334 xfs_bmbt_ptr_t *pp; /* pointer to bmap block addr */
335 int ptr; /* key/record index */
336 xfs_fsblock_t rbno; /* right sibling block number */
337 xfs_buf_t *rbp; /* right buffer pointer */
338 xfs_bmbt_block_t *right; /* right btree block */
339 xfs_bmbt_key_t *rkp; /* right btree key */
340 xfs_bmbt_rec_t *rp; /* pointer to bmap btree rec */
341 xfs_bmbt_ptr_t *rpp; /* right address pointer */
342 xfs_bmbt_block_t *rrblock; /* right-right btree block */
343 xfs_buf_t *rrbp; /* right-right buffer pointer */
344 int rrecs=0; /* right record count */
345 xfs_bmbt_rec_t *rrp; /* right record pointer */
346 xfs_btree_cur_t *tcur; /* temporary btree cursor */
347 int numrecs; /* temporary numrec count */
348 int numlrecs, numrrecs;
349
350 XFS_BMBT_TRACE_CURSOR(cur, ENTRY);
351 XFS_BMBT_TRACE_ARGI(cur, level);
352 ptr = cur->bc_ptrs[level];
353 tcur = NULL;
354 if (ptr == 0) {
355 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
356 *stat = 0;
357 return 0;
358 }
359 block = xfs_bmbt_get_block(cur, level, &bp);
360 numrecs = be16_to_cpu(block->bb_numrecs);
361#ifdef DEBUG
362 if ((error = xfs_btree_check_lblock(cur, block, level, bp))) {
363 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
364 goto error0;
365 }
366#endif
367 if (ptr > numrecs) {
368 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
369 *stat = 0;
370 return 0;
371 }
372 XFS_STATS_INC(xs_bmbt_delrec);
373 if (level > 0) {
374 kp = XFS_BMAP_KEY_IADDR(block, 1, cur);
375 pp = XFS_BMAP_PTR_IADDR(block, 1, cur);
376#ifdef DEBUG
377 for (i = ptr; i < numrecs; i++) {
378 if ((error = xfs_btree_check_lptr_disk(cur, pp[i], level))) {
379 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
380 goto error0;
381 }
382 }
383#endif
384 if (ptr < numrecs) {
385 memmove(&kp[ptr - 1], &kp[ptr],
386 (numrecs - ptr) * sizeof(*kp));
387 memmove(&pp[ptr - 1], &pp[ptr],
388 (numrecs - ptr) * sizeof(*pp));
389 xfs_bmbt_log_ptrs(cur, bp, ptr, numrecs - 1);
390 xfs_bmbt_log_keys(cur, bp, ptr, numrecs - 1);
391 }
392 } else {
393 rp = XFS_BMAP_REC_IADDR(block, 1, cur);
394 if (ptr < numrecs) {
395 memmove(&rp[ptr - 1], &rp[ptr],
396 (numrecs - ptr) * sizeof(*rp));
397 xfs_bmbt_log_recs(cur, bp, ptr, numrecs - 1);
398 }
399 if (ptr == 1) {
400 key.br_startoff =
401 cpu_to_be64(xfs_bmbt_disk_get_startoff(rp));
402 kp = &key;
403 }
404 }
405 numrecs--;
406 block->bb_numrecs = cpu_to_be16(numrecs);
407 xfs_bmbt_log_block(cur, bp, XFS_BB_NUMRECS);
408 /*
409 * We're at the root level.
410 * First, shrink the root block in-memory.
411 * Try to get rid of the next level down.
412 * If we can't then there's nothing left to do.
413 */
414 if (level == cur->bc_nlevels - 1) {
415 xfs_iroot_realloc(cur->bc_private.b.ip, -1,
416 cur->bc_private.b.whichfork);
417 if ((error = xfs_bmbt_killroot(cur))) {
418 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
419 goto error0;
420 }
421 if (level > 0 && (error = xfs_bmbt_decrement(cur, level, &j))) {
422 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
423 goto error0;
424 }
425 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
426 *stat = 1;
427 return 0;
428 }
429 if (ptr == 1 && (error = xfs_bmbt_updkey(cur, kp, level + 1))) {
430 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
431 goto error0;
432 }
433 if (numrecs >= XFS_BMAP_BLOCK_IMINRECS(level, cur)) {
434 if (level > 0 && (error = xfs_bmbt_decrement(cur, level, &j))) {
435 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
436 goto error0;
437 }
438 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
439 *stat = 1;
440 return 0;
441 }
442 rbno = be64_to_cpu(block->bb_rightsib);
443 lbno = be64_to_cpu(block->bb_leftsib);
444 /*
445 * One child of root, need to get a chance to copy its contents
446 * into the root and delete it. Can't go up to next level,
447 * there's nothing to delete there.
448 */
449 if (lbno == NULLFSBLOCK && rbno == NULLFSBLOCK &&
450 level == cur->bc_nlevels - 2) {
451 if ((error = xfs_bmbt_killroot(cur))) {
452 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
453 goto error0;
454 }
455 if (level > 0 && (error = xfs_bmbt_decrement(cur, level, &i))) {
456 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
457 goto error0;
458 }
459 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
460 *stat = 1;
461 return 0;
462 }
463 ASSERT(rbno != NULLFSBLOCK || lbno != NULLFSBLOCK);
464 if ((error = xfs_btree_dup_cursor(cur, &tcur))) {
465 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
466 goto error0;
467 }
468 bno = NULLFSBLOCK;
469 if (rbno != NULLFSBLOCK) {
470 i = xfs_btree_lastrec(tcur, level);
471 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
472 if ((error = xfs_bmbt_increment(tcur, level, &i))) {
473 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
474 goto error0;
475 }
476 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
477 i = xfs_btree_lastrec(tcur, level);
478 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
479 rbp = tcur->bc_bufs[level];
480 right = XFS_BUF_TO_BMBT_BLOCK(rbp);
481#ifdef DEBUG
482 if ((error = xfs_btree_check_lblock(cur, right, level, rbp))) {
483 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
484 goto error0;
485 }
486#endif
487 bno = be64_to_cpu(right->bb_leftsib);
488 if (be16_to_cpu(right->bb_numrecs) - 1 >=
489 XFS_BMAP_BLOCK_IMINRECS(level, cur)) {
490 if ((error = xfs_bmbt_lshift(tcur, level, &i))) {
491 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
492 goto error0;
493 }
494 if (i) {
495 ASSERT(be16_to_cpu(block->bb_numrecs) >=
496 XFS_BMAP_BLOCK_IMINRECS(level, tcur));
497 xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
498 tcur = NULL;
499 if (level > 0) {
500 if ((error = xfs_bmbt_decrement(cur,
501 level, &i))) {
502 XFS_BMBT_TRACE_CURSOR(cur,
503 ERROR);
504 goto error0;
505 }
506 }
507 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
508 *stat = 1;
509 return 0;
510 }
511 }
512 rrecs = be16_to_cpu(right->bb_numrecs);
513 if (lbno != NULLFSBLOCK) {
514 i = xfs_btree_firstrec(tcur, level);
515 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
516 if ((error = xfs_bmbt_decrement(tcur, level, &i))) {
517 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
518 goto error0;
519 }
520 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
521 }
522 }
523 if (lbno != NULLFSBLOCK) {
524 i = xfs_btree_firstrec(tcur, level);
525 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
526 /*
527 * decrement to last in block
528 */
529 if ((error = xfs_bmbt_decrement(tcur, level, &i))) {
530 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
531 goto error0;
532 }
533 i = xfs_btree_firstrec(tcur, level);
534 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
535 lbp = tcur->bc_bufs[level];
536 left = XFS_BUF_TO_BMBT_BLOCK(lbp);
537#ifdef DEBUG
538 if ((error = xfs_btree_check_lblock(cur, left, level, lbp))) {
539 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
540 goto error0;
541 }
542#endif
543 bno = be64_to_cpu(left->bb_rightsib);
544 if (be16_to_cpu(left->bb_numrecs) - 1 >=
545 XFS_BMAP_BLOCK_IMINRECS(level, cur)) {
546 if ((error = xfs_bmbt_rshift(tcur, level, &i))) {
547 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
548 goto error0;
549 }
550 if (i) {
551 ASSERT(be16_to_cpu(block->bb_numrecs) >=
552 XFS_BMAP_BLOCK_IMINRECS(level, tcur));
553 xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
554 tcur = NULL;
555 if (level == 0)
556 cur->bc_ptrs[0]++;
557 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
558 *stat = 1;
559 return 0;
560 }
561 }
562 lrecs = be16_to_cpu(left->bb_numrecs);
563 }
564 xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
565 tcur = NULL;
566 mp = cur->bc_mp;
567 ASSERT(bno != NULLFSBLOCK);
568 if (lbno != NULLFSBLOCK &&
569 lrecs + be16_to_cpu(block->bb_numrecs) <= XFS_BMAP_BLOCK_IMAXRECS(level, cur)) {
570 rbno = bno;
571 right = block;
572 rbp = bp;
573 if ((error = xfs_btree_read_bufl(mp, cur->bc_tp, lbno, 0, &lbp,
574 XFS_BMAP_BTREE_REF))) {
575 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
576 goto error0;
577 }
578 left = XFS_BUF_TO_BMBT_BLOCK(lbp);
579 if ((error = xfs_btree_check_lblock(cur, left, level, lbp))) {
580 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
581 goto error0;
582 }
583 } else if (rbno != NULLFSBLOCK &&
584 rrecs + be16_to_cpu(block->bb_numrecs) <=
585 XFS_BMAP_BLOCK_IMAXRECS(level, cur)) {
586 lbno = bno;
587 left = block;
588 lbp = bp;
589 if ((error = xfs_btree_read_bufl(mp, cur->bc_tp, rbno, 0, &rbp,
590 XFS_BMAP_BTREE_REF))) {
591 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
592 goto error0;
593 }
594 right = XFS_BUF_TO_BMBT_BLOCK(rbp);
595 if ((error = xfs_btree_check_lblock(cur, right, level, rbp))) {
596 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
597 goto error0;
598 }
599 lrecs = be16_to_cpu(left->bb_numrecs);
600 } else {
601 if (level > 0 && (error = xfs_bmbt_decrement(cur, level, &i))) {
602 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
603 goto error0;
604 }
605 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
606 *stat = 1;
607 return 0;
608 }
609 numlrecs = be16_to_cpu(left->bb_numrecs);
610 numrrecs = be16_to_cpu(right->bb_numrecs);
611 if (level > 0) {
612 lkp = XFS_BMAP_KEY_IADDR(left, numlrecs + 1, cur);
613 lpp = XFS_BMAP_PTR_IADDR(left, numlrecs + 1, cur);
614 rkp = XFS_BMAP_KEY_IADDR(right, 1, cur);
615 rpp = XFS_BMAP_PTR_IADDR(right, 1, cur);
616#ifdef DEBUG
617 for (i = 0; i < numrrecs; i++) {
618 if ((error = xfs_btree_check_lptr_disk(cur, rpp[i], level))) {
619 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
620 goto error0;
621 }
622 }
623#endif
624 memcpy(lkp, rkp, numrrecs * sizeof(*lkp));
625 memcpy(lpp, rpp, numrrecs * sizeof(*lpp));
626 xfs_bmbt_log_keys(cur, lbp, numlrecs + 1, numlrecs + numrrecs);
627 xfs_bmbt_log_ptrs(cur, lbp, numlrecs + 1, numlrecs + numrrecs);
628 } else {
629 lrp = XFS_BMAP_REC_IADDR(left, numlrecs + 1, cur);
630 rrp = XFS_BMAP_REC_IADDR(right, 1, cur);
631 memcpy(lrp, rrp, numrrecs * sizeof(*lrp));
632 xfs_bmbt_log_recs(cur, lbp, numlrecs + 1, numlrecs + numrrecs);
633 }
634 be16_add_cpu(&left->bb_numrecs, numrrecs);
635 left->bb_rightsib = right->bb_rightsib;
636 xfs_bmbt_log_block(cur, lbp, XFS_BB_RIGHTSIB | XFS_BB_NUMRECS);
637 if (be64_to_cpu(left->bb_rightsib) != NULLDFSBNO) {
638 if ((error = xfs_btree_read_bufl(mp, cur->bc_tp,
639 be64_to_cpu(left->bb_rightsib),
640 0, &rrbp, XFS_BMAP_BTREE_REF))) {
641 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
642 goto error0;
643 }
644 rrblock = XFS_BUF_TO_BMBT_BLOCK(rrbp);
645 if ((error = xfs_btree_check_lblock(cur, rrblock, level, rrbp))) {
646 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
647 goto error0;
648 }
649 rrblock->bb_leftsib = cpu_to_be64(lbno);
650 xfs_bmbt_log_block(cur, rrbp, XFS_BB_LEFTSIB);
651 }
652 xfs_bmap_add_free(XFS_DADDR_TO_FSB(mp, XFS_BUF_ADDR(rbp)), 1,
653 cur->bc_private.b.flist, mp);
654 cur->bc_private.b.ip->i_d.di_nblocks--;
655 xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip, XFS_ILOG_CORE);
656 XFS_TRANS_MOD_DQUOT_BYINO(mp, cur->bc_tp, cur->bc_private.b.ip,
657 XFS_TRANS_DQ_BCOUNT, -1L);
658 xfs_trans_binval(cur->bc_tp, rbp);
659 if (bp != lbp) {
660 cur->bc_bufs[level] = lbp;
661 cur->bc_ptrs[level] += lrecs;
662 cur->bc_ra[level] = 0;
663 } else if ((error = xfs_bmbt_increment(cur, level + 1, &i))) {
664 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
665 goto error0;
666 }
667 if (level > 0)
668 cur->bc_ptrs[level]--;
669 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
670 *stat = 2;
671 return 0;
672
673error0:
674 if (tcur)
675 xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
676 return error;
677}
678
679/*
680 * Insert one record/level. Return information to the caller
681 * allowing the next level up to proceed if necessary.
682 */
683STATIC int /* error */
684xfs_bmbt_insrec(
685 xfs_btree_cur_t *cur,
686 int level,
687 xfs_fsblock_t *bnop,
688 xfs_bmbt_rec_t *recp,
689 xfs_btree_cur_t **curp,
690 int *stat) /* no-go/done/continue */
691{
692 xfs_bmbt_block_t *block; /* bmap btree block */
693 xfs_buf_t *bp; /* buffer for block */
694 int error; /* error return value */
695 int i; /* loop index */
696 xfs_bmbt_key_t key; /* bmap btree key */
697 xfs_bmbt_key_t *kp=NULL; /* pointer to bmap btree key */
698 int logflags; /* inode logging flags */
699 xfs_fsblock_t nbno; /* new block number */
700 struct xfs_btree_cur *ncur; /* new btree cursor */
701 __uint64_t startoff; /* new btree key value */
702 xfs_bmbt_rec_t nrec; /* new record count */
703 int optr; /* old key/record index */
704 xfs_bmbt_ptr_t *pp; /* pointer to bmap block addr */
705 int ptr; /* key/record index */
706 xfs_bmbt_rec_t *rp=NULL; /* pointer to bmap btree rec */
707 int numrecs;
708
709 ASSERT(level < cur->bc_nlevels);
710 XFS_BMBT_TRACE_CURSOR(cur, ENTRY);
711 XFS_BMBT_TRACE_ARGIFR(cur, level, *bnop, recp);
712 ncur = NULL;
713 key.br_startoff = cpu_to_be64(xfs_bmbt_disk_get_startoff(recp));
714 optr = ptr = cur->bc_ptrs[level];
715 if (ptr == 0) {
716 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
717 *stat = 0;
718 return 0;
719 }
720 XFS_STATS_INC(xs_bmbt_insrec);
721 block = xfs_bmbt_get_block(cur, level, &bp);
722 numrecs = be16_to_cpu(block->bb_numrecs);
723#ifdef DEBUG
724 if ((error = xfs_btree_check_lblock(cur, block, level, bp))) {
725 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
726 return error;
727 }
728 if (ptr <= numrecs) {
729 if (level == 0) {
730 rp = XFS_BMAP_REC_IADDR(block, ptr, cur);
731 xfs_btree_check_rec(XFS_BTNUM_BMAP, recp, rp);
732 } else {
733 kp = XFS_BMAP_KEY_IADDR(block, ptr, cur);
734 xfs_btree_check_key(XFS_BTNUM_BMAP, &key, kp);
735 }
736 }
737#endif
738 nbno = NULLFSBLOCK;
739 if (numrecs == XFS_BMAP_BLOCK_IMAXRECS(level, cur)) {
740 if (numrecs < XFS_BMAP_BLOCK_DMAXRECS(level, cur)) {
741 /*
742 * A root block, that can be made bigger.
743 */
744 xfs_iroot_realloc(cur->bc_private.b.ip, 1,
745 cur->bc_private.b.whichfork);
746 block = xfs_bmbt_get_block(cur, level, &bp);
747 } else if (level == cur->bc_nlevels - 1) {
748 if ((error = xfs_bmbt_newroot(cur, &logflags, stat)) ||
749 *stat == 0) {
750 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
751 return error;
752 }
753 xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip,
754 logflags);
755 block = xfs_bmbt_get_block(cur, level, &bp);
756 } else {
757 if ((error = xfs_bmbt_rshift(cur, level, &i))) {
758 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
759 return error;
760 }
761 if (i) {
762 /* nothing */
763 } else {
764 if ((error = xfs_bmbt_lshift(cur, level, &i))) {
765 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
766 return error;
767 }
768 if (i) {
769 optr = ptr = cur->bc_ptrs[level];
770 } else {
771 if ((error = xfs_bmbt_split(cur, level,
772 &nbno, &startoff, &ncur,
773 &i))) {
774 XFS_BMBT_TRACE_CURSOR(cur,
775 ERROR);
776 return error;
777 }
778 if (i) {
779 block = xfs_bmbt_get_block(
780 cur, level, &bp);
781#ifdef DEBUG
782 if ((error =
783 xfs_btree_check_lblock(cur,
784 block, level, bp))) {
785 XFS_BMBT_TRACE_CURSOR(
786 cur, ERROR);
787 return error;
788 }
789#endif
790 ptr = cur->bc_ptrs[level];
791 xfs_bmbt_disk_set_allf(&nrec,
792 startoff, 0, 0,
793 XFS_EXT_NORM);
794 } else {
795 XFS_BMBT_TRACE_CURSOR(cur,
796 EXIT);
797 *stat = 0;
798 return 0;
799 }
800 }
801 }
802 }
803 }
804 numrecs = be16_to_cpu(block->bb_numrecs);
805 if (level > 0) {
806 kp = XFS_BMAP_KEY_IADDR(block, 1, cur);
807 pp = XFS_BMAP_PTR_IADDR(block, 1, cur);
808#ifdef DEBUG
809 for (i = numrecs; i >= ptr; i--) {
810 if ((error = xfs_btree_check_lptr_disk(cur, pp[i - 1],
811 level))) {
812 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
813 return error;
814 }
815 }
816#endif
817 memmove(&kp[ptr], &kp[ptr - 1],
818 (numrecs - ptr + 1) * sizeof(*kp));
819 memmove(&pp[ptr], &pp[ptr - 1],
820 (numrecs - ptr + 1) * sizeof(*pp));
821#ifdef DEBUG
822 if ((error = xfs_btree_check_lptr(cur, *bnop, level))) {
823 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
824 return error;
825 }
826#endif
827 kp[ptr - 1] = key;
828 pp[ptr - 1] = cpu_to_be64(*bnop);
829 numrecs++;
830 block->bb_numrecs = cpu_to_be16(numrecs);
831 xfs_bmbt_log_keys(cur, bp, ptr, numrecs);
832 xfs_bmbt_log_ptrs(cur, bp, ptr, numrecs);
833 } else {
834 rp = XFS_BMAP_REC_IADDR(block, 1, cur);
835 memmove(&rp[ptr], &rp[ptr - 1],
836 (numrecs - ptr + 1) * sizeof(*rp));
837 rp[ptr - 1] = *recp;
838 numrecs++;
839 block->bb_numrecs = cpu_to_be16(numrecs);
840 xfs_bmbt_log_recs(cur, bp, ptr, numrecs);
841 }
842 xfs_bmbt_log_block(cur, bp, XFS_BB_NUMRECS);
843#ifdef DEBUG
844 if (ptr < numrecs) {
845 if (level == 0)
846 xfs_btree_check_rec(XFS_BTNUM_BMAP, rp + ptr - 1,
847 rp + ptr);
848 else
849 xfs_btree_check_key(XFS_BTNUM_BMAP, kp + ptr - 1,
850 kp + ptr);
851 }
852#endif
853 if (optr == 1 && (error = xfs_bmbt_updkey(cur, &key, level + 1))) {
854 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
855 return error;
856 }
857 *bnop = nbno;
858 if (nbno != NULLFSBLOCK) {
859 *recp = nrec;
860 *curp = ncur;
861 }
862 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
863 *stat = 1;
864 return 0;
865}
866
867STATIC int
868xfs_bmbt_killroot(
869 xfs_btree_cur_t *cur)
870{
871 xfs_bmbt_block_t *block;
872 xfs_bmbt_block_t *cblock;
873 xfs_buf_t *cbp;
874 xfs_bmbt_key_t *ckp;
875 xfs_bmbt_ptr_t *cpp;
876#ifdef DEBUG
877 int error;
878#endif
879 int i;
880 xfs_bmbt_key_t *kp;
881 xfs_inode_t *ip;
882 xfs_ifork_t *ifp;
883 int level;
884 xfs_bmbt_ptr_t *pp;
885
886 XFS_BMBT_TRACE_CURSOR(cur, ENTRY);
887 level = cur->bc_nlevels - 1;
888 ASSERT(level >= 1);
889 /*
890 * Don't deal with the root block needs to be a leaf case.
891 * We're just going to turn the thing back into extents anyway.
892 */
893 if (level == 1) {
894 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
895 return 0;
896 }
897 block = xfs_bmbt_get_block(cur, level, &cbp);
898 /*
899 * Give up if the root has multiple children.
900 */
901 if (be16_to_cpu(block->bb_numrecs) != 1) {
902 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
903 return 0;
904 }
905 /*
906 * Only do this if the next level will fit.
907 * Then the data must be copied up to the inode,
908 * instead of freeing the root you free the next level.
909 */
910 cbp = cur->bc_bufs[level - 1];
911 cblock = XFS_BUF_TO_BMBT_BLOCK(cbp);
912 if (be16_to_cpu(cblock->bb_numrecs) > XFS_BMAP_BLOCK_DMAXRECS(level, cur)) {
913 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
914 return 0;
915 }
916 ASSERT(be64_to_cpu(cblock->bb_leftsib) == NULLDFSBNO);
917 ASSERT(be64_to_cpu(cblock->bb_rightsib) == NULLDFSBNO);
918 ip = cur->bc_private.b.ip;
919 ifp = XFS_IFORK_PTR(ip, cur->bc_private.b.whichfork);
920 ASSERT(XFS_BMAP_BLOCK_IMAXRECS(level, cur) ==
921 XFS_BMAP_BROOT_MAXRECS(ifp->if_broot_bytes));
922 i = (int)(be16_to_cpu(cblock->bb_numrecs) - XFS_BMAP_BLOCK_IMAXRECS(level, cur));
923 if (i) {
924 xfs_iroot_realloc(ip, i, cur->bc_private.b.whichfork);
925 block = ifp->if_broot;
926 }
927 be16_add_cpu(&block->bb_numrecs, i);
928 ASSERT(block->bb_numrecs == cblock->bb_numrecs);
929 kp = XFS_BMAP_KEY_IADDR(block, 1, cur);
930 ckp = XFS_BMAP_KEY_IADDR(cblock, 1, cur);
931 memcpy(kp, ckp, be16_to_cpu(block->bb_numrecs) * sizeof(*kp));
932 pp = XFS_BMAP_PTR_IADDR(block, 1, cur);
933 cpp = XFS_BMAP_PTR_IADDR(cblock, 1, cur);
934#ifdef DEBUG
935 for (i = 0; i < be16_to_cpu(cblock->bb_numrecs); i++) {
936 if ((error = xfs_btree_check_lptr_disk(cur, cpp[i], level - 1))) {
937 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
938 return error;
939 }
940 }
941#endif
942 memcpy(pp, cpp, be16_to_cpu(block->bb_numrecs) * sizeof(*pp));
943 xfs_bmap_add_free(XFS_DADDR_TO_FSB(cur->bc_mp, XFS_BUF_ADDR(cbp)), 1,
944 cur->bc_private.b.flist, cur->bc_mp);
945 ip->i_d.di_nblocks--;
946 XFS_TRANS_MOD_DQUOT_BYINO(cur->bc_mp, cur->bc_tp, ip,
947 XFS_TRANS_DQ_BCOUNT, -1L);
948 xfs_trans_binval(cur->bc_tp, cbp);
949 cur->bc_bufs[level - 1] = NULL;
950 be16_add_cpu(&block->bb_level, -1);
951 xfs_trans_log_inode(cur->bc_tp, ip,
952 XFS_ILOG_CORE | XFS_ILOG_FBROOT(cur->bc_private.b.whichfork));
953 cur->bc_nlevels--;
954 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
955 return 0;
956}
957
958/*
959 * Log key values from the btree block.
960 */
961STATIC void
962xfs_bmbt_log_keys(
963 xfs_btree_cur_t *cur,
964 xfs_buf_t *bp,
965 int kfirst,
966 int klast)
967{
968 xfs_trans_t *tp;
969
970 XFS_BMBT_TRACE_CURSOR(cur, ENTRY);
971 XFS_BMBT_TRACE_ARGBII(cur, bp, kfirst, klast);
972 tp = cur->bc_tp;
973 if (bp) {
974 xfs_bmbt_block_t *block;
975 int first;
976 xfs_bmbt_key_t *kp;
977 int last;
978
979 block = XFS_BUF_TO_BMBT_BLOCK(bp);
980 kp = XFS_BMAP_KEY_DADDR(block, 1, cur);
981 first = (int)((xfs_caddr_t)&kp[kfirst - 1] - (xfs_caddr_t)block);
982 last = (int)(((xfs_caddr_t)&kp[klast] - 1) - (xfs_caddr_t)block);
983 xfs_trans_log_buf(tp, bp, first, last);
984 } else {
985 xfs_inode_t *ip;
986
987 ip = cur->bc_private.b.ip;
988 xfs_trans_log_inode(tp, ip,
989 XFS_ILOG_FBROOT(cur->bc_private.b.whichfork));
990 }
991 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
992}
993
994/*
995 * Log pointer values from the btree block.
996 */
997STATIC void
998xfs_bmbt_log_ptrs(
999 xfs_btree_cur_t *cur,
1000 xfs_buf_t *bp,
1001 int pfirst,
1002 int plast)
1003{
1004 xfs_trans_t *tp;
1005
1006 XFS_BMBT_TRACE_CURSOR(cur, ENTRY);
1007 XFS_BMBT_TRACE_ARGBII(cur, bp, pfirst, plast);
1008 tp = cur->bc_tp;
1009 if (bp) {
1010 xfs_bmbt_block_t *block;
1011 int first;
1012 int last;
1013 xfs_bmbt_ptr_t *pp;
1014
1015 block = XFS_BUF_TO_BMBT_BLOCK(bp);
1016 pp = XFS_BMAP_PTR_DADDR(block, 1, cur);
1017 first = (int)((xfs_caddr_t)&pp[pfirst - 1] - (xfs_caddr_t)block);
1018 last = (int)(((xfs_caddr_t)&pp[plast] - 1) - (xfs_caddr_t)block);
1019 xfs_trans_log_buf(tp, bp, first, last);
1020 } else {
1021 xfs_inode_t *ip;
1022
1023 ip = cur->bc_private.b.ip;
1024 xfs_trans_log_inode(tp, ip,
1025 XFS_ILOG_FBROOT(cur->bc_private.b.whichfork));
1026 }
1027 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
1028}
1029
1030/*
1031 * Lookup the record. The cursor is made to point to it, based on dir.
1032 */
1033STATIC int /* error */
1034xfs_bmbt_lookup(
1035 xfs_btree_cur_t *cur,
1036 xfs_lookup_t dir,
1037 int *stat) /* success/failure */
1038{
1039 xfs_bmbt_block_t *block=NULL;
1040 xfs_buf_t *bp;
1041 xfs_daddr_t d;
1042 xfs_sfiloff_t diff;
1043 int error; /* error return value */
1044 xfs_fsblock_t fsbno=0;
1045 int high;
1046 int i;
1047 int keyno=0;
1048 xfs_bmbt_key_t *kkbase=NULL;
1049 xfs_bmbt_key_t *kkp;
1050 xfs_bmbt_rec_t *krbase=NULL;
1051 xfs_bmbt_rec_t *krp;
1052 int level;
1053 int low;
1054 xfs_mount_t *mp;
1055 xfs_bmbt_ptr_t *pp;
1056 xfs_bmbt_irec_t *rp;
1057 xfs_fileoff_t startoff;
1058 xfs_trans_t *tp;
1059
1060 XFS_STATS_INC(xs_bmbt_lookup);
1061 XFS_BMBT_TRACE_CURSOR(cur, ENTRY);
1062 XFS_BMBT_TRACE_ARGI(cur, (int)dir);
1063 tp = cur->bc_tp;
1064 mp = cur->bc_mp;
1065 rp = &cur->bc_rec.b;
1066 for (level = cur->bc_nlevels - 1, diff = 1; level >= 0; level--) {
1067 if (level < cur->bc_nlevels - 1) {
1068 d = XFS_FSB_TO_DADDR(mp, fsbno);
1069 bp = cur->bc_bufs[level];
1070 if (bp && XFS_BUF_ADDR(bp) != d)
1071 bp = NULL;
1072 if (!bp) {
1073 if ((error = xfs_btree_read_bufl(mp, tp, fsbno,
1074 0, &bp, XFS_BMAP_BTREE_REF))) {
1075 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
1076 return error;
1077 }
1078 xfs_btree_setbuf(cur, level, bp);
1079 block = XFS_BUF_TO_BMBT_BLOCK(bp);
1080 if ((error = xfs_btree_check_lblock(cur, block,
1081 level, bp))) {
1082 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
1083 return error;
1084 }
1085 } else
1086 block = XFS_BUF_TO_BMBT_BLOCK(bp);
1087 } else
1088 block = xfs_bmbt_get_block(cur, level, &bp);
1089 if (diff == 0)
1090 keyno = 1;
1091 else {
1092 if (level > 0)
1093 kkbase = XFS_BMAP_KEY_IADDR(block, 1, cur);
1094 else
1095 krbase = XFS_BMAP_REC_IADDR(block, 1, cur);
1096 low = 1;
1097 if (!(high = be16_to_cpu(block->bb_numrecs))) {
1098 ASSERT(level == 0);
1099 cur->bc_ptrs[0] = dir != XFS_LOOKUP_LE;
1100 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
1101 *stat = 0;
1102 return 0;
1103 }
1104 while (low <= high) {
1105 XFS_STATS_INC(xs_bmbt_compare);
1106 keyno = (low + high) >> 1;
1107 if (level > 0) {
1108 kkp = kkbase + keyno - 1;
1109 startoff = be64_to_cpu(kkp->br_startoff);
1110 } else {
1111 krp = krbase + keyno - 1;
1112 startoff = xfs_bmbt_disk_get_startoff(krp);
1113 }
1114 diff = (xfs_sfiloff_t)
1115 (startoff - rp->br_startoff);
1116 if (diff < 0)
1117 low = keyno + 1;
1118 else if (diff > 0)
1119 high = keyno - 1;
1120 else
1121 break;
1122 }
1123 }
1124 if (level > 0) {
1125 if (diff > 0 && --keyno < 1)
1126 keyno = 1;
1127 pp = XFS_BMAP_PTR_IADDR(block, keyno, cur);
1128 fsbno = be64_to_cpu(*pp);
1129#ifdef DEBUG
1130 if ((error = xfs_btree_check_lptr(cur, fsbno, level))) {
1131 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
1132 return error;
1133 }
1134#endif
1135 cur->bc_ptrs[level] = keyno;
1136 }
1137 }
1138 if (dir != XFS_LOOKUP_LE && diff < 0) {
1139 keyno++;
1140 /*
1141 * If ge search and we went off the end of the block, but it's
1142 * not the last block, we're in the wrong block.
1143 */
1144 if (dir == XFS_LOOKUP_GE && keyno > be16_to_cpu(block->bb_numrecs) &&
1145 be64_to_cpu(block->bb_rightsib) != NULLDFSBNO) {
1146 cur->bc_ptrs[0] = keyno;
1147 if ((error = xfs_bmbt_increment(cur, 0, &i))) {
1148 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
1149 return error;
1150 }
1151 XFS_WANT_CORRUPTED_RETURN(i == 1);
1152 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
1153 *stat = 1;
1154 return 0;
1155 }
1156 }
1157 else if (dir == XFS_LOOKUP_LE && diff > 0)
1158 keyno--;
1159 cur->bc_ptrs[0] = keyno;
1160 if (keyno == 0 || keyno > be16_to_cpu(block->bb_numrecs)) {
1161 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
1162 *stat = 0;
1163 } else {
1164 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
1165 *stat = ((dir != XFS_LOOKUP_EQ) || (diff == 0));
1166 }
1167 return 0;
1168}
1169
1170/*
1171 * Move 1 record left from cur/level if possible.
1172 * Update cur to reflect the new path.
1173 */
1174STATIC int /* error */
1175xfs_bmbt_lshift(
1176 xfs_btree_cur_t *cur,
1177 int level,
1178 int *stat) /* success/failure */
1179{
1180 int error; /* error return value */
1181#ifdef DEBUG
1182 int i; /* loop counter */
1183#endif
1184 xfs_bmbt_key_t key; /* bmap btree key */
1185 xfs_buf_t *lbp; /* left buffer pointer */
1186 xfs_bmbt_block_t *left; /* left btree block */
1187 xfs_bmbt_key_t *lkp=NULL; /* left btree key */
1188 xfs_bmbt_ptr_t *lpp; /* left address pointer */
1189 int lrecs; /* left record count */
1190 xfs_bmbt_rec_t *lrp=NULL; /* left record pointer */
1191 xfs_mount_t *mp; /* file system mount point */
1192 xfs_buf_t *rbp; /* right buffer pointer */
1193 xfs_bmbt_block_t *right; /* right btree block */
1194 xfs_bmbt_key_t *rkp=NULL; /* right btree key */
1195 xfs_bmbt_ptr_t *rpp=NULL; /* right address pointer */
1196 xfs_bmbt_rec_t *rrp=NULL; /* right record pointer */
1197 int rrecs; /* right record count */
1198
1199 XFS_BMBT_TRACE_CURSOR(cur, ENTRY);
1200 XFS_BMBT_TRACE_ARGI(cur, level);
1201 if (level == cur->bc_nlevels - 1) {
1202 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
1203 *stat = 0;
1204 return 0;
1205 }
1206 rbp = cur->bc_bufs[level];
1207 right = XFS_BUF_TO_BMBT_BLOCK(rbp);
1208#ifdef DEBUG
1209 if ((error = xfs_btree_check_lblock(cur, right, level, rbp))) {
1210 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
1211 return error;
1212 }
1213#endif
1214 if (be64_to_cpu(right->bb_leftsib) == NULLDFSBNO) {
1215 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
1216 *stat = 0;
1217 return 0;
1218 }
1219 if (cur->bc_ptrs[level] <= 1) {
1220 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
1221 *stat = 0;
1222 return 0;
1223 }
1224 mp = cur->bc_mp;
1225 if ((error = xfs_btree_read_bufl(mp, cur->bc_tp, be64_to_cpu(right->bb_leftsib), 0,
1226 &lbp, XFS_BMAP_BTREE_REF))) {
1227 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
1228 return error;
1229 }
1230 left = XFS_BUF_TO_BMBT_BLOCK(lbp);
1231 if ((error = xfs_btree_check_lblock(cur, left, level, lbp))) {
1232 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
1233 return error;
1234 }
1235 if (be16_to_cpu(left->bb_numrecs) == XFS_BMAP_BLOCK_IMAXRECS(level, cur)) {
1236 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
1237 *stat = 0;
1238 return 0;
1239 }
1240 lrecs = be16_to_cpu(left->bb_numrecs) + 1;
1241 if (level > 0) {
1242 lkp = XFS_BMAP_KEY_IADDR(left, lrecs, cur);
1243 rkp = XFS_BMAP_KEY_IADDR(right, 1, cur);
1244 *lkp = *rkp;
1245 xfs_bmbt_log_keys(cur, lbp, lrecs, lrecs);
1246 lpp = XFS_BMAP_PTR_IADDR(left, lrecs, cur);
1247 rpp = XFS_BMAP_PTR_IADDR(right, 1, cur);
1248#ifdef DEBUG
1249 if ((error = xfs_btree_check_lptr_disk(cur, *rpp, level))) {
1250 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
1251 return error;
1252 }
1253#endif
1254 *lpp = *rpp;
1255 xfs_bmbt_log_ptrs(cur, lbp, lrecs, lrecs);
1256 } else {
1257 lrp = XFS_BMAP_REC_IADDR(left, lrecs, cur);
1258 rrp = XFS_BMAP_REC_IADDR(right, 1, cur);
1259 *lrp = *rrp;
1260 xfs_bmbt_log_recs(cur, lbp, lrecs, lrecs);
1261 }
1262 left->bb_numrecs = cpu_to_be16(lrecs);
1263 xfs_bmbt_log_block(cur, lbp, XFS_BB_NUMRECS);
1264#ifdef DEBUG
1265 if (level > 0)
1266 xfs_btree_check_key(XFS_BTNUM_BMAP, lkp - 1, lkp);
1267 else
1268 xfs_btree_check_rec(XFS_BTNUM_BMAP, lrp - 1, lrp);
1269#endif
1270 rrecs = be16_to_cpu(right->bb_numrecs) - 1;
1271 right->bb_numrecs = cpu_to_be16(rrecs);
1272 xfs_bmbt_log_block(cur, rbp, XFS_BB_NUMRECS);
1273 if (level > 0) {
1274#ifdef DEBUG
1275 for (i = 0; i < rrecs; i++) {
1276 if ((error = xfs_btree_check_lptr_disk(cur, rpp[i + 1],
1277 level))) {
1278 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
1279 return error;
1280 }
1281 }
1282#endif
1283 memmove(rkp, rkp + 1, rrecs * sizeof(*rkp));
1284 memmove(rpp, rpp + 1, rrecs * sizeof(*rpp));
1285 xfs_bmbt_log_keys(cur, rbp, 1, rrecs);
1286 xfs_bmbt_log_ptrs(cur, rbp, 1, rrecs);
1287 } else {
1288 memmove(rrp, rrp + 1, rrecs * sizeof(*rrp));
1289 xfs_bmbt_log_recs(cur, rbp, 1, rrecs);
1290 key.br_startoff = cpu_to_be64(xfs_bmbt_disk_get_startoff(rrp));
1291 rkp = &key;
1292 }
1293 if ((error = xfs_bmbt_updkey(cur, rkp, level + 1))) {
1294 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
1295 return error;
1296 }
1297 cur->bc_ptrs[level]--;
1298 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
1299 *stat = 1;
1300 return 0;
1301}
1302
1303/*
1304 * Move 1 record right from cur/level if possible.
1305 * Update cur to reflect the new path.
1306 */
1307STATIC int /* error */
1308xfs_bmbt_rshift(
1309 xfs_btree_cur_t *cur,
1310 int level,
1311 int *stat) /* success/failure */
1312{
1313 int error; /* error return value */
1314 int i; /* loop counter */
1315 xfs_bmbt_key_t key; /* bmap btree key */
1316 xfs_buf_t *lbp; /* left buffer pointer */
1317 xfs_bmbt_block_t *left; /* left btree block */
1318 xfs_bmbt_key_t *lkp; /* left btree key */
1319 xfs_bmbt_ptr_t *lpp; /* left address pointer */
1320 xfs_bmbt_rec_t *lrp; /* left record pointer */
1321 xfs_mount_t *mp; /* file system mount point */
1322 xfs_buf_t *rbp; /* right buffer pointer */
1323 xfs_bmbt_block_t *right; /* right btree block */
1324 xfs_bmbt_key_t *rkp; /* right btree key */
1325 xfs_bmbt_ptr_t *rpp; /* right address pointer */
1326 xfs_bmbt_rec_t *rrp=NULL; /* right record pointer */
1327 struct xfs_btree_cur *tcur; /* temporary btree cursor */
1328
1329 XFS_BMBT_TRACE_CURSOR(cur, ENTRY);
1330 XFS_BMBT_TRACE_ARGI(cur, level);
1331 if (level == cur->bc_nlevels - 1) {
1332 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
1333 *stat = 0;
1334 return 0;
1335 }
1336 lbp = cur->bc_bufs[level];
1337 left = XFS_BUF_TO_BMBT_BLOCK(lbp);
1338#ifdef DEBUG
1339 if ((error = xfs_btree_check_lblock(cur, left, level, lbp))) {
1340 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
1341 return error;
1342 }
1343#endif
1344 if (be64_to_cpu(left->bb_rightsib) == NULLDFSBNO) {
1345 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
1346 *stat = 0;
1347 return 0;
1348 }
1349 if (cur->bc_ptrs[level] >= be16_to_cpu(left->bb_numrecs)) {
1350 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
1351 *stat = 0;
1352 return 0;
1353 }
1354 mp = cur->bc_mp;
1355 if ((error = xfs_btree_read_bufl(mp, cur->bc_tp, be64_to_cpu(left->bb_rightsib), 0,
1356 &rbp, XFS_BMAP_BTREE_REF))) {
1357 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
1358 return error;
1359 }
1360 right = XFS_BUF_TO_BMBT_BLOCK(rbp);
1361 if ((error = xfs_btree_check_lblock(cur, right, level, rbp))) {
1362 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
1363 return error;
1364 }
1365 if (be16_to_cpu(right->bb_numrecs) == XFS_BMAP_BLOCK_IMAXRECS(level, cur)) {
1366 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
1367 *stat = 0;
1368 return 0;
1369 }
1370 if (level > 0) {
1371 lkp = XFS_BMAP_KEY_IADDR(left, be16_to_cpu(left->bb_numrecs), cur);
1372 lpp = XFS_BMAP_PTR_IADDR(left, be16_to_cpu(left->bb_numrecs), cur);
1373 rkp = XFS_BMAP_KEY_IADDR(right, 1, cur);
1374 rpp = XFS_BMAP_PTR_IADDR(right, 1, cur);
1375#ifdef DEBUG
1376 for (i = be16_to_cpu(right->bb_numrecs) - 1; i >= 0; i--) {
1377 if ((error = xfs_btree_check_lptr_disk(cur, rpp[i], level))) {
1378 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
1379 return error;
1380 }
1381 }
1382#endif
1383 memmove(rkp + 1, rkp, be16_to_cpu(right->bb_numrecs) * sizeof(*rkp));
1384 memmove(rpp + 1, rpp, be16_to_cpu(right->bb_numrecs) * sizeof(*rpp));
1385#ifdef DEBUG
1386 if ((error = xfs_btree_check_lptr_disk(cur, *lpp, level))) {
1387 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
1388 return error;
1389 }
1390#endif
1391 *rkp = *lkp;
1392 *rpp = *lpp;
1393 xfs_bmbt_log_keys(cur, rbp, 1, be16_to_cpu(right->bb_numrecs) + 1);
1394 xfs_bmbt_log_ptrs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs) + 1);
1395 } else {
1396 lrp = XFS_BMAP_REC_IADDR(left, be16_to_cpu(left->bb_numrecs), cur);
1397 rrp = XFS_BMAP_REC_IADDR(right, 1, cur);
1398 memmove(rrp + 1, rrp, be16_to_cpu(right->bb_numrecs) * sizeof(*rrp));
1399 *rrp = *lrp;
1400 xfs_bmbt_log_recs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs) + 1);
1401 key.br_startoff = cpu_to_be64(xfs_bmbt_disk_get_startoff(rrp));
1402 rkp = &key;
1403 }
1404 be16_add_cpu(&left->bb_numrecs, -1);
1405 xfs_bmbt_log_block(cur, lbp, XFS_BB_NUMRECS);
1406 be16_add_cpu(&right->bb_numrecs, 1);
1407#ifdef DEBUG
1408 if (level > 0)
1409 xfs_btree_check_key(XFS_BTNUM_BMAP, rkp, rkp + 1);
1410 else
1411 xfs_btree_check_rec(XFS_BTNUM_BMAP, rrp, rrp + 1);
1412#endif
1413 xfs_bmbt_log_block(cur, rbp, XFS_BB_NUMRECS);
1414 if ((error = xfs_btree_dup_cursor(cur, &tcur))) {
1415 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
1416 return error;
1417 }
1418 i = xfs_btree_lastrec(tcur, level);
1419 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1420 if ((error = xfs_bmbt_increment(tcur, level, &i))) {
1421 XFS_BMBT_TRACE_CURSOR(tcur, ERROR);
1422 goto error1;
1423 }
1424 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1425 if ((error = xfs_bmbt_updkey(tcur, rkp, level + 1))) {
1426 XFS_BMBT_TRACE_CURSOR(tcur, ERROR);
1427 goto error1;
1428 }
1429 xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
1430 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
1431 *stat = 1;
1432 return 0;
1433error0:
1434 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
1435error1:
1436 xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
1437 return error;
1438}
1439
1440/* 47/*
1441 * Determine the extent state. 48 * Determine the extent state.
1442 */ 49 */
@@ -1453,229 +60,15 @@ xfs_extent_state(
1453 return XFS_EXT_NORM; 60 return XFS_EXT_NORM;
1454} 61}
1455 62
1456
1457/*
1458 * Split cur/level block in half.
1459 * Return new block number and its first record (to be inserted into parent).
1460 */
1461STATIC int /* error */
1462xfs_bmbt_split(
1463 xfs_btree_cur_t *cur,
1464 int level,
1465 xfs_fsblock_t *bnop,
1466 __uint64_t *startoff,
1467 xfs_btree_cur_t **curp,
1468 int *stat) /* success/failure */
1469{
1470 xfs_alloc_arg_t args; /* block allocation args */
1471 int error; /* error return value */
1472 int i; /* loop counter */
1473 xfs_fsblock_t lbno; /* left sibling block number */
1474 xfs_buf_t *lbp; /* left buffer pointer */
1475 xfs_bmbt_block_t *left; /* left btree block */
1476 xfs_bmbt_key_t *lkp; /* left btree key */
1477 xfs_bmbt_ptr_t *lpp; /* left address pointer */
1478 xfs_bmbt_rec_t *lrp; /* left record pointer */
1479 xfs_buf_t *rbp; /* right buffer pointer */
1480 xfs_bmbt_block_t *right; /* right btree block */
1481 xfs_bmbt_key_t *rkp; /* right btree key */
1482 xfs_bmbt_ptr_t *rpp; /* right address pointer */
1483 xfs_bmbt_block_t *rrblock; /* right-right btree block */
1484 xfs_buf_t *rrbp; /* right-right buffer pointer */
1485 xfs_bmbt_rec_t *rrp; /* right record pointer */
1486
1487 XFS_BMBT_TRACE_CURSOR(cur, ENTRY);
1488 XFS_BMBT_TRACE_ARGIFK(cur, level, *bnop, *startoff);
1489 args.tp = cur->bc_tp;
1490 args.mp = cur->bc_mp;
1491 lbp = cur->bc_bufs[level];
1492 lbno = XFS_DADDR_TO_FSB(args.mp, XFS_BUF_ADDR(lbp));
1493 left = XFS_BUF_TO_BMBT_BLOCK(lbp);
1494 args.fsbno = cur->bc_private.b.firstblock;
1495 args.firstblock = args.fsbno;
1496 args.minleft = 0;
1497 if (args.fsbno == NULLFSBLOCK) {
1498 args.fsbno = lbno;
1499 args.type = XFS_ALLOCTYPE_START_BNO;
1500 /*
1501 * Make sure there is sufficient room left in the AG to
1502 * complete a full tree split for an extent insert. If
1503 * we are converting the middle part of an extent then
1504 * we may need space for two tree splits.
1505 *
1506 * We are relying on the caller to make the correct block
1507 * reservation for this operation to succeed. If the
1508 * reservation amount is insufficient then we may fail a
1509 * block allocation here and corrupt the filesystem.
1510 */
1511 args.minleft = xfs_trans_get_block_res(args.tp);
1512 } else if (cur->bc_private.b.flist->xbf_low)
1513 args.type = XFS_ALLOCTYPE_START_BNO;
1514 else
1515 args.type = XFS_ALLOCTYPE_NEAR_BNO;
1516 args.mod = args.alignment = args.total = args.isfl =
1517 args.userdata = args.minalignslop = 0;
1518 args.minlen = args.maxlen = args.prod = 1;
1519 args.wasdel = cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL;
1520 if (!args.wasdel && xfs_trans_get_block_res(args.tp) == 0) {
1521 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
1522 return XFS_ERROR(ENOSPC);
1523 }
1524 if ((error = xfs_alloc_vextent(&args))) {
1525 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
1526 return error;
1527 }
1528 if (args.fsbno == NULLFSBLOCK && args.minleft) {
1529 /*
1530 * Could not find an AG with enough free space to satisfy
1531 * a full btree split. Try again without minleft and if
1532 * successful activate the lowspace algorithm.
1533 */
1534 args.fsbno = 0;
1535 args.type = XFS_ALLOCTYPE_FIRST_AG;
1536 args.minleft = 0;
1537 if ((error = xfs_alloc_vextent(&args))) {
1538 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
1539 return error;
1540 }
1541 cur->bc_private.b.flist->xbf_low = 1;
1542 }
1543 if (args.fsbno == NULLFSBLOCK) {
1544 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
1545 *stat = 0;
1546 return 0;
1547 }
1548 ASSERT(args.len == 1);
1549 cur->bc_private.b.firstblock = args.fsbno;
1550 cur->bc_private.b.allocated++;
1551 cur->bc_private.b.ip->i_d.di_nblocks++;
1552 xfs_trans_log_inode(args.tp, cur->bc_private.b.ip, XFS_ILOG_CORE);
1553 XFS_TRANS_MOD_DQUOT_BYINO(args.mp, args.tp, cur->bc_private.b.ip,
1554 XFS_TRANS_DQ_BCOUNT, 1L);
1555 rbp = xfs_btree_get_bufl(args.mp, args.tp, args.fsbno, 0);
1556 right = XFS_BUF_TO_BMBT_BLOCK(rbp);
1557#ifdef DEBUG
1558 if ((error = xfs_btree_check_lblock(cur, left, level, rbp))) {
1559 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
1560 return error;
1561 }
1562#endif
1563 right->bb_magic = cpu_to_be32(XFS_BMAP_MAGIC);
1564 right->bb_level = left->bb_level;
1565 right->bb_numrecs = cpu_to_be16(be16_to_cpu(left->bb_numrecs) / 2);
1566 if ((be16_to_cpu(left->bb_numrecs) & 1) &&
1567 cur->bc_ptrs[level] <= be16_to_cpu(right->bb_numrecs) + 1)
1568 be16_add_cpu(&right->bb_numrecs, 1);
1569 i = be16_to_cpu(left->bb_numrecs) - be16_to_cpu(right->bb_numrecs) + 1;
1570 if (level > 0) {
1571 lkp = XFS_BMAP_KEY_IADDR(left, i, cur);
1572 lpp = XFS_BMAP_PTR_IADDR(left, i, cur);
1573 rkp = XFS_BMAP_KEY_IADDR(right, 1, cur);
1574 rpp = XFS_BMAP_PTR_IADDR(right, 1, cur);
1575#ifdef DEBUG
1576 for (i = 0; i < be16_to_cpu(right->bb_numrecs); i++) {
1577 if ((error = xfs_btree_check_lptr_disk(cur, lpp[i], level))) {
1578 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
1579 return error;
1580 }
1581 }
1582#endif
1583 memcpy(rkp, lkp, be16_to_cpu(right->bb_numrecs) * sizeof(*rkp));
1584 memcpy(rpp, lpp, be16_to_cpu(right->bb_numrecs) * sizeof(*rpp));
1585 xfs_bmbt_log_keys(cur, rbp, 1, be16_to_cpu(right->bb_numrecs));
1586 xfs_bmbt_log_ptrs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs));
1587 *startoff = be64_to_cpu(rkp->br_startoff);
1588 } else {
1589 lrp = XFS_BMAP_REC_IADDR(left, i, cur);
1590 rrp = XFS_BMAP_REC_IADDR(right, 1, cur);
1591 memcpy(rrp, lrp, be16_to_cpu(right->bb_numrecs) * sizeof(*rrp));
1592 xfs_bmbt_log_recs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs));
1593 *startoff = xfs_bmbt_disk_get_startoff(rrp);
1594 }
1595 be16_add_cpu(&left->bb_numrecs, -(be16_to_cpu(right->bb_numrecs)));
1596 right->bb_rightsib = left->bb_rightsib;
1597 left->bb_rightsib = cpu_to_be64(args.fsbno);
1598 right->bb_leftsib = cpu_to_be64(lbno);
1599 xfs_bmbt_log_block(cur, rbp, XFS_BB_ALL_BITS);
1600 xfs_bmbt_log_block(cur, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB);
1601 if (be64_to_cpu(right->bb_rightsib) != NULLDFSBNO) {
1602 if ((error = xfs_btree_read_bufl(args.mp, args.tp,
1603 be64_to_cpu(right->bb_rightsib), 0, &rrbp,
1604 XFS_BMAP_BTREE_REF))) {
1605 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
1606 return error;
1607 }
1608 rrblock = XFS_BUF_TO_BMBT_BLOCK(rrbp);
1609 if ((error = xfs_btree_check_lblock(cur, rrblock, level, rrbp))) {
1610 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
1611 return error;
1612 }
1613 rrblock->bb_leftsib = cpu_to_be64(args.fsbno);
1614 xfs_bmbt_log_block(cur, rrbp, XFS_BB_LEFTSIB);
1615 }
1616 if (cur->bc_ptrs[level] > be16_to_cpu(left->bb_numrecs) + 1) {
1617 xfs_btree_setbuf(cur, level, rbp);
1618 cur->bc_ptrs[level] -= be16_to_cpu(left->bb_numrecs);
1619 }
1620 if (level + 1 < cur->bc_nlevels) {
1621 if ((error = xfs_btree_dup_cursor(cur, curp))) {
1622 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
1623 return error;
1624 }
1625 (*curp)->bc_ptrs[level + 1]++;
1626 }
1627 *bnop = args.fsbno;
1628 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
1629 *stat = 1;
1630 return 0;
1631}
1632
1633
1634/*
1635 * Update keys for the record.
1636 */
1637STATIC int
1638xfs_bmbt_updkey(
1639 xfs_btree_cur_t *cur,
1640 xfs_bmbt_key_t *keyp, /* on-disk format */
1641 int level)
1642{
1643 xfs_bmbt_block_t *block;
1644 xfs_buf_t *bp;
1645#ifdef DEBUG
1646 int error;
1647#endif
1648 xfs_bmbt_key_t *kp;
1649 int ptr;
1650
1651 ASSERT(level >= 1);
1652 XFS_BMBT_TRACE_CURSOR(cur, ENTRY);
1653 XFS_BMBT_TRACE_ARGIK(cur, level, keyp);
1654 for (ptr = 1; ptr == 1 && level < cur->bc_nlevels; level++) {
1655 block = xfs_bmbt_get_block(cur, level, &bp);
1656#ifdef DEBUG
1657 if ((error = xfs_btree_check_lblock(cur, block, level, bp))) {
1658 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
1659 return error;
1660 }
1661#endif
1662 ptr = cur->bc_ptrs[level];
1663 kp = XFS_BMAP_KEY_IADDR(block, ptr, cur);
1664 *kp = *keyp;
1665 xfs_bmbt_log_keys(cur, bp, ptr, ptr);
1666 }
1667 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
1668 return 0;
1669}
1670
1671/* 63/*
1672 * Convert on-disk form of btree root to in-memory form. 64 * Convert on-disk form of btree root to in-memory form.
1673 */ 65 */
1674void 66void
1675xfs_bmdr_to_bmbt( 67xfs_bmdr_to_bmbt(
68 struct xfs_mount *mp,
1676 xfs_bmdr_block_t *dblock, 69 xfs_bmdr_block_t *dblock,
1677 int dblocklen, 70 int dblocklen,
1678 xfs_bmbt_block_t *rblock, 71 struct xfs_btree_block *rblock,
1679 int rblocklen) 72 int rblocklen)
1680{ 73{
1681 int dmxr; 74 int dmxr;
@@ -1688,129 +81,19 @@ xfs_bmdr_to_bmbt(
1688 rblock->bb_level = dblock->bb_level; 81 rblock->bb_level = dblock->bb_level;
1689 ASSERT(be16_to_cpu(rblock->bb_level) > 0); 82 ASSERT(be16_to_cpu(rblock->bb_level) > 0);
1690 rblock->bb_numrecs = dblock->bb_numrecs; 83 rblock->bb_numrecs = dblock->bb_numrecs;
1691 rblock->bb_leftsib = cpu_to_be64(NULLDFSBNO); 84 rblock->bb_u.l.bb_leftsib = cpu_to_be64(NULLDFSBNO);
1692 rblock->bb_rightsib = cpu_to_be64(NULLDFSBNO); 85 rblock->bb_u.l.bb_rightsib = cpu_to_be64(NULLDFSBNO);
1693 dmxr = (int)XFS_BTREE_BLOCK_MAXRECS(dblocklen, xfs_bmdr, 0); 86 dmxr = xfs_bmdr_maxrecs(mp, dblocklen, 0);
1694 fkp = XFS_BTREE_KEY_ADDR(xfs_bmdr, dblock, 1); 87 fkp = XFS_BMDR_KEY_ADDR(dblock, 1);
1695 tkp = XFS_BMAP_BROOT_KEY_ADDR(rblock, 1, rblocklen); 88 tkp = XFS_BMBT_KEY_ADDR(mp, rblock, 1);
1696 fpp = XFS_BTREE_PTR_ADDR(xfs_bmdr, dblock, 1, dmxr); 89 fpp = XFS_BMDR_PTR_ADDR(dblock, 1, dmxr);
1697 tpp = XFS_BMAP_BROOT_PTR_ADDR(rblock, 1, rblocklen); 90 tpp = XFS_BMAP_BROOT_PTR_ADDR(mp, rblock, 1, rblocklen);
1698 dmxr = be16_to_cpu(dblock->bb_numrecs); 91 dmxr = be16_to_cpu(dblock->bb_numrecs);
1699 memcpy(tkp, fkp, sizeof(*fkp) * dmxr); 92 memcpy(tkp, fkp, sizeof(*fkp) * dmxr);
1700 memcpy(tpp, fpp, sizeof(*fpp) * dmxr); 93 memcpy(tpp, fpp, sizeof(*fpp) * dmxr);
1701} 94}
1702 95
1703/* 96/*
1704 * Decrement cursor by one record at the level.
1705 * For nonzero levels the leaf-ward information is untouched.
1706 */
1707int /* error */
1708xfs_bmbt_decrement(
1709 xfs_btree_cur_t *cur,
1710 int level,
1711 int *stat) /* success/failure */
1712{
1713 xfs_bmbt_block_t *block;
1714 xfs_buf_t *bp;
1715 int error; /* error return value */
1716 xfs_fsblock_t fsbno;
1717 int lev;
1718 xfs_mount_t *mp;
1719 xfs_trans_t *tp;
1720
1721 XFS_BMBT_TRACE_CURSOR(cur, ENTRY);
1722 XFS_BMBT_TRACE_ARGI(cur, level);
1723 ASSERT(level < cur->bc_nlevels);
1724 if (level < cur->bc_nlevels - 1)
1725 xfs_btree_readahead(cur, level, XFS_BTCUR_LEFTRA);
1726 if (--cur->bc_ptrs[level] > 0) {
1727 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
1728 *stat = 1;
1729 return 0;
1730 }
1731 block = xfs_bmbt_get_block(cur, level, &bp);
1732#ifdef DEBUG
1733 if ((error = xfs_btree_check_lblock(cur, block, level, bp))) {
1734 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
1735 return error;
1736 }
1737#endif
1738 if (be64_to_cpu(block->bb_leftsib) == NULLDFSBNO) {
1739 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
1740 *stat = 0;
1741 return 0;
1742 }
1743 for (lev = level + 1; lev < cur->bc_nlevels; lev++) {
1744 if (--cur->bc_ptrs[lev] > 0)
1745 break;
1746 if (lev < cur->bc_nlevels - 1)
1747 xfs_btree_readahead(cur, lev, XFS_BTCUR_LEFTRA);
1748 }
1749 if (lev == cur->bc_nlevels) {
1750 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
1751 *stat = 0;
1752 return 0;
1753 }
1754 tp = cur->bc_tp;
1755 mp = cur->bc_mp;
1756 for (block = xfs_bmbt_get_block(cur, lev, &bp); lev > level; ) {
1757 fsbno = be64_to_cpu(*XFS_BMAP_PTR_IADDR(block, cur->bc_ptrs[lev], cur));
1758 if ((error = xfs_btree_read_bufl(mp, tp, fsbno, 0, &bp,
1759 XFS_BMAP_BTREE_REF))) {
1760 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
1761 return error;
1762 }
1763 lev--;
1764 xfs_btree_setbuf(cur, lev, bp);
1765 block = XFS_BUF_TO_BMBT_BLOCK(bp);
1766 if ((error = xfs_btree_check_lblock(cur, block, lev, bp))) {
1767 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
1768 return error;
1769 }
1770 cur->bc_ptrs[lev] = be16_to_cpu(block->bb_numrecs);
1771 }
1772 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
1773 *stat = 1;
1774 return 0;
1775}
1776
1777/*
1778 * Delete the record pointed to by cur.
1779 */
1780int /* error */
1781xfs_bmbt_delete(
1782 xfs_btree_cur_t *cur,
1783 int *stat) /* success/failure */
1784{
1785 int error; /* error return value */
1786 int i;
1787 int level;
1788
1789 XFS_BMBT_TRACE_CURSOR(cur, ENTRY);
1790 for (level = 0, i = 2; i == 2; level++) {
1791 if ((error = xfs_bmbt_delrec(cur, level, &i))) {
1792 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
1793 return error;
1794 }
1795 }
1796 if (i == 0) {
1797 for (level = 1; level < cur->bc_nlevels; level++) {
1798 if (cur->bc_ptrs[level] == 0) {
1799 if ((error = xfs_bmbt_decrement(cur, level,
1800 &i))) {
1801 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
1802 return error;
1803 }
1804 break;
1805 }
1806 }
1807 }
1808 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
1809 *stat = i;
1810 return 0;
1811}
1812
1813/*
1814 * Convert a compressed bmap extent record to an uncompressed form. 97 * Convert a compressed bmap extent record to an uncompressed form.
1815 * This code must be in sync with the routines xfs_bmbt_get_startoff, 98 * This code must be in sync with the routines xfs_bmbt_get_startoff,
1816 * xfs_bmbt_get_startblock, xfs_bmbt_get_blockcount and xfs_bmbt_get_state. 99 * xfs_bmbt_get_startblock, xfs_bmbt_get_blockcount and xfs_bmbt_get_state.
@@ -1864,31 +147,6 @@ xfs_bmbt_get_all(
1864} 147}
1865 148
1866/* 149/*
1867 * Get the block pointer for the given level of the cursor.
1868 * Fill in the buffer pointer, if applicable.
1869 */
1870xfs_bmbt_block_t *
1871xfs_bmbt_get_block(
1872 xfs_btree_cur_t *cur,
1873 int level,
1874 xfs_buf_t **bpp)
1875{
1876 xfs_ifork_t *ifp;
1877 xfs_bmbt_block_t *rval;
1878
1879 if (level < cur->bc_nlevels - 1) {
1880 *bpp = cur->bc_bufs[level];
1881 rval = XFS_BUF_TO_BMBT_BLOCK(*bpp);
1882 } else {
1883 *bpp = NULL;
1884 ifp = XFS_IFORK_PTR(cur->bc_private.b.ip,
1885 cur->bc_private.b.whichfork);
1886 rval = ifp->if_broot;
1887 }
1888 return rval;
1889}
1890
1891/*
1892 * Extract the blockcount field from an in memory bmap extent record. 150 * Extract the blockcount field from an in memory bmap extent record.
1893 */ 151 */
1894xfs_filblks_t 152xfs_filblks_t
@@ -1974,348 +232,6 @@ xfs_bmbt_disk_get_startoff(
1974 XFS_MASK64LO(64 - BMBT_EXNTFLAG_BITLEN)) >> 9; 232 XFS_MASK64LO(64 - BMBT_EXNTFLAG_BITLEN)) >> 9;
1975} 233}
1976 234
1977/*
1978 * Increment cursor by one record at the level.
1979 * For nonzero levels the leaf-ward information is untouched.
1980 */
1981int /* error */
1982xfs_bmbt_increment(
1983 xfs_btree_cur_t *cur,
1984 int level,
1985 int *stat) /* success/failure */
1986{
1987 xfs_bmbt_block_t *block;
1988 xfs_buf_t *bp;
1989 int error; /* error return value */
1990 xfs_fsblock_t fsbno;
1991 int lev;
1992 xfs_mount_t *mp;
1993 xfs_trans_t *tp;
1994
1995 XFS_BMBT_TRACE_CURSOR(cur, ENTRY);
1996 XFS_BMBT_TRACE_ARGI(cur, level);
1997 ASSERT(level < cur->bc_nlevels);
1998 if (level < cur->bc_nlevels - 1)
1999 xfs_btree_readahead(cur, level, XFS_BTCUR_RIGHTRA);
2000 block = xfs_bmbt_get_block(cur, level, &bp);
2001#ifdef DEBUG
2002 if ((error = xfs_btree_check_lblock(cur, block, level, bp))) {
2003 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
2004 return error;
2005 }
2006#endif
2007 if (++cur->bc_ptrs[level] <= be16_to_cpu(block->bb_numrecs)) {
2008 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
2009 *stat = 1;
2010 return 0;
2011 }
2012 if (be64_to_cpu(block->bb_rightsib) == NULLDFSBNO) {
2013 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
2014 *stat = 0;
2015 return 0;
2016 }
2017 for (lev = level + 1; lev < cur->bc_nlevels; lev++) {
2018 block = xfs_bmbt_get_block(cur, lev, &bp);
2019#ifdef DEBUG
2020 if ((error = xfs_btree_check_lblock(cur, block, lev, bp))) {
2021 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
2022 return error;
2023 }
2024#endif
2025 if (++cur->bc_ptrs[lev] <= be16_to_cpu(block->bb_numrecs))
2026 break;
2027 if (lev < cur->bc_nlevels - 1)
2028 xfs_btree_readahead(cur, lev, XFS_BTCUR_RIGHTRA);
2029 }
2030 if (lev == cur->bc_nlevels) {
2031 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
2032 *stat = 0;
2033 return 0;
2034 }
2035 tp = cur->bc_tp;
2036 mp = cur->bc_mp;
2037 for (block = xfs_bmbt_get_block(cur, lev, &bp); lev > level; ) {
2038 fsbno = be64_to_cpu(*XFS_BMAP_PTR_IADDR(block, cur->bc_ptrs[lev], cur));
2039 if ((error = xfs_btree_read_bufl(mp, tp, fsbno, 0, &bp,
2040 XFS_BMAP_BTREE_REF))) {
2041 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
2042 return error;
2043 }
2044 lev--;
2045 xfs_btree_setbuf(cur, lev, bp);
2046 block = XFS_BUF_TO_BMBT_BLOCK(bp);
2047 if ((error = xfs_btree_check_lblock(cur, block, lev, bp))) {
2048 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
2049 return error;
2050 }
2051 cur->bc_ptrs[lev] = 1;
2052 }
2053 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
2054 *stat = 1;
2055 return 0;
2056}
2057
2058/*
2059 * Insert the current record at the point referenced by cur.
2060 *
2061 * A multi-level split of the tree on insert will invalidate the original
2062 * cursor. All callers of this function should assume that the cursor is
2063 * no longer valid and revalidate it.
2064 */
2065int /* error */
2066xfs_bmbt_insert(
2067 xfs_btree_cur_t *cur,
2068 int *stat) /* success/failure */
2069{
2070 int error; /* error return value */
2071 int i;
2072 int level;
2073 xfs_fsblock_t nbno;
2074 xfs_btree_cur_t *ncur;
2075 xfs_bmbt_rec_t nrec;
2076 xfs_btree_cur_t *pcur;
2077
2078 XFS_BMBT_TRACE_CURSOR(cur, ENTRY);
2079 level = 0;
2080 nbno = NULLFSBLOCK;
2081 xfs_bmbt_disk_set_all(&nrec, &cur->bc_rec.b);
2082 ncur = NULL;
2083 pcur = cur;
2084 do {
2085 if ((error = xfs_bmbt_insrec(pcur, level++, &nbno, &nrec, &ncur,
2086 &i))) {
2087 if (pcur != cur)
2088 xfs_btree_del_cursor(pcur, XFS_BTREE_ERROR);
2089 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
2090 return error;
2091 }
2092 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
2093 if (pcur != cur && (ncur || nbno == NULLFSBLOCK)) {
2094 cur->bc_nlevels = pcur->bc_nlevels;
2095 cur->bc_private.b.allocated +=
2096 pcur->bc_private.b.allocated;
2097 pcur->bc_private.b.allocated = 0;
2098 ASSERT((cur->bc_private.b.firstblock != NULLFSBLOCK) ||
2099 XFS_IS_REALTIME_INODE(cur->bc_private.b.ip));
2100 cur->bc_private.b.firstblock =
2101 pcur->bc_private.b.firstblock;
2102 ASSERT(cur->bc_private.b.flist ==
2103 pcur->bc_private.b.flist);
2104 xfs_btree_del_cursor(pcur, XFS_BTREE_NOERROR);
2105 }
2106 if (ncur) {
2107 pcur = ncur;
2108 ncur = NULL;
2109 }
2110 } while (nbno != NULLFSBLOCK);
2111 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
2112 *stat = i;
2113 return 0;
2114error0:
2115 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
2116 return error;
2117}
2118
2119/*
2120 * Log fields from the btree block header.
2121 */
2122void
2123xfs_bmbt_log_block(
2124 xfs_btree_cur_t *cur,
2125 xfs_buf_t *bp,
2126 int fields)
2127{
2128 int first;
2129 int last;
2130 xfs_trans_t *tp;
2131 static const short offsets[] = {
2132 offsetof(xfs_bmbt_block_t, bb_magic),
2133 offsetof(xfs_bmbt_block_t, bb_level),
2134 offsetof(xfs_bmbt_block_t, bb_numrecs),
2135 offsetof(xfs_bmbt_block_t, bb_leftsib),
2136 offsetof(xfs_bmbt_block_t, bb_rightsib),
2137 sizeof(xfs_bmbt_block_t)
2138 };
2139
2140 XFS_BMBT_TRACE_CURSOR(cur, ENTRY);
2141 XFS_BMBT_TRACE_ARGBI(cur, bp, fields);
2142 tp = cur->bc_tp;
2143 if (bp) {
2144 xfs_btree_offsets(fields, offsets, XFS_BB_NUM_BITS, &first,
2145 &last);
2146 xfs_trans_log_buf(tp, bp, first, last);
2147 } else
2148 xfs_trans_log_inode(tp, cur->bc_private.b.ip,
2149 XFS_ILOG_FBROOT(cur->bc_private.b.whichfork));
2150 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
2151}
2152
2153/*
2154 * Log record values from the btree block.
2155 */
2156void
2157xfs_bmbt_log_recs(
2158 xfs_btree_cur_t *cur,
2159 xfs_buf_t *bp,
2160 int rfirst,
2161 int rlast)
2162{
2163 xfs_bmbt_block_t *block;
2164 int first;
2165 int last;
2166 xfs_bmbt_rec_t *rp;
2167 xfs_trans_t *tp;
2168
2169 XFS_BMBT_TRACE_CURSOR(cur, ENTRY);
2170 XFS_BMBT_TRACE_ARGBII(cur, bp, rfirst, rlast);
2171 ASSERT(bp);
2172 tp = cur->bc_tp;
2173 block = XFS_BUF_TO_BMBT_BLOCK(bp);
2174 rp = XFS_BMAP_REC_DADDR(block, 1, cur);
2175 first = (int)((xfs_caddr_t)&rp[rfirst - 1] - (xfs_caddr_t)block);
2176 last = (int)(((xfs_caddr_t)&rp[rlast] - 1) - (xfs_caddr_t)block);
2177 xfs_trans_log_buf(tp, bp, first, last);
2178 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
2179}
2180
2181int /* error */
2182xfs_bmbt_lookup_eq(
2183 xfs_btree_cur_t *cur,
2184 xfs_fileoff_t off,
2185 xfs_fsblock_t bno,
2186 xfs_filblks_t len,
2187 int *stat) /* success/failure */
2188{
2189 cur->bc_rec.b.br_startoff = off;
2190 cur->bc_rec.b.br_startblock = bno;
2191 cur->bc_rec.b.br_blockcount = len;
2192 return xfs_bmbt_lookup(cur, XFS_LOOKUP_EQ, stat);
2193}
2194
2195int /* error */
2196xfs_bmbt_lookup_ge(
2197 xfs_btree_cur_t *cur,
2198 xfs_fileoff_t off,
2199 xfs_fsblock_t bno,
2200 xfs_filblks_t len,
2201 int *stat) /* success/failure */
2202{
2203 cur->bc_rec.b.br_startoff = off;
2204 cur->bc_rec.b.br_startblock = bno;
2205 cur->bc_rec.b.br_blockcount = len;
2206 return xfs_bmbt_lookup(cur, XFS_LOOKUP_GE, stat);
2207}
2208
2209/*
2210 * Give the bmap btree a new root block. Copy the old broot contents
2211 * down into a real block and make the broot point to it.
2212 */
2213int /* error */
2214xfs_bmbt_newroot(
2215 xfs_btree_cur_t *cur, /* btree cursor */
2216 int *logflags, /* logging flags for inode */
2217 int *stat) /* return status - 0 fail */
2218{
2219 xfs_alloc_arg_t args; /* allocation arguments */
2220 xfs_bmbt_block_t *block; /* bmap btree block */
2221 xfs_buf_t *bp; /* buffer for block */
2222 xfs_bmbt_block_t *cblock; /* child btree block */
2223 xfs_bmbt_key_t *ckp; /* child key pointer */
2224 xfs_bmbt_ptr_t *cpp; /* child ptr pointer */
2225 int error; /* error return code */
2226#ifdef DEBUG
2227 int i; /* loop counter */
2228#endif
2229 xfs_bmbt_key_t *kp; /* pointer to bmap btree key */
2230 int level; /* btree level */
2231 xfs_bmbt_ptr_t *pp; /* pointer to bmap block addr */
2232
2233 XFS_BMBT_TRACE_CURSOR(cur, ENTRY);
2234 level = cur->bc_nlevels - 1;
2235 block = xfs_bmbt_get_block(cur, level, &bp);
2236 /*
2237 * Copy the root into a real block.
2238 */
2239 args.mp = cur->bc_mp;
2240 pp = XFS_BMAP_PTR_IADDR(block, 1, cur);
2241 args.tp = cur->bc_tp;
2242 args.fsbno = cur->bc_private.b.firstblock;
2243 args.mod = args.minleft = args.alignment = args.total = args.isfl =
2244 args.userdata = args.minalignslop = 0;
2245 args.minlen = args.maxlen = args.prod = 1;
2246 args.wasdel = cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL;
2247 args.firstblock = args.fsbno;
2248 if (args.fsbno == NULLFSBLOCK) {
2249#ifdef DEBUG
2250 if ((error = xfs_btree_check_lptr_disk(cur, *pp, level))) {
2251 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
2252 return error;
2253 }
2254#endif
2255 args.fsbno = be64_to_cpu(*pp);
2256 args.type = XFS_ALLOCTYPE_START_BNO;
2257 } else if (cur->bc_private.b.flist->xbf_low)
2258 args.type = XFS_ALLOCTYPE_START_BNO;
2259 else
2260 args.type = XFS_ALLOCTYPE_NEAR_BNO;
2261 if ((error = xfs_alloc_vextent(&args))) {
2262 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
2263 return error;
2264 }
2265 if (args.fsbno == NULLFSBLOCK) {
2266 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
2267 *stat = 0;
2268 return 0;
2269 }
2270 ASSERT(args.len == 1);
2271 cur->bc_private.b.firstblock = args.fsbno;
2272 cur->bc_private.b.allocated++;
2273 cur->bc_private.b.ip->i_d.di_nblocks++;
2274 XFS_TRANS_MOD_DQUOT_BYINO(args.mp, args.tp, cur->bc_private.b.ip,
2275 XFS_TRANS_DQ_BCOUNT, 1L);
2276 bp = xfs_btree_get_bufl(args.mp, cur->bc_tp, args.fsbno, 0);
2277 cblock = XFS_BUF_TO_BMBT_BLOCK(bp);
2278 *cblock = *block;
2279 be16_add_cpu(&block->bb_level, 1);
2280 block->bb_numrecs = cpu_to_be16(1);
2281 cur->bc_nlevels++;
2282 cur->bc_ptrs[level + 1] = 1;
2283 kp = XFS_BMAP_KEY_IADDR(block, 1, cur);
2284 ckp = XFS_BMAP_KEY_IADDR(cblock, 1, cur);
2285 memcpy(ckp, kp, be16_to_cpu(cblock->bb_numrecs) * sizeof(*kp));
2286 cpp = XFS_BMAP_PTR_IADDR(cblock, 1, cur);
2287#ifdef DEBUG
2288 for (i = 0; i < be16_to_cpu(cblock->bb_numrecs); i++) {
2289 if ((error = xfs_btree_check_lptr_disk(cur, pp[i], level))) {
2290 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
2291 return error;
2292 }
2293 }
2294#endif
2295 memcpy(cpp, pp, be16_to_cpu(cblock->bb_numrecs) * sizeof(*pp));
2296#ifdef DEBUG
2297 if ((error = xfs_btree_check_lptr(cur, args.fsbno, level))) {
2298 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
2299 return error;
2300 }
2301#endif
2302 *pp = cpu_to_be64(args.fsbno);
2303 xfs_iroot_realloc(cur->bc_private.b.ip, 1 - be16_to_cpu(cblock->bb_numrecs),
2304 cur->bc_private.b.whichfork);
2305 xfs_btree_setbuf(cur, level, bp);
2306 /*
2307 * Do all this logging at the end so that
2308 * the root is at the right level.
2309 */
2310 xfs_bmbt_log_block(cur, bp, XFS_BB_ALL_BITS);
2311 xfs_bmbt_log_keys(cur, bp, 1, be16_to_cpu(cblock->bb_numrecs));
2312 xfs_bmbt_log_ptrs(cur, bp, 1, be16_to_cpu(cblock->bb_numrecs));
2313 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
2314 *logflags |=
2315 XFS_ILOG_CORE | XFS_ILOG_FBROOT(cur->bc_private.b.whichfork);
2316 *stat = 1;
2317 return 0;
2318}
2319 235
2320/* 236/*
2321 * Set all the fields in a bmap extent record from the arguments. 237 * Set all the fields in a bmap extent record from the arguments.
@@ -2512,7 +428,8 @@ xfs_bmbt_set_state(
2512 */ 428 */
2513void 429void
2514xfs_bmbt_to_bmdr( 430xfs_bmbt_to_bmdr(
2515 xfs_bmbt_block_t *rblock, 431 struct xfs_mount *mp,
432 struct xfs_btree_block *rblock,
2516 int rblocklen, 433 int rblocklen,
2517 xfs_bmdr_block_t *dblock, 434 xfs_bmdr_block_t *dblock,
2518 int dblocklen) 435 int dblocklen)
@@ -2524,67 +441,22 @@ xfs_bmbt_to_bmdr(
2524 __be64 *tpp; 441 __be64 *tpp;
2525 442
2526 ASSERT(be32_to_cpu(rblock->bb_magic) == XFS_BMAP_MAGIC); 443 ASSERT(be32_to_cpu(rblock->bb_magic) == XFS_BMAP_MAGIC);
2527 ASSERT(be64_to_cpu(rblock->bb_leftsib) == NULLDFSBNO); 444 ASSERT(be64_to_cpu(rblock->bb_u.l.bb_leftsib) == NULLDFSBNO);
2528 ASSERT(be64_to_cpu(rblock->bb_rightsib) == NULLDFSBNO); 445 ASSERT(be64_to_cpu(rblock->bb_u.l.bb_rightsib) == NULLDFSBNO);
2529 ASSERT(be16_to_cpu(rblock->bb_level) > 0); 446 ASSERT(be16_to_cpu(rblock->bb_level) > 0);
2530 dblock->bb_level = rblock->bb_level; 447 dblock->bb_level = rblock->bb_level;
2531 dblock->bb_numrecs = rblock->bb_numrecs; 448 dblock->bb_numrecs = rblock->bb_numrecs;
2532 dmxr = (int)XFS_BTREE_BLOCK_MAXRECS(dblocklen, xfs_bmdr, 0); 449 dmxr = xfs_bmdr_maxrecs(mp, dblocklen, 0);
2533 fkp = XFS_BMAP_BROOT_KEY_ADDR(rblock, 1, rblocklen); 450 fkp = XFS_BMBT_KEY_ADDR(mp, rblock, 1);
2534 tkp = XFS_BTREE_KEY_ADDR(xfs_bmdr, dblock, 1); 451 tkp = XFS_BMDR_KEY_ADDR(dblock, 1);
2535 fpp = XFS_BMAP_BROOT_PTR_ADDR(rblock, 1, rblocklen); 452 fpp = XFS_BMAP_BROOT_PTR_ADDR(mp, rblock, 1, rblocklen);
2536 tpp = XFS_BTREE_PTR_ADDR(xfs_bmdr, dblock, 1, dmxr); 453 tpp = XFS_BMDR_PTR_ADDR(dblock, 1, dmxr);
2537 dmxr = be16_to_cpu(dblock->bb_numrecs); 454 dmxr = be16_to_cpu(dblock->bb_numrecs);
2538 memcpy(tkp, fkp, sizeof(*fkp) * dmxr); 455 memcpy(tkp, fkp, sizeof(*fkp) * dmxr);
2539 memcpy(tpp, fpp, sizeof(*fpp) * dmxr); 456 memcpy(tpp, fpp, sizeof(*fpp) * dmxr);
2540} 457}
2541 458
2542/* 459/*
2543 * Update the record to the passed values.
2544 */
2545int
2546xfs_bmbt_update(
2547 xfs_btree_cur_t *cur,
2548 xfs_fileoff_t off,
2549 xfs_fsblock_t bno,
2550 xfs_filblks_t len,
2551 xfs_exntst_t state)
2552{
2553 xfs_bmbt_block_t *block;
2554 xfs_buf_t *bp;
2555 int error;
2556 xfs_bmbt_key_t key;
2557 int ptr;
2558 xfs_bmbt_rec_t *rp;
2559
2560 XFS_BMBT_TRACE_CURSOR(cur, ENTRY);
2561 XFS_BMBT_TRACE_ARGFFFI(cur, (xfs_dfiloff_t)off, (xfs_dfsbno_t)bno,
2562 (xfs_dfilblks_t)len, (int)state);
2563 block = xfs_bmbt_get_block(cur, 0, &bp);
2564#ifdef DEBUG
2565 if ((error = xfs_btree_check_lblock(cur, block, 0, bp))) {
2566 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
2567 return error;
2568 }
2569#endif
2570 ptr = cur->bc_ptrs[0];
2571 rp = XFS_BMAP_REC_IADDR(block, ptr, cur);
2572 xfs_bmbt_disk_set_allf(rp, off, bno, len, state);
2573 xfs_bmbt_log_recs(cur, bp, ptr, ptr);
2574 if (ptr > 1) {
2575 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
2576 return 0;
2577 }
2578 key.br_startoff = cpu_to_be64(off);
2579 if ((error = xfs_bmbt_updkey(cur, &key, 1))) {
2580 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
2581 return error;
2582 }
2583 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
2584 return 0;
2585}
2586
2587/*
2588 * Check extent records, which have just been read, for 460 * Check extent records, which have just been read, for
2589 * any bit in the extent flag field. ASSERT on debug 461 * any bit in the extent flag field. ASSERT on debug
2590 * kernels, as this condition should not occur. 462 * kernels, as this condition should not occur.
@@ -2608,3 +480,451 @@ xfs_check_nostate_extents(
2608 } 480 }
2609 return 0; 481 return 0;
2610} 482}
483
484
485STATIC struct xfs_btree_cur *
486xfs_bmbt_dup_cursor(
487 struct xfs_btree_cur *cur)
488{
489 struct xfs_btree_cur *new;
490
491 new = xfs_bmbt_init_cursor(cur->bc_mp, cur->bc_tp,
492 cur->bc_private.b.ip, cur->bc_private.b.whichfork);
493
494 /*
495 * Copy the firstblock, flist, and flags values,
496 * since init cursor doesn't get them.
497 */
498 new->bc_private.b.firstblock = cur->bc_private.b.firstblock;
499 new->bc_private.b.flist = cur->bc_private.b.flist;
500 new->bc_private.b.flags = cur->bc_private.b.flags;
501
502 return new;
503}
504
505STATIC void
506xfs_bmbt_update_cursor(
507 struct xfs_btree_cur *src,
508 struct xfs_btree_cur *dst)
509{
510 ASSERT((dst->bc_private.b.firstblock != NULLFSBLOCK) ||
511 (dst->bc_private.b.ip->i_d.di_flags & XFS_DIFLAG_REALTIME));
512 ASSERT(dst->bc_private.b.flist == src->bc_private.b.flist);
513
514 dst->bc_private.b.allocated += src->bc_private.b.allocated;
515 dst->bc_private.b.firstblock = src->bc_private.b.firstblock;
516
517 src->bc_private.b.allocated = 0;
518}
519
520STATIC int
521xfs_bmbt_alloc_block(
522 struct xfs_btree_cur *cur,
523 union xfs_btree_ptr *start,
524 union xfs_btree_ptr *new,
525 int length,
526 int *stat)
527{
528 xfs_alloc_arg_t args; /* block allocation args */
529 int error; /* error return value */
530
531 memset(&args, 0, sizeof(args));
532 args.tp = cur->bc_tp;
533 args.mp = cur->bc_mp;
534 args.fsbno = cur->bc_private.b.firstblock;
535 args.firstblock = args.fsbno;
536
537 if (args.fsbno == NULLFSBLOCK) {
538 args.fsbno = be64_to_cpu(start->l);
539 args.type = XFS_ALLOCTYPE_START_BNO;
540 /*
541 * Make sure there is sufficient room left in the AG to
542 * complete a full tree split for an extent insert. If
543 * we are converting the middle part of an extent then
544 * we may need space for two tree splits.
545 *
546 * We are relying on the caller to make the correct block
547 * reservation for this operation to succeed. If the
548 * reservation amount is insufficient then we may fail a
549 * block allocation here and corrupt the filesystem.
550 */
551 args.minleft = xfs_trans_get_block_res(args.tp);
552 } else if (cur->bc_private.b.flist->xbf_low) {
553 args.type = XFS_ALLOCTYPE_START_BNO;
554 } else {
555 args.type = XFS_ALLOCTYPE_NEAR_BNO;
556 }
557
558 args.minlen = args.maxlen = args.prod = 1;
559 args.wasdel = cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL;
560 if (!args.wasdel && xfs_trans_get_block_res(args.tp) == 0) {
561 error = XFS_ERROR(ENOSPC);
562 goto error0;
563 }
564 error = xfs_alloc_vextent(&args);
565 if (error)
566 goto error0;
567
568 if (args.fsbno == NULLFSBLOCK && args.minleft) {
569 /*
570 * Could not find an AG with enough free space to satisfy
571 * a full btree split. Try again without minleft and if
572 * successful activate the lowspace algorithm.
573 */
574 args.fsbno = 0;
575 args.type = XFS_ALLOCTYPE_FIRST_AG;
576 args.minleft = 0;
577 error = xfs_alloc_vextent(&args);
578 if (error)
579 goto error0;
580 cur->bc_private.b.flist->xbf_low = 1;
581 }
582 if (args.fsbno == NULLFSBLOCK) {
583 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
584 *stat = 0;
585 return 0;
586 }
587 ASSERT(args.len == 1);
588 cur->bc_private.b.firstblock = args.fsbno;
589 cur->bc_private.b.allocated++;
590 cur->bc_private.b.ip->i_d.di_nblocks++;
591 xfs_trans_log_inode(args.tp, cur->bc_private.b.ip, XFS_ILOG_CORE);
592 XFS_TRANS_MOD_DQUOT_BYINO(args.mp, args.tp, cur->bc_private.b.ip,
593 XFS_TRANS_DQ_BCOUNT, 1L);
594
595 new->l = cpu_to_be64(args.fsbno);
596
597 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
598 *stat = 1;
599 return 0;
600
601 error0:
602 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
603 return error;
604}
605
606STATIC int
607xfs_bmbt_free_block(
608 struct xfs_btree_cur *cur,
609 struct xfs_buf *bp)
610{
611 struct xfs_mount *mp = cur->bc_mp;
612 struct xfs_inode *ip = cur->bc_private.b.ip;
613 struct xfs_trans *tp = cur->bc_tp;
614 xfs_fsblock_t fsbno = XFS_DADDR_TO_FSB(mp, XFS_BUF_ADDR(bp));
615
616 xfs_bmap_add_free(fsbno, 1, cur->bc_private.b.flist, mp);
617 ip->i_d.di_nblocks--;
618
619 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
620 XFS_TRANS_MOD_DQUOT_BYINO(mp, tp, ip, XFS_TRANS_DQ_BCOUNT, -1L);
621 xfs_trans_binval(tp, bp);
622 return 0;
623}
624
625STATIC int
626xfs_bmbt_get_minrecs(
627 struct xfs_btree_cur *cur,
628 int level)
629{
630 if (level == cur->bc_nlevels - 1) {
631 struct xfs_ifork *ifp;
632
633 ifp = XFS_IFORK_PTR(cur->bc_private.b.ip,
634 cur->bc_private.b.whichfork);
635
636 return xfs_bmbt_maxrecs(cur->bc_mp,
637 ifp->if_broot_bytes, level == 0) / 2;
638 }
639
640 return cur->bc_mp->m_bmap_dmnr[level != 0];
641}
642
643int
644xfs_bmbt_get_maxrecs(
645 struct xfs_btree_cur *cur,
646 int level)
647{
648 if (level == cur->bc_nlevels - 1) {
649 struct xfs_ifork *ifp;
650
651 ifp = XFS_IFORK_PTR(cur->bc_private.b.ip,
652 cur->bc_private.b.whichfork);
653
654 return xfs_bmbt_maxrecs(cur->bc_mp,
655 ifp->if_broot_bytes, level == 0);
656 }
657
658 return cur->bc_mp->m_bmap_dmxr[level != 0];
659
660}
661
662/*
663 * Get the maximum records we could store in the on-disk format.
664 *
665 * For non-root nodes this is equivalent to xfs_bmbt_get_maxrecs, but
666 * for the root node this checks the available space in the dinode fork
667 * so that we can resize the in-memory buffer to match it. After a
668 * resize to the maximum size this function returns the same value
669 * as xfs_bmbt_get_maxrecs for the root node, too.
670 */
671STATIC int
672xfs_bmbt_get_dmaxrecs(
673 struct xfs_btree_cur *cur,
674 int level)
675{
676 if (level != cur->bc_nlevels - 1)
677 return cur->bc_mp->m_bmap_dmxr[level != 0];
678 return xfs_bmdr_maxrecs(cur->bc_mp, cur->bc_private.b.forksize,
679 level == 0);
680}
681
682STATIC void
683xfs_bmbt_init_key_from_rec(
684 union xfs_btree_key *key,
685 union xfs_btree_rec *rec)
686{
687 key->bmbt.br_startoff =
688 cpu_to_be64(xfs_bmbt_disk_get_startoff(&rec->bmbt));
689}
690
691STATIC void
692xfs_bmbt_init_rec_from_key(
693 union xfs_btree_key *key,
694 union xfs_btree_rec *rec)
695{
696 ASSERT(key->bmbt.br_startoff != 0);
697
698 xfs_bmbt_disk_set_allf(&rec->bmbt, be64_to_cpu(key->bmbt.br_startoff),
699 0, 0, XFS_EXT_NORM);
700}
701
702STATIC void
703xfs_bmbt_init_rec_from_cur(
704 struct xfs_btree_cur *cur,
705 union xfs_btree_rec *rec)
706{
707 xfs_bmbt_disk_set_all(&rec->bmbt, &cur->bc_rec.b);
708}
709
710STATIC void
711xfs_bmbt_init_ptr_from_cur(
712 struct xfs_btree_cur *cur,
713 union xfs_btree_ptr *ptr)
714{
715 ptr->l = 0;
716}
717
718STATIC __int64_t
719xfs_bmbt_key_diff(
720 struct xfs_btree_cur *cur,
721 union xfs_btree_key *key)
722{
723 return (__int64_t)be64_to_cpu(key->bmbt.br_startoff) -
724 cur->bc_rec.b.br_startoff;
725}
726
727#ifdef DEBUG
728STATIC int
729xfs_bmbt_keys_inorder(
730 struct xfs_btree_cur *cur,
731 union xfs_btree_key *k1,
732 union xfs_btree_key *k2)
733{
734 return be64_to_cpu(k1->bmbt.br_startoff) <
735 be64_to_cpu(k2->bmbt.br_startoff);
736}
737
738STATIC int
739xfs_bmbt_recs_inorder(
740 struct xfs_btree_cur *cur,
741 union xfs_btree_rec *r1,
742 union xfs_btree_rec *r2)
743{
744 return xfs_bmbt_disk_get_startoff(&r1->bmbt) +
745 xfs_bmbt_disk_get_blockcount(&r1->bmbt) <=
746 xfs_bmbt_disk_get_startoff(&r2->bmbt);
747}
748#endif /* DEBUG */
749
750#ifdef XFS_BTREE_TRACE
751ktrace_t *xfs_bmbt_trace_buf;
752
753STATIC void
754xfs_bmbt_trace_enter(
755 struct xfs_btree_cur *cur,
756 const char *func,
757 char *s,
758 int type,
759 int line,
760 __psunsigned_t a0,
761 __psunsigned_t a1,
762 __psunsigned_t a2,
763 __psunsigned_t a3,
764 __psunsigned_t a4,
765 __psunsigned_t a5,
766 __psunsigned_t a6,
767 __psunsigned_t a7,
768 __psunsigned_t a8,
769 __psunsigned_t a9,
770 __psunsigned_t a10)
771{
772 struct xfs_inode *ip = cur->bc_private.b.ip;
773 int whichfork = cur->bc_private.b.whichfork;
774
775 ktrace_enter(xfs_bmbt_trace_buf,
776 (void *)((__psint_t)type | (whichfork << 8) | (line << 16)),
777 (void *)func, (void *)s, (void *)ip, (void *)cur,
778 (void *)a0, (void *)a1, (void *)a2, (void *)a3,
779 (void *)a4, (void *)a5, (void *)a6, (void *)a7,
780 (void *)a8, (void *)a9, (void *)a10);
781 ktrace_enter(ip->i_btrace,
782 (void *)((__psint_t)type | (whichfork << 8) | (line << 16)),
783 (void *)func, (void *)s, (void *)ip, (void *)cur,
784 (void *)a0, (void *)a1, (void *)a2, (void *)a3,
785 (void *)a4, (void *)a5, (void *)a6, (void *)a7,
786 (void *)a8, (void *)a9, (void *)a10);
787}
788
789STATIC void
790xfs_bmbt_trace_cursor(
791 struct xfs_btree_cur *cur,
792 __uint32_t *s0,
793 __uint64_t *l0,
794 __uint64_t *l1)
795{
796 struct xfs_bmbt_rec_host r;
797
798 xfs_bmbt_set_all(&r, &cur->bc_rec.b);
799
800 *s0 = (cur->bc_nlevels << 24) |
801 (cur->bc_private.b.flags << 16) |
802 cur->bc_private.b.allocated;
803 *l0 = r.l0;
804 *l1 = r.l1;
805}
806
807STATIC void
808xfs_bmbt_trace_key(
809 struct xfs_btree_cur *cur,
810 union xfs_btree_key *key,
811 __uint64_t *l0,
812 __uint64_t *l1)
813{
814 *l0 = be64_to_cpu(key->bmbt.br_startoff);
815 *l1 = 0;
816}
817
818STATIC void
819xfs_bmbt_trace_record(
820 struct xfs_btree_cur *cur,
821 union xfs_btree_rec *rec,
822 __uint64_t *l0,
823 __uint64_t *l1,
824 __uint64_t *l2)
825{
826 struct xfs_bmbt_irec irec;
827
828 xfs_bmbt_disk_get_all(&rec->bmbt, &irec);
829 *l0 = irec.br_startoff;
830 *l1 = irec.br_startblock;
831 *l2 = irec.br_blockcount;
832}
833#endif /* XFS_BTREE_TRACE */
834
835static const struct xfs_btree_ops xfs_bmbt_ops = {
836 .rec_len = sizeof(xfs_bmbt_rec_t),
837 .key_len = sizeof(xfs_bmbt_key_t),
838
839 .dup_cursor = xfs_bmbt_dup_cursor,
840 .update_cursor = xfs_bmbt_update_cursor,
841 .alloc_block = xfs_bmbt_alloc_block,
842 .free_block = xfs_bmbt_free_block,
843 .get_maxrecs = xfs_bmbt_get_maxrecs,
844 .get_minrecs = xfs_bmbt_get_minrecs,
845 .get_dmaxrecs = xfs_bmbt_get_dmaxrecs,
846 .init_key_from_rec = xfs_bmbt_init_key_from_rec,
847 .init_rec_from_key = xfs_bmbt_init_rec_from_key,
848 .init_rec_from_cur = xfs_bmbt_init_rec_from_cur,
849 .init_ptr_from_cur = xfs_bmbt_init_ptr_from_cur,
850 .key_diff = xfs_bmbt_key_diff,
851
852#ifdef DEBUG
853 .keys_inorder = xfs_bmbt_keys_inorder,
854 .recs_inorder = xfs_bmbt_recs_inorder,
855#endif
856
857#ifdef XFS_BTREE_TRACE
858 .trace_enter = xfs_bmbt_trace_enter,
859 .trace_cursor = xfs_bmbt_trace_cursor,
860 .trace_key = xfs_bmbt_trace_key,
861 .trace_record = xfs_bmbt_trace_record,
862#endif
863};
864
865/*
866 * Allocate a new bmap btree cursor.
867 */
868struct xfs_btree_cur * /* new bmap btree cursor */
869xfs_bmbt_init_cursor(
870 struct xfs_mount *mp, /* file system mount point */
871 struct xfs_trans *tp, /* transaction pointer */
872 struct xfs_inode *ip, /* inode owning the btree */
873 int whichfork) /* data or attr fork */
874{
875 struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork);
876 struct xfs_btree_cur *cur;
877
878 cur = kmem_zone_zalloc(xfs_btree_cur_zone, KM_SLEEP);
879
880 cur->bc_tp = tp;
881 cur->bc_mp = mp;
882 cur->bc_nlevels = be16_to_cpu(ifp->if_broot->bb_level) + 1;
883 cur->bc_btnum = XFS_BTNUM_BMAP;
884 cur->bc_blocklog = mp->m_sb.sb_blocklog;
885
886 cur->bc_ops = &xfs_bmbt_ops;
887 cur->bc_flags = XFS_BTREE_LONG_PTRS | XFS_BTREE_ROOT_IN_INODE;
888
889 cur->bc_private.b.forksize = XFS_IFORK_SIZE(ip, whichfork);
890 cur->bc_private.b.ip = ip;
891 cur->bc_private.b.firstblock = NULLFSBLOCK;
892 cur->bc_private.b.flist = NULL;
893 cur->bc_private.b.allocated = 0;
894 cur->bc_private.b.flags = 0;
895 cur->bc_private.b.whichfork = whichfork;
896
897 return cur;
898}
899
900/*
901 * Calculate number of records in a bmap btree block.
902 */
903int
904xfs_bmbt_maxrecs(
905 struct xfs_mount *mp,
906 int blocklen,
907 int leaf)
908{
909 blocklen -= XFS_BMBT_BLOCK_LEN(mp);
910
911 if (leaf)
912 return blocklen / sizeof(xfs_bmbt_rec_t);
913 return blocklen / (sizeof(xfs_bmbt_key_t) + sizeof(xfs_bmbt_ptr_t));
914}
915
916/*
917 * Calculate number of records in a bmap btree inode root.
918 */
919int
920xfs_bmdr_maxrecs(
921 struct xfs_mount *mp,
922 int blocklen,
923 int leaf)
924{
925 blocklen -= sizeof(xfs_bmdr_block_t);
926
927 if (leaf)
928 return blocklen / sizeof(xfs_bmdr_rec_t);
929 return blocklen / (sizeof(xfs_bmdr_key_t) + sizeof(xfs_bmdr_ptr_t));
930}
diff --git a/fs/xfs/xfs_bmap_btree.h b/fs/xfs/xfs_bmap_btree.h
index cd0d4b4bb816..a4555abb6622 100644
--- a/fs/xfs/xfs_bmap_btree.h
+++ b/fs/xfs/xfs_bmap_btree.h
@@ -21,9 +21,10 @@
21#define XFS_BMAP_MAGIC 0x424d4150 /* 'BMAP' */ 21#define XFS_BMAP_MAGIC 0x424d4150 /* 'BMAP' */
22 22
23struct xfs_btree_cur; 23struct xfs_btree_cur;
24struct xfs_btree_lblock; 24struct xfs_btree_block;
25struct xfs_mount; 25struct xfs_mount;
26struct xfs_inode; 26struct xfs_inode;
27struct xfs_trans;
27 28
28/* 29/*
29 * Bmap root header, on-disk form only. 30 * Bmap root header, on-disk form only.
@@ -145,71 +146,60 @@ typedef struct xfs_bmbt_key {
145/* btree pointer type */ 146/* btree pointer type */
146typedef __be64 xfs_bmbt_ptr_t, xfs_bmdr_ptr_t; 147typedef __be64 xfs_bmbt_ptr_t, xfs_bmdr_ptr_t;
147 148
148/* btree block header type */ 149/*
149typedef struct xfs_btree_lblock xfs_bmbt_block_t; 150 * Btree block header size depends on a superblock flag.
150 151 *
151#define XFS_BUF_TO_BMBT_BLOCK(bp) ((xfs_bmbt_block_t *)XFS_BUF_PTR(bp)) 152 * (not quite yet, but soon)
152 153 */
153#define XFS_BMAP_RBLOCK_DSIZE(lev,cur) ((cur)->bc_private.b.forksize) 154#define XFS_BMBT_BLOCK_LEN(mp) XFS_BTREE_LBLOCK_LEN
154#define XFS_BMAP_RBLOCK_ISIZE(lev,cur) \ 155
155 ((int)XFS_IFORK_PTR((cur)->bc_private.b.ip, \ 156#define XFS_BMBT_REC_ADDR(mp, block, index) \
156 (cur)->bc_private.b.whichfork)->if_broot_bytes) 157 ((xfs_bmbt_rec_t *) \
157 158 ((char *)(block) + \
158#define XFS_BMAP_BLOCK_DMAXRECS(lev,cur) \ 159 XFS_BMBT_BLOCK_LEN(mp) + \
159 (((lev) == (cur)->bc_nlevels - 1 ? \ 160 ((index) - 1) * sizeof(xfs_bmbt_rec_t)))
160 XFS_BTREE_BLOCK_MAXRECS(XFS_BMAP_RBLOCK_DSIZE(lev,cur), \ 161
161 xfs_bmdr, (lev) == 0) : \ 162#define XFS_BMBT_KEY_ADDR(mp, block, index) \
162 ((cur)->bc_mp->m_bmap_dmxr[(lev) != 0]))) 163 ((xfs_bmbt_key_t *) \
163#define XFS_BMAP_BLOCK_IMAXRECS(lev,cur) \ 164 ((char *)(block) + \
164 (((lev) == (cur)->bc_nlevels - 1 ? \ 165 XFS_BMBT_BLOCK_LEN(mp) + \
165 XFS_BTREE_BLOCK_MAXRECS(XFS_BMAP_RBLOCK_ISIZE(lev,cur),\ 166 ((index) - 1) * sizeof(xfs_bmbt_key_t)))
166 xfs_bmbt, (lev) == 0) : \ 167
167 ((cur)->bc_mp->m_bmap_dmxr[(lev) != 0]))) 168#define XFS_BMBT_PTR_ADDR(mp, block, index, maxrecs) \
168 169 ((xfs_bmbt_ptr_t *) \
169#define XFS_BMAP_BLOCK_DMINRECS(lev,cur) \ 170 ((char *)(block) + \
170 (((lev) == (cur)->bc_nlevels - 1 ? \ 171 XFS_BMBT_BLOCK_LEN(mp) + \
171 XFS_BTREE_BLOCK_MINRECS(XFS_BMAP_RBLOCK_DSIZE(lev,cur),\ 172 (maxrecs) * sizeof(xfs_bmbt_key_t) + \
172 xfs_bmdr, (lev) == 0) : \ 173 ((index) - 1) * sizeof(xfs_bmbt_ptr_t)))
173 ((cur)->bc_mp->m_bmap_dmnr[(lev) != 0]))) 174
174#define XFS_BMAP_BLOCK_IMINRECS(lev,cur) \ 175#define XFS_BMDR_REC_ADDR(block, index) \
175 (((lev) == (cur)->bc_nlevels - 1 ? \ 176 ((xfs_bmdr_rec_t *) \
176 XFS_BTREE_BLOCK_MINRECS(XFS_BMAP_RBLOCK_ISIZE(lev,cur),\ 177 ((char *)(block) + \
177 xfs_bmbt, (lev) == 0) : \ 178 sizeof(struct xfs_bmdr_block) + \
178 ((cur)->bc_mp->m_bmap_dmnr[(lev) != 0]))) 179 ((index) - 1) * sizeof(xfs_bmdr_rec_t)))
179 180
180#define XFS_BMAP_REC_DADDR(bb,i,cur) (XFS_BTREE_REC_ADDR(xfs_bmbt, bb, i)) 181#define XFS_BMDR_KEY_ADDR(block, index) \
181 182 ((xfs_bmdr_key_t *) \
182#define XFS_BMAP_REC_IADDR(bb,i,cur) (XFS_BTREE_REC_ADDR(xfs_bmbt, bb, i)) 183 ((char *)(block) + \
183 184 sizeof(struct xfs_bmdr_block) + \
184#define XFS_BMAP_KEY_DADDR(bb,i,cur) \ 185 ((index) - 1) * sizeof(xfs_bmdr_key_t)))
185 (XFS_BTREE_KEY_ADDR(xfs_bmbt, bb, i)) 186
186 187#define XFS_BMDR_PTR_ADDR(block, index, maxrecs) \
187#define XFS_BMAP_KEY_IADDR(bb,i,cur) \ 188 ((xfs_bmdr_ptr_t *) \
188 (XFS_BTREE_KEY_ADDR(xfs_bmbt, bb, i)) 189 ((char *)(block) + \
189 190 sizeof(struct xfs_bmdr_block) + \
190#define XFS_BMAP_PTR_DADDR(bb,i,cur) \ 191 (maxrecs) * sizeof(xfs_bmdr_key_t) + \
191 (XFS_BTREE_PTR_ADDR(xfs_bmbt, bb, i, XFS_BMAP_BLOCK_DMAXRECS( \ 192 ((index) - 1) * sizeof(xfs_bmdr_ptr_t)))
192 be16_to_cpu((bb)->bb_level), cur)))
193#define XFS_BMAP_PTR_IADDR(bb,i,cur) \
194 (XFS_BTREE_PTR_ADDR(xfs_bmbt, bb, i, XFS_BMAP_BLOCK_IMAXRECS( \
195 be16_to_cpu((bb)->bb_level), cur)))
196 193
197/* 194/*
198 * These are to be used when we know the size of the block and 195 * These are to be used when we know the size of the block and
199 * we don't have a cursor. 196 * we don't have a cursor.
200 */ 197 */
201#define XFS_BMAP_BROOT_REC_ADDR(bb,i,sz) \ 198#define XFS_BMAP_BROOT_PTR_ADDR(mp, bb, i, sz) \
202 (XFS_BTREE_REC_ADDR(xfs_bmbt,bb,i)) 199 XFS_BMBT_PTR_ADDR(mp, bb, i, xfs_bmbt_maxrecs(mp, sz, 0))
203#define XFS_BMAP_BROOT_KEY_ADDR(bb,i,sz) \
204 (XFS_BTREE_KEY_ADDR(xfs_bmbt,bb,i))
205#define XFS_BMAP_BROOT_PTR_ADDR(bb,i,sz) \
206 (XFS_BTREE_PTR_ADDR(xfs_bmbt,bb,i,XFS_BMAP_BROOT_MAXRECS(sz)))
207
208#define XFS_BMAP_BROOT_NUMRECS(bb) be16_to_cpu((bb)->bb_numrecs)
209#define XFS_BMAP_BROOT_MAXRECS(sz) XFS_BTREE_BLOCK_MAXRECS(sz,xfs_bmbt,0)
210 200
211#define XFS_BMAP_BROOT_SPACE_CALC(nrecs) \ 201#define XFS_BMAP_BROOT_SPACE_CALC(nrecs) \
212 (int)(sizeof(xfs_bmbt_block_t) + \ 202 (int)(XFS_BTREE_LBLOCK_LEN + \
213 ((nrecs) * (sizeof(xfs_bmbt_key_t) + sizeof(xfs_bmbt_ptr_t)))) 203 ((nrecs) * (sizeof(xfs_bmbt_key_t) + sizeof(xfs_bmbt_ptr_t))))
214 204
215#define XFS_BMAP_BROOT_SPACE(bb) \ 205#define XFS_BMAP_BROOT_SPACE(bb) \
@@ -223,42 +213,12 @@ typedef struct xfs_btree_lblock xfs_bmbt_block_t;
223 */ 213 */
224#define XFS_BM_MAXLEVELS(mp,w) ((mp)->m_bm_maxlevels[(w)]) 214#define XFS_BM_MAXLEVELS(mp,w) ((mp)->m_bm_maxlevels[(w)])
225 215
226#define XFS_BMAP_SANITY_CHECK(mp,bb,level) \
227 (be32_to_cpu((bb)->bb_magic) == XFS_BMAP_MAGIC && \
228 be16_to_cpu((bb)->bb_level) == level && \
229 be16_to_cpu((bb)->bb_numrecs) > 0 && \
230 be16_to_cpu((bb)->bb_numrecs) <= (mp)->m_bmap_dmxr[(level) != 0])
231
232
233#ifdef __KERNEL__
234
235#if defined(XFS_BMBT_TRACE)
236/*
237 * Trace buffer entry types.
238 */
239#define XFS_BMBT_KTRACE_ARGBI 1
240#define XFS_BMBT_KTRACE_ARGBII 2
241#define XFS_BMBT_KTRACE_ARGFFFI 3
242#define XFS_BMBT_KTRACE_ARGI 4
243#define XFS_BMBT_KTRACE_ARGIFK 5
244#define XFS_BMBT_KTRACE_ARGIFR 6
245#define XFS_BMBT_KTRACE_ARGIK 7
246#define XFS_BMBT_KTRACE_CUR 8
247
248#define XFS_BMBT_TRACE_SIZE 4096 /* size of global trace buffer */
249#define XFS_BMBT_KTRACE_SIZE 32 /* size of per-inode trace buffer */
250extern ktrace_t *xfs_bmbt_trace_buf;
251#endif
252
253/* 216/*
254 * Prototypes for xfs_bmap.c to call. 217 * Prototypes for xfs_bmap.c to call.
255 */ 218 */
256extern void xfs_bmdr_to_bmbt(xfs_bmdr_block_t *, int, xfs_bmbt_block_t *, int); 219extern void xfs_bmdr_to_bmbt(struct xfs_mount *, xfs_bmdr_block_t *, int,
257extern int xfs_bmbt_decrement(struct xfs_btree_cur *, int, int *); 220 struct xfs_btree_block *, int);
258extern int xfs_bmbt_delete(struct xfs_btree_cur *, int *);
259extern void xfs_bmbt_get_all(xfs_bmbt_rec_host_t *r, xfs_bmbt_irec_t *s); 221extern void xfs_bmbt_get_all(xfs_bmbt_rec_host_t *r, xfs_bmbt_irec_t *s);
260extern xfs_bmbt_block_t *xfs_bmbt_get_block(struct xfs_btree_cur *cur,
261 int, struct xfs_buf **bpp);
262extern xfs_filblks_t xfs_bmbt_get_blockcount(xfs_bmbt_rec_host_t *r); 222extern xfs_filblks_t xfs_bmbt_get_blockcount(xfs_bmbt_rec_host_t *r);
263extern xfs_fsblock_t xfs_bmbt_get_startblock(xfs_bmbt_rec_host_t *r); 223extern xfs_fsblock_t xfs_bmbt_get_startblock(xfs_bmbt_rec_host_t *r);
264extern xfs_fileoff_t xfs_bmbt_get_startoff(xfs_bmbt_rec_host_t *r); 224extern xfs_fileoff_t xfs_bmbt_get_startoff(xfs_bmbt_rec_host_t *r);
@@ -268,22 +228,6 @@ extern void xfs_bmbt_disk_get_all(xfs_bmbt_rec_t *r, xfs_bmbt_irec_t *s);
268extern xfs_filblks_t xfs_bmbt_disk_get_blockcount(xfs_bmbt_rec_t *r); 228extern xfs_filblks_t xfs_bmbt_disk_get_blockcount(xfs_bmbt_rec_t *r);
269extern xfs_fileoff_t xfs_bmbt_disk_get_startoff(xfs_bmbt_rec_t *r); 229extern xfs_fileoff_t xfs_bmbt_disk_get_startoff(xfs_bmbt_rec_t *r);
270 230
271extern int xfs_bmbt_increment(struct xfs_btree_cur *, int, int *);
272extern int xfs_bmbt_insert(struct xfs_btree_cur *, int *);
273extern void xfs_bmbt_log_block(struct xfs_btree_cur *, struct xfs_buf *, int);
274extern void xfs_bmbt_log_recs(struct xfs_btree_cur *, struct xfs_buf *, int,
275 int);
276extern int xfs_bmbt_lookup_eq(struct xfs_btree_cur *, xfs_fileoff_t,
277 xfs_fsblock_t, xfs_filblks_t, int *);
278extern int xfs_bmbt_lookup_ge(struct xfs_btree_cur *, xfs_fileoff_t,
279 xfs_fsblock_t, xfs_filblks_t, int *);
280
281/*
282 * Give the bmap btree a new root block. Copy the old broot contents
283 * down into a real block and make the broot point to it.
284 */
285extern int xfs_bmbt_newroot(struct xfs_btree_cur *cur, int *lflags, int *stat);
286
287extern void xfs_bmbt_set_all(xfs_bmbt_rec_host_t *r, xfs_bmbt_irec_t *s); 231extern void xfs_bmbt_set_all(xfs_bmbt_rec_host_t *r, xfs_bmbt_irec_t *s);
288extern void xfs_bmbt_set_allf(xfs_bmbt_rec_host_t *r, xfs_fileoff_t o, 232extern void xfs_bmbt_set_allf(xfs_bmbt_rec_host_t *r, xfs_fileoff_t o,
289 xfs_fsblock_t b, xfs_filblks_t c, xfs_exntst_t v); 233 xfs_fsblock_t b, xfs_filblks_t c, xfs_exntst_t v);
@@ -296,10 +240,15 @@ extern void xfs_bmbt_disk_set_all(xfs_bmbt_rec_t *r, xfs_bmbt_irec_t *s);
296extern void xfs_bmbt_disk_set_allf(xfs_bmbt_rec_t *r, xfs_fileoff_t o, 240extern void xfs_bmbt_disk_set_allf(xfs_bmbt_rec_t *r, xfs_fileoff_t o,
297 xfs_fsblock_t b, xfs_filblks_t c, xfs_exntst_t v); 241 xfs_fsblock_t b, xfs_filblks_t c, xfs_exntst_t v);
298 242
299extern void xfs_bmbt_to_bmdr(xfs_bmbt_block_t *, int, xfs_bmdr_block_t *, int); 243extern void xfs_bmbt_to_bmdr(struct xfs_mount *, struct xfs_btree_block *, int,
300extern int xfs_bmbt_update(struct xfs_btree_cur *, xfs_fileoff_t, 244 xfs_bmdr_block_t *, int);
301 xfs_fsblock_t, xfs_filblks_t, xfs_exntst_t); 245
246extern int xfs_bmbt_get_maxrecs(struct xfs_btree_cur *, int level);
247extern int xfs_bmdr_maxrecs(struct xfs_mount *, int blocklen, int leaf);
248extern int xfs_bmbt_maxrecs(struct xfs_mount *, int blocklen, int leaf);
249
250extern struct xfs_btree_cur *xfs_bmbt_init_cursor(struct xfs_mount *,
251 struct xfs_trans *, struct xfs_inode *, int);
302 252
303#endif /* __KERNEL__ */
304 253
305#endif /* __XFS_BMAP_BTREE_H__ */ 254#endif /* __XFS_BMAP_BTREE_H__ */
diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c
index cc593a84c345..7ed59267420d 100644
--- a/fs/xfs/xfs_btree.c
+++ b/fs/xfs/xfs_btree.c
@@ -34,7 +34,9 @@
34#include "xfs_attr_sf.h" 34#include "xfs_attr_sf.h"
35#include "xfs_dinode.h" 35#include "xfs_dinode.h"
36#include "xfs_inode.h" 36#include "xfs_inode.h"
37#include "xfs_inode_item.h"
37#include "xfs_btree.h" 38#include "xfs_btree.h"
39#include "xfs_btree_trace.h"
38#include "xfs_ialloc.h" 40#include "xfs_ialloc.h"
39#include "xfs_error.h" 41#include "xfs_error.h"
40 42
@@ -50,135 +52,33 @@ const __uint32_t xfs_magics[XFS_BTNUM_MAX] = {
50 XFS_ABTB_MAGIC, XFS_ABTC_MAGIC, XFS_BMAP_MAGIC, XFS_IBT_MAGIC 52 XFS_ABTB_MAGIC, XFS_ABTC_MAGIC, XFS_BMAP_MAGIC, XFS_IBT_MAGIC
51}; 53};
52 54
53/*
54 * Checking routine: return maxrecs for the block.
55 */
56STATIC int /* number of records fitting in block */
57xfs_btree_maxrecs(
58 xfs_btree_cur_t *cur, /* btree cursor */
59 xfs_btree_block_t *block) /* generic btree block pointer */
60{
61 switch (cur->bc_btnum) {
62 case XFS_BTNUM_BNO:
63 case XFS_BTNUM_CNT:
64 return (int)XFS_ALLOC_BLOCK_MAXRECS(
65 be16_to_cpu(block->bb_h.bb_level), cur);
66 case XFS_BTNUM_BMAP:
67 return (int)XFS_BMAP_BLOCK_IMAXRECS(
68 be16_to_cpu(block->bb_h.bb_level), cur);
69 case XFS_BTNUM_INO:
70 return (int)XFS_INOBT_BLOCK_MAXRECS(
71 be16_to_cpu(block->bb_h.bb_level), cur);
72 default:
73 ASSERT(0);
74 return 0;
75 }
76}
77
78/*
79 * External routines.
80 */
81
82#ifdef DEBUG
83/*
84 * Debug routine: check that block header is ok.
85 */
86void
87xfs_btree_check_block(
88 xfs_btree_cur_t *cur, /* btree cursor */
89 xfs_btree_block_t *block, /* generic btree block pointer */
90 int level, /* level of the btree block */
91 xfs_buf_t *bp) /* buffer containing block, if any */
92{
93 if (XFS_BTREE_LONG_PTRS(cur->bc_btnum))
94 xfs_btree_check_lblock(cur, (xfs_btree_lblock_t *)block, level,
95 bp);
96 else
97 xfs_btree_check_sblock(cur, (xfs_btree_sblock_t *)block, level,
98 bp);
99}
100
101/*
102 * Debug routine: check that keys are in the right order.
103 */
104void
105xfs_btree_check_key(
106 xfs_btnum_t btnum, /* btree identifier */
107 void *ak1, /* pointer to left (lower) key */
108 void *ak2) /* pointer to right (higher) key */
109{
110 switch (btnum) {
111 case XFS_BTNUM_BNO: {
112 xfs_alloc_key_t *k1;
113 xfs_alloc_key_t *k2;
114
115 k1 = ak1;
116 k2 = ak2;
117 ASSERT(be32_to_cpu(k1->ar_startblock) < be32_to_cpu(k2->ar_startblock));
118 break;
119 }
120 case XFS_BTNUM_CNT: {
121 xfs_alloc_key_t *k1;
122 xfs_alloc_key_t *k2;
123
124 k1 = ak1;
125 k2 = ak2;
126 ASSERT(be32_to_cpu(k1->ar_blockcount) < be32_to_cpu(k2->ar_blockcount) ||
127 (k1->ar_blockcount == k2->ar_blockcount &&
128 be32_to_cpu(k1->ar_startblock) < be32_to_cpu(k2->ar_startblock)));
129 break;
130 }
131 case XFS_BTNUM_BMAP: {
132 xfs_bmbt_key_t *k1;
133 xfs_bmbt_key_t *k2;
134
135 k1 = ak1;
136 k2 = ak2;
137 ASSERT(be64_to_cpu(k1->br_startoff) < be64_to_cpu(k2->br_startoff));
138 break;
139 }
140 case XFS_BTNUM_INO: {
141 xfs_inobt_key_t *k1;
142 xfs_inobt_key_t *k2;
143
144 k1 = ak1;
145 k2 = ak2;
146 ASSERT(be32_to_cpu(k1->ir_startino) < be32_to_cpu(k2->ir_startino));
147 break;
148 }
149 default:
150 ASSERT(0);
151 }
152}
153#endif /* DEBUG */
154 55
155/* 56STATIC int /* error (0 or EFSCORRUPTED) */
156 * Checking routine: check that long form block header is ok.
157 */
158/* ARGSUSED */
159int /* error (0 or EFSCORRUPTED) */
160xfs_btree_check_lblock( 57xfs_btree_check_lblock(
161 xfs_btree_cur_t *cur, /* btree cursor */ 58 struct xfs_btree_cur *cur, /* btree cursor */
162 xfs_btree_lblock_t *block, /* btree long form block pointer */ 59 struct xfs_btree_block *block, /* btree long form block pointer */
163 int level, /* level of the btree block */ 60 int level, /* level of the btree block */
164 xfs_buf_t *bp) /* buffer for block, if any */ 61 struct xfs_buf *bp) /* buffer for block, if any */
165{ 62{
166 int lblock_ok; /* block passes checks */ 63 int lblock_ok; /* block passes checks */
167 xfs_mount_t *mp; /* file system mount point */ 64 struct xfs_mount *mp; /* file system mount point */
168 65
169 mp = cur->bc_mp; 66 mp = cur->bc_mp;
170 lblock_ok = 67 lblock_ok =
171 be32_to_cpu(block->bb_magic) == xfs_magics[cur->bc_btnum] && 68 be32_to_cpu(block->bb_magic) == xfs_magics[cur->bc_btnum] &&
172 be16_to_cpu(block->bb_level) == level && 69 be16_to_cpu(block->bb_level) == level &&
173 be16_to_cpu(block->bb_numrecs) <= 70 be16_to_cpu(block->bb_numrecs) <=
174 xfs_btree_maxrecs(cur, (xfs_btree_block_t *)block) && 71 cur->bc_ops->get_maxrecs(cur, level) &&
175 block->bb_leftsib && 72 block->bb_u.l.bb_leftsib &&
176 (be64_to_cpu(block->bb_leftsib) == NULLDFSBNO || 73 (be64_to_cpu(block->bb_u.l.bb_leftsib) == NULLDFSBNO ||
177 XFS_FSB_SANITY_CHECK(mp, be64_to_cpu(block->bb_leftsib))) && 74 XFS_FSB_SANITY_CHECK(mp,
178 block->bb_rightsib && 75 be64_to_cpu(block->bb_u.l.bb_leftsib))) &&
179 (be64_to_cpu(block->bb_rightsib) == NULLDFSBNO || 76 block->bb_u.l.bb_rightsib &&
180 XFS_FSB_SANITY_CHECK(mp, be64_to_cpu(block->bb_rightsib))); 77 (be64_to_cpu(block->bb_u.l.bb_rightsib) == NULLDFSBNO ||
181 if (unlikely(XFS_TEST_ERROR(!lblock_ok, mp, XFS_ERRTAG_BTREE_CHECK_LBLOCK, 78 XFS_FSB_SANITY_CHECK(mp,
79 be64_to_cpu(block->bb_u.l.bb_rightsib)));
80 if (unlikely(XFS_TEST_ERROR(!lblock_ok, mp,
81 XFS_ERRTAG_BTREE_CHECK_LBLOCK,
182 XFS_RANDOM_BTREE_CHECK_LBLOCK))) { 82 XFS_RANDOM_BTREE_CHECK_LBLOCK))) {
183 if (bp) 83 if (bp)
184 xfs_buftrace("LBTREE ERROR", bp); 84 xfs_buftrace("LBTREE ERROR", bp);
@@ -189,98 +89,15 @@ xfs_btree_check_lblock(
189 return 0; 89 return 0;
190} 90}
191 91
192/* 92STATIC int /* error (0 or EFSCORRUPTED) */
193 * Checking routine: check that (long) pointer is ok.
194 */
195int /* error (0 or EFSCORRUPTED) */
196xfs_btree_check_lptr(
197 xfs_btree_cur_t *cur, /* btree cursor */
198 xfs_dfsbno_t ptr, /* btree block disk address */
199 int level) /* btree block level */
200{
201 xfs_mount_t *mp; /* file system mount point */
202
203 mp = cur->bc_mp;
204 XFS_WANT_CORRUPTED_RETURN(
205 level > 0 &&
206 ptr != NULLDFSBNO &&
207 XFS_FSB_SANITY_CHECK(mp, ptr));
208 return 0;
209}
210
211#ifdef DEBUG
212/*
213 * Debug routine: check that records are in the right order.
214 */
215void
216xfs_btree_check_rec(
217 xfs_btnum_t btnum, /* btree identifier */
218 void *ar1, /* pointer to left (lower) record */
219 void *ar2) /* pointer to right (higher) record */
220{
221 switch (btnum) {
222 case XFS_BTNUM_BNO: {
223 xfs_alloc_rec_t *r1;
224 xfs_alloc_rec_t *r2;
225
226 r1 = ar1;
227 r2 = ar2;
228 ASSERT(be32_to_cpu(r1->ar_startblock) +
229 be32_to_cpu(r1->ar_blockcount) <=
230 be32_to_cpu(r2->ar_startblock));
231 break;
232 }
233 case XFS_BTNUM_CNT: {
234 xfs_alloc_rec_t *r1;
235 xfs_alloc_rec_t *r2;
236
237 r1 = ar1;
238 r2 = ar2;
239 ASSERT(be32_to_cpu(r1->ar_blockcount) < be32_to_cpu(r2->ar_blockcount) ||
240 (r1->ar_blockcount == r2->ar_blockcount &&
241 be32_to_cpu(r1->ar_startblock) < be32_to_cpu(r2->ar_startblock)));
242 break;
243 }
244 case XFS_BTNUM_BMAP: {
245 xfs_bmbt_rec_t *r1;
246 xfs_bmbt_rec_t *r2;
247
248 r1 = ar1;
249 r2 = ar2;
250 ASSERT(xfs_bmbt_disk_get_startoff(r1) +
251 xfs_bmbt_disk_get_blockcount(r1) <=
252 xfs_bmbt_disk_get_startoff(r2));
253 break;
254 }
255 case XFS_BTNUM_INO: {
256 xfs_inobt_rec_t *r1;
257 xfs_inobt_rec_t *r2;
258
259 r1 = ar1;
260 r2 = ar2;
261 ASSERT(be32_to_cpu(r1->ir_startino) + XFS_INODES_PER_CHUNK <=
262 be32_to_cpu(r2->ir_startino));
263 break;
264 }
265 default:
266 ASSERT(0);
267 }
268}
269#endif /* DEBUG */
270
271/*
272 * Checking routine: check that block header is ok.
273 */
274/* ARGSUSED */
275int /* error (0 or EFSCORRUPTED) */
276xfs_btree_check_sblock( 93xfs_btree_check_sblock(
277 xfs_btree_cur_t *cur, /* btree cursor */ 94 struct xfs_btree_cur *cur, /* btree cursor */
278 xfs_btree_sblock_t *block, /* btree short form block pointer */ 95 struct xfs_btree_block *block, /* btree short form block pointer */
279 int level, /* level of the btree block */ 96 int level, /* level of the btree block */
280 xfs_buf_t *bp) /* buffer containing block */ 97 struct xfs_buf *bp) /* buffer containing block */
281{ 98{
282 xfs_buf_t *agbp; /* buffer for ag. freespace struct */ 99 struct xfs_buf *agbp; /* buffer for ag. freespace struct */
283 xfs_agf_t *agf; /* ag. freespace structure */ 100 struct xfs_agf *agf; /* ag. freespace structure */
284 xfs_agblock_t agflen; /* native ag. freespace length */ 101 xfs_agblock_t agflen; /* native ag. freespace length */
285 int sblock_ok; /* block passes checks */ 102 int sblock_ok; /* block passes checks */
286 103
@@ -291,13 +108,13 @@ xfs_btree_check_sblock(
291 be32_to_cpu(block->bb_magic) == xfs_magics[cur->bc_btnum] && 108 be32_to_cpu(block->bb_magic) == xfs_magics[cur->bc_btnum] &&
292 be16_to_cpu(block->bb_level) == level && 109 be16_to_cpu(block->bb_level) == level &&
293 be16_to_cpu(block->bb_numrecs) <= 110 be16_to_cpu(block->bb_numrecs) <=
294 xfs_btree_maxrecs(cur, (xfs_btree_block_t *)block) && 111 cur->bc_ops->get_maxrecs(cur, level) &&
295 (be32_to_cpu(block->bb_leftsib) == NULLAGBLOCK || 112 (be32_to_cpu(block->bb_u.s.bb_leftsib) == NULLAGBLOCK ||
296 be32_to_cpu(block->bb_leftsib) < agflen) && 113 be32_to_cpu(block->bb_u.s.bb_leftsib) < agflen) &&
297 block->bb_leftsib && 114 block->bb_u.s.bb_leftsib &&
298 (be32_to_cpu(block->bb_rightsib) == NULLAGBLOCK || 115 (be32_to_cpu(block->bb_u.s.bb_rightsib) == NULLAGBLOCK ||
299 be32_to_cpu(block->bb_rightsib) < agflen) && 116 be32_to_cpu(block->bb_u.s.bb_rightsib) < agflen) &&
300 block->bb_rightsib; 117 block->bb_u.s.bb_rightsib;
301 if (unlikely(XFS_TEST_ERROR(!sblock_ok, cur->bc_mp, 118 if (unlikely(XFS_TEST_ERROR(!sblock_ok, cur->bc_mp,
302 XFS_ERRTAG_BTREE_CHECK_SBLOCK, 119 XFS_ERRTAG_BTREE_CHECK_SBLOCK,
303 XFS_RANDOM_BTREE_CHECK_SBLOCK))) { 120 XFS_RANDOM_BTREE_CHECK_SBLOCK))) {
@@ -311,27 +128,78 @@ xfs_btree_check_sblock(
311} 128}
312 129
313/* 130/*
314 * Checking routine: check that (short) pointer is ok. 131 * Debug routine: check that block header is ok.
132 */
133int
134xfs_btree_check_block(
135 struct xfs_btree_cur *cur, /* btree cursor */
136 struct xfs_btree_block *block, /* generic btree block pointer */
137 int level, /* level of the btree block */
138 struct xfs_buf *bp) /* buffer containing block, if any */
139{
140 if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
141 return xfs_btree_check_lblock(cur, block, level, bp);
142 else
143 return xfs_btree_check_sblock(cur, block, level, bp);
144}
145
146/*
147 * Check that (long) pointer is ok.
315 */ 148 */
316int /* error (0 or EFSCORRUPTED) */ 149int /* error (0 or EFSCORRUPTED) */
150xfs_btree_check_lptr(
151 struct xfs_btree_cur *cur, /* btree cursor */
152 xfs_dfsbno_t bno, /* btree block disk address */
153 int level) /* btree block level */
154{
155 XFS_WANT_CORRUPTED_RETURN(
156 level > 0 &&
157 bno != NULLDFSBNO &&
158 XFS_FSB_SANITY_CHECK(cur->bc_mp, bno));
159 return 0;
160}
161
162#ifdef DEBUG
163/*
164 * Check that (short) pointer is ok.
165 */
166STATIC int /* error (0 or EFSCORRUPTED) */
317xfs_btree_check_sptr( 167xfs_btree_check_sptr(
318 xfs_btree_cur_t *cur, /* btree cursor */ 168 struct xfs_btree_cur *cur, /* btree cursor */
319 xfs_agblock_t ptr, /* btree block disk address */ 169 xfs_agblock_t bno, /* btree block disk address */
320 int level) /* btree block level */ 170 int level) /* btree block level */
321{ 171{
322 xfs_buf_t *agbp; /* buffer for ag. freespace struct */ 172 xfs_agblock_t agblocks = cur->bc_mp->m_sb.sb_agblocks;
323 xfs_agf_t *agf; /* ag. freespace structure */
324 173
325 agbp = cur->bc_private.a.agbp;
326 agf = XFS_BUF_TO_AGF(agbp);
327 XFS_WANT_CORRUPTED_RETURN( 174 XFS_WANT_CORRUPTED_RETURN(
328 level > 0 && 175 level > 0 &&
329 ptr != NULLAGBLOCK && ptr != 0 && 176 bno != NULLAGBLOCK &&
330 ptr < be32_to_cpu(agf->agf_length)); 177 bno != 0 &&
178 bno < agblocks);
331 return 0; 179 return 0;
332} 180}
333 181
334/* 182/*
183 * Check that block ptr is ok.
184 */
185STATIC int /* error (0 or EFSCORRUPTED) */
186xfs_btree_check_ptr(
187 struct xfs_btree_cur *cur, /* btree cursor */
188 union xfs_btree_ptr *ptr, /* btree block disk address */
189 int index, /* offset from ptr to check */
190 int level) /* btree block level */
191{
192 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
193 return xfs_btree_check_lptr(cur,
194 be64_to_cpu((&ptr->l)[index]), level);
195 } else {
196 return xfs_btree_check_sptr(cur,
197 be32_to_cpu((&ptr->s)[index]), level);
198 }
199}
200#endif
201
202/*
335 * Delete the btree cursor. 203 * Delete the btree cursor.
336 */ 204 */
337void 205void
@@ -387,16 +255,17 @@ xfs_btree_dup_cursor(
387 255
388 tp = cur->bc_tp; 256 tp = cur->bc_tp;
389 mp = cur->bc_mp; 257 mp = cur->bc_mp;
258
390 /* 259 /*
391 * Allocate a new cursor like the old one. 260 * Allocate a new cursor like the old one.
392 */ 261 */
393 new = xfs_btree_init_cursor(mp, tp, cur->bc_private.a.agbp, 262 new = cur->bc_ops->dup_cursor(cur);
394 cur->bc_private.a.agno, cur->bc_btnum, cur->bc_private.b.ip, 263
395 cur->bc_private.b.whichfork);
396 /* 264 /*
397 * Copy the record currently in the cursor. 265 * Copy the record currently in the cursor.
398 */ 266 */
399 new->bc_rec = cur->bc_rec; 267 new->bc_rec = cur->bc_rec;
268
400 /* 269 /*
401 * For each level current, re-get the buffer and copy the ptr value. 270 * For each level current, re-get the buffer and copy the ptr value.
402 */ 271 */
@@ -416,46 +285,174 @@ xfs_btree_dup_cursor(
416 } else 285 } else
417 new->bc_bufs[i] = NULL; 286 new->bc_bufs[i] = NULL;
418 } 287 }
419 /*
420 * For bmap btrees, copy the firstblock, flist, and flags values,
421 * since init cursor doesn't get them.
422 */
423 if (new->bc_btnum == XFS_BTNUM_BMAP) {
424 new->bc_private.b.firstblock = cur->bc_private.b.firstblock;
425 new->bc_private.b.flist = cur->bc_private.b.flist;
426 new->bc_private.b.flags = cur->bc_private.b.flags;
427 }
428 *ncur = new; 288 *ncur = new;
429 return 0; 289 return 0;
430} 290}
431 291
432/* 292/*
293 * XFS btree block layout and addressing:
294 *
295 * There are two types of blocks in the btree: leaf and non-leaf blocks.
296 *
297 * The leaf record start with a header then followed by records containing
298 * the values. A non-leaf block also starts with the same header, and
299 * then first contains lookup keys followed by an equal number of pointers
300 * to the btree blocks at the previous level.
301 *
302 * +--------+-------+-------+-------+-------+-------+-------+
303 * Leaf: | header | rec 1 | rec 2 | rec 3 | rec 4 | rec 5 | rec N |
304 * +--------+-------+-------+-------+-------+-------+-------+
305 *
306 * +--------+-------+-------+-------+-------+-------+-------+
307 * Non-Leaf: | header | key 1 | key 2 | key N | ptr 1 | ptr 2 | ptr N |
308 * +--------+-------+-------+-------+-------+-------+-------+
309 *
310 * The header is called struct xfs_btree_block for reasons better left unknown
311 * and comes in different versions for short (32bit) and long (64bit) block
312 * pointers. The record and key structures are defined by the btree instances
313 * and opaque to the btree core. The block pointers are simple disk endian
314 * integers, available in a short (32bit) and long (64bit) variant.
315 *
316 * The helpers below calculate the offset of a given record, key or pointer
317 * into a btree block (xfs_btree_*_offset) or return a pointer to the given
318 * record, key or pointer (xfs_btree_*_addr). Note that all addressing
319 * inside the btree block is done using indices starting at one, not zero!
320 */
321
322/*
323 * Return size of the btree block header for this btree instance.
324 */
325static inline size_t xfs_btree_block_len(struct xfs_btree_cur *cur)
326{
327 return (cur->bc_flags & XFS_BTREE_LONG_PTRS) ?
328 XFS_BTREE_LBLOCK_LEN :
329 XFS_BTREE_SBLOCK_LEN;
330}
331
332/*
333 * Return size of btree block pointers for this btree instance.
334 */
335static inline size_t xfs_btree_ptr_len(struct xfs_btree_cur *cur)
336{
337 return (cur->bc_flags & XFS_BTREE_LONG_PTRS) ?
338 sizeof(__be64) : sizeof(__be32);
339}
340
341/*
342 * Calculate offset of the n-th record in a btree block.
343 */
344STATIC size_t
345xfs_btree_rec_offset(
346 struct xfs_btree_cur *cur,
347 int n)
348{
349 return xfs_btree_block_len(cur) +
350 (n - 1) * cur->bc_ops->rec_len;
351}
352
353/*
354 * Calculate offset of the n-th key in a btree block.
355 */
356STATIC size_t
357xfs_btree_key_offset(
358 struct xfs_btree_cur *cur,
359 int n)
360{
361 return xfs_btree_block_len(cur) +
362 (n - 1) * cur->bc_ops->key_len;
363}
364
365/*
366 * Calculate offset of the n-th block pointer in a btree block.
367 */
368STATIC size_t
369xfs_btree_ptr_offset(
370 struct xfs_btree_cur *cur,
371 int n,
372 int level)
373{
374 return xfs_btree_block_len(cur) +
375 cur->bc_ops->get_maxrecs(cur, level) * cur->bc_ops->key_len +
376 (n - 1) * xfs_btree_ptr_len(cur);
377}
378
379/*
380 * Return a pointer to the n-th record in the btree block.
381 */
382STATIC union xfs_btree_rec *
383xfs_btree_rec_addr(
384 struct xfs_btree_cur *cur,
385 int n,
386 struct xfs_btree_block *block)
387{
388 return (union xfs_btree_rec *)
389 ((char *)block + xfs_btree_rec_offset(cur, n));
390}
391
392/*
393 * Return a pointer to the n-th key in the btree block.
394 */
395STATIC union xfs_btree_key *
396xfs_btree_key_addr(
397 struct xfs_btree_cur *cur,
398 int n,
399 struct xfs_btree_block *block)
400{
401 return (union xfs_btree_key *)
402 ((char *)block + xfs_btree_key_offset(cur, n));
403}
404
405/*
406 * Return a pointer to the n-th block pointer in the btree block.
407 */
408STATIC union xfs_btree_ptr *
409xfs_btree_ptr_addr(
410 struct xfs_btree_cur *cur,
411 int n,
412 struct xfs_btree_block *block)
413{
414 int level = xfs_btree_get_level(block);
415
416 ASSERT(block->bb_level != 0);
417
418 return (union xfs_btree_ptr *)
419 ((char *)block + xfs_btree_ptr_offset(cur, n, level));
420}
421
422/*
423 * Get a the root block which is stored in the inode.
424 *
425 * For now this btree implementation assumes the btree root is always
426 * stored in the if_broot field of an inode fork.
427 */
428STATIC struct xfs_btree_block *
429xfs_btree_get_iroot(
430 struct xfs_btree_cur *cur)
431{
432 struct xfs_ifork *ifp;
433
434 ifp = XFS_IFORK_PTR(cur->bc_private.b.ip, cur->bc_private.b.whichfork);
435 return (struct xfs_btree_block *)ifp->if_broot;
436}
437
438/*
433 * Retrieve the block pointer from the cursor at the given level. 439 * Retrieve the block pointer from the cursor at the given level.
434 * This may be a bmap btree root or from a buffer. 440 * This may be an inode btree root or from a buffer.
435 */ 441 */
436STATIC xfs_btree_block_t * /* generic btree block pointer */ 442STATIC struct xfs_btree_block * /* generic btree block pointer */
437xfs_btree_get_block( 443xfs_btree_get_block(
438 xfs_btree_cur_t *cur, /* btree cursor */ 444 struct xfs_btree_cur *cur, /* btree cursor */
439 int level, /* level in btree */ 445 int level, /* level in btree */
440 xfs_buf_t **bpp) /* buffer containing the block */ 446 struct xfs_buf **bpp) /* buffer containing the block */
441{ 447{
442 xfs_btree_block_t *block; /* return value */ 448 if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
443 xfs_buf_t *bp; /* return buffer */ 449 (level == cur->bc_nlevels - 1)) {
444 xfs_ifork_t *ifp; /* inode fork pointer */ 450 *bpp = NULL;
445 int whichfork; /* data or attr fork */ 451 return xfs_btree_get_iroot(cur);
446
447 if (cur->bc_btnum == XFS_BTNUM_BMAP && level == cur->bc_nlevels - 1) {
448 whichfork = cur->bc_private.b.whichfork;
449 ifp = XFS_IFORK_PTR(cur->bc_private.b.ip, whichfork);
450 block = (xfs_btree_block_t *)ifp->if_broot;
451 bp = NULL;
452 } else {
453 bp = cur->bc_bufs[level];
454 block = XFS_BUF_TO_BLOCK(bp);
455 } 452 }
456 ASSERT(block != NULL); 453
457 *bpp = bp; 454 *bpp = cur->bc_bufs[level];
458 return block; 455 return XFS_BUF_TO_BLOCK(*bpp);
459} 456}
460 457
461/* 458/*
@@ -505,97 +502,6 @@ xfs_btree_get_bufs(
505} 502}
506 503
507/* 504/*
508 * Allocate a new btree cursor.
509 * The cursor is either for allocation (A) or bmap (B) or inodes (I).
510 */
511xfs_btree_cur_t * /* new btree cursor */
512xfs_btree_init_cursor(
513 xfs_mount_t *mp, /* file system mount point */
514 xfs_trans_t *tp, /* transaction pointer */
515 xfs_buf_t *agbp, /* (A only) buffer for agf structure */
516 /* (I only) buffer for agi structure */
517 xfs_agnumber_t agno, /* (AI only) allocation group number */
518 xfs_btnum_t btnum, /* btree identifier */
519 xfs_inode_t *ip, /* (B only) inode owning the btree */
520 int whichfork) /* (B only) data or attr fork */
521{
522 xfs_agf_t *agf; /* (A) allocation group freespace */
523 xfs_agi_t *agi; /* (I) allocation group inodespace */
524 xfs_btree_cur_t *cur; /* return value */
525 xfs_ifork_t *ifp; /* (I) inode fork pointer */
526 int nlevels=0; /* number of levels in the btree */
527
528 ASSERT(xfs_btree_cur_zone != NULL);
529 /*
530 * Allocate a new cursor.
531 */
532 cur = kmem_zone_zalloc(xfs_btree_cur_zone, KM_SLEEP);
533 /*
534 * Deduce the number of btree levels from the arguments.
535 */
536 switch (btnum) {
537 case XFS_BTNUM_BNO:
538 case XFS_BTNUM_CNT:
539 agf = XFS_BUF_TO_AGF(agbp);
540 nlevels = be32_to_cpu(agf->agf_levels[btnum]);
541 break;
542 case XFS_BTNUM_BMAP:
543 ifp = XFS_IFORK_PTR(ip, whichfork);
544 nlevels = be16_to_cpu(ifp->if_broot->bb_level) + 1;
545 break;
546 case XFS_BTNUM_INO:
547 agi = XFS_BUF_TO_AGI(agbp);
548 nlevels = be32_to_cpu(agi->agi_level);
549 break;
550 default:
551 ASSERT(0);
552 }
553 /*
554 * Fill in the common fields.
555 */
556 cur->bc_tp = tp;
557 cur->bc_mp = mp;
558 cur->bc_nlevels = nlevels;
559 cur->bc_btnum = btnum;
560 cur->bc_blocklog = mp->m_sb.sb_blocklog;
561 /*
562 * Fill in private fields.
563 */
564 switch (btnum) {
565 case XFS_BTNUM_BNO:
566 case XFS_BTNUM_CNT:
567 /*
568 * Allocation btree fields.
569 */
570 cur->bc_private.a.agbp = agbp;
571 cur->bc_private.a.agno = agno;
572 break;
573 case XFS_BTNUM_INO:
574 /*
575 * Inode allocation btree fields.
576 */
577 cur->bc_private.a.agbp = agbp;
578 cur->bc_private.a.agno = agno;
579 break;
580 case XFS_BTNUM_BMAP:
581 /*
582 * Bmap btree fields.
583 */
584 cur->bc_private.b.forksize = XFS_IFORK_SIZE(ip, whichfork);
585 cur->bc_private.b.ip = ip;
586 cur->bc_private.b.firstblock = NULLFSBLOCK;
587 cur->bc_private.b.flist = NULL;
588 cur->bc_private.b.allocated = 0;
589 cur->bc_private.b.flags = 0;
590 cur->bc_private.b.whichfork = whichfork;
591 break;
592 default:
593 ASSERT(0);
594 }
595 return cur;
596}
597
598/*
599 * Check for the cursor referring to the last block at the given level. 505 * Check for the cursor referring to the last block at the given level.
600 */ 506 */
601int /* 1=is last block, 0=not last block */ 507int /* 1=is last block, 0=not last block */
@@ -603,12 +509,12 @@ xfs_btree_islastblock(
603 xfs_btree_cur_t *cur, /* btree cursor */ 509 xfs_btree_cur_t *cur, /* btree cursor */
604 int level) /* level to check */ 510 int level) /* level to check */
605{ 511{
606 xfs_btree_block_t *block; /* generic btree block pointer */ 512 struct xfs_btree_block *block; /* generic btree block pointer */
607 xfs_buf_t *bp; /* buffer containing block */ 513 xfs_buf_t *bp; /* buffer containing block */
608 514
609 block = xfs_btree_get_block(cur, level, &bp); 515 block = xfs_btree_get_block(cur, level, &bp);
610 xfs_btree_check_block(cur, block, level, bp); 516 xfs_btree_check_block(cur, block, level, bp);
611 if (XFS_BTREE_LONG_PTRS(cur->bc_btnum)) 517 if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
612 return be64_to_cpu(block->bb_u.l.bb_rightsib) == NULLDFSBNO; 518 return be64_to_cpu(block->bb_u.l.bb_rightsib) == NULLDFSBNO;
613 else 519 else
614 return be32_to_cpu(block->bb_u.s.bb_rightsib) == NULLAGBLOCK; 520 return be32_to_cpu(block->bb_u.s.bb_rightsib) == NULLAGBLOCK;
@@ -618,12 +524,12 @@ xfs_btree_islastblock(
618 * Change the cursor to point to the first record at the given level. 524 * Change the cursor to point to the first record at the given level.
619 * Other levels are unaffected. 525 * Other levels are unaffected.
620 */ 526 */
621int /* success=1, failure=0 */ 527STATIC int /* success=1, failure=0 */
622xfs_btree_firstrec( 528xfs_btree_firstrec(
623 xfs_btree_cur_t *cur, /* btree cursor */ 529 xfs_btree_cur_t *cur, /* btree cursor */
624 int level) /* level to change */ 530 int level) /* level to change */
625{ 531{
626 xfs_btree_block_t *block; /* generic btree block pointer */ 532 struct xfs_btree_block *block; /* generic btree block pointer */
627 xfs_buf_t *bp; /* buffer containing block */ 533 xfs_buf_t *bp; /* buffer containing block */
628 534
629 /* 535 /*
@@ -634,7 +540,7 @@ xfs_btree_firstrec(
634 /* 540 /*
635 * It's empty, there is no such record. 541 * It's empty, there is no such record.
636 */ 542 */
637 if (!block->bb_h.bb_numrecs) 543 if (!block->bb_numrecs)
638 return 0; 544 return 0;
639 /* 545 /*
640 * Set the ptr value to 1, that's the first record/key. 546 * Set the ptr value to 1, that's the first record/key.
@@ -647,12 +553,12 @@ xfs_btree_firstrec(
647 * Change the cursor to point to the last record in the current block 553 * Change the cursor to point to the last record in the current block
648 * at the given level. Other levels are unaffected. 554 * at the given level. Other levels are unaffected.
649 */ 555 */
650int /* success=1, failure=0 */ 556STATIC int /* success=1, failure=0 */
651xfs_btree_lastrec( 557xfs_btree_lastrec(
652 xfs_btree_cur_t *cur, /* btree cursor */ 558 xfs_btree_cur_t *cur, /* btree cursor */
653 int level) /* level to change */ 559 int level) /* level to change */
654{ 560{
655 xfs_btree_block_t *block; /* generic btree block pointer */ 561 struct xfs_btree_block *block; /* generic btree block pointer */
656 xfs_buf_t *bp; /* buffer containing block */ 562 xfs_buf_t *bp; /* buffer containing block */
657 563
658 /* 564 /*
@@ -663,12 +569,12 @@ xfs_btree_lastrec(
663 /* 569 /*
664 * It's empty, there is no such record. 570 * It's empty, there is no such record.
665 */ 571 */
666 if (!block->bb_h.bb_numrecs) 572 if (!block->bb_numrecs)
667 return 0; 573 return 0;
668 /* 574 /*
669 * Set the ptr value to numrecs, that's the last record/key. 575 * Set the ptr value to numrecs, that's the last record/key.
670 */ 576 */
671 cur->bc_ptrs[level] = be16_to_cpu(block->bb_h.bb_numrecs); 577 cur->bc_ptrs[level] = be16_to_cpu(block->bb_numrecs);
672 return 1; 578 return 1;
673} 579}
674 580
@@ -817,66 +723,84 @@ xfs_btree_reada_bufs(
817 xfs_baread(mp->m_ddev_targp, d, mp->m_bsize * count); 723 xfs_baread(mp->m_ddev_targp, d, mp->m_bsize * count);
818} 724}
819 725
726STATIC int
727xfs_btree_readahead_lblock(
728 struct xfs_btree_cur *cur,
729 int lr,
730 struct xfs_btree_block *block)
731{
732 int rval = 0;
733 xfs_fsblock_t left = be64_to_cpu(block->bb_u.l.bb_leftsib);
734 xfs_fsblock_t right = be64_to_cpu(block->bb_u.l.bb_rightsib);
735
736 if ((lr & XFS_BTCUR_LEFTRA) && left != NULLDFSBNO) {
737 xfs_btree_reada_bufl(cur->bc_mp, left, 1);
738 rval++;
739 }
740
741 if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLDFSBNO) {
742 xfs_btree_reada_bufl(cur->bc_mp, right, 1);
743 rval++;
744 }
745
746 return rval;
747}
748
749STATIC int
750xfs_btree_readahead_sblock(
751 struct xfs_btree_cur *cur,
752 int lr,
753 struct xfs_btree_block *block)
754{
755 int rval = 0;
756 xfs_agblock_t left = be32_to_cpu(block->bb_u.s.bb_leftsib);
757 xfs_agblock_t right = be32_to_cpu(block->bb_u.s.bb_rightsib);
758
759
760 if ((lr & XFS_BTCUR_LEFTRA) && left != NULLAGBLOCK) {
761 xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno,
762 left, 1);
763 rval++;
764 }
765
766 if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLAGBLOCK) {
767 xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno,
768 right, 1);
769 rval++;
770 }
771
772 return rval;
773}
774
820/* 775/*
821 * Read-ahead btree blocks, at the given level. 776 * Read-ahead btree blocks, at the given level.
822 * Bits in lr are set from XFS_BTCUR_{LEFT,RIGHT}RA. 777 * Bits in lr are set from XFS_BTCUR_{LEFT,RIGHT}RA.
823 */ 778 */
824int 779STATIC int
825xfs_btree_readahead_core( 780xfs_btree_readahead(
826 xfs_btree_cur_t *cur, /* btree cursor */ 781 struct xfs_btree_cur *cur, /* btree cursor */
827 int lev, /* level in btree */ 782 int lev, /* level in btree */
828 int lr) /* left/right bits */ 783 int lr) /* left/right bits */
829{ 784{
830 xfs_alloc_block_t *a; 785 struct xfs_btree_block *block;
831 xfs_bmbt_block_t *b; 786
832 xfs_inobt_block_t *i; 787 /*
833 int rval = 0; 788 * No readahead needed if we are at the root level and the
789 * btree root is stored in the inode.
790 */
791 if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
792 (lev == cur->bc_nlevels - 1))
793 return 0;
794
795 if ((cur->bc_ra[lev] | lr) == cur->bc_ra[lev])
796 return 0;
834 797
835 ASSERT(cur->bc_bufs[lev] != NULL);
836 cur->bc_ra[lev] |= lr; 798 cur->bc_ra[lev] |= lr;
837 switch (cur->bc_btnum) { 799 block = XFS_BUF_TO_BLOCK(cur->bc_bufs[lev]);
838 case XFS_BTNUM_BNO: 800
839 case XFS_BTNUM_CNT: 801 if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
840 a = XFS_BUF_TO_ALLOC_BLOCK(cur->bc_bufs[lev]); 802 return xfs_btree_readahead_lblock(cur, lr, block);
841 if ((lr & XFS_BTCUR_LEFTRA) && be32_to_cpu(a->bb_leftsib) != NULLAGBLOCK) { 803 return xfs_btree_readahead_sblock(cur, lr, block);
842 xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno,
843 be32_to_cpu(a->bb_leftsib), 1);
844 rval++;
845 }
846 if ((lr & XFS_BTCUR_RIGHTRA) && be32_to_cpu(a->bb_rightsib) != NULLAGBLOCK) {
847 xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno,
848 be32_to_cpu(a->bb_rightsib), 1);
849 rval++;
850 }
851 break;
852 case XFS_BTNUM_BMAP:
853 b = XFS_BUF_TO_BMBT_BLOCK(cur->bc_bufs[lev]);
854 if ((lr & XFS_BTCUR_LEFTRA) && be64_to_cpu(b->bb_leftsib) != NULLDFSBNO) {
855 xfs_btree_reada_bufl(cur->bc_mp, be64_to_cpu(b->bb_leftsib), 1);
856 rval++;
857 }
858 if ((lr & XFS_BTCUR_RIGHTRA) && be64_to_cpu(b->bb_rightsib) != NULLDFSBNO) {
859 xfs_btree_reada_bufl(cur->bc_mp, be64_to_cpu(b->bb_rightsib), 1);
860 rval++;
861 }
862 break;
863 case XFS_BTNUM_INO:
864 i = XFS_BUF_TO_INOBT_BLOCK(cur->bc_bufs[lev]);
865 if ((lr & XFS_BTCUR_LEFTRA) && be32_to_cpu(i->bb_leftsib) != NULLAGBLOCK) {
866 xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno,
867 be32_to_cpu(i->bb_leftsib), 1);
868 rval++;
869 }
870 if ((lr & XFS_BTCUR_RIGHTRA) && be32_to_cpu(i->bb_rightsib) != NULLAGBLOCK) {
871 xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno,
872 be32_to_cpu(i->bb_rightsib), 1);
873 rval++;
874 }
875 break;
876 default:
877 ASSERT(0);
878 }
879 return rval;
880} 804}
881 805
882/* 806/*
@@ -889,7 +813,7 @@ xfs_btree_setbuf(
889 int lev, /* level in btree */ 813 int lev, /* level in btree */
890 xfs_buf_t *bp) /* new buffer to set */ 814 xfs_buf_t *bp) /* new buffer to set */
891{ 815{
892 xfs_btree_block_t *b; /* btree block */ 816 struct xfs_btree_block *b; /* btree block */
893 xfs_buf_t *obp; /* old buffer pointer */ 817 xfs_buf_t *obp; /* old buffer pointer */
894 818
895 obp = cur->bc_bufs[lev]; 819 obp = cur->bc_bufs[lev];
@@ -900,7 +824,7 @@ xfs_btree_setbuf(
900 if (!bp) 824 if (!bp)
901 return; 825 return;
902 b = XFS_BUF_TO_BLOCK(bp); 826 b = XFS_BUF_TO_BLOCK(bp);
903 if (XFS_BTREE_LONG_PTRS(cur->bc_btnum)) { 827 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
904 if (be64_to_cpu(b->bb_u.l.bb_leftsib) == NULLDFSBNO) 828 if (be64_to_cpu(b->bb_u.l.bb_leftsib) == NULLDFSBNO)
905 cur->bc_ra[lev] |= XFS_BTCUR_LEFTRA; 829 cur->bc_ra[lev] |= XFS_BTCUR_LEFTRA;
906 if (be64_to_cpu(b->bb_u.l.bb_rightsib) == NULLDFSBNO) 830 if (be64_to_cpu(b->bb_u.l.bb_rightsib) == NULLDFSBNO)
@@ -912,3 +836,2855 @@ xfs_btree_setbuf(
912 cur->bc_ra[lev] |= XFS_BTCUR_RIGHTRA; 836 cur->bc_ra[lev] |= XFS_BTCUR_RIGHTRA;
913 } 837 }
914} 838}
839
840STATIC int
841xfs_btree_ptr_is_null(
842 struct xfs_btree_cur *cur,
843 union xfs_btree_ptr *ptr)
844{
845 if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
846 return be64_to_cpu(ptr->l) == NULLFSBLOCK;
847 else
848 return be32_to_cpu(ptr->s) == NULLAGBLOCK;
849}
850
851STATIC void
852xfs_btree_set_ptr_null(
853 struct xfs_btree_cur *cur,
854 union xfs_btree_ptr *ptr)
855{
856 if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
857 ptr->l = cpu_to_be64(NULLFSBLOCK);
858 else
859 ptr->s = cpu_to_be32(NULLAGBLOCK);
860}
861
862/*
863 * Get/set/init sibling pointers
864 */
865STATIC void
866xfs_btree_get_sibling(
867 struct xfs_btree_cur *cur,
868 struct xfs_btree_block *block,
869 union xfs_btree_ptr *ptr,
870 int lr)
871{
872 ASSERT(lr == XFS_BB_LEFTSIB || lr == XFS_BB_RIGHTSIB);
873
874 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
875 if (lr == XFS_BB_RIGHTSIB)
876 ptr->l = block->bb_u.l.bb_rightsib;
877 else
878 ptr->l = block->bb_u.l.bb_leftsib;
879 } else {
880 if (lr == XFS_BB_RIGHTSIB)
881 ptr->s = block->bb_u.s.bb_rightsib;
882 else
883 ptr->s = block->bb_u.s.bb_leftsib;
884 }
885}
886
887STATIC void
888xfs_btree_set_sibling(
889 struct xfs_btree_cur *cur,
890 struct xfs_btree_block *block,
891 union xfs_btree_ptr *ptr,
892 int lr)
893{
894 ASSERT(lr == XFS_BB_LEFTSIB || lr == XFS_BB_RIGHTSIB);
895
896 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
897 if (lr == XFS_BB_RIGHTSIB)
898 block->bb_u.l.bb_rightsib = ptr->l;
899 else
900 block->bb_u.l.bb_leftsib = ptr->l;
901 } else {
902 if (lr == XFS_BB_RIGHTSIB)
903 block->bb_u.s.bb_rightsib = ptr->s;
904 else
905 block->bb_u.s.bb_leftsib = ptr->s;
906 }
907}
908
909STATIC void
910xfs_btree_init_block(
911 struct xfs_btree_cur *cur,
912 int level,
913 int numrecs,
914 struct xfs_btree_block *new) /* new block */
915{
916 new->bb_magic = cpu_to_be32(xfs_magics[cur->bc_btnum]);
917 new->bb_level = cpu_to_be16(level);
918 new->bb_numrecs = cpu_to_be16(numrecs);
919
920 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
921 new->bb_u.l.bb_leftsib = cpu_to_be64(NULLFSBLOCK);
922 new->bb_u.l.bb_rightsib = cpu_to_be64(NULLFSBLOCK);
923 } else {
924 new->bb_u.s.bb_leftsib = cpu_to_be32(NULLAGBLOCK);
925 new->bb_u.s.bb_rightsib = cpu_to_be32(NULLAGBLOCK);
926 }
927}
928
929/*
930 * Return true if ptr is the last record in the btree and
931 * we need to track updateѕ to this record. The decision
932 * will be further refined in the update_lastrec method.
933 */
934STATIC int
935xfs_btree_is_lastrec(
936 struct xfs_btree_cur *cur,
937 struct xfs_btree_block *block,
938 int level)
939{
940 union xfs_btree_ptr ptr;
941
942 if (level > 0)
943 return 0;
944 if (!(cur->bc_flags & XFS_BTREE_LASTREC_UPDATE))
945 return 0;
946
947 xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB);
948 if (!xfs_btree_ptr_is_null(cur, &ptr))
949 return 0;
950 return 1;
951}
952
953STATIC void
954xfs_btree_buf_to_ptr(
955 struct xfs_btree_cur *cur,
956 struct xfs_buf *bp,
957 union xfs_btree_ptr *ptr)
958{
959 if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
960 ptr->l = cpu_to_be64(XFS_DADDR_TO_FSB(cur->bc_mp,
961 XFS_BUF_ADDR(bp)));
962 else {
963 ptr->s = cpu_to_be32(XFS_DADDR_TO_AGBNO(cur->bc_mp,
964 XFS_BUF_ADDR(bp)));
965 }
966}
967
968STATIC xfs_daddr_t
969xfs_btree_ptr_to_daddr(
970 struct xfs_btree_cur *cur,
971 union xfs_btree_ptr *ptr)
972{
973 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
974 ASSERT(be64_to_cpu(ptr->l) != NULLFSBLOCK);
975
976 return XFS_FSB_TO_DADDR(cur->bc_mp, be64_to_cpu(ptr->l));
977 } else {
978 ASSERT(cur->bc_private.a.agno != NULLAGNUMBER);
979 ASSERT(be32_to_cpu(ptr->s) != NULLAGBLOCK);
980
981 return XFS_AGB_TO_DADDR(cur->bc_mp, cur->bc_private.a.agno,
982 be32_to_cpu(ptr->s));
983 }
984}
985
986STATIC void
987xfs_btree_set_refs(
988 struct xfs_btree_cur *cur,
989 struct xfs_buf *bp)
990{
991 switch (cur->bc_btnum) {
992 case XFS_BTNUM_BNO:
993 case XFS_BTNUM_CNT:
994 XFS_BUF_SET_VTYPE_REF(*bpp, B_FS_MAP, XFS_ALLOC_BTREE_REF);
995 break;
996 case XFS_BTNUM_INO:
997 XFS_BUF_SET_VTYPE_REF(*bpp, B_FS_INOMAP, XFS_INO_BTREE_REF);
998 break;
999 case XFS_BTNUM_BMAP:
1000 XFS_BUF_SET_VTYPE_REF(*bpp, B_FS_MAP, XFS_BMAP_BTREE_REF);
1001 break;
1002 default:
1003 ASSERT(0);
1004 }
1005}
1006
1007STATIC int
1008xfs_btree_get_buf_block(
1009 struct xfs_btree_cur *cur,
1010 union xfs_btree_ptr *ptr,
1011 int flags,
1012 struct xfs_btree_block **block,
1013 struct xfs_buf **bpp)
1014{
1015 struct xfs_mount *mp = cur->bc_mp;
1016 xfs_daddr_t d;
1017
1018 /* need to sort out how callers deal with failures first */
1019 ASSERT(!(flags & XFS_BUF_TRYLOCK));
1020
1021 d = xfs_btree_ptr_to_daddr(cur, ptr);
1022 *bpp = xfs_trans_get_buf(cur->bc_tp, mp->m_ddev_targp, d,
1023 mp->m_bsize, flags);
1024
1025 ASSERT(*bpp);
1026 ASSERT(!XFS_BUF_GETERROR(*bpp));
1027
1028 *block = XFS_BUF_TO_BLOCK(*bpp);
1029 return 0;
1030}
1031
1032/*
1033 * Read in the buffer at the given ptr and return the buffer and
1034 * the block pointer within the buffer.
1035 */
1036STATIC int
1037xfs_btree_read_buf_block(
1038 struct xfs_btree_cur *cur,
1039 union xfs_btree_ptr *ptr,
1040 int level,
1041 int flags,
1042 struct xfs_btree_block **block,
1043 struct xfs_buf **bpp)
1044{
1045 struct xfs_mount *mp = cur->bc_mp;
1046 xfs_daddr_t d;
1047 int error;
1048
1049 /* need to sort out how callers deal with failures first */
1050 ASSERT(!(flags & XFS_BUF_TRYLOCK));
1051
1052 d = xfs_btree_ptr_to_daddr(cur, ptr);
1053 error = xfs_trans_read_buf(mp, cur->bc_tp, mp->m_ddev_targp, d,
1054 mp->m_bsize, flags, bpp);
1055 if (error)
1056 return error;
1057
1058 ASSERT(*bpp != NULL);
1059 ASSERT(!XFS_BUF_GETERROR(*bpp));
1060
1061 xfs_btree_set_refs(cur, *bpp);
1062 *block = XFS_BUF_TO_BLOCK(*bpp);
1063
1064 error = xfs_btree_check_block(cur, *block, level, *bpp);
1065 if (error)
1066 xfs_trans_brelse(cur->bc_tp, *bpp);
1067 return error;
1068}
1069
1070/*
1071 * Copy keys from one btree block to another.
1072 */
1073STATIC void
1074xfs_btree_copy_keys(
1075 struct xfs_btree_cur *cur,
1076 union xfs_btree_key *dst_key,
1077 union xfs_btree_key *src_key,
1078 int numkeys)
1079{
1080 ASSERT(numkeys >= 0);
1081 memcpy(dst_key, src_key, numkeys * cur->bc_ops->key_len);
1082}
1083
1084/*
1085 * Copy records from one btree block to another.
1086 */
1087STATIC void
1088xfs_btree_copy_recs(
1089 struct xfs_btree_cur *cur,
1090 union xfs_btree_rec *dst_rec,
1091 union xfs_btree_rec *src_rec,
1092 int numrecs)
1093{
1094 ASSERT(numrecs >= 0);
1095 memcpy(dst_rec, src_rec, numrecs * cur->bc_ops->rec_len);
1096}
1097
1098/*
1099 * Copy block pointers from one btree block to another.
1100 */
1101STATIC void
1102xfs_btree_copy_ptrs(
1103 struct xfs_btree_cur *cur,
1104 union xfs_btree_ptr *dst_ptr,
1105 union xfs_btree_ptr *src_ptr,
1106 int numptrs)
1107{
1108 ASSERT(numptrs >= 0);
1109 memcpy(dst_ptr, src_ptr, numptrs * xfs_btree_ptr_len(cur));
1110}
1111
1112/*
1113 * Shift keys one index left/right inside a single btree block.
1114 */
1115STATIC void
1116xfs_btree_shift_keys(
1117 struct xfs_btree_cur *cur,
1118 union xfs_btree_key *key,
1119 int dir,
1120 int numkeys)
1121{
1122 char *dst_key;
1123
1124 ASSERT(numkeys >= 0);
1125 ASSERT(dir == 1 || dir == -1);
1126
1127 dst_key = (char *)key + (dir * cur->bc_ops->key_len);
1128 memmove(dst_key, key, numkeys * cur->bc_ops->key_len);
1129}
1130
1131/*
1132 * Shift records one index left/right inside a single btree block.
1133 */
1134STATIC void
1135xfs_btree_shift_recs(
1136 struct xfs_btree_cur *cur,
1137 union xfs_btree_rec *rec,
1138 int dir,
1139 int numrecs)
1140{
1141 char *dst_rec;
1142
1143 ASSERT(numrecs >= 0);
1144 ASSERT(dir == 1 || dir == -1);
1145
1146 dst_rec = (char *)rec + (dir * cur->bc_ops->rec_len);
1147 memmove(dst_rec, rec, numrecs * cur->bc_ops->rec_len);
1148}
1149
1150/*
1151 * Shift block pointers one index left/right inside a single btree block.
1152 */
1153STATIC void
1154xfs_btree_shift_ptrs(
1155 struct xfs_btree_cur *cur,
1156 union xfs_btree_ptr *ptr,
1157 int dir,
1158 int numptrs)
1159{
1160 char *dst_ptr;
1161
1162 ASSERT(numptrs >= 0);
1163 ASSERT(dir == 1 || dir == -1);
1164
1165 dst_ptr = (char *)ptr + (dir * xfs_btree_ptr_len(cur));
1166 memmove(dst_ptr, ptr, numptrs * xfs_btree_ptr_len(cur));
1167}
1168
1169/*
1170 * Log key values from the btree block.
1171 */
1172STATIC void
1173xfs_btree_log_keys(
1174 struct xfs_btree_cur *cur,
1175 struct xfs_buf *bp,
1176 int first,
1177 int last)
1178{
1179 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1180 XFS_BTREE_TRACE_ARGBII(cur, bp, first, last);
1181
1182 if (bp) {
1183 xfs_trans_log_buf(cur->bc_tp, bp,
1184 xfs_btree_key_offset(cur, first),
1185 xfs_btree_key_offset(cur, last + 1) - 1);
1186 } else {
1187 xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip,
1188 xfs_ilog_fbroot(cur->bc_private.b.whichfork));
1189 }
1190
1191 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1192}
1193
1194/*
1195 * Log record values from the btree block.
1196 */
1197void
1198xfs_btree_log_recs(
1199 struct xfs_btree_cur *cur,
1200 struct xfs_buf *bp,
1201 int first,
1202 int last)
1203{
1204 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1205 XFS_BTREE_TRACE_ARGBII(cur, bp, first, last);
1206
1207 xfs_trans_log_buf(cur->bc_tp, bp,
1208 xfs_btree_rec_offset(cur, first),
1209 xfs_btree_rec_offset(cur, last + 1) - 1);
1210
1211 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1212}
1213
1214/*
1215 * Log block pointer fields from a btree block (nonleaf).
1216 */
1217STATIC void
1218xfs_btree_log_ptrs(
1219 struct xfs_btree_cur *cur, /* btree cursor */
1220 struct xfs_buf *bp, /* buffer containing btree block */
1221 int first, /* index of first pointer to log */
1222 int last) /* index of last pointer to log */
1223{
1224 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1225 XFS_BTREE_TRACE_ARGBII(cur, bp, first, last);
1226
1227 if (bp) {
1228 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
1229 int level = xfs_btree_get_level(block);
1230
1231 xfs_trans_log_buf(cur->bc_tp, bp,
1232 xfs_btree_ptr_offset(cur, first, level),
1233 xfs_btree_ptr_offset(cur, last + 1, level) - 1);
1234 } else {
1235 xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip,
1236 xfs_ilog_fbroot(cur->bc_private.b.whichfork));
1237 }
1238
1239 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1240}
1241
1242/*
1243 * Log fields from a btree block header.
1244 */
1245void
1246xfs_btree_log_block(
1247 struct xfs_btree_cur *cur, /* btree cursor */
1248 struct xfs_buf *bp, /* buffer containing btree block */
1249 int fields) /* mask of fields: XFS_BB_... */
1250{
1251 int first; /* first byte offset logged */
1252 int last; /* last byte offset logged */
1253 static const short soffsets[] = { /* table of offsets (short) */
1254 offsetof(struct xfs_btree_block, bb_magic),
1255 offsetof(struct xfs_btree_block, bb_level),
1256 offsetof(struct xfs_btree_block, bb_numrecs),
1257 offsetof(struct xfs_btree_block, bb_u.s.bb_leftsib),
1258 offsetof(struct xfs_btree_block, bb_u.s.bb_rightsib),
1259 XFS_BTREE_SBLOCK_LEN
1260 };
1261 static const short loffsets[] = { /* table of offsets (long) */
1262 offsetof(struct xfs_btree_block, bb_magic),
1263 offsetof(struct xfs_btree_block, bb_level),
1264 offsetof(struct xfs_btree_block, bb_numrecs),
1265 offsetof(struct xfs_btree_block, bb_u.l.bb_leftsib),
1266 offsetof(struct xfs_btree_block, bb_u.l.bb_rightsib),
1267 XFS_BTREE_LBLOCK_LEN
1268 };
1269
1270 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1271 XFS_BTREE_TRACE_ARGBI(cur, bp, fields);
1272
1273 if (bp) {
1274 xfs_btree_offsets(fields,
1275 (cur->bc_flags & XFS_BTREE_LONG_PTRS) ?
1276 loffsets : soffsets,
1277 XFS_BB_NUM_BITS, &first, &last);
1278 xfs_trans_log_buf(cur->bc_tp, bp, first, last);
1279 } else {
1280 xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip,
1281 xfs_ilog_fbroot(cur->bc_private.b.whichfork));
1282 }
1283
1284 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1285}
1286
1287/*
1288 * Increment cursor by one record at the level.
1289 * For nonzero levels the leaf-ward information is untouched.
1290 */
1291int /* error */
1292xfs_btree_increment(
1293 struct xfs_btree_cur *cur,
1294 int level,
1295 int *stat) /* success/failure */
1296{
1297 struct xfs_btree_block *block;
1298 union xfs_btree_ptr ptr;
1299 struct xfs_buf *bp;
1300 int error; /* error return value */
1301 int lev;
1302
1303 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1304 XFS_BTREE_TRACE_ARGI(cur, level);
1305
1306 ASSERT(level < cur->bc_nlevels);
1307
1308 /* Read-ahead to the right at this level. */
1309 xfs_btree_readahead(cur, level, XFS_BTCUR_RIGHTRA);
1310
1311 /* Get a pointer to the btree block. */
1312 block = xfs_btree_get_block(cur, level, &bp);
1313
1314#ifdef DEBUG
1315 error = xfs_btree_check_block(cur, block, level, bp);
1316 if (error)
1317 goto error0;
1318#endif
1319
1320 /* We're done if we remain in the block after the increment. */
1321 if (++cur->bc_ptrs[level] <= xfs_btree_get_numrecs(block))
1322 goto out1;
1323
1324 /* Fail if we just went off the right edge of the tree. */
1325 xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB);
1326 if (xfs_btree_ptr_is_null(cur, &ptr))
1327 goto out0;
1328
1329 XFS_BTREE_STATS_INC(cur, increment);
1330
1331 /*
1332 * March up the tree incrementing pointers.
1333 * Stop when we don't go off the right edge of a block.
1334 */
1335 for (lev = level + 1; lev < cur->bc_nlevels; lev++) {
1336 block = xfs_btree_get_block(cur, lev, &bp);
1337
1338#ifdef DEBUG
1339 error = xfs_btree_check_block(cur, block, lev, bp);
1340 if (error)
1341 goto error0;
1342#endif
1343
1344 if (++cur->bc_ptrs[lev] <= xfs_btree_get_numrecs(block))
1345 break;
1346
1347 /* Read-ahead the right block for the next loop. */
1348 xfs_btree_readahead(cur, lev, XFS_BTCUR_RIGHTRA);
1349 }
1350
1351 /*
1352 * If we went off the root then we are either seriously
1353 * confused or have the tree root in an inode.
1354 */
1355 if (lev == cur->bc_nlevels) {
1356 if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)
1357 goto out0;
1358 ASSERT(0);
1359 error = EFSCORRUPTED;
1360 goto error0;
1361 }
1362 ASSERT(lev < cur->bc_nlevels);
1363
1364 /*
1365 * Now walk back down the tree, fixing up the cursor's buffer
1366 * pointers and key numbers.
1367 */
1368 for (block = xfs_btree_get_block(cur, lev, &bp); lev > level; ) {
1369 union xfs_btree_ptr *ptrp;
1370
1371 ptrp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[lev], block);
1372 error = xfs_btree_read_buf_block(cur, ptrp, --lev,
1373 0, &block, &bp);
1374 if (error)
1375 goto error0;
1376
1377 xfs_btree_setbuf(cur, lev, bp);
1378 cur->bc_ptrs[lev] = 1;
1379 }
1380out1:
1381 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1382 *stat = 1;
1383 return 0;
1384
1385out0:
1386 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1387 *stat = 0;
1388 return 0;
1389
1390error0:
1391 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
1392 return error;
1393}
1394
1395/*
1396 * Decrement cursor by one record at the level.
1397 * For nonzero levels the leaf-ward information is untouched.
1398 */
1399int /* error */
1400xfs_btree_decrement(
1401 struct xfs_btree_cur *cur,
1402 int level,
1403 int *stat) /* success/failure */
1404{
1405 struct xfs_btree_block *block;
1406 xfs_buf_t *bp;
1407 int error; /* error return value */
1408 int lev;
1409 union xfs_btree_ptr ptr;
1410
1411 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1412 XFS_BTREE_TRACE_ARGI(cur, level);
1413
1414 ASSERT(level < cur->bc_nlevels);
1415
1416 /* Read-ahead to the left at this level. */
1417 xfs_btree_readahead(cur, level, XFS_BTCUR_LEFTRA);
1418
1419 /* We're done if we remain in the block after the decrement. */
1420 if (--cur->bc_ptrs[level] > 0)
1421 goto out1;
1422
1423 /* Get a pointer to the btree block. */
1424 block = xfs_btree_get_block(cur, level, &bp);
1425
1426#ifdef DEBUG
1427 error = xfs_btree_check_block(cur, block, level, bp);
1428 if (error)
1429 goto error0;
1430#endif
1431
1432 /* Fail if we just went off the left edge of the tree. */
1433 xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_LEFTSIB);
1434 if (xfs_btree_ptr_is_null(cur, &ptr))
1435 goto out0;
1436
1437 XFS_BTREE_STATS_INC(cur, decrement);
1438
1439 /*
1440 * March up the tree decrementing pointers.
1441 * Stop when we don't go off the left edge of a block.
1442 */
1443 for (lev = level + 1; lev < cur->bc_nlevels; lev++) {
1444 if (--cur->bc_ptrs[lev] > 0)
1445 break;
1446 /* Read-ahead the left block for the next loop. */
1447 xfs_btree_readahead(cur, lev, XFS_BTCUR_LEFTRA);
1448 }
1449
1450 /*
1451 * If we went off the root then we are seriously confused.
1452 * or the root of the tree is in an inode.
1453 */
1454 if (lev == cur->bc_nlevels) {
1455 if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)
1456 goto out0;
1457 ASSERT(0);
1458 error = EFSCORRUPTED;
1459 goto error0;
1460 }
1461 ASSERT(lev < cur->bc_nlevels);
1462
1463 /*
1464 * Now walk back down the tree, fixing up the cursor's buffer
1465 * pointers and key numbers.
1466 */
1467 for (block = xfs_btree_get_block(cur, lev, &bp); lev > level; ) {
1468 union xfs_btree_ptr *ptrp;
1469
1470 ptrp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[lev], block);
1471 error = xfs_btree_read_buf_block(cur, ptrp, --lev,
1472 0, &block, &bp);
1473 if (error)
1474 goto error0;
1475 xfs_btree_setbuf(cur, lev, bp);
1476 cur->bc_ptrs[lev] = xfs_btree_get_numrecs(block);
1477 }
1478out1:
1479 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1480 *stat = 1;
1481 return 0;
1482
1483out0:
1484 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1485 *stat = 0;
1486 return 0;
1487
1488error0:
1489 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
1490 return error;
1491}
1492
1493STATIC int
1494xfs_btree_lookup_get_block(
1495 struct xfs_btree_cur *cur, /* btree cursor */
1496 int level, /* level in the btree */
1497 union xfs_btree_ptr *pp, /* ptr to btree block */
1498 struct xfs_btree_block **blkp) /* return btree block */
1499{
1500 struct xfs_buf *bp; /* buffer pointer for btree block */
1501 int error = 0;
1502
1503 /* special case the root block if in an inode */
1504 if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
1505 (level == cur->bc_nlevels - 1)) {
1506 *blkp = xfs_btree_get_iroot(cur);
1507 return 0;
1508 }
1509
1510 /*
1511 * If the old buffer at this level for the disk address we are
1512 * looking for re-use it.
1513 *
1514 * Otherwise throw it away and get a new one.
1515 */
1516 bp = cur->bc_bufs[level];
1517 if (bp && XFS_BUF_ADDR(bp) == xfs_btree_ptr_to_daddr(cur, pp)) {
1518 *blkp = XFS_BUF_TO_BLOCK(bp);
1519 return 0;
1520 }
1521
1522 error = xfs_btree_read_buf_block(cur, pp, level, 0, blkp, &bp);
1523 if (error)
1524 return error;
1525
1526 xfs_btree_setbuf(cur, level, bp);
1527 return 0;
1528}
1529
1530/*
1531 * Get current search key. For level 0 we don't actually have a key
1532 * structure so we make one up from the record. For all other levels
1533 * we just return the right key.
1534 */
1535STATIC union xfs_btree_key *
1536xfs_lookup_get_search_key(
1537 struct xfs_btree_cur *cur,
1538 int level,
1539 int keyno,
1540 struct xfs_btree_block *block,
1541 union xfs_btree_key *kp)
1542{
1543 if (level == 0) {
1544 cur->bc_ops->init_key_from_rec(kp,
1545 xfs_btree_rec_addr(cur, keyno, block));
1546 return kp;
1547 }
1548
1549 return xfs_btree_key_addr(cur, keyno, block);
1550}
1551
1552/*
1553 * Lookup the record. The cursor is made to point to it, based on dir.
1554 * Return 0 if can't find any such record, 1 for success.
1555 */
1556int /* error */
1557xfs_btree_lookup(
1558 struct xfs_btree_cur *cur, /* btree cursor */
1559 xfs_lookup_t dir, /* <=, ==, or >= */
1560 int *stat) /* success/failure */
1561{
1562 struct xfs_btree_block *block; /* current btree block */
1563 __int64_t diff; /* difference for the current key */
1564 int error; /* error return value */
1565 int keyno; /* current key number */
1566 int level; /* level in the btree */
1567 union xfs_btree_ptr *pp; /* ptr to btree block */
1568 union xfs_btree_ptr ptr; /* ptr to btree block */
1569
1570 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1571 XFS_BTREE_TRACE_ARGI(cur, dir);
1572
1573 XFS_BTREE_STATS_INC(cur, lookup);
1574
1575 block = NULL;
1576 keyno = 0;
1577
1578 /* initialise start pointer from cursor */
1579 cur->bc_ops->init_ptr_from_cur(cur, &ptr);
1580 pp = &ptr;
1581
1582 /*
1583 * Iterate over each level in the btree, starting at the root.
1584 * For each level above the leaves, find the key we need, based
1585 * on the lookup record, then follow the corresponding block
1586 * pointer down to the next level.
1587 */
1588 for (level = cur->bc_nlevels - 1, diff = 1; level >= 0; level--) {
1589 /* Get the block we need to do the lookup on. */
1590 error = xfs_btree_lookup_get_block(cur, level, pp, &block);
1591 if (error)
1592 goto error0;
1593
1594 if (diff == 0) {
1595 /*
1596 * If we already had a key match at a higher level, we
1597 * know we need to use the first entry in this block.
1598 */
1599 keyno = 1;
1600 } else {
1601 /* Otherwise search this block. Do a binary search. */
1602
1603 int high; /* high entry number */
1604 int low; /* low entry number */
1605
1606 /* Set low and high entry numbers, 1-based. */
1607 low = 1;
1608 high = xfs_btree_get_numrecs(block);
1609 if (!high) {
1610 /* Block is empty, must be an empty leaf. */
1611 ASSERT(level == 0 && cur->bc_nlevels == 1);
1612
1613 cur->bc_ptrs[0] = dir != XFS_LOOKUP_LE;
1614 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1615 *stat = 0;
1616 return 0;
1617 }
1618
1619 /* Binary search the block. */
1620 while (low <= high) {
1621 union xfs_btree_key key;
1622 union xfs_btree_key *kp;
1623
1624 XFS_BTREE_STATS_INC(cur, compare);
1625
1626 /* keyno is average of low and high. */
1627 keyno = (low + high) >> 1;
1628
1629 /* Get current search key */
1630 kp = xfs_lookup_get_search_key(cur, level,
1631 keyno, block, &key);
1632
1633 /*
1634 * Compute difference to get next direction:
1635 * - less than, move right
1636 * - greater than, move left
1637 * - equal, we're done
1638 */
1639 diff = cur->bc_ops->key_diff(cur, kp);
1640 if (diff < 0)
1641 low = keyno + 1;
1642 else if (diff > 0)
1643 high = keyno - 1;
1644 else
1645 break;
1646 }
1647 }
1648
1649 /*
1650 * If there are more levels, set up for the next level
1651 * by getting the block number and filling in the cursor.
1652 */
1653 if (level > 0) {
1654 /*
1655 * If we moved left, need the previous key number,
1656 * unless there isn't one.
1657 */
1658 if (diff > 0 && --keyno < 1)
1659 keyno = 1;
1660 pp = xfs_btree_ptr_addr(cur, keyno, block);
1661
1662#ifdef DEBUG
1663 error = xfs_btree_check_ptr(cur, pp, 0, level);
1664 if (error)
1665 goto error0;
1666#endif
1667 cur->bc_ptrs[level] = keyno;
1668 }
1669 }
1670
1671 /* Done with the search. See if we need to adjust the results. */
1672 if (dir != XFS_LOOKUP_LE && diff < 0) {
1673 keyno++;
1674 /*
1675 * If ge search and we went off the end of the block, but it's
1676 * not the last block, we're in the wrong block.
1677 */
1678 xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB);
1679 if (dir == XFS_LOOKUP_GE &&
1680 keyno > xfs_btree_get_numrecs(block) &&
1681 !xfs_btree_ptr_is_null(cur, &ptr)) {
1682 int i;
1683
1684 cur->bc_ptrs[0] = keyno;
1685 error = xfs_btree_increment(cur, 0, &i);
1686 if (error)
1687 goto error0;
1688 XFS_WANT_CORRUPTED_RETURN(i == 1);
1689 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1690 *stat = 1;
1691 return 0;
1692 }
1693 } else if (dir == XFS_LOOKUP_LE && diff > 0)
1694 keyno--;
1695 cur->bc_ptrs[0] = keyno;
1696
1697 /* Return if we succeeded or not. */
1698 if (keyno == 0 || keyno > xfs_btree_get_numrecs(block))
1699 *stat = 0;
1700 else if (dir != XFS_LOOKUP_EQ || diff == 0)
1701 *stat = 1;
1702 else
1703 *stat = 0;
1704 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1705 return 0;
1706
1707error0:
1708 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
1709 return error;
1710}
1711
1712/*
1713 * Update keys at all levels from here to the root along the cursor's path.
1714 */
1715STATIC int
1716xfs_btree_updkey(
1717 struct xfs_btree_cur *cur,
1718 union xfs_btree_key *keyp,
1719 int level)
1720{
1721 struct xfs_btree_block *block;
1722 struct xfs_buf *bp;
1723 union xfs_btree_key *kp;
1724 int ptr;
1725
1726 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1727 XFS_BTREE_TRACE_ARGIK(cur, level, keyp);
1728
1729 ASSERT(!(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) || level >= 1);
1730
1731 /*
1732 * Go up the tree from this level toward the root.
1733 * At each level, update the key value to the value input.
1734 * Stop when we reach a level where the cursor isn't pointing
1735 * at the first entry in the block.
1736 */
1737 for (ptr = 1; ptr == 1 && level < cur->bc_nlevels; level++) {
1738#ifdef DEBUG
1739 int error;
1740#endif
1741 block = xfs_btree_get_block(cur, level, &bp);
1742#ifdef DEBUG
1743 error = xfs_btree_check_block(cur, block, level, bp);
1744 if (error) {
1745 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
1746 return error;
1747 }
1748#endif
1749 ptr = cur->bc_ptrs[level];
1750 kp = xfs_btree_key_addr(cur, ptr, block);
1751 xfs_btree_copy_keys(cur, kp, keyp, 1);
1752 xfs_btree_log_keys(cur, bp, ptr, ptr);
1753 }
1754
1755 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1756 return 0;
1757}
1758
1759/*
1760 * Update the record referred to by cur to the value in the
1761 * given record. This either works (return 0) or gets an
1762 * EFSCORRUPTED error.
1763 */
1764int
1765xfs_btree_update(
1766 struct xfs_btree_cur *cur,
1767 union xfs_btree_rec *rec)
1768{
1769 struct xfs_btree_block *block;
1770 struct xfs_buf *bp;
1771 int error;
1772 int ptr;
1773 union xfs_btree_rec *rp;
1774
1775 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1776 XFS_BTREE_TRACE_ARGR(cur, rec);
1777
1778 /* Pick up the current block. */
1779 block = xfs_btree_get_block(cur, 0, &bp);
1780
1781#ifdef DEBUG
1782 error = xfs_btree_check_block(cur, block, 0, bp);
1783 if (error)
1784 goto error0;
1785#endif
1786 /* Get the address of the rec to be updated. */
1787 ptr = cur->bc_ptrs[0];
1788 rp = xfs_btree_rec_addr(cur, ptr, block);
1789
1790 /* Fill in the new contents and log them. */
1791 xfs_btree_copy_recs(cur, rp, rec, 1);
1792 xfs_btree_log_recs(cur, bp, ptr, ptr);
1793
1794 /*
1795 * If we are tracking the last record in the tree and
1796 * we are at the far right edge of the tree, update it.
1797 */
1798 if (xfs_btree_is_lastrec(cur, block, 0)) {
1799 cur->bc_ops->update_lastrec(cur, block, rec,
1800 ptr, LASTREC_UPDATE);
1801 }
1802
1803 /* Updating first rec in leaf. Pass new key value up to our parent. */
1804 if (ptr == 1) {
1805 union xfs_btree_key key;
1806
1807 cur->bc_ops->init_key_from_rec(&key, rec);
1808 error = xfs_btree_updkey(cur, &key, 1);
1809 if (error)
1810 goto error0;
1811 }
1812
1813 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1814 return 0;
1815
1816error0:
1817 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
1818 return error;
1819}
1820
1821/*
1822 * Move 1 record left from cur/level if possible.
1823 * Update cur to reflect the new path.
1824 */
1825STATIC int /* error */
1826xfs_btree_lshift(
1827 struct xfs_btree_cur *cur,
1828 int level,
1829 int *stat) /* success/failure */
1830{
1831 union xfs_btree_key key; /* btree key */
1832 struct xfs_buf *lbp; /* left buffer pointer */
1833 struct xfs_btree_block *left; /* left btree block */
1834 int lrecs; /* left record count */
1835 struct xfs_buf *rbp; /* right buffer pointer */
1836 struct xfs_btree_block *right; /* right btree block */
1837 int rrecs; /* right record count */
1838 union xfs_btree_ptr lptr; /* left btree pointer */
1839 union xfs_btree_key *rkp = NULL; /* right btree key */
1840 union xfs_btree_ptr *rpp = NULL; /* right address pointer */
1841 union xfs_btree_rec *rrp = NULL; /* right record pointer */
1842 int error; /* error return value */
1843
1844 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1845 XFS_BTREE_TRACE_ARGI(cur, level);
1846
1847 if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
1848 level == cur->bc_nlevels - 1)
1849 goto out0;
1850
1851 /* Set up variables for this block as "right". */
1852 right = xfs_btree_get_block(cur, level, &rbp);
1853
1854#ifdef DEBUG
1855 error = xfs_btree_check_block(cur, right, level, rbp);
1856 if (error)
1857 goto error0;
1858#endif
1859
1860 /* If we've got no left sibling then we can't shift an entry left. */
1861 xfs_btree_get_sibling(cur, right, &lptr, XFS_BB_LEFTSIB);
1862 if (xfs_btree_ptr_is_null(cur, &lptr))
1863 goto out0;
1864
1865 /*
1866 * If the cursor entry is the one that would be moved, don't
1867 * do it... it's too complicated.
1868 */
1869 if (cur->bc_ptrs[level] <= 1)
1870 goto out0;
1871
1872 /* Set up the left neighbor as "left". */
1873 error = xfs_btree_read_buf_block(cur, &lptr, level, 0, &left, &lbp);
1874 if (error)
1875 goto error0;
1876
1877 /* If it's full, it can't take another entry. */
1878 lrecs = xfs_btree_get_numrecs(left);
1879 if (lrecs == cur->bc_ops->get_maxrecs(cur, level))
1880 goto out0;
1881
1882 rrecs = xfs_btree_get_numrecs(right);
1883
1884 /*
1885 * We add one entry to the left side and remove one for the right side.
1886 * Accout for it here, the changes will be updated on disk and logged
1887 * later.
1888 */
1889 lrecs++;
1890 rrecs--;
1891
1892 XFS_BTREE_STATS_INC(cur, lshift);
1893 XFS_BTREE_STATS_ADD(cur, moves, 1);
1894
1895 /*
1896 * If non-leaf, copy a key and a ptr to the left block.
1897 * Log the changes to the left block.
1898 */
1899 if (level > 0) {
1900 /* It's a non-leaf. Move keys and pointers. */
1901 union xfs_btree_key *lkp; /* left btree key */
1902 union xfs_btree_ptr *lpp; /* left address pointer */
1903
1904 lkp = xfs_btree_key_addr(cur, lrecs, left);
1905 rkp = xfs_btree_key_addr(cur, 1, right);
1906
1907 lpp = xfs_btree_ptr_addr(cur, lrecs, left);
1908 rpp = xfs_btree_ptr_addr(cur, 1, right);
1909#ifdef DEBUG
1910 error = xfs_btree_check_ptr(cur, rpp, 0, level);
1911 if (error)
1912 goto error0;
1913#endif
1914 xfs_btree_copy_keys(cur, lkp, rkp, 1);
1915 xfs_btree_copy_ptrs(cur, lpp, rpp, 1);
1916
1917 xfs_btree_log_keys(cur, lbp, lrecs, lrecs);
1918 xfs_btree_log_ptrs(cur, lbp, lrecs, lrecs);
1919
1920 ASSERT(cur->bc_ops->keys_inorder(cur,
1921 xfs_btree_key_addr(cur, lrecs - 1, left), lkp));
1922 } else {
1923 /* It's a leaf. Move records. */
1924 union xfs_btree_rec *lrp; /* left record pointer */
1925
1926 lrp = xfs_btree_rec_addr(cur, lrecs, left);
1927 rrp = xfs_btree_rec_addr(cur, 1, right);
1928
1929 xfs_btree_copy_recs(cur, lrp, rrp, 1);
1930 xfs_btree_log_recs(cur, lbp, lrecs, lrecs);
1931
1932 ASSERT(cur->bc_ops->recs_inorder(cur,
1933 xfs_btree_rec_addr(cur, lrecs - 1, left), lrp));
1934 }
1935
1936 xfs_btree_set_numrecs(left, lrecs);
1937 xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS);
1938
1939 xfs_btree_set_numrecs(right, rrecs);
1940 xfs_btree_log_block(cur, rbp, XFS_BB_NUMRECS);
1941
1942 /*
1943 * Slide the contents of right down one entry.
1944 */
1945 XFS_BTREE_STATS_ADD(cur, moves, rrecs - 1);
1946 if (level > 0) {
1947 /* It's a nonleaf. operate on keys and ptrs */
1948#ifdef DEBUG
1949 int i; /* loop index */
1950
1951 for (i = 0; i < rrecs; i++) {
1952 error = xfs_btree_check_ptr(cur, rpp, i + 1, level);
1953 if (error)
1954 goto error0;
1955 }
1956#endif
1957 xfs_btree_shift_keys(cur,
1958 xfs_btree_key_addr(cur, 2, right),
1959 -1, rrecs);
1960 xfs_btree_shift_ptrs(cur,
1961 xfs_btree_ptr_addr(cur, 2, right),
1962 -1, rrecs);
1963
1964 xfs_btree_log_keys(cur, rbp, 1, rrecs);
1965 xfs_btree_log_ptrs(cur, rbp, 1, rrecs);
1966 } else {
1967 /* It's a leaf. operate on records */
1968 xfs_btree_shift_recs(cur,
1969 xfs_btree_rec_addr(cur, 2, right),
1970 -1, rrecs);
1971 xfs_btree_log_recs(cur, rbp, 1, rrecs);
1972
1973 /*
1974 * If it's the first record in the block, we'll need a key
1975 * structure to pass up to the next level (updkey).
1976 */
1977 cur->bc_ops->init_key_from_rec(&key,
1978 xfs_btree_rec_addr(cur, 1, right));
1979 rkp = &key;
1980 }
1981
1982 /* Update the parent key values of right. */
1983 error = xfs_btree_updkey(cur, rkp, level + 1);
1984 if (error)
1985 goto error0;
1986
1987 /* Slide the cursor value left one. */
1988 cur->bc_ptrs[level]--;
1989
1990 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1991 *stat = 1;
1992 return 0;
1993
1994out0:
1995 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1996 *stat = 0;
1997 return 0;
1998
1999error0:
2000 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
2001 return error;
2002}
2003
2004/*
2005 * Move 1 record right from cur/level if possible.
2006 * Update cur to reflect the new path.
2007 */
2008STATIC int /* error */
2009xfs_btree_rshift(
2010 struct xfs_btree_cur *cur,
2011 int level,
2012 int *stat) /* success/failure */
2013{
2014 union xfs_btree_key key; /* btree key */
2015 struct xfs_buf *lbp; /* left buffer pointer */
2016 struct xfs_btree_block *left; /* left btree block */
2017 struct xfs_buf *rbp; /* right buffer pointer */
2018 struct xfs_btree_block *right; /* right btree block */
2019 struct xfs_btree_cur *tcur; /* temporary btree cursor */
2020 union xfs_btree_ptr rptr; /* right block pointer */
2021 union xfs_btree_key *rkp; /* right btree key */
2022 int rrecs; /* right record count */
2023 int lrecs; /* left record count */
2024 int error; /* error return value */
2025 int i; /* loop counter */
2026
2027 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
2028 XFS_BTREE_TRACE_ARGI(cur, level);
2029
2030 if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
2031 (level == cur->bc_nlevels - 1))
2032 goto out0;
2033
2034 /* Set up variables for this block as "left". */
2035 left = xfs_btree_get_block(cur, level, &lbp);
2036
2037#ifdef DEBUG
2038 error = xfs_btree_check_block(cur, left, level, lbp);
2039 if (error)
2040 goto error0;
2041#endif
2042
2043 /* If we've got no right sibling then we can't shift an entry right. */
2044 xfs_btree_get_sibling(cur, left, &rptr, XFS_BB_RIGHTSIB);
2045 if (xfs_btree_ptr_is_null(cur, &rptr))
2046 goto out0;
2047
2048 /*
2049 * If the cursor entry is the one that would be moved, don't
2050 * do it... it's too complicated.
2051 */
2052 lrecs = xfs_btree_get_numrecs(left);
2053 if (cur->bc_ptrs[level] >= lrecs)
2054 goto out0;
2055
2056 /* Set up the right neighbor as "right". */
2057 error = xfs_btree_read_buf_block(cur, &rptr, level, 0, &right, &rbp);
2058 if (error)
2059 goto error0;
2060
2061 /* If it's full, it can't take another entry. */
2062 rrecs = xfs_btree_get_numrecs(right);
2063 if (rrecs == cur->bc_ops->get_maxrecs(cur, level))
2064 goto out0;
2065
2066 XFS_BTREE_STATS_INC(cur, rshift);
2067 XFS_BTREE_STATS_ADD(cur, moves, rrecs);
2068
2069 /*
2070 * Make a hole at the start of the right neighbor block, then
2071 * copy the last left block entry to the hole.
2072 */
2073 if (level > 0) {
2074 /* It's a nonleaf. make a hole in the keys and ptrs */
2075 union xfs_btree_key *lkp;
2076 union xfs_btree_ptr *lpp;
2077 union xfs_btree_ptr *rpp;
2078
2079 lkp = xfs_btree_key_addr(cur, lrecs, left);
2080 lpp = xfs_btree_ptr_addr(cur, lrecs, left);
2081 rkp = xfs_btree_key_addr(cur, 1, right);
2082 rpp = xfs_btree_ptr_addr(cur, 1, right);
2083
2084#ifdef DEBUG
2085 for (i = rrecs - 1; i >= 0; i--) {
2086 error = xfs_btree_check_ptr(cur, rpp, i, level);
2087 if (error)
2088 goto error0;
2089 }
2090#endif
2091
2092 xfs_btree_shift_keys(cur, rkp, 1, rrecs);
2093 xfs_btree_shift_ptrs(cur, rpp, 1, rrecs);
2094
2095#ifdef DEBUG
2096 error = xfs_btree_check_ptr(cur, lpp, 0, level);
2097 if (error)
2098 goto error0;
2099#endif
2100
2101 /* Now put the new data in, and log it. */
2102 xfs_btree_copy_keys(cur, rkp, lkp, 1);
2103 xfs_btree_copy_ptrs(cur, rpp, lpp, 1);
2104
2105 xfs_btree_log_keys(cur, rbp, 1, rrecs + 1);
2106 xfs_btree_log_ptrs(cur, rbp, 1, rrecs + 1);
2107
2108 ASSERT(cur->bc_ops->keys_inorder(cur, rkp,
2109 xfs_btree_key_addr(cur, 2, right)));
2110 } else {
2111 /* It's a leaf. make a hole in the records */
2112 union xfs_btree_rec *lrp;
2113 union xfs_btree_rec *rrp;
2114
2115 lrp = xfs_btree_rec_addr(cur, lrecs, left);
2116 rrp = xfs_btree_rec_addr(cur, 1, right);
2117
2118 xfs_btree_shift_recs(cur, rrp, 1, rrecs);
2119
2120 /* Now put the new data in, and log it. */
2121 xfs_btree_copy_recs(cur, rrp, lrp, 1);
2122 xfs_btree_log_recs(cur, rbp, 1, rrecs + 1);
2123
2124 cur->bc_ops->init_key_from_rec(&key, rrp);
2125 rkp = &key;
2126
2127 ASSERT(cur->bc_ops->recs_inorder(cur, rrp,
2128 xfs_btree_rec_addr(cur, 2, right)));
2129 }
2130
2131 /*
2132 * Decrement and log left's numrecs, bump and log right's numrecs.
2133 */
2134 xfs_btree_set_numrecs(left, --lrecs);
2135 xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS);
2136
2137 xfs_btree_set_numrecs(right, ++rrecs);
2138 xfs_btree_log_block(cur, rbp, XFS_BB_NUMRECS);
2139
2140 /*
2141 * Using a temporary cursor, update the parent key values of the
2142 * block on the right.
2143 */
2144 error = xfs_btree_dup_cursor(cur, &tcur);
2145 if (error)
2146 goto error0;
2147 i = xfs_btree_lastrec(tcur, level);
2148 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
2149
2150 error = xfs_btree_increment(tcur, level, &i);
2151 if (error)
2152 goto error1;
2153
2154 error = xfs_btree_updkey(tcur, rkp, level + 1);
2155 if (error)
2156 goto error1;
2157
2158 xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
2159
2160 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2161 *stat = 1;
2162 return 0;
2163
2164out0:
2165 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2166 *stat = 0;
2167 return 0;
2168
2169error0:
2170 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
2171 return error;
2172
2173error1:
2174 XFS_BTREE_TRACE_CURSOR(tcur, XBT_ERROR);
2175 xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
2176 return error;
2177}
2178
2179/*
2180 * Split cur/level block in half.
2181 * Return new block number and the key to its first
2182 * record (to be inserted into parent).
2183 */
2184STATIC int /* error */
2185xfs_btree_split(
2186 struct xfs_btree_cur *cur,
2187 int level,
2188 union xfs_btree_ptr *ptrp,
2189 union xfs_btree_key *key,
2190 struct xfs_btree_cur **curp,
2191 int *stat) /* success/failure */
2192{
2193 union xfs_btree_ptr lptr; /* left sibling block ptr */
2194 struct xfs_buf *lbp; /* left buffer pointer */
2195 struct xfs_btree_block *left; /* left btree block */
2196 union xfs_btree_ptr rptr; /* right sibling block ptr */
2197 struct xfs_buf *rbp; /* right buffer pointer */
2198 struct xfs_btree_block *right; /* right btree block */
2199 union xfs_btree_ptr rrptr; /* right-right sibling ptr */
2200 struct xfs_buf *rrbp; /* right-right buffer pointer */
2201 struct xfs_btree_block *rrblock; /* right-right btree block */
2202 int lrecs;
2203 int rrecs;
2204 int src_index;
2205 int error; /* error return value */
2206#ifdef DEBUG
2207 int i;
2208#endif
2209
2210 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
2211 XFS_BTREE_TRACE_ARGIPK(cur, level, *ptrp, key);
2212
2213 XFS_BTREE_STATS_INC(cur, split);
2214
2215 /* Set up left block (current one). */
2216 left = xfs_btree_get_block(cur, level, &lbp);
2217
2218#ifdef DEBUG
2219 error = xfs_btree_check_block(cur, left, level, lbp);
2220 if (error)
2221 goto error0;
2222#endif
2223
2224 xfs_btree_buf_to_ptr(cur, lbp, &lptr);
2225
2226 /* Allocate the new block. If we can't do it, we're toast. Give up. */
2227 error = cur->bc_ops->alloc_block(cur, &lptr, &rptr, 1, stat);
2228 if (error)
2229 goto error0;
2230 if (*stat == 0)
2231 goto out0;
2232 XFS_BTREE_STATS_INC(cur, alloc);
2233
2234 /* Set up the new block as "right". */
2235 error = xfs_btree_get_buf_block(cur, &rptr, 0, &right, &rbp);
2236 if (error)
2237 goto error0;
2238
2239 /* Fill in the btree header for the new right block. */
2240 xfs_btree_init_block(cur, xfs_btree_get_level(left), 0, right);
2241
2242 /*
2243 * Split the entries between the old and the new block evenly.
2244 * Make sure that if there's an odd number of entries now, that
2245 * each new block will have the same number of entries.
2246 */
2247 lrecs = xfs_btree_get_numrecs(left);
2248 rrecs = lrecs / 2;
2249 if ((lrecs & 1) && cur->bc_ptrs[level] <= rrecs + 1)
2250 rrecs++;
2251 src_index = (lrecs - rrecs + 1);
2252
2253 XFS_BTREE_STATS_ADD(cur, moves, rrecs);
2254
2255 /*
2256 * Copy btree block entries from the left block over to the
2257 * new block, the right. Update the right block and log the
2258 * changes.
2259 */
2260 if (level > 0) {
2261 /* It's a non-leaf. Move keys and pointers. */
2262 union xfs_btree_key *lkp; /* left btree key */
2263 union xfs_btree_ptr *lpp; /* left address pointer */
2264 union xfs_btree_key *rkp; /* right btree key */
2265 union xfs_btree_ptr *rpp; /* right address pointer */
2266
2267 lkp = xfs_btree_key_addr(cur, src_index, left);
2268 lpp = xfs_btree_ptr_addr(cur, src_index, left);
2269 rkp = xfs_btree_key_addr(cur, 1, right);
2270 rpp = xfs_btree_ptr_addr(cur, 1, right);
2271
2272#ifdef DEBUG
2273 for (i = src_index; i < rrecs; i++) {
2274 error = xfs_btree_check_ptr(cur, lpp, i, level);
2275 if (error)
2276 goto error0;
2277 }
2278#endif
2279
2280 xfs_btree_copy_keys(cur, rkp, lkp, rrecs);
2281 xfs_btree_copy_ptrs(cur, rpp, lpp, rrecs);
2282
2283 xfs_btree_log_keys(cur, rbp, 1, rrecs);
2284 xfs_btree_log_ptrs(cur, rbp, 1, rrecs);
2285
2286 /* Grab the keys to the entries moved to the right block */
2287 xfs_btree_copy_keys(cur, key, rkp, 1);
2288 } else {
2289 /* It's a leaf. Move records. */
2290 union xfs_btree_rec *lrp; /* left record pointer */
2291 union xfs_btree_rec *rrp; /* right record pointer */
2292
2293 lrp = xfs_btree_rec_addr(cur, src_index, left);
2294 rrp = xfs_btree_rec_addr(cur, 1, right);
2295
2296 xfs_btree_copy_recs(cur, rrp, lrp, rrecs);
2297 xfs_btree_log_recs(cur, rbp, 1, rrecs);
2298
2299 cur->bc_ops->init_key_from_rec(key,
2300 xfs_btree_rec_addr(cur, 1, right));
2301 }
2302
2303
2304 /*
2305 * Find the left block number by looking in the buffer.
2306 * Adjust numrecs, sibling pointers.
2307 */
2308 xfs_btree_get_sibling(cur, left, &rrptr, XFS_BB_RIGHTSIB);
2309 xfs_btree_set_sibling(cur, right, &rrptr, XFS_BB_RIGHTSIB);
2310 xfs_btree_set_sibling(cur, right, &lptr, XFS_BB_LEFTSIB);
2311 xfs_btree_set_sibling(cur, left, &rptr, XFS_BB_RIGHTSIB);
2312
2313 lrecs -= rrecs;
2314 xfs_btree_set_numrecs(left, lrecs);
2315 xfs_btree_set_numrecs(right, xfs_btree_get_numrecs(right) + rrecs);
2316
2317 xfs_btree_log_block(cur, rbp, XFS_BB_ALL_BITS);
2318 xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB);
2319
2320 /*
2321 * If there's a block to the new block's right, make that block
2322 * point back to right instead of to left.
2323 */
2324 if (!xfs_btree_ptr_is_null(cur, &rrptr)) {
2325 error = xfs_btree_read_buf_block(cur, &rrptr, level,
2326 0, &rrblock, &rrbp);
2327 if (error)
2328 goto error0;
2329 xfs_btree_set_sibling(cur, rrblock, &rptr, XFS_BB_LEFTSIB);
2330 xfs_btree_log_block(cur, rrbp, XFS_BB_LEFTSIB);
2331 }
2332 /*
2333 * If the cursor is really in the right block, move it there.
2334 * If it's just pointing past the last entry in left, then we'll
2335 * insert there, so don't change anything in that case.
2336 */
2337 if (cur->bc_ptrs[level] > lrecs + 1) {
2338 xfs_btree_setbuf(cur, level, rbp);
2339 cur->bc_ptrs[level] -= lrecs;
2340 }
2341 /*
2342 * If there are more levels, we'll need another cursor which refers
2343 * the right block, no matter where this cursor was.
2344 */
2345 if (level + 1 < cur->bc_nlevels) {
2346 error = xfs_btree_dup_cursor(cur, curp);
2347 if (error)
2348 goto error0;
2349 (*curp)->bc_ptrs[level + 1]++;
2350 }
2351 *ptrp = rptr;
2352 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2353 *stat = 1;
2354 return 0;
2355out0:
2356 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2357 *stat = 0;
2358 return 0;
2359
2360error0:
2361 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
2362 return error;
2363}
2364
2365/*
2366 * Copy the old inode root contents into a real block and make the
2367 * broot point to it.
2368 */
2369int /* error */
2370xfs_btree_new_iroot(
2371 struct xfs_btree_cur *cur, /* btree cursor */
2372 int *logflags, /* logging flags for inode */
2373 int *stat) /* return status - 0 fail */
2374{
2375 struct xfs_buf *cbp; /* buffer for cblock */
2376 struct xfs_btree_block *block; /* btree block */
2377 struct xfs_btree_block *cblock; /* child btree block */
2378 union xfs_btree_key *ckp; /* child key pointer */
2379 union xfs_btree_ptr *cpp; /* child ptr pointer */
2380 union xfs_btree_key *kp; /* pointer to btree key */
2381 union xfs_btree_ptr *pp; /* pointer to block addr */
2382 union xfs_btree_ptr nptr; /* new block addr */
2383 int level; /* btree level */
2384 int error; /* error return code */
2385#ifdef DEBUG
2386 int i; /* loop counter */
2387#endif
2388
2389 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
2390 XFS_BTREE_STATS_INC(cur, newroot);
2391
2392 ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE);
2393
2394 level = cur->bc_nlevels - 1;
2395
2396 block = xfs_btree_get_iroot(cur);
2397 pp = xfs_btree_ptr_addr(cur, 1, block);
2398
2399 /* Allocate the new block. If we can't do it, we're toast. Give up. */
2400 error = cur->bc_ops->alloc_block(cur, pp, &nptr, 1, stat);
2401 if (error)
2402 goto error0;
2403 if (*stat == 0) {
2404 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2405 return 0;
2406 }
2407 XFS_BTREE_STATS_INC(cur, alloc);
2408
2409 /* Copy the root into a real block. */
2410 error = xfs_btree_get_buf_block(cur, &nptr, 0, &cblock, &cbp);
2411 if (error)
2412 goto error0;
2413
2414 memcpy(cblock, block, xfs_btree_block_len(cur));
2415
2416 be16_add_cpu(&block->bb_level, 1);
2417 xfs_btree_set_numrecs(block, 1);
2418 cur->bc_nlevels++;
2419 cur->bc_ptrs[level + 1] = 1;
2420
2421 kp = xfs_btree_key_addr(cur, 1, block);
2422 ckp = xfs_btree_key_addr(cur, 1, cblock);
2423 xfs_btree_copy_keys(cur, ckp, kp, xfs_btree_get_numrecs(cblock));
2424
2425 cpp = xfs_btree_ptr_addr(cur, 1, cblock);
2426#ifdef DEBUG
2427 for (i = 0; i < be16_to_cpu(cblock->bb_numrecs); i++) {
2428 error = xfs_btree_check_ptr(cur, pp, i, level);
2429 if (error)
2430 goto error0;
2431 }
2432#endif
2433 xfs_btree_copy_ptrs(cur, cpp, pp, xfs_btree_get_numrecs(cblock));
2434
2435#ifdef DEBUG
2436 error = xfs_btree_check_ptr(cur, &nptr, 0, level);
2437 if (error)
2438 goto error0;
2439#endif
2440 xfs_btree_copy_ptrs(cur, pp, &nptr, 1);
2441
2442 xfs_iroot_realloc(cur->bc_private.b.ip,
2443 1 - xfs_btree_get_numrecs(cblock),
2444 cur->bc_private.b.whichfork);
2445
2446 xfs_btree_setbuf(cur, level, cbp);
2447
2448 /*
2449 * Do all this logging at the end so that
2450 * the root is at the right level.
2451 */
2452 xfs_btree_log_block(cur, cbp, XFS_BB_ALL_BITS);
2453 xfs_btree_log_keys(cur, cbp, 1, be16_to_cpu(cblock->bb_numrecs));
2454 xfs_btree_log_ptrs(cur, cbp, 1, be16_to_cpu(cblock->bb_numrecs));
2455
2456 *logflags |=
2457 XFS_ILOG_CORE | XFS_ILOG_FBROOT(cur->bc_private.b.whichfork);
2458 *stat = 1;
2459 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2460 return 0;
2461error0:
2462 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
2463 return error;
2464}
2465
2466/*
2467 * Allocate a new root block, fill it in.
2468 */
2469STATIC int /* error */
2470xfs_btree_new_root(
2471 struct xfs_btree_cur *cur, /* btree cursor */
2472 int *stat) /* success/failure */
2473{
2474 struct xfs_btree_block *block; /* one half of the old root block */
2475 struct xfs_buf *bp; /* buffer containing block */
2476 int error; /* error return value */
2477 struct xfs_buf *lbp; /* left buffer pointer */
2478 struct xfs_btree_block *left; /* left btree block */
2479 struct xfs_buf *nbp; /* new (root) buffer */
2480 struct xfs_btree_block *new; /* new (root) btree block */
2481 int nptr; /* new value for key index, 1 or 2 */
2482 struct xfs_buf *rbp; /* right buffer pointer */
2483 struct xfs_btree_block *right; /* right btree block */
2484 union xfs_btree_ptr rptr;
2485 union xfs_btree_ptr lptr;
2486
2487 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
2488 XFS_BTREE_STATS_INC(cur, newroot);
2489
2490 /* initialise our start point from the cursor */
2491 cur->bc_ops->init_ptr_from_cur(cur, &rptr);
2492
2493 /* Allocate the new block. If we can't do it, we're toast. Give up. */
2494 error = cur->bc_ops->alloc_block(cur, &rptr, &lptr, 1, stat);
2495 if (error)
2496 goto error0;
2497 if (*stat == 0)
2498 goto out0;
2499 XFS_BTREE_STATS_INC(cur, alloc);
2500
2501 /* Set up the new block. */
2502 error = xfs_btree_get_buf_block(cur, &lptr, 0, &new, &nbp);
2503 if (error)
2504 goto error0;
2505
2506 /* Set the root in the holding structure increasing the level by 1. */
2507 cur->bc_ops->set_root(cur, &lptr, 1);
2508
2509 /*
2510 * At the previous root level there are now two blocks: the old root,
2511 * and the new block generated when it was split. We don't know which
2512 * one the cursor is pointing at, so we set up variables "left" and
2513 * "right" for each case.
2514 */
2515 block = xfs_btree_get_block(cur, cur->bc_nlevels - 1, &bp);
2516
2517#ifdef DEBUG
2518 error = xfs_btree_check_block(cur, block, cur->bc_nlevels - 1, bp);
2519 if (error)
2520 goto error0;
2521#endif
2522
2523 xfs_btree_get_sibling(cur, block, &rptr, XFS_BB_RIGHTSIB);
2524 if (!xfs_btree_ptr_is_null(cur, &rptr)) {
2525 /* Our block is left, pick up the right block. */
2526 lbp = bp;
2527 xfs_btree_buf_to_ptr(cur, lbp, &lptr);
2528 left = block;
2529 error = xfs_btree_read_buf_block(cur, &rptr,
2530 cur->bc_nlevels - 1, 0, &right, &rbp);
2531 if (error)
2532 goto error0;
2533 bp = rbp;
2534 nptr = 1;
2535 } else {
2536 /* Our block is right, pick up the left block. */
2537 rbp = bp;
2538 xfs_btree_buf_to_ptr(cur, rbp, &rptr);
2539 right = block;
2540 xfs_btree_get_sibling(cur, right, &lptr, XFS_BB_LEFTSIB);
2541 error = xfs_btree_read_buf_block(cur, &lptr,
2542 cur->bc_nlevels - 1, 0, &left, &lbp);
2543 if (error)
2544 goto error0;
2545 bp = lbp;
2546 nptr = 2;
2547 }
2548 /* Fill in the new block's btree header and log it. */
2549 xfs_btree_init_block(cur, cur->bc_nlevels, 2, new);
2550 xfs_btree_log_block(cur, nbp, XFS_BB_ALL_BITS);
2551 ASSERT(!xfs_btree_ptr_is_null(cur, &lptr) &&
2552 !xfs_btree_ptr_is_null(cur, &rptr));
2553
2554 /* Fill in the key data in the new root. */
2555 if (xfs_btree_get_level(left) > 0) {
2556 xfs_btree_copy_keys(cur,
2557 xfs_btree_key_addr(cur, 1, new),
2558 xfs_btree_key_addr(cur, 1, left), 1);
2559 xfs_btree_copy_keys(cur,
2560 xfs_btree_key_addr(cur, 2, new),
2561 xfs_btree_key_addr(cur, 1, right), 1);
2562 } else {
2563 cur->bc_ops->init_key_from_rec(
2564 xfs_btree_key_addr(cur, 1, new),
2565 xfs_btree_rec_addr(cur, 1, left));
2566 cur->bc_ops->init_key_from_rec(
2567 xfs_btree_key_addr(cur, 2, new),
2568 xfs_btree_rec_addr(cur, 1, right));
2569 }
2570 xfs_btree_log_keys(cur, nbp, 1, 2);
2571
2572 /* Fill in the pointer data in the new root. */
2573 xfs_btree_copy_ptrs(cur,
2574 xfs_btree_ptr_addr(cur, 1, new), &lptr, 1);
2575 xfs_btree_copy_ptrs(cur,
2576 xfs_btree_ptr_addr(cur, 2, new), &rptr, 1);
2577 xfs_btree_log_ptrs(cur, nbp, 1, 2);
2578
2579 /* Fix up the cursor. */
2580 xfs_btree_setbuf(cur, cur->bc_nlevels, nbp);
2581 cur->bc_ptrs[cur->bc_nlevels] = nptr;
2582 cur->bc_nlevels++;
2583 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2584 *stat = 1;
2585 return 0;
2586error0:
2587 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
2588 return error;
2589out0:
2590 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2591 *stat = 0;
2592 return 0;
2593}
2594
2595STATIC int
2596xfs_btree_make_block_unfull(
2597 struct xfs_btree_cur *cur, /* btree cursor */
2598 int level, /* btree level */
2599 int numrecs,/* # of recs in block */
2600 int *oindex,/* old tree index */
2601 int *index, /* new tree index */
2602 union xfs_btree_ptr *nptr, /* new btree ptr */
2603 struct xfs_btree_cur **ncur, /* new btree cursor */
2604 union xfs_btree_rec *nrec, /* new record */
2605 int *stat)
2606{
2607 union xfs_btree_key key; /* new btree key value */
2608 int error = 0;
2609
2610 if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
2611 level == cur->bc_nlevels - 1) {
2612 struct xfs_inode *ip = cur->bc_private.b.ip;
2613
2614 if (numrecs < cur->bc_ops->get_dmaxrecs(cur, level)) {
2615 /* A root block that can be made bigger. */
2616
2617 xfs_iroot_realloc(ip, 1, cur->bc_private.b.whichfork);
2618 } else {
2619 /* A root block that needs replacing */
2620 int logflags = 0;
2621
2622 error = xfs_btree_new_iroot(cur, &logflags, stat);
2623 if (error || *stat == 0)
2624 return error;
2625
2626 xfs_trans_log_inode(cur->bc_tp, ip, logflags);
2627 }
2628
2629 return 0;
2630 }
2631
2632 /* First, try shifting an entry to the right neighbor. */
2633 error = xfs_btree_rshift(cur, level, stat);
2634 if (error || *stat)
2635 return error;
2636
2637 /* Next, try shifting an entry to the left neighbor. */
2638 error = xfs_btree_lshift(cur, level, stat);
2639 if (error)
2640 return error;
2641
2642 if (*stat) {
2643 *oindex = *index = cur->bc_ptrs[level];
2644 return 0;
2645 }
2646
2647 /*
2648 * Next, try splitting the current block in half.
2649 *
2650 * If this works we have to re-set our variables because we
2651 * could be in a different block now.
2652 */
2653 error = xfs_btree_split(cur, level, nptr, &key, ncur, stat);
2654 if (error || *stat == 0)
2655 return error;
2656
2657
2658 *index = cur->bc_ptrs[level];
2659 cur->bc_ops->init_rec_from_key(&key, nrec);
2660 return 0;
2661}
2662
2663/*
2664 * Insert one record/level. Return information to the caller
2665 * allowing the next level up to proceed if necessary.
2666 */
2667STATIC int
2668xfs_btree_insrec(
2669 struct xfs_btree_cur *cur, /* btree cursor */
2670 int level, /* level to insert record at */
2671 union xfs_btree_ptr *ptrp, /* i/o: block number inserted */
2672 union xfs_btree_rec *recp, /* i/o: record data inserted */
2673 struct xfs_btree_cur **curp, /* output: new cursor replacing cur */
2674 int *stat) /* success/failure */
2675{
2676 struct xfs_btree_block *block; /* btree block */
2677 struct xfs_buf *bp; /* buffer for block */
2678 union xfs_btree_key key; /* btree key */
2679 union xfs_btree_ptr nptr; /* new block ptr */
2680 struct xfs_btree_cur *ncur; /* new btree cursor */
2681 union xfs_btree_rec nrec; /* new record count */
2682 int optr; /* old key/record index */
2683 int ptr; /* key/record index */
2684 int numrecs;/* number of records */
2685 int error; /* error return value */
2686#ifdef DEBUG
2687 int i;
2688#endif
2689
2690 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
2691 XFS_BTREE_TRACE_ARGIPR(cur, level, *ptrp, recp);
2692
2693 ncur = NULL;
2694
2695 /*
2696 * If we have an external root pointer, and we've made it to the
2697 * root level, allocate a new root block and we're done.
2698 */
2699 if (!(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
2700 (level >= cur->bc_nlevels)) {
2701 error = xfs_btree_new_root(cur, stat);
2702 xfs_btree_set_ptr_null(cur, ptrp);
2703
2704 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2705 return error;
2706 }
2707
2708 /* If we're off the left edge, return failure. */
2709 ptr = cur->bc_ptrs[level];
2710 if (ptr == 0) {
2711 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2712 *stat = 0;
2713 return 0;
2714 }
2715
2716 /* Make a key out of the record data to be inserted, and save it. */
2717 cur->bc_ops->init_key_from_rec(&key, recp);
2718
2719 optr = ptr;
2720
2721 XFS_BTREE_STATS_INC(cur, insrec);
2722
2723 /* Get pointers to the btree buffer and block. */
2724 block = xfs_btree_get_block(cur, level, &bp);
2725 numrecs = xfs_btree_get_numrecs(block);
2726
2727#ifdef DEBUG
2728 error = xfs_btree_check_block(cur, block, level, bp);
2729 if (error)
2730 goto error0;
2731
2732 /* Check that the new entry is being inserted in the right place. */
2733 if (ptr <= numrecs) {
2734 if (level == 0) {
2735 ASSERT(cur->bc_ops->recs_inorder(cur, recp,
2736 xfs_btree_rec_addr(cur, ptr, block)));
2737 } else {
2738 ASSERT(cur->bc_ops->keys_inorder(cur, &key,
2739 xfs_btree_key_addr(cur, ptr, block)));
2740 }
2741 }
2742#endif
2743
2744 /*
2745 * If the block is full, we can't insert the new entry until we
2746 * make the block un-full.
2747 */
2748 xfs_btree_set_ptr_null(cur, &nptr);
2749 if (numrecs == cur->bc_ops->get_maxrecs(cur, level)) {
2750 error = xfs_btree_make_block_unfull(cur, level, numrecs,
2751 &optr, &ptr, &nptr, &ncur, &nrec, stat);
2752 if (error || *stat == 0)
2753 goto error0;
2754 }
2755
2756 /*
2757 * The current block may have changed if the block was
2758 * previously full and we have just made space in it.
2759 */
2760 block = xfs_btree_get_block(cur, level, &bp);
2761 numrecs = xfs_btree_get_numrecs(block);
2762
2763#ifdef DEBUG
2764 error = xfs_btree_check_block(cur, block, level, bp);
2765 if (error)
2766 return error;
2767#endif
2768
2769 /*
2770 * At this point we know there's room for our new entry in the block
2771 * we're pointing at.
2772 */
2773 XFS_BTREE_STATS_ADD(cur, moves, numrecs - ptr + 1);
2774
2775 if (level > 0) {
2776 /* It's a nonleaf. make a hole in the keys and ptrs */
2777 union xfs_btree_key *kp;
2778 union xfs_btree_ptr *pp;
2779
2780 kp = xfs_btree_key_addr(cur, ptr, block);
2781 pp = xfs_btree_ptr_addr(cur, ptr, block);
2782
2783#ifdef DEBUG
2784 for (i = numrecs - ptr; i >= 0; i--) {
2785 error = xfs_btree_check_ptr(cur, pp, i, level);
2786 if (error)
2787 return error;
2788 }
2789#endif
2790
2791 xfs_btree_shift_keys(cur, kp, 1, numrecs - ptr + 1);
2792 xfs_btree_shift_ptrs(cur, pp, 1, numrecs - ptr + 1);
2793
2794#ifdef DEBUG
2795 error = xfs_btree_check_ptr(cur, ptrp, 0, level);
2796 if (error)
2797 goto error0;
2798#endif
2799
2800 /* Now put the new data in, bump numrecs and log it. */
2801 xfs_btree_copy_keys(cur, kp, &key, 1);
2802 xfs_btree_copy_ptrs(cur, pp, ptrp, 1);
2803 numrecs++;
2804 xfs_btree_set_numrecs(block, numrecs);
2805 xfs_btree_log_ptrs(cur, bp, ptr, numrecs);
2806 xfs_btree_log_keys(cur, bp, ptr, numrecs);
2807#ifdef DEBUG
2808 if (ptr < numrecs) {
2809 ASSERT(cur->bc_ops->keys_inorder(cur, kp,
2810 xfs_btree_key_addr(cur, ptr + 1, block)));
2811 }
2812#endif
2813 } else {
2814 /* It's a leaf. make a hole in the records */
2815 union xfs_btree_rec *rp;
2816
2817 rp = xfs_btree_rec_addr(cur, ptr, block);
2818
2819 xfs_btree_shift_recs(cur, rp, 1, numrecs - ptr + 1);
2820
2821 /* Now put the new data in, bump numrecs and log it. */
2822 xfs_btree_copy_recs(cur, rp, recp, 1);
2823 xfs_btree_set_numrecs(block, ++numrecs);
2824 xfs_btree_log_recs(cur, bp, ptr, numrecs);
2825#ifdef DEBUG
2826 if (ptr < numrecs) {
2827 ASSERT(cur->bc_ops->recs_inorder(cur, rp,
2828 xfs_btree_rec_addr(cur, ptr + 1, block)));
2829 }
2830#endif
2831 }
2832
2833 /* Log the new number of records in the btree header. */
2834 xfs_btree_log_block(cur, bp, XFS_BB_NUMRECS);
2835
2836 /* If we inserted at the start of a block, update the parents' keys. */
2837 if (optr == 1) {
2838 error = xfs_btree_updkey(cur, &key, level + 1);
2839 if (error)
2840 goto error0;
2841 }
2842
2843 /*
2844 * If we are tracking the last record in the tree and
2845 * we are at the far right edge of the tree, update it.
2846 */
2847 if (xfs_btree_is_lastrec(cur, block, level)) {
2848 cur->bc_ops->update_lastrec(cur, block, recp,
2849 ptr, LASTREC_INSREC);
2850 }
2851
2852 /*
2853 * Return the new block number, if any.
2854 * If there is one, give back a record value and a cursor too.
2855 */
2856 *ptrp = nptr;
2857 if (!xfs_btree_ptr_is_null(cur, &nptr)) {
2858 *recp = nrec;
2859 *curp = ncur;
2860 }
2861
2862 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2863 *stat = 1;
2864 return 0;
2865
2866error0:
2867 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
2868 return error;
2869}
2870
2871/*
2872 * Insert the record at the point referenced by cur.
2873 *
2874 * A multi-level split of the tree on insert will invalidate the original
2875 * cursor. All callers of this function should assume that the cursor is
2876 * no longer valid and revalidate it.
2877 */
2878int
2879xfs_btree_insert(
2880 struct xfs_btree_cur *cur,
2881 int *stat)
2882{
2883 int error; /* error return value */
2884 int i; /* result value, 0 for failure */
2885 int level; /* current level number in btree */
2886 union xfs_btree_ptr nptr; /* new block number (split result) */
2887 struct xfs_btree_cur *ncur; /* new cursor (split result) */
2888 struct xfs_btree_cur *pcur; /* previous level's cursor */
2889 union xfs_btree_rec rec; /* record to insert */
2890
2891 level = 0;
2892 ncur = NULL;
2893 pcur = cur;
2894
2895 xfs_btree_set_ptr_null(cur, &nptr);
2896 cur->bc_ops->init_rec_from_cur(cur, &rec);
2897
2898 /*
2899 * Loop going up the tree, starting at the leaf level.
2900 * Stop when we don't get a split block, that must mean that
2901 * the insert is finished with this level.
2902 */
2903 do {
2904 /*
2905 * Insert nrec/nptr into this level of the tree.
2906 * Note if we fail, nptr will be null.
2907 */
2908 error = xfs_btree_insrec(pcur, level, &nptr, &rec, &ncur, &i);
2909 if (error) {
2910 if (pcur != cur)
2911 xfs_btree_del_cursor(pcur, XFS_BTREE_ERROR);
2912 goto error0;
2913 }
2914
2915 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
2916 level++;
2917
2918 /*
2919 * See if the cursor we just used is trash.
2920 * Can't trash the caller's cursor, but otherwise we should
2921 * if ncur is a new cursor or we're about to be done.
2922 */
2923 if (pcur != cur &&
2924 (ncur || xfs_btree_ptr_is_null(cur, &nptr))) {
2925 /* Save the state from the cursor before we trash it */
2926 if (cur->bc_ops->update_cursor)
2927 cur->bc_ops->update_cursor(pcur, cur);
2928 cur->bc_nlevels = pcur->bc_nlevels;
2929 xfs_btree_del_cursor(pcur, XFS_BTREE_NOERROR);
2930 }
2931 /* If we got a new cursor, switch to it. */
2932 if (ncur) {
2933 pcur = ncur;
2934 ncur = NULL;
2935 }
2936 } while (!xfs_btree_ptr_is_null(cur, &nptr));
2937
2938 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2939 *stat = i;
2940 return 0;
2941error0:
2942 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
2943 return error;
2944}
2945
2946/*
2947 * Try to merge a non-leaf block back into the inode root.
2948 *
2949 * Note: the killroot names comes from the fact that we're effectively
2950 * killing the old root block. But because we can't just delete the
2951 * inode we have to copy the single block it was pointing to into the
2952 * inode.
2953 */
2954int
2955xfs_btree_kill_iroot(
2956 struct xfs_btree_cur *cur)
2957{
2958 int whichfork = cur->bc_private.b.whichfork;
2959 struct xfs_inode *ip = cur->bc_private.b.ip;
2960 struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork);
2961 struct xfs_btree_block *block;
2962 struct xfs_btree_block *cblock;
2963 union xfs_btree_key *kp;
2964 union xfs_btree_key *ckp;
2965 union xfs_btree_ptr *pp;
2966 union xfs_btree_ptr *cpp;
2967 struct xfs_buf *cbp;
2968 int level;
2969 int index;
2970 int numrecs;
2971#ifdef DEBUG
2972 union xfs_btree_ptr ptr;
2973 int i;
2974#endif
2975
2976 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
2977
2978 ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE);
2979 ASSERT(cur->bc_nlevels > 1);
2980
2981 /*
2982 * Don't deal with the root block needs to be a leaf case.
2983 * We're just going to turn the thing back into extents anyway.
2984 */
2985 level = cur->bc_nlevels - 1;
2986 if (level == 1)
2987 goto out0;
2988
2989 /*
2990 * Give up if the root has multiple children.
2991 */
2992 block = xfs_btree_get_iroot(cur);
2993 if (xfs_btree_get_numrecs(block) != 1)
2994 goto out0;
2995
2996 cblock = xfs_btree_get_block(cur, level - 1, &cbp);
2997 numrecs = xfs_btree_get_numrecs(cblock);
2998
2999 /*
3000 * Only do this if the next level will fit.
3001 * Then the data must be copied up to the inode,
3002 * instead of freeing the root you free the next level.
3003 */
3004 if (numrecs > cur->bc_ops->get_dmaxrecs(cur, level))
3005 goto out0;
3006
3007 XFS_BTREE_STATS_INC(cur, killroot);
3008
3009#ifdef DEBUG
3010 xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_LEFTSIB);
3011 ASSERT(xfs_btree_ptr_is_null(cur, &ptr));
3012 xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB);
3013 ASSERT(xfs_btree_ptr_is_null(cur, &ptr));
3014#endif
3015
3016 index = numrecs - cur->bc_ops->get_maxrecs(cur, level);
3017 if (index) {
3018 xfs_iroot_realloc(cur->bc_private.b.ip, index,
3019 cur->bc_private.b.whichfork);
3020 block = ifp->if_broot;
3021 }
3022
3023 be16_add_cpu(&block->bb_numrecs, index);
3024 ASSERT(block->bb_numrecs == cblock->bb_numrecs);
3025
3026 kp = xfs_btree_key_addr(cur, 1, block);
3027 ckp = xfs_btree_key_addr(cur, 1, cblock);
3028 xfs_btree_copy_keys(cur, kp, ckp, numrecs);
3029
3030 pp = xfs_btree_ptr_addr(cur, 1, block);
3031 cpp = xfs_btree_ptr_addr(cur, 1, cblock);
3032#ifdef DEBUG
3033 for (i = 0; i < numrecs; i++) {
3034 int error;
3035
3036 error = xfs_btree_check_ptr(cur, cpp, i, level - 1);
3037 if (error) {
3038 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
3039 return error;
3040 }
3041 }
3042#endif
3043 xfs_btree_copy_ptrs(cur, pp, cpp, numrecs);
3044
3045 cur->bc_ops->free_block(cur, cbp);
3046 XFS_BTREE_STATS_INC(cur, free);
3047
3048 cur->bc_bufs[level - 1] = NULL;
3049 be16_add_cpu(&block->bb_level, -1);
3050 xfs_trans_log_inode(cur->bc_tp, ip,
3051 XFS_ILOG_CORE | XFS_ILOG_FBROOT(cur->bc_private.b.whichfork));
3052 cur->bc_nlevels--;
3053out0:
3054 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3055 return 0;
3056}
3057
3058STATIC int
3059xfs_btree_dec_cursor(
3060 struct xfs_btree_cur *cur,
3061 int level,
3062 int *stat)
3063{
3064 int error;
3065 int i;
3066
3067 if (level > 0) {
3068 error = xfs_btree_decrement(cur, level, &i);
3069 if (error)
3070 return error;
3071 }
3072
3073 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3074 *stat = 1;
3075 return 0;
3076}
3077
3078/*
3079 * Single level of the btree record deletion routine.
3080 * Delete record pointed to by cur/level.
3081 * Remove the record from its block then rebalance the tree.
3082 * Return 0 for error, 1 for done, 2 to go on to the next level.
3083 */
3084STATIC int /* error */
3085xfs_btree_delrec(
3086 struct xfs_btree_cur *cur, /* btree cursor */
3087 int level, /* level removing record from */
3088 int *stat) /* fail/done/go-on */
3089{
3090 struct xfs_btree_block *block; /* btree block */
3091 union xfs_btree_ptr cptr; /* current block ptr */
3092 struct xfs_buf *bp; /* buffer for block */
3093 int error; /* error return value */
3094 int i; /* loop counter */
3095 union xfs_btree_key key; /* storage for keyp */
3096 union xfs_btree_key *keyp = &key; /* passed to the next level */
3097 union xfs_btree_ptr lptr; /* left sibling block ptr */
3098 struct xfs_buf *lbp; /* left buffer pointer */
3099 struct xfs_btree_block *left; /* left btree block */
3100 int lrecs = 0; /* left record count */
3101 int ptr; /* key/record index */
3102 union xfs_btree_ptr rptr; /* right sibling block ptr */
3103 struct xfs_buf *rbp; /* right buffer pointer */
3104 struct xfs_btree_block *right; /* right btree block */
3105 struct xfs_btree_block *rrblock; /* right-right btree block */
3106 struct xfs_buf *rrbp; /* right-right buffer pointer */
3107 int rrecs = 0; /* right record count */
3108 struct xfs_btree_cur *tcur; /* temporary btree cursor */
3109 int numrecs; /* temporary numrec count */
3110
3111 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
3112 XFS_BTREE_TRACE_ARGI(cur, level);
3113
3114 tcur = NULL;
3115
3116 /* Get the index of the entry being deleted, check for nothing there. */
3117 ptr = cur->bc_ptrs[level];
3118 if (ptr == 0) {
3119 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3120 *stat = 0;
3121 return 0;
3122 }
3123
3124 /* Get the buffer & block containing the record or key/ptr. */
3125 block = xfs_btree_get_block(cur, level, &bp);
3126 numrecs = xfs_btree_get_numrecs(block);
3127
3128#ifdef DEBUG
3129 error = xfs_btree_check_block(cur, block, level, bp);
3130 if (error)
3131 goto error0;
3132#endif
3133
3134 /* Fail if we're off the end of the block. */
3135 if (ptr > numrecs) {
3136 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3137 *stat = 0;
3138 return 0;
3139 }
3140
3141 XFS_BTREE_STATS_INC(cur, delrec);
3142 XFS_BTREE_STATS_ADD(cur, moves, numrecs - ptr);
3143
3144 /* Excise the entries being deleted. */
3145 if (level > 0) {
3146 /* It's a nonleaf. operate on keys and ptrs */
3147 union xfs_btree_key *lkp;
3148 union xfs_btree_ptr *lpp;
3149
3150 lkp = xfs_btree_key_addr(cur, ptr + 1, block);
3151 lpp = xfs_btree_ptr_addr(cur, ptr + 1, block);
3152
3153#ifdef DEBUG
3154 for (i = 0; i < numrecs - ptr; i++) {
3155 error = xfs_btree_check_ptr(cur, lpp, i, level);
3156 if (error)
3157 goto error0;
3158 }
3159#endif
3160
3161 if (ptr < numrecs) {
3162 xfs_btree_shift_keys(cur, lkp, -1, numrecs - ptr);
3163 xfs_btree_shift_ptrs(cur, lpp, -1, numrecs - ptr);
3164 xfs_btree_log_keys(cur, bp, ptr, numrecs - 1);
3165 xfs_btree_log_ptrs(cur, bp, ptr, numrecs - 1);
3166 }
3167
3168 /*
3169 * If it's the first record in the block, we'll need to pass a
3170 * key up to the next level (updkey).
3171 */
3172 if (ptr == 1)
3173 keyp = xfs_btree_key_addr(cur, 1, block);
3174 } else {
3175 /* It's a leaf. operate on records */
3176 if (ptr < numrecs) {
3177 xfs_btree_shift_recs(cur,
3178 xfs_btree_rec_addr(cur, ptr + 1, block),
3179 -1, numrecs - ptr);
3180 xfs_btree_log_recs(cur, bp, ptr, numrecs - 1);
3181 }
3182
3183 /*
3184 * If it's the first record in the block, we'll need a key
3185 * structure to pass up to the next level (updkey).
3186 */
3187 if (ptr == 1) {
3188 cur->bc_ops->init_key_from_rec(&key,
3189 xfs_btree_rec_addr(cur, 1, block));
3190 keyp = &key;
3191 }
3192 }
3193
3194 /*
3195 * Decrement and log the number of entries in the block.
3196 */
3197 xfs_btree_set_numrecs(block, --numrecs);
3198 xfs_btree_log_block(cur, bp, XFS_BB_NUMRECS);
3199
3200 /*
3201 * If we are tracking the last record in the tree and
3202 * we are at the far right edge of the tree, update it.
3203 */
3204 if (xfs_btree_is_lastrec(cur, block, level)) {
3205 cur->bc_ops->update_lastrec(cur, block, NULL,
3206 ptr, LASTREC_DELREC);
3207 }
3208
3209 /*
3210 * We're at the root level. First, shrink the root block in-memory.
3211 * Try to get rid of the next level down. If we can't then there's
3212 * nothing left to do.
3213 */
3214 if (level == cur->bc_nlevels - 1) {
3215 if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) {
3216 xfs_iroot_realloc(cur->bc_private.b.ip, -1,
3217 cur->bc_private.b.whichfork);
3218
3219 error = xfs_btree_kill_iroot(cur);
3220 if (error)
3221 goto error0;
3222
3223 error = xfs_btree_dec_cursor(cur, level, stat);
3224 if (error)
3225 goto error0;
3226 *stat = 1;
3227 return 0;
3228 }
3229
3230 /*
3231 * If this is the root level, and there's only one entry left,
3232 * and it's NOT the leaf level, then we can get rid of this
3233 * level.
3234 */
3235 if (numrecs == 1 && level > 0) {
3236 union xfs_btree_ptr *pp;
3237 /*
3238 * pp is still set to the first pointer in the block.
3239 * Make it the new root of the btree.
3240 */
3241 pp = xfs_btree_ptr_addr(cur, 1, block);
3242 error = cur->bc_ops->kill_root(cur, bp, level, pp);
3243 if (error)
3244 goto error0;
3245 } else if (level > 0) {
3246 error = xfs_btree_dec_cursor(cur, level, stat);
3247 if (error)
3248 goto error0;
3249 }
3250 *stat = 1;
3251 return 0;
3252 }
3253
3254 /*
3255 * If we deleted the leftmost entry in the block, update the
3256 * key values above us in the tree.
3257 */
3258 if (ptr == 1) {
3259 error = xfs_btree_updkey(cur, keyp, level + 1);
3260 if (error)
3261 goto error0;
3262 }
3263
3264 /*
3265 * If the number of records remaining in the block is at least
3266 * the minimum, we're done.
3267 */
3268 if (numrecs >= cur->bc_ops->get_minrecs(cur, level)) {
3269 error = xfs_btree_dec_cursor(cur, level, stat);
3270 if (error)
3271 goto error0;
3272 return 0;
3273 }
3274
3275 /*
3276 * Otherwise, we have to move some records around to keep the
3277 * tree balanced. Look at the left and right sibling blocks to
3278 * see if we can re-balance by moving only one record.
3279 */
3280 xfs_btree_get_sibling(cur, block, &rptr, XFS_BB_RIGHTSIB);
3281 xfs_btree_get_sibling(cur, block, &lptr, XFS_BB_LEFTSIB);
3282
3283 if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) {
3284 /*
3285 * One child of root, need to get a chance to copy its contents
3286 * into the root and delete it. Can't go up to next level,
3287 * there's nothing to delete there.
3288 */
3289 if (xfs_btree_ptr_is_null(cur, &rptr) &&
3290 xfs_btree_ptr_is_null(cur, &lptr) &&
3291 level == cur->bc_nlevels - 2) {
3292 error = xfs_btree_kill_iroot(cur);
3293 if (!error)
3294 error = xfs_btree_dec_cursor(cur, level, stat);
3295 if (error)
3296 goto error0;
3297 return 0;
3298 }
3299 }
3300
3301 ASSERT(!xfs_btree_ptr_is_null(cur, &rptr) ||
3302 !xfs_btree_ptr_is_null(cur, &lptr));
3303
3304 /*
3305 * Duplicate the cursor so our btree manipulations here won't
3306 * disrupt the next level up.
3307 */
3308 error = xfs_btree_dup_cursor(cur, &tcur);
3309 if (error)
3310 goto error0;
3311
3312 /*
3313 * If there's a right sibling, see if it's ok to shift an entry
3314 * out of it.
3315 */
3316 if (!xfs_btree_ptr_is_null(cur, &rptr)) {
3317 /*
3318 * Move the temp cursor to the last entry in the next block.
3319 * Actually any entry but the first would suffice.
3320 */
3321 i = xfs_btree_lastrec(tcur, level);
3322 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
3323
3324 error = xfs_btree_increment(tcur, level, &i);
3325 if (error)
3326 goto error0;
3327 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
3328
3329 i = xfs_btree_lastrec(tcur, level);
3330 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
3331
3332 /* Grab a pointer to the block. */
3333 right = xfs_btree_get_block(tcur, level, &rbp);
3334#ifdef DEBUG
3335 error = xfs_btree_check_block(tcur, right, level, rbp);
3336 if (error)
3337 goto error0;
3338#endif
3339 /* Grab the current block number, for future use. */
3340 xfs_btree_get_sibling(tcur, right, &cptr, XFS_BB_LEFTSIB);
3341
3342 /*
3343 * If right block is full enough so that removing one entry
3344 * won't make it too empty, and left-shifting an entry out
3345 * of right to us works, we're done.
3346 */
3347 if (xfs_btree_get_numrecs(right) - 1 >=
3348 cur->bc_ops->get_minrecs(tcur, level)) {
3349 error = xfs_btree_lshift(tcur, level, &i);
3350 if (error)
3351 goto error0;
3352 if (i) {
3353 ASSERT(xfs_btree_get_numrecs(block) >=
3354 cur->bc_ops->get_minrecs(tcur, level));
3355
3356 xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
3357 tcur = NULL;
3358
3359 error = xfs_btree_dec_cursor(cur, level, stat);
3360 if (error)
3361 goto error0;
3362 return 0;
3363 }
3364 }
3365
3366 /*
3367 * Otherwise, grab the number of records in right for
3368 * future reference, and fix up the temp cursor to point
3369 * to our block again (last record).
3370 */
3371 rrecs = xfs_btree_get_numrecs(right);
3372 if (!xfs_btree_ptr_is_null(cur, &lptr)) {
3373 i = xfs_btree_firstrec(tcur, level);
3374 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
3375
3376 error = xfs_btree_decrement(tcur, level, &i);
3377 if (error)
3378 goto error0;
3379 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
3380 }
3381 }
3382
3383 /*
3384 * If there's a left sibling, see if it's ok to shift an entry
3385 * out of it.
3386 */
3387 if (!xfs_btree_ptr_is_null(cur, &lptr)) {
3388 /*
3389 * Move the temp cursor to the first entry in the
3390 * previous block.
3391 */
3392 i = xfs_btree_firstrec(tcur, level);
3393 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
3394
3395 error = xfs_btree_decrement(tcur, level, &i);
3396 if (error)
3397 goto error0;
3398 i = xfs_btree_firstrec(tcur, level);
3399 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
3400
3401 /* Grab a pointer to the block. */
3402 left = xfs_btree_get_block(tcur, level, &lbp);
3403#ifdef DEBUG
3404 error = xfs_btree_check_block(cur, left, level, lbp);
3405 if (error)
3406 goto error0;
3407#endif
3408 /* Grab the current block number, for future use. */
3409 xfs_btree_get_sibling(tcur, left, &cptr, XFS_BB_RIGHTSIB);
3410
3411 /*
3412 * If left block is full enough so that removing one entry
3413 * won't make it too empty, and right-shifting an entry out
3414 * of left to us works, we're done.
3415 */
3416 if (xfs_btree_get_numrecs(left) - 1 >=
3417 cur->bc_ops->get_minrecs(tcur, level)) {
3418 error = xfs_btree_rshift(tcur, level, &i);
3419 if (error)
3420 goto error0;
3421 if (i) {
3422 ASSERT(xfs_btree_get_numrecs(block) >=
3423 cur->bc_ops->get_minrecs(tcur, level));
3424 xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
3425 tcur = NULL;
3426 if (level == 0)
3427 cur->bc_ptrs[0]++;
3428 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3429 *stat = 1;
3430 return 0;
3431 }
3432 }
3433
3434 /*
3435 * Otherwise, grab the number of records in right for
3436 * future reference.
3437 */
3438 lrecs = xfs_btree_get_numrecs(left);
3439 }
3440
3441 /* Delete the temp cursor, we're done with it. */
3442 xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
3443 tcur = NULL;
3444
3445 /* If here, we need to do a join to keep the tree balanced. */
3446 ASSERT(!xfs_btree_ptr_is_null(cur, &cptr));
3447
3448 if (!xfs_btree_ptr_is_null(cur, &lptr) &&
3449 lrecs + xfs_btree_get_numrecs(block) <=
3450 cur->bc_ops->get_maxrecs(cur, level)) {
3451 /*
3452 * Set "right" to be the starting block,
3453 * "left" to be the left neighbor.
3454 */
3455 rptr = cptr;
3456 right = block;
3457 rbp = bp;
3458 error = xfs_btree_read_buf_block(cur, &lptr, level,
3459 0, &left, &lbp);
3460 if (error)
3461 goto error0;
3462
3463 /*
3464 * If that won't work, see if we can join with the right neighbor block.
3465 */
3466 } else if (!xfs_btree_ptr_is_null(cur, &rptr) &&
3467 rrecs + xfs_btree_get_numrecs(block) <=
3468 cur->bc_ops->get_maxrecs(cur, level)) {
3469 /*
3470 * Set "left" to be the starting block,
3471 * "right" to be the right neighbor.
3472 */
3473 lptr = cptr;
3474 left = block;
3475 lbp = bp;
3476 error = xfs_btree_read_buf_block(cur, &rptr, level,
3477 0, &right, &rbp);
3478 if (error)
3479 goto error0;
3480
3481 /*
3482 * Otherwise, we can't fix the imbalance.
3483 * Just return. This is probably a logic error, but it's not fatal.
3484 */
3485 } else {
3486 error = xfs_btree_dec_cursor(cur, level, stat);
3487 if (error)
3488 goto error0;
3489 return 0;
3490 }
3491
3492 rrecs = xfs_btree_get_numrecs(right);
3493 lrecs = xfs_btree_get_numrecs(left);
3494
3495 /*
3496 * We're now going to join "left" and "right" by moving all the stuff
3497 * in "right" to "left" and deleting "right".
3498 */
3499 XFS_BTREE_STATS_ADD(cur, moves, rrecs);
3500 if (level > 0) {
3501 /* It's a non-leaf. Move keys and pointers. */
3502 union xfs_btree_key *lkp; /* left btree key */
3503 union xfs_btree_ptr *lpp; /* left address pointer */
3504 union xfs_btree_key *rkp; /* right btree key */
3505 union xfs_btree_ptr *rpp; /* right address pointer */
3506
3507 lkp = xfs_btree_key_addr(cur, lrecs + 1, left);
3508 lpp = xfs_btree_ptr_addr(cur, lrecs + 1, left);
3509 rkp = xfs_btree_key_addr(cur, 1, right);
3510 rpp = xfs_btree_ptr_addr(cur, 1, right);
3511#ifdef DEBUG
3512 for (i = 1; i < rrecs; i++) {
3513 error = xfs_btree_check_ptr(cur, rpp, i, level);
3514 if (error)
3515 goto error0;
3516 }
3517#endif
3518 xfs_btree_copy_keys(cur, lkp, rkp, rrecs);
3519 xfs_btree_copy_ptrs(cur, lpp, rpp, rrecs);
3520
3521 xfs_btree_log_keys(cur, lbp, lrecs + 1, lrecs + rrecs);
3522 xfs_btree_log_ptrs(cur, lbp, lrecs + 1, lrecs + rrecs);
3523 } else {
3524 /* It's a leaf. Move records. */
3525 union xfs_btree_rec *lrp; /* left record pointer */
3526 union xfs_btree_rec *rrp; /* right record pointer */
3527
3528 lrp = xfs_btree_rec_addr(cur, lrecs + 1, left);
3529 rrp = xfs_btree_rec_addr(cur, 1, right);
3530
3531 xfs_btree_copy_recs(cur, lrp, rrp, rrecs);
3532 xfs_btree_log_recs(cur, lbp, lrecs + 1, lrecs + rrecs);
3533 }
3534
3535 XFS_BTREE_STATS_INC(cur, join);
3536
3537 /*
3538 * Fix up the the number of records and right block pointer in the
3539 * surviving block, and log it.
3540 */
3541 xfs_btree_set_numrecs(left, lrecs + rrecs);
3542 xfs_btree_get_sibling(cur, right, &cptr, XFS_BB_RIGHTSIB),
3543 xfs_btree_set_sibling(cur, left, &cptr, XFS_BB_RIGHTSIB);
3544 xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB);
3545
3546 /* If there is a right sibling, point it to the remaining block. */
3547 xfs_btree_get_sibling(cur, left, &cptr, XFS_BB_RIGHTSIB);
3548 if (!xfs_btree_ptr_is_null(cur, &cptr)) {
3549 error = xfs_btree_read_buf_block(cur, &cptr, level,
3550 0, &rrblock, &rrbp);
3551 if (error)
3552 goto error0;
3553 xfs_btree_set_sibling(cur, rrblock, &lptr, XFS_BB_LEFTSIB);
3554 xfs_btree_log_block(cur, rrbp, XFS_BB_LEFTSIB);
3555 }
3556
3557 /* Free the deleted block. */
3558 error = cur->bc_ops->free_block(cur, rbp);
3559 if (error)
3560 goto error0;
3561 XFS_BTREE_STATS_INC(cur, free);
3562
3563 /*
3564 * If we joined with the left neighbor, set the buffer in the
3565 * cursor to the left block, and fix up the index.
3566 */
3567 if (bp != lbp) {
3568 cur->bc_bufs[level] = lbp;
3569 cur->bc_ptrs[level] += lrecs;
3570 cur->bc_ra[level] = 0;
3571 }
3572 /*
3573 * If we joined with the right neighbor and there's a level above
3574 * us, increment the cursor at that level.
3575 */
3576 else if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) ||
3577 (level + 1 < cur->bc_nlevels)) {
3578 error = xfs_btree_increment(cur, level + 1, &i);
3579 if (error)
3580 goto error0;
3581 }
3582
3583 /*
3584 * Readjust the ptr at this level if it's not a leaf, since it's
3585 * still pointing at the deletion point, which makes the cursor
3586 * inconsistent. If this makes the ptr 0, the caller fixes it up.
3587 * We can't use decrement because it would change the next level up.
3588 */
3589 if (level > 0)
3590 cur->bc_ptrs[level]--;
3591
3592 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3593 /* Return value means the next level up has something to do. */
3594 *stat = 2;
3595 return 0;
3596
3597error0:
3598 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
3599 if (tcur)
3600 xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
3601 return error;
3602}
3603
3604/*
3605 * Delete the record pointed to by cur.
3606 * The cursor refers to the place where the record was (could be inserted)
3607 * when the operation returns.
3608 */
3609int /* error */
3610xfs_btree_delete(
3611 struct xfs_btree_cur *cur,
3612 int *stat) /* success/failure */
3613{
3614 int error; /* error return value */
3615 int level;
3616 int i;
3617
3618 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
3619
3620 /*
3621 * Go up the tree, starting at leaf level.
3622 *
3623 * If 2 is returned then a join was done; go to the next level.
3624 * Otherwise we are done.
3625 */
3626 for (level = 0, i = 2; i == 2; level++) {
3627 error = xfs_btree_delrec(cur, level, &i);
3628 if (error)
3629 goto error0;
3630 }
3631
3632 if (i == 0) {
3633 for (level = 1; level < cur->bc_nlevels; level++) {
3634 if (cur->bc_ptrs[level] == 0) {
3635 error = xfs_btree_decrement(cur, level, &i);
3636 if (error)
3637 goto error0;
3638 break;
3639 }
3640 }
3641 }
3642
3643 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3644 *stat = i;
3645 return 0;
3646error0:
3647 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
3648 return error;
3649}
3650
3651/*
3652 * Get the data from the pointed-to record.
3653 */
3654int /* error */
3655xfs_btree_get_rec(
3656 struct xfs_btree_cur *cur, /* btree cursor */
3657 union xfs_btree_rec **recp, /* output: btree record */
3658 int *stat) /* output: success/failure */
3659{
3660 struct xfs_btree_block *block; /* btree block */
3661 struct xfs_buf *bp; /* buffer pointer */
3662 int ptr; /* record number */
3663#ifdef DEBUG
3664 int error; /* error return value */
3665#endif
3666
3667 ptr = cur->bc_ptrs[0];
3668 block = xfs_btree_get_block(cur, 0, &bp);
3669
3670#ifdef DEBUG
3671 error = xfs_btree_check_block(cur, block, 0, bp);
3672 if (error)
3673 return error;
3674#endif
3675
3676 /*
3677 * Off the right end or left end, return failure.
3678 */
3679 if (ptr > xfs_btree_get_numrecs(block) || ptr <= 0) {
3680 *stat = 0;
3681 return 0;
3682 }
3683
3684 /*
3685 * Point to the record and extract its data.
3686 */
3687 *recp = xfs_btree_rec_addr(cur, ptr, block);
3688 *stat = 1;
3689 return 0;
3690}
diff --git a/fs/xfs/xfs_btree.h b/fs/xfs/xfs_btree.h
index 1f528a2a3754..789fffdf8b2f 100644
--- a/fs/xfs/xfs_btree.h
+++ b/fs/xfs/xfs_btree.h
@@ -39,39 +39,19 @@ extern kmem_zone_t *xfs_btree_cur_zone;
39#define XFS_BTNUM_INO ((xfs_btnum_t)XFS_BTNUM_INOi) 39#define XFS_BTNUM_INO ((xfs_btnum_t)XFS_BTNUM_INOi)
40 40
41/* 41/*
42 * Short form header: space allocation btrees. 42 * Generic btree header.
43 */ 43 *
44typedef struct xfs_btree_sblock { 44 * This is a comination of the actual format used on disk for short and long
45 __be32 bb_magic; /* magic number for block type */ 45 * format btrees. The first three fields are shared by both format, but
46 __be16 bb_level; /* 0 is a leaf */ 46 * the pointers are different and should be used with care.
47 __be16 bb_numrecs; /* current # of data records */ 47 *
48 __be32 bb_leftsib; /* left sibling block or NULLAGBLOCK */ 48 * To get the size of the actual short or long form headers please use
49 __be32 bb_rightsib; /* right sibling block or NULLAGBLOCK */ 49 * the size macros below. Never use sizeof(xfs_btree_block).
50} xfs_btree_sblock_t;
51
52/*
53 * Long form header: bmap btrees.
54 */
55typedef struct xfs_btree_lblock {
56 __be32 bb_magic; /* magic number for block type */
57 __be16 bb_level; /* 0 is a leaf */
58 __be16 bb_numrecs; /* current # of data records */
59 __be64 bb_leftsib; /* left sibling block or NULLDFSBNO */
60 __be64 bb_rightsib; /* right sibling block or NULLDFSBNO */
61} xfs_btree_lblock_t;
62
63/*
64 * Combined header and structure, used by common code.
65 */ 50 */
66typedef struct xfs_btree_hdr 51struct xfs_btree_block {
67{
68 __be32 bb_magic; /* magic number for block type */ 52 __be32 bb_magic; /* magic number for block type */
69 __be16 bb_level; /* 0 is a leaf */ 53 __be16 bb_level; /* 0 is a leaf */
70 __be16 bb_numrecs; /* current # of data records */ 54 __be16 bb_numrecs; /* current # of data records */
71} xfs_btree_hdr_t;
72
73typedef struct xfs_btree_block {
74 xfs_btree_hdr_t bb_h; /* header */
75 union { 55 union {
76 struct { 56 struct {
77 __be32 bb_leftsib; 57 __be32 bb_leftsib;
@@ -82,7 +62,36 @@ typedef struct xfs_btree_block {
82 __be64 bb_rightsib; 62 __be64 bb_rightsib;
83 } l; /* long form pointers */ 63 } l; /* long form pointers */
84 } bb_u; /* rest */ 64 } bb_u; /* rest */
85} xfs_btree_block_t; 65};
66
67#define XFS_BTREE_SBLOCK_LEN 16 /* size of a short form block */
68#define XFS_BTREE_LBLOCK_LEN 24 /* size of a long form block */
69
70
71/*
72 * Generic key, ptr and record wrapper structures.
73 *
74 * These are disk format structures, and are converted where necessary
75 * by the btree specific code that needs to interpret them.
76 */
77union xfs_btree_ptr {
78 __be32 s; /* short form ptr */
79 __be64 l; /* long form ptr */
80};
81
82union xfs_btree_key {
83 xfs_bmbt_key_t bmbt;
84 xfs_bmdr_key_t bmbr; /* bmbt root block */
85 xfs_alloc_key_t alloc;
86 xfs_inobt_key_t inobt;
87};
88
89union xfs_btree_rec {
90 xfs_bmbt_rec_t bmbt;
91 xfs_bmdr_rec_t bmbr; /* bmbt root block */
92 xfs_alloc_rec_t alloc;
93 xfs_inobt_rec_t inobt;
94};
86 95
87/* 96/*
88 * For logging record fields. 97 * For logging record fields.
@@ -96,46 +105,131 @@ typedef struct xfs_btree_block {
96#define XFS_BB_ALL_BITS ((1 << XFS_BB_NUM_BITS) - 1) 105#define XFS_BB_ALL_BITS ((1 << XFS_BB_NUM_BITS) - 1)
97 106
98/* 107/*
99 * Boolean to select which form of xfs_btree_block_t.bb_u to use.
100 */
101#define XFS_BTREE_LONG_PTRS(btnum) ((btnum) == XFS_BTNUM_BMAP)
102
103/*
104 * Magic numbers for btree blocks. 108 * Magic numbers for btree blocks.
105 */ 109 */
106extern const __uint32_t xfs_magics[]; 110extern const __uint32_t xfs_magics[];
107 111
108/* 112/*
109 * Maximum and minimum records in a btree block. 113 * Generic stats interface
110 * Given block size, type prefix, and leaf flag (0 or 1). 114 */
111 * The divisor below is equivalent to lf ? (e1) : (e2) but that produces 115#define __XFS_BTREE_STATS_INC(type, stat) \
112 * compiler warnings. 116 XFS_STATS_INC(xs_ ## type ## _2_ ## stat)
113 */ 117#define XFS_BTREE_STATS_INC(cur, stat) \
114#define XFS_BTREE_BLOCK_MAXRECS(bsz,t,lf) \ 118do { \
115 ((int)(((bsz) - (uint)sizeof(t ## _block_t)) / \ 119 switch (cur->bc_btnum) { \
116 (((lf) * (uint)sizeof(t ## _rec_t)) + \ 120 case XFS_BTNUM_BNO: __XFS_BTREE_STATS_INC(abtb, stat); break; \
117 ((1 - (lf)) * \ 121 case XFS_BTNUM_CNT: __XFS_BTREE_STATS_INC(abtc, stat); break; \
118 ((uint)sizeof(t ## _key_t) + (uint)sizeof(t ## _ptr_t)))))) 122 case XFS_BTNUM_BMAP: __XFS_BTREE_STATS_INC(bmbt, stat); break; \
119#define XFS_BTREE_BLOCK_MINRECS(bsz,t,lf) \ 123 case XFS_BTNUM_INO: __XFS_BTREE_STATS_INC(ibt, stat); break; \
120 (XFS_BTREE_BLOCK_MAXRECS(bsz,t,lf) / 2) 124 case XFS_BTNUM_MAX: ASSERT(0); /* fucking gcc */ ; break; \
121 125 } \
122/* 126} while (0)
123 * Record, key, and pointer address calculation macros. 127
124 * Given block size, type prefix, block pointer, and index of requested entry 128#define __XFS_BTREE_STATS_ADD(type, stat, val) \
125 * (first entry numbered 1). 129 XFS_STATS_ADD(xs_ ## type ## _2_ ## stat, val)
126 */ 130#define XFS_BTREE_STATS_ADD(cur, stat, val) \
127#define XFS_BTREE_REC_ADDR(t,bb,i) \ 131do { \
128 ((t ## _rec_t *)((char *)(bb) + sizeof(t ## _block_t) + \ 132 switch (cur->bc_btnum) { \
129 ((i) - 1) * sizeof(t ## _rec_t))) 133 case XFS_BTNUM_BNO: __XFS_BTREE_STATS_ADD(abtb, stat, val); break; \
130#define XFS_BTREE_KEY_ADDR(t,bb,i) \ 134 case XFS_BTNUM_CNT: __XFS_BTREE_STATS_ADD(abtc, stat, val); break; \
131 ((t ## _key_t *)((char *)(bb) + sizeof(t ## _block_t) + \ 135 case XFS_BTNUM_BMAP: __XFS_BTREE_STATS_ADD(bmbt, stat, val); break; \
132 ((i) - 1) * sizeof(t ## _key_t))) 136 case XFS_BTNUM_INO: __XFS_BTREE_STATS_ADD(ibt, stat, val); break; \
133#define XFS_BTREE_PTR_ADDR(t,bb,i,mxr) \ 137 case XFS_BTNUM_MAX: ASSERT(0); /* fucking gcc */ ; break; \
134 ((t ## _ptr_t *)((char *)(bb) + sizeof(t ## _block_t) + \ 138 } \
135 (mxr) * sizeof(t ## _key_t) + ((i) - 1) * sizeof(t ## _ptr_t))) 139} while (0)
136 140
137#define XFS_BTREE_MAXLEVELS 8 /* max of all btrees */ 141#define XFS_BTREE_MAXLEVELS 8 /* max of all btrees */
138 142
143struct xfs_btree_ops {
144 /* size of the key and record structures */
145 size_t key_len;
146 size_t rec_len;
147
148 /* cursor operations */
149 struct xfs_btree_cur *(*dup_cursor)(struct xfs_btree_cur *);
150 void (*update_cursor)(struct xfs_btree_cur *src,
151 struct xfs_btree_cur *dst);
152
153 /* update btree root pointer */
154 void (*set_root)(struct xfs_btree_cur *cur,
155 union xfs_btree_ptr *nptr, int level_change);
156 int (*kill_root)(struct xfs_btree_cur *cur, struct xfs_buf *bp,
157 int level, union xfs_btree_ptr *newroot);
158
159 /* block allocation / freeing */
160 int (*alloc_block)(struct xfs_btree_cur *cur,
161 union xfs_btree_ptr *start_bno,
162 union xfs_btree_ptr *new_bno,
163 int length, int *stat);
164 int (*free_block)(struct xfs_btree_cur *cur, struct xfs_buf *bp);
165
166 /* update last record information */
167 void (*update_lastrec)(struct xfs_btree_cur *cur,
168 struct xfs_btree_block *block,
169 union xfs_btree_rec *rec,
170 int ptr, int reason);
171
172 /* records in block/level */
173 int (*get_minrecs)(struct xfs_btree_cur *cur, int level);
174 int (*get_maxrecs)(struct xfs_btree_cur *cur, int level);
175
176 /* records on disk. Matter for the root in inode case. */
177 int (*get_dmaxrecs)(struct xfs_btree_cur *cur, int level);
178
179 /* init values of btree structures */
180 void (*init_key_from_rec)(union xfs_btree_key *key,
181 union xfs_btree_rec *rec);
182 void (*init_rec_from_key)(union xfs_btree_key *key,
183 union xfs_btree_rec *rec);
184 void (*init_rec_from_cur)(struct xfs_btree_cur *cur,
185 union xfs_btree_rec *rec);
186 void (*init_ptr_from_cur)(struct xfs_btree_cur *cur,
187 union xfs_btree_ptr *ptr);
188
189 /* difference between key value and cursor value */
190 __int64_t (*key_diff)(struct xfs_btree_cur *cur,
191 union xfs_btree_key *key);
192
193#ifdef DEBUG
194 /* check that k1 is lower than k2 */
195 int (*keys_inorder)(struct xfs_btree_cur *cur,
196 union xfs_btree_key *k1,
197 union xfs_btree_key *k2);
198
199 /* check that r1 is lower than r2 */
200 int (*recs_inorder)(struct xfs_btree_cur *cur,
201 union xfs_btree_rec *r1,
202 union xfs_btree_rec *r2);
203#endif
204
205 /* btree tracing */
206#ifdef XFS_BTREE_TRACE
207 void (*trace_enter)(struct xfs_btree_cur *, const char *,
208 char *, int, int, __psunsigned_t,
209 __psunsigned_t, __psunsigned_t,
210 __psunsigned_t, __psunsigned_t,
211 __psunsigned_t, __psunsigned_t,
212 __psunsigned_t, __psunsigned_t,
213 __psunsigned_t, __psunsigned_t);
214 void (*trace_cursor)(struct xfs_btree_cur *, __uint32_t *,
215 __uint64_t *, __uint64_t *);
216 void (*trace_key)(struct xfs_btree_cur *,
217 union xfs_btree_key *, __uint64_t *,
218 __uint64_t *);
219 void (*trace_record)(struct xfs_btree_cur *,
220 union xfs_btree_rec *, __uint64_t *,
221 __uint64_t *, __uint64_t *);
222#endif
223};
224
225/*
226 * Reasons for the update_lastrec method to be called.
227 */
228#define LASTREC_UPDATE 0
229#define LASTREC_INSREC 1
230#define LASTREC_DELREC 2
231
232
139/* 233/*
140 * Btree cursor structure. 234 * Btree cursor structure.
141 * This collects all information needed by the btree code in one place. 235 * This collects all information needed by the btree code in one place.
@@ -144,6 +238,8 @@ typedef struct xfs_btree_cur
144{ 238{
145 struct xfs_trans *bc_tp; /* transaction we're in, if any */ 239 struct xfs_trans *bc_tp; /* transaction we're in, if any */
146 struct xfs_mount *bc_mp; /* file system mount struct */ 240 struct xfs_mount *bc_mp; /* file system mount struct */
241 const struct xfs_btree_ops *bc_ops;
242 uint bc_flags; /* btree features - below */
147 union { 243 union {
148 xfs_alloc_rec_incore_t a; 244 xfs_alloc_rec_incore_t a;
149 xfs_bmbt_irec_t b; 245 xfs_bmbt_irec_t b;
@@ -175,94 +271,40 @@ typedef struct xfs_btree_cur
175 } bc_private; /* per-btree type data */ 271 } bc_private; /* per-btree type data */
176} xfs_btree_cur_t; 272} xfs_btree_cur_t;
177 273
274/* cursor flags */
275#define XFS_BTREE_LONG_PTRS (1<<0) /* pointers are 64bits long */
276#define XFS_BTREE_ROOT_IN_INODE (1<<1) /* root may be variable size */
277#define XFS_BTREE_LASTREC_UPDATE (1<<2) /* track last rec externally */
278
279
178#define XFS_BTREE_NOERROR 0 280#define XFS_BTREE_NOERROR 0
179#define XFS_BTREE_ERROR 1 281#define XFS_BTREE_ERROR 1
180 282
181/* 283/*
182 * Convert from buffer to btree block header. 284 * Convert from buffer to btree block header.
183 */ 285 */
184#define XFS_BUF_TO_BLOCK(bp) ((xfs_btree_block_t *)XFS_BUF_PTR(bp)) 286#define XFS_BUF_TO_BLOCK(bp) ((struct xfs_btree_block *)XFS_BUF_PTR(bp))
185#define XFS_BUF_TO_LBLOCK(bp) ((xfs_btree_lblock_t *)XFS_BUF_PTR(bp))
186#define XFS_BUF_TO_SBLOCK(bp) ((xfs_btree_sblock_t *)XFS_BUF_PTR(bp))
187 287
188 288
189#ifdef __KERNEL__
190
191#ifdef DEBUG
192/* 289/*
193 * Debug routine: check that block header is ok. 290 * Check that block header is ok.
194 */ 291 */
195void 292int
196xfs_btree_check_block( 293xfs_btree_check_block(
197 xfs_btree_cur_t *cur, /* btree cursor */ 294 struct xfs_btree_cur *cur, /* btree cursor */
198 xfs_btree_block_t *block, /* generic btree block pointer */ 295 struct xfs_btree_block *block, /* generic btree block pointer */
199 int level, /* level of the btree block */
200 struct xfs_buf *bp); /* buffer containing block, if any */
201
202/*
203 * Debug routine: check that keys are in the right order.
204 */
205void
206xfs_btree_check_key(
207 xfs_btnum_t btnum, /* btree identifier */
208 void *ak1, /* pointer to left (lower) key */
209 void *ak2); /* pointer to right (higher) key */
210
211/*
212 * Debug routine: check that records are in the right order.
213 */
214void
215xfs_btree_check_rec(
216 xfs_btnum_t btnum, /* btree identifier */
217 void *ar1, /* pointer to left (lower) record */
218 void *ar2); /* pointer to right (higher) record */
219#else
220#define xfs_btree_check_block(a,b,c,d)
221#define xfs_btree_check_key(a,b,c)
222#define xfs_btree_check_rec(a,b,c)
223#endif /* DEBUG */
224
225/*
226 * Checking routine: check that long form block header is ok.
227 */
228int /* error (0 or EFSCORRUPTED) */
229xfs_btree_check_lblock(
230 xfs_btree_cur_t *cur, /* btree cursor */
231 xfs_btree_lblock_t *block, /* btree long form block pointer */
232 int level, /* level of the btree block */ 296 int level, /* level of the btree block */
233 struct xfs_buf *bp); /* buffer containing block, if any */ 297 struct xfs_buf *bp); /* buffer containing block, if any */
234 298
235/* 299/*
236 * Checking routine: check that (long) pointer is ok. 300 * Check that (long) pointer is ok.
237 */ 301 */
238int /* error (0 or EFSCORRUPTED) */ 302int /* error (0 or EFSCORRUPTED) */
239xfs_btree_check_lptr( 303xfs_btree_check_lptr(
240 xfs_btree_cur_t *cur, /* btree cursor */ 304 struct xfs_btree_cur *cur, /* btree cursor */
241 xfs_dfsbno_t ptr, /* btree block disk address */ 305 xfs_dfsbno_t ptr, /* btree block disk address */
242 int level); /* btree block level */ 306 int level); /* btree block level */
243 307
244#define xfs_btree_check_lptr_disk(cur, ptr, level) \
245 xfs_btree_check_lptr(cur, be64_to_cpu(ptr), level)
246
247/*
248 * Checking routine: check that short form block header is ok.
249 */
250int /* error (0 or EFSCORRUPTED) */
251xfs_btree_check_sblock(
252 xfs_btree_cur_t *cur, /* btree cursor */
253 xfs_btree_sblock_t *block, /* btree short form block pointer */
254 int level, /* level of the btree block */
255 struct xfs_buf *bp); /* buffer containing block */
256
257/*
258 * Checking routine: check that (short) pointer is ok.
259 */
260int /* error (0 or EFSCORRUPTED) */
261xfs_btree_check_sptr(
262 xfs_btree_cur_t *cur, /* btree cursor */
263 xfs_agblock_t ptr, /* btree block disk address */
264 int level); /* btree block level */
265
266/* 308/*
267 * Delete the btree cursor. 309 * Delete the btree cursor.
268 */ 310 */
@@ -281,15 +323,6 @@ xfs_btree_dup_cursor(
281 xfs_btree_cur_t **ncur);/* output cursor */ 323 xfs_btree_cur_t **ncur);/* output cursor */
282 324
283/* 325/*
284 * Change the cursor to point to the first record in the current block
285 * at the given level. Other levels are unaffected.
286 */
287int /* success=1, failure=0 */
288xfs_btree_firstrec(
289 xfs_btree_cur_t *cur, /* btree cursor */
290 int level); /* level to change */
291
292/*
293 * Get a buffer for the block, return it with no data read. 326 * Get a buffer for the block, return it with no data read.
294 * Long-form addressing. 327 * Long-form addressing.
295 */ 328 */
@@ -313,20 +346,6 @@ xfs_btree_get_bufs(
313 uint lock); /* lock flags for get_buf */ 346 uint lock); /* lock flags for get_buf */
314 347
315/* 348/*
316 * Allocate a new btree cursor.
317 * The cursor is either for allocation (A) or bmap (B).
318 */
319xfs_btree_cur_t * /* new btree cursor */
320xfs_btree_init_cursor(
321 struct xfs_mount *mp, /* file system mount point */
322 struct xfs_trans *tp, /* transaction pointer */
323 struct xfs_buf *agbp, /* (A only) buffer for agf structure */
324 xfs_agnumber_t agno, /* (A only) allocation group number */
325 xfs_btnum_t btnum, /* btree identifier */
326 struct xfs_inode *ip, /* (B only) inode owning the btree */
327 int whichfork); /* (B only) data/attr fork */
328
329/*
330 * Check for the cursor referring to the last block at the given level. 349 * Check for the cursor referring to the last block at the given level.
331 */ 350 */
332int /* 1=is last block, 0=not last block */ 351int /* 1=is last block, 0=not last block */
@@ -335,15 +354,6 @@ xfs_btree_islastblock(
335 int level); /* level to check */ 354 int level); /* level to check */
336 355
337/* 356/*
338 * Change the cursor to point to the last record in the current block
339 * at the given level. Other levels are unaffected.
340 */
341int /* success=1, failure=0 */
342xfs_btree_lastrec(
343 xfs_btree_cur_t *cur, /* btree cursor */
344 int level); /* level to change */
345
346/*
347 * Compute first and last byte offsets for the fields given. 357 * Compute first and last byte offsets for the fields given.
348 * Interprets the offsets table, which contains struct field offsets. 358 * Interprets the offsets table, which contains struct field offsets.
349 */ 359 */
@@ -404,39 +414,53 @@ xfs_btree_reada_bufs(
404 xfs_extlen_t count); /* count of filesystem blocks */ 414 xfs_extlen_t count); /* count of filesystem blocks */
405 415
406/* 416/*
407 * Read-ahead btree blocks, at the given level. 417 * Set the buffer for level "lev" in the cursor to bp, releasing
408 * Bits in lr are set from XFS_BTCUR_{LEFT,RIGHT}RA. 418 * any previous buffer.
409 */ 419 */
410int /* readahead block count */ 420void
411xfs_btree_readahead_core( 421xfs_btree_setbuf(
412 xfs_btree_cur_t *cur, /* btree cursor */ 422 xfs_btree_cur_t *cur, /* btree cursor */
413 int lev, /* level in btree */ 423 int lev, /* level in btree */
414 int lr); /* left/right bits */ 424 struct xfs_buf *bp); /* new buffer to set */
415 425
416static inline int /* readahead block count */
417xfs_btree_readahead(
418 xfs_btree_cur_t *cur, /* btree cursor */
419 int lev, /* level in btree */
420 int lr) /* left/right bits */
421{
422 if ((cur->bc_ra[lev] | lr) == cur->bc_ra[lev])
423 return 0;
424 426
425 return xfs_btree_readahead_core(cur, lev, lr); 427/*
426} 428 * Common btree core entry points.
429 */
430int xfs_btree_increment(struct xfs_btree_cur *, int, int *);
431int xfs_btree_decrement(struct xfs_btree_cur *, int, int *);
432int xfs_btree_lookup(struct xfs_btree_cur *, xfs_lookup_t, int *);
433int xfs_btree_update(struct xfs_btree_cur *, union xfs_btree_rec *);
434int xfs_btree_new_iroot(struct xfs_btree_cur *, int *, int *);
435int xfs_btree_kill_iroot(struct xfs_btree_cur *);
436int xfs_btree_insert(struct xfs_btree_cur *, int *);
437int xfs_btree_delete(struct xfs_btree_cur *, int *);
438int xfs_btree_get_rec(struct xfs_btree_cur *, union xfs_btree_rec **, int *);
427 439
440/*
441 * Internal btree helpers also used by xfs_bmap.c.
442 */
443void xfs_btree_log_block(struct xfs_btree_cur *, struct xfs_buf *, int);
444void xfs_btree_log_recs(struct xfs_btree_cur *, struct xfs_buf *, int, int);
428 445
429/* 446/*
430 * Set the buffer for level "lev" in the cursor to bp, releasing 447 * Helpers.
431 * any previous buffer.
432 */ 448 */
433void 449static inline int xfs_btree_get_numrecs(struct xfs_btree_block *block)
434xfs_btree_setbuf( 450{
435 xfs_btree_cur_t *cur, /* btree cursor */ 451 return be16_to_cpu(block->bb_numrecs);
436 int lev, /* level in btree */ 452}
437 struct xfs_buf *bp); /* new buffer to set */ 453
454static inline void xfs_btree_set_numrecs(struct xfs_btree_block *block,
455 __uint16_t numrecs)
456{
457 block->bb_numrecs = cpu_to_be16(numrecs);
458}
438 459
439#endif /* __KERNEL__ */ 460static inline int xfs_btree_get_level(struct xfs_btree_block *block)
461{
462 return be16_to_cpu(block->bb_level);
463}
440 464
441 465
442/* 466/*
diff --git a/fs/xfs/xfs_btree_trace.c b/fs/xfs/xfs_btree_trace.c
new file mode 100644
index 000000000000..44ff942a0fda
--- /dev/null
+++ b/fs/xfs/xfs_btree_trace.c
@@ -0,0 +1,249 @@
1/*
2 * Copyright (c) 2008 Silicon Graphics, Inc.
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18#include "xfs.h"
19#include "xfs_types.h"
20#include "xfs_inum.h"
21#include "xfs_bmap_btree.h"
22#include "xfs_alloc_btree.h"
23#include "xfs_ialloc_btree.h"
24#include "xfs_inode.h"
25#include "xfs_btree.h"
26#include "xfs_btree_trace.h"
27
28STATIC void
29xfs_btree_trace_ptr(
30 struct xfs_btree_cur *cur,
31 union xfs_btree_ptr ptr,
32 __psunsigned_t *high,
33 __psunsigned_t *low)
34{
35 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
36 __u64 val = be64_to_cpu(ptr.l);
37 *high = val >> 32;
38 *low = (int)val;
39 } else {
40 *high = 0;
41 *low = be32_to_cpu(ptr.s);
42 }
43}
44
45/*
46 * Add a trace buffer entry for arguments, for a buffer & 1 integer arg.
47 */
48void
49xfs_btree_trace_argbi(
50 const char *func,
51 struct xfs_btree_cur *cur,
52 struct xfs_buf *b,
53 int i,
54 int line)
55{
56 cur->bc_ops->trace_enter(cur, func, XBT_ARGS, XFS_BTREE_KTRACE_ARGBI,
57 line, (__psunsigned_t)b, i, 0, 0, 0, 0, 0,
58 0, 0, 0, 0);
59}
60
61/*
62 * Add a trace buffer entry for arguments, for a buffer & 2 integer args.
63 */
64void
65xfs_btree_trace_argbii(
66 const char *func,
67 struct xfs_btree_cur *cur,
68 struct xfs_buf *b,
69 int i0,
70 int i1,
71 int line)
72{
73 cur->bc_ops->trace_enter(cur, func, XBT_ARGS, XFS_BTREE_KTRACE_ARGBII,
74 line, (__psunsigned_t)b, i0, i1, 0, 0, 0, 0,
75 0, 0, 0, 0);
76}
77
78/*
79 * Add a trace buffer entry for arguments, for 3 block-length args
80 * and an integer arg.
81 */
82void
83xfs_btree_trace_argfffi(
84 const char *func,
85 struct xfs_btree_cur *cur,
86 xfs_dfiloff_t o,
87 xfs_dfsbno_t b,
88 xfs_dfilblks_t i,
89 int j,
90 int line)
91{
92 cur->bc_ops->trace_enter(cur, func, XBT_ARGS, XFS_BTREE_KTRACE_ARGFFFI,
93 line,
94 o >> 32, (int)o,
95 b >> 32, (int)b,
96 i >> 32, (int)i,
97 (int)j, 0, 0, 0, 0);
98}
99
100/*
101 * Add a trace buffer entry for arguments, for one integer arg.
102 */
103void
104xfs_btree_trace_argi(
105 const char *func,
106 struct xfs_btree_cur *cur,
107 int i,
108 int line)
109{
110 cur->bc_ops->trace_enter(cur, func, XBT_ARGS, XFS_BTREE_KTRACE_ARGI,
111 line, i, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
112}
113
114/*
115 * Add a trace buffer entry for arguments, for int, fsblock, key.
116 */
117void
118xfs_btree_trace_argipk(
119 const char *func,
120 struct xfs_btree_cur *cur,
121 int i,
122 union xfs_btree_ptr ptr,
123 union xfs_btree_key *key,
124 int line)
125{
126 __psunsigned_t high, low;
127 __uint64_t l0, l1;
128
129 xfs_btree_trace_ptr(cur, ptr, &high, &low);
130 cur->bc_ops->trace_key(cur, key, &l0, &l1);
131 cur->bc_ops->trace_enter(cur, func, XBT_ARGS, XFS_BTREE_KTRACE_ARGIPK,
132 line, i, high, low,
133 l0 >> 32, (int)l0,
134 l1 >> 32, (int)l1,
135 0, 0, 0, 0);
136}
137
138/*
139 * Add a trace buffer entry for arguments, for int, fsblock, rec.
140 */
141void
142xfs_btree_trace_argipr(
143 const char *func,
144 struct xfs_btree_cur *cur,
145 int i,
146 union xfs_btree_ptr ptr,
147 union xfs_btree_rec *rec,
148 int line)
149{
150 __psunsigned_t high, low;
151 __uint64_t l0, l1, l2;
152
153 xfs_btree_trace_ptr(cur, ptr, &high, &low);
154 cur->bc_ops->trace_record(cur, rec, &l0, &l1, &l2);
155 cur->bc_ops->trace_enter(cur, func, XBT_ARGS, XFS_BTREE_KTRACE_ARGIPR,
156 line, i,
157 high, low,
158 l0 >> 32, (int)l0,
159 l1 >> 32, (int)l1,
160 l2 >> 32, (int)l2,
161 0, 0);
162}
163
164/*
165 * Add a trace buffer entry for arguments, for int, key.
166 */
167void
168xfs_btree_trace_argik(
169 const char *func,
170 struct xfs_btree_cur *cur,
171 int i,
172 union xfs_btree_key *key,
173 int line)
174{
175 __uint64_t l0, l1;
176
177 cur->bc_ops->trace_key(cur, key, &l0, &l1);
178 cur->bc_ops->trace_enter(cur, func, XBT_ARGS, XFS_BTREE_KTRACE_ARGIK,
179 line, i,
180 l0 >> 32, (int)l0,
181 l1 >> 32, (int)l1,
182 0, 0, 0, 0, 0, 0);
183}
184
185/*
186 * Add a trace buffer entry for arguments, for record.
187 */
188void
189xfs_btree_trace_argr(
190 const char *func,
191 struct xfs_btree_cur *cur,
192 union xfs_btree_rec *rec,
193 int line)
194{
195 __uint64_t l0, l1, l2;
196
197 cur->bc_ops->trace_record(cur, rec, &l0, &l1, &l2);
198 cur->bc_ops->trace_enter(cur, func, XBT_ARGS, XFS_BTREE_KTRACE_ARGR,
199 line,
200 l0 >> 32, (int)l0,
201 l1 >> 32, (int)l1,
202 l2 >> 32, (int)l2,
203 0, 0, 0, 0, 0);
204}
205
206/*
207 * Add a trace buffer entry for the cursor/operation.
208 */
209void
210xfs_btree_trace_cursor(
211 const char *func,
212 struct xfs_btree_cur *cur,
213 int type,
214 int line)
215{
216 __uint32_t s0;
217 __uint64_t l0, l1;
218 char *s;
219
220 switch (type) {
221 case XBT_ARGS:
222 s = "args";
223 break;
224 case XBT_ENTRY:
225 s = "entry";
226 break;
227 case XBT_ERROR:
228 s = "error";
229 break;
230 case XBT_EXIT:
231 s = "exit";
232 break;
233 default:
234 s = "unknown";
235 break;
236 }
237
238 cur->bc_ops->trace_cursor(cur, &s0, &l0, &l1);
239 cur->bc_ops->trace_enter(cur, func, s, XFS_BTREE_KTRACE_CUR, line,
240 s0,
241 l0 >> 32, (int)l0,
242 l1 >> 32, (int)l1,
243 (__psunsigned_t)cur->bc_bufs[0],
244 (__psunsigned_t)cur->bc_bufs[1],
245 (__psunsigned_t)cur->bc_bufs[2],
246 (__psunsigned_t)cur->bc_bufs[3],
247 (cur->bc_ptrs[0] << 16) | cur->bc_ptrs[1],
248 (cur->bc_ptrs[2] << 16) | cur->bc_ptrs[3]);
249}
diff --git a/fs/xfs/xfs_btree_trace.h b/fs/xfs/xfs_btree_trace.h
new file mode 100644
index 000000000000..b3f5eb3c3c6c
--- /dev/null
+++ b/fs/xfs/xfs_btree_trace.h
@@ -0,0 +1,116 @@
1/*
2 * Copyright (c) 2008 Silicon Graphics, Inc.
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18#ifndef __XFS_BTREE_TRACE_H__
19#define __XFS_BTREE_TRACE_H__
20
21struct xfs_btree_cur;
22struct xfs_buf;
23
24
25/*
26 * Trace hooks.
27 * i,j = integer (32 bit)
28 * b = btree block buffer (xfs_buf_t)
29 * p = btree ptr
30 * r = btree record
31 * k = btree key
32 */
33
34#ifdef XFS_BTREE_TRACE
35
36/*
37 * Trace buffer entry types.
38 */
39#define XFS_BTREE_KTRACE_ARGBI 1
40#define XFS_BTREE_KTRACE_ARGBII 2
41#define XFS_BTREE_KTRACE_ARGFFFI 3
42#define XFS_BTREE_KTRACE_ARGI 4
43#define XFS_BTREE_KTRACE_ARGIPK 5
44#define XFS_BTREE_KTRACE_ARGIPR 6
45#define XFS_BTREE_KTRACE_ARGIK 7
46#define XFS_BTREE_KTRACE_ARGR 8
47#define XFS_BTREE_KTRACE_CUR 9
48
49/*
50 * Sub-types for cursor traces.
51 */
52#define XBT_ARGS 0
53#define XBT_ENTRY 1
54#define XBT_ERROR 2
55#define XBT_EXIT 3
56
57void xfs_btree_trace_argbi(const char *, struct xfs_btree_cur *,
58 struct xfs_buf *, int, int);
59void xfs_btree_trace_argbii(const char *, struct xfs_btree_cur *,
60 struct xfs_buf *, int, int, int);
61void xfs_btree_trace_argfffi(const char *, struct xfs_btree_cur *,
62 xfs_dfiloff_t, xfs_dfsbno_t, xfs_dfilblks_t, int, int);
63void xfs_btree_trace_argi(const char *, struct xfs_btree_cur *, int, int);
64void xfs_btree_trace_argipk(const char *, struct xfs_btree_cur *, int,
65 union xfs_btree_ptr, union xfs_btree_key *, int);
66void xfs_btree_trace_argipr(const char *, struct xfs_btree_cur *, int,
67 union xfs_btree_ptr, union xfs_btree_rec *, int);
68void xfs_btree_trace_argik(const char *, struct xfs_btree_cur *, int,
69 union xfs_btree_key *, int);
70void xfs_btree_trace_argr(const char *, struct xfs_btree_cur *,
71 union xfs_btree_rec *, int);
72void xfs_btree_trace_cursor(const char *, struct xfs_btree_cur *, int, int);
73
74
75#define XFS_ALLOCBT_TRACE_SIZE 4096 /* size of global trace buffer */
76extern ktrace_t *xfs_allocbt_trace_buf;
77
78#define XFS_INOBT_TRACE_SIZE 4096 /* size of global trace buffer */
79extern ktrace_t *xfs_inobt_trace_buf;
80
81#define XFS_BMBT_TRACE_SIZE 4096 /* size of global trace buffer */
82#define XFS_BMBT_KTRACE_SIZE 32 /* size of per-inode trace buffer */
83extern ktrace_t *xfs_bmbt_trace_buf;
84
85
86#define XFS_BTREE_TRACE_ARGBI(c, b, i) \
87 xfs_btree_trace_argbi(__func__, c, b, i, __LINE__)
88#define XFS_BTREE_TRACE_ARGBII(c, b, i, j) \
89 xfs_btree_trace_argbii(__func__, c, b, i, j, __LINE__)
90#define XFS_BTREE_TRACE_ARGFFFI(c, o, b, i, j) \
91 xfs_btree_trace_argfffi(__func__, c, o, b, i, j, __LINE__)
92#define XFS_BTREE_TRACE_ARGI(c, i) \
93 xfs_btree_trace_argi(__func__, c, i, __LINE__)
94#define XFS_BTREE_TRACE_ARGIPK(c, i, p, k) \
95 xfs_btree_trace_argipk(__func__, c, i, p, k, __LINE__)
96#define XFS_BTREE_TRACE_ARGIPR(c, i, p, r) \
97 xfs_btree_trace_argipr(__func__, c, i, p, r, __LINE__)
98#define XFS_BTREE_TRACE_ARGIK(c, i, k) \
99 xfs_btree_trace_argik(__func__, c, i, k, __LINE__)
100#define XFS_BTREE_TRACE_ARGR(c, r) \
101 xfs_btree_trace_argr(__func__, c, r, __LINE__)
102#define XFS_BTREE_TRACE_CURSOR(c, t) \
103 xfs_btree_trace_cursor(__func__, c, t, __LINE__)
104#else
105#define XFS_BTREE_TRACE_ARGBI(c, b, i)
106#define XFS_BTREE_TRACE_ARGBII(c, b, i, j)
107#define XFS_BTREE_TRACE_ARGFFFI(c, o, b, i, j)
108#define XFS_BTREE_TRACE_ARGI(c, i)
109#define XFS_BTREE_TRACE_ARGIPK(c, i, p, s)
110#define XFS_BTREE_TRACE_ARGIPR(c, i, p, r)
111#define XFS_BTREE_TRACE_ARGIK(c, i, k)
112#define XFS_BTREE_TRACE_ARGR(c, r)
113#define XFS_BTREE_TRACE_CURSOR(c, t)
114#endif /* XFS_BTREE_TRACE */
115
116#endif /* __XFS_BTREE_TRACE_H__ */
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index 002fc2617c8e..d245d04e10ca 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -375,7 +375,7 @@ xfs_buf_item_unpin(
375 xfs_buf_log_item_t *bip, 375 xfs_buf_log_item_t *bip,
376 int stale) 376 int stale)
377{ 377{
378 xfs_mount_t *mp; 378 struct xfs_ail *ailp;
379 xfs_buf_t *bp; 379 xfs_buf_t *bp;
380 int freed; 380 int freed;
381 381
@@ -387,7 +387,7 @@ xfs_buf_item_unpin(
387 xfs_buftrace("XFS_UNPIN", bp); 387 xfs_buftrace("XFS_UNPIN", bp);
388 388
389 freed = atomic_dec_and_test(&bip->bli_refcount); 389 freed = atomic_dec_and_test(&bip->bli_refcount);
390 mp = bip->bli_item.li_mountp; 390 ailp = bip->bli_item.li_ailp;
391 xfs_bunpin(bp); 391 xfs_bunpin(bp);
392 if (freed && stale) { 392 if (freed && stale) {
393 ASSERT(bip->bli_flags & XFS_BLI_STALE); 393 ASSERT(bip->bli_flags & XFS_BLI_STALE);
@@ -399,17 +399,17 @@ xfs_buf_item_unpin(
399 xfs_buftrace("XFS_UNPIN STALE", bp); 399 xfs_buftrace("XFS_UNPIN STALE", bp);
400 /* 400 /*
401 * If we get called here because of an IO error, we may 401 * If we get called here because of an IO error, we may
402 * or may not have the item on the AIL. xfs_trans_delete_ail() 402 * or may not have the item on the AIL. xfs_trans_ail_delete()
403 * will take care of that situation. 403 * will take care of that situation.
404 * xfs_trans_delete_ail() drops the AIL lock. 404 * xfs_trans_ail_delete() drops the AIL lock.
405 */ 405 */
406 if (bip->bli_flags & XFS_BLI_STALE_INODE) { 406 if (bip->bli_flags & XFS_BLI_STALE_INODE) {
407 xfs_buf_do_callbacks(bp, (xfs_log_item_t *)bip); 407 xfs_buf_do_callbacks(bp, (xfs_log_item_t *)bip);
408 XFS_BUF_SET_FSPRIVATE(bp, NULL); 408 XFS_BUF_SET_FSPRIVATE(bp, NULL);
409 XFS_BUF_CLR_IODONE_FUNC(bp); 409 XFS_BUF_CLR_IODONE_FUNC(bp);
410 } else { 410 } else {
411 spin_lock(&mp->m_ail_lock); 411 spin_lock(&ailp->xa_lock);
412 xfs_trans_delete_ail(mp, (xfs_log_item_t *)bip); 412 xfs_trans_ail_delete(ailp, (xfs_log_item_t *)bip);
413 xfs_buf_item_relse(bp); 413 xfs_buf_item_relse(bp);
414 ASSERT(XFS_BUF_FSPRIVATE(bp, void *) == NULL); 414 ASSERT(XFS_BUF_FSPRIVATE(bp, void *) == NULL);
415 } 415 }
@@ -731,6 +731,7 @@ xfs_buf_item_init(
731 bip->bli_item.li_type = XFS_LI_BUF; 731 bip->bli_item.li_type = XFS_LI_BUF;
732 bip->bli_item.li_ops = &xfs_buf_item_ops; 732 bip->bli_item.li_ops = &xfs_buf_item_ops;
733 bip->bli_item.li_mountp = mp; 733 bip->bli_item.li_mountp = mp;
734 bip->bli_item.li_ailp = mp->m_ail;
734 bip->bli_buf = bp; 735 bip->bli_buf = bp;
735 xfs_buf_hold(bp); 736 xfs_buf_hold(bp);
736 bip->bli_format.blf_type = XFS_LI_BUF; 737 bip->bli_format.blf_type = XFS_LI_BUF;
@@ -1122,27 +1123,23 @@ xfs_buf_iodone(
1122 xfs_buf_t *bp, 1123 xfs_buf_t *bp,
1123 xfs_buf_log_item_t *bip) 1124 xfs_buf_log_item_t *bip)
1124{ 1125{
1125 struct xfs_mount *mp; 1126 struct xfs_ail *ailp = bip->bli_item.li_ailp;
1126 1127
1127 ASSERT(bip->bli_buf == bp); 1128 ASSERT(bip->bli_buf == bp);
1128 1129
1129 xfs_buf_rele(bp); 1130 xfs_buf_rele(bp);
1130 mp = bip->bli_item.li_mountp;
1131 1131
1132 /* 1132 /*
1133 * If we are forcibly shutting down, this may well be 1133 * If we are forcibly shutting down, this may well be
1134 * off the AIL already. That's because we simulate the 1134 * off the AIL already. That's because we simulate the
1135 * log-committed callbacks to unpin these buffers. Or we may never 1135 * log-committed callbacks to unpin these buffers. Or we may never
1136 * have put this item on AIL because of the transaction was 1136 * have put this item on AIL because of the transaction was
1137 * aborted forcibly. xfs_trans_delete_ail() takes care of these. 1137 * aborted forcibly. xfs_trans_ail_delete() takes care of these.
1138 * 1138 *
1139 * Either way, AIL is useless if we're forcing a shutdown. 1139 * Either way, AIL is useless if we're forcing a shutdown.
1140 */ 1140 */
1141 spin_lock(&mp->m_ail_lock); 1141 spin_lock(&ailp->xa_lock);
1142 /* 1142 xfs_trans_ail_delete(ailp, (xfs_log_item_t *)bip);
1143 * xfs_trans_delete_ail() drops the AIL lock.
1144 */
1145 xfs_trans_delete_ail(mp, (xfs_log_item_t *)bip);
1146 xfs_buf_item_free(bip); 1143 xfs_buf_item_free(bip);
1147} 1144}
1148 1145
diff --git a/fs/xfs/xfs_clnt.h b/fs/xfs/xfs_clnt.h
deleted file mode 100644
index d2ce5dd70d87..000000000000
--- a/fs/xfs/xfs_clnt.h
+++ /dev/null
@@ -1,105 +0,0 @@
1/*
2 * Copyright (c) 2000-2005 Silicon Graphics, Inc.
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18#ifndef __XFS_CLNT_H__
19#define __XFS_CLNT_H__
20
21/*
22 * XFS arguments structure, constructed from the arguments we
23 * are passed via the mount system call.
24 *
25 * NOTE: The mount system call is handled differently between
26 * Linux and IRIX. In IRIX we worked work with a binary data
27 * structure coming in across the syscall interface from user
28 * space (the mount userspace knows about each filesystem type
29 * and the set of valid options for it, and converts the users
30 * argument string into a binary structure _before_ making the
31 * system call), and the ABI issues that this implies.
32 *
33 * In Linux, we are passed a comma separated set of options;
34 * ie. a NULL terminated string of characters. Userspace mount
35 * code does not have any knowledge of mount options expected by
36 * each filesystem type and so each filesystem parses its mount
37 * options in kernel space.
38 *
39 * For the Linux port, we kept this structure pretty much intact
40 * and use it internally (because the existing code groks it).
41 */
42struct xfs_mount_args {
43 int flags; /* flags -> see XFSMNT_... macros below */
44 int flags2; /* flags -> see XFSMNT2_... macros below */
45 int logbufs; /* Number of log buffers, -1 to default */
46 int logbufsize; /* Size of log buffers, -1 to default */
47 char fsname[MAXNAMELEN+1]; /* data device name */
48 char rtname[MAXNAMELEN+1]; /* realtime device filename */
49 char logname[MAXNAMELEN+1]; /* journal device filename */
50 char mtpt[MAXNAMELEN+1]; /* filesystem mount point */
51 int sunit; /* stripe unit (BBs) */
52 int swidth; /* stripe width (BBs), multiple of sunit */
53 uchar_t iosizelog; /* log2 of the preferred I/O size */
54 int ihashsize; /* inode hash table size (buckets) */
55};
56
57/*
58 * XFS mount option flags -- args->flags1
59 */
60#define XFSMNT_ATTR2 0x00000001 /* allow ATTR2 EA format */
61#define XFSMNT_WSYNC 0x00000002 /* safe mode nfs mount
62 * compatible */
63#define XFSMNT_INO64 0x00000004 /* move inode numbers up
64 * past 2^32 */
65#define XFSMNT_UQUOTA 0x00000008 /* user quota accounting */
66#define XFSMNT_PQUOTA 0x00000010 /* IRIX prj quota accounting */
67#define XFSMNT_UQUOTAENF 0x00000020 /* user quota limit
68 * enforcement */
69#define XFSMNT_PQUOTAENF 0x00000040 /* IRIX project quota limit
70 * enforcement */
71#define XFSMNT_QUIET 0x00000080 /* don't report mount errors */
72#define XFSMNT_NOALIGN 0x00000200 /* don't allocate at
73 * stripe boundaries*/
74#define XFSMNT_RETERR 0x00000400 /* return error to user */
75#define XFSMNT_NORECOVERY 0x00000800 /* no recovery, implies
76 * read-only mount */
77#define XFSMNT_SHARED 0x00001000 /* shared XFS mount */
78#define XFSMNT_IOSIZE 0x00002000 /* optimize for I/O size */
79#define XFSMNT_OSYNCISOSYNC 0x00004000 /* o_sync is REALLY o_sync */
80 /* (osyncisdsync is default) */
81#define XFSMNT_NOATTR2 0x00008000 /* turn off ATTR2 EA format */
82#define XFSMNT_32BITINODES 0x00200000 /* restrict inodes to 32
83 * bits of address space */
84#define XFSMNT_GQUOTA 0x00400000 /* group quota accounting */
85#define XFSMNT_GQUOTAENF 0x00800000 /* group quota limit
86 * enforcement */
87#define XFSMNT_NOUUID 0x01000000 /* Ignore fs uuid */
88#define XFSMNT_DMAPI 0x02000000 /* enable dmapi/xdsm */
89#define XFSMNT_BARRIER 0x04000000 /* use write barriers */
90#define XFSMNT_IKEEP 0x08000000 /* inode cluster delete */
91#define XFSMNT_SWALLOC 0x10000000 /* turn on stripe width
92 * allocation */
93#define XFSMNT_DIRSYNC 0x40000000 /* sync creat,link,unlink,rename
94 * symlink,mkdir,rmdir,mknod */
95#define XFSMNT_FLAGS2 0x80000000 /* more flags set in flags2 */
96
97/*
98 * XFS mount option flags -- args->flags2
99 */
100#define XFSMNT2_COMPAT_IOSIZE 0x00000001 /* don't report large preferred
101 * I/O size in stat(2) */
102#define XFSMNT2_FILESTREAMS 0x00000002 /* enable the filestreams
103 * allocator */
104
105#endif /* __XFS_CLNT_H__ */
diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/xfs_da_btree.c
index 9e561a9cefca..a11a8390bf6c 100644
--- a/fs/xfs/xfs_da_btree.c
+++ b/fs/xfs/xfs_da_btree.c
@@ -1566,11 +1566,14 @@ xfs_da_grow_inode(xfs_da_args_t *args, xfs_dablk_t *new_blkno)
1566 int nmap, error, w, count, c, got, i, mapi; 1566 int nmap, error, w, count, c, got, i, mapi;
1567 xfs_trans_t *tp; 1567 xfs_trans_t *tp;
1568 xfs_mount_t *mp; 1568 xfs_mount_t *mp;
1569 xfs_drfsbno_t nblks;
1569 1570
1570 dp = args->dp; 1571 dp = args->dp;
1571 mp = dp->i_mount; 1572 mp = dp->i_mount;
1572 w = args->whichfork; 1573 w = args->whichfork;
1573 tp = args->trans; 1574 tp = args->trans;
1575 nblks = dp->i_d.di_nblocks;
1576
1574 /* 1577 /*
1575 * For new directories adjust the file offset and block count. 1578 * For new directories adjust the file offset and block count.
1576 */ 1579 */
@@ -1647,6 +1650,8 @@ xfs_da_grow_inode(xfs_da_args_t *args, xfs_dablk_t *new_blkno)
1647 } 1650 }
1648 if (mapp != &map) 1651 if (mapp != &map)
1649 kmem_free(mapp); 1652 kmem_free(mapp);
1653 /* account for newly allocated blocks in reserved blocks total */
1654 args->total -= dp->i_d.di_nblocks - nblks;
1650 *new_blkno = (xfs_dablk_t)bno; 1655 *new_blkno = (xfs_dablk_t)bno;
1651 return 0; 1656 return 0;
1652} 1657}
diff --git a/fs/xfs/xfs_da_btree.h b/fs/xfs/xfs_da_btree.h
index 8be0b00ede9a..70b710c1792d 100644
--- a/fs/xfs/xfs_da_btree.h
+++ b/fs/xfs/xfs_da_btree.h
@@ -72,27 +72,7 @@ typedef struct xfs_da_intnode {
72typedef struct xfs_da_node_hdr xfs_da_node_hdr_t; 72typedef struct xfs_da_node_hdr xfs_da_node_hdr_t;
73typedef struct xfs_da_node_entry xfs_da_node_entry_t; 73typedef struct xfs_da_node_entry xfs_da_node_entry_t;
74 74
75#define XFS_DA_MAXHASH ((xfs_dahash_t)-1) /* largest valid hash value */
76
77#define XFS_LBSIZE(mp) (mp)->m_sb.sb_blocksize 75#define XFS_LBSIZE(mp) (mp)->m_sb.sb_blocksize
78#define XFS_LBLOG(mp) (mp)->m_sb.sb_blocklog
79
80#define XFS_DA_MAKE_BNOENTRY(mp,bno,entry) \
81 (((bno) << (mp)->m_dircook_elog) | (entry))
82#define XFS_DA_MAKE_COOKIE(mp,bno,entry,hash) \
83 (((xfs_off_t)XFS_DA_MAKE_BNOENTRY(mp, bno, entry) << 32) | (hash))
84#define XFS_DA_COOKIE_HASH(mp,cookie) ((xfs_dahash_t)cookie)
85#define XFS_DA_COOKIE_BNO(mp,cookie) \
86 ((((xfs_off_t)(cookie) >> 31) == -1LL ? \
87 (xfs_dablk_t)0 : \
88 (xfs_dablk_t)((xfs_off_t)(cookie) >> \
89 ((mp)->m_dircook_elog + 32))))
90#define XFS_DA_COOKIE_ENTRY(mp,cookie) \
91 ((((xfs_off_t)(cookie) >> 31) == -1LL ? \
92 (xfs_dablk_t)0 : \
93 (xfs_dablk_t)(((xfs_off_t)(cookie) >> 32) & \
94 ((1 << (mp)->m_dircook_elog) - 1))))
95
96 76
97/*======================================================================== 77/*========================================================================
98 * Btree searching and modification structure definitions. 78 * Btree searching and modification structure definitions.
@@ -226,9 +206,8 @@ struct xfs_nameops {
226}; 206};
227 207
228 208
229#ifdef __KERNEL__
230/*======================================================================== 209/*========================================================================
231 * Function prototypes for the kernel. 210 * Function prototypes.
232 *========================================================================*/ 211 *========================================================================*/
233 212
234/* 213/*
@@ -289,6 +268,5 @@ xfs_daddr_t xfs_da_blkno(xfs_dabuf_t *dabuf);
289 268
290extern struct kmem_zone *xfs_da_state_zone; 269extern struct kmem_zone *xfs_da_state_zone;
291extern struct kmem_zone *xfs_dabuf_zone; 270extern struct kmem_zone *xfs_dabuf_zone;
292#endif /* __KERNEL__ */
293 271
294#endif /* __XFS_DA_BTREE_H__ */ 272#endif /* __XFS_DA_BTREE_H__ */
diff --git a/fs/xfs/xfs_dinode.h b/fs/xfs/xfs_dinode.h
index c9065eaf2a4d..d7cf392cc852 100644
--- a/fs/xfs/xfs_dinode.h
+++ b/fs/xfs/xfs_dinode.h
@@ -78,8 +78,7 @@ typedef struct xfs_dinode
78 xfs_dinode_core_t di_core; 78 xfs_dinode_core_t di_core;
79 /* 79 /*
80 * In adding anything between the core and the union, be 80 * In adding anything between the core and the union, be
81 * sure to update the macros like XFS_LITINO below and 81 * sure to update the macros like XFS_LITINO below.
82 * XFS_BMAP_RBLOCK_DSIZE in xfs_bmap_btree.h.
83 */ 82 */
84 __be32 di_next_unlinked;/* agi unlinked list ptr */ 83 __be32 di_next_unlinked;/* agi unlinked list ptr */
85 union { 84 union {
@@ -166,7 +165,7 @@ typedef enum xfs_dinode_fmt
166 */ 165 */
167#define XFS_LITINO(mp) ((mp)->m_litino) 166#define XFS_LITINO(mp) ((mp)->m_litino)
168#define XFS_BROOT_SIZE_ADJ \ 167#define XFS_BROOT_SIZE_ADJ \
169 (sizeof(xfs_bmbt_block_t) - sizeof(xfs_bmdr_block_t)) 168 (XFS_BTREE_LBLOCK_LEN - sizeof(xfs_bmdr_block_t))
170 169
171/* 170/*
172 * Inode data & attribute fork sizes, per inode. 171 * Inode data & attribute fork sizes, per inode.
diff --git a/fs/xfs/xfs_dir2.c b/fs/xfs/xfs_dir2.c
index 80e0dc51361c..1afb12278b8d 100644
--- a/fs/xfs/xfs_dir2.c
+++ b/fs/xfs/xfs_dir2.c
@@ -525,11 +525,13 @@ xfs_dir2_grow_inode(
525 xfs_mount_t *mp; 525 xfs_mount_t *mp;
526 int nmap; /* number of bmap entries */ 526 int nmap; /* number of bmap entries */
527 xfs_trans_t *tp; 527 xfs_trans_t *tp;
528 xfs_drfsbno_t nblks;
528 529
529 xfs_dir2_trace_args_s("grow_inode", args, space); 530 xfs_dir2_trace_args_s("grow_inode", args, space);
530 dp = args->dp; 531 dp = args->dp;
531 tp = args->trans; 532 tp = args->trans;
532 mp = dp->i_mount; 533 mp = dp->i_mount;
534 nblks = dp->i_d.di_nblocks;
533 /* 535 /*
534 * Set lowest possible block in the space requested. 536 * Set lowest possible block in the space requested.
535 */ 537 */
@@ -622,7 +624,11 @@ xfs_dir2_grow_inode(
622 */ 624 */
623 if (mapp != &map) 625 if (mapp != &map)
624 kmem_free(mapp); 626 kmem_free(mapp);
627
628 /* account for newly allocated blocks in reserved blocks total */
629 args->total -= dp->i_d.di_nblocks - nblks;
625 *dbp = xfs_dir2_da_to_db(mp, (xfs_dablk_t)bno); 630 *dbp = xfs_dir2_da_to_db(mp, (xfs_dablk_t)bno);
631
626 /* 632 /*
627 * Update file's size if this is the data space and it grew. 633 * Update file's size if this is the data space and it grew.
628 */ 634 */
diff --git a/fs/xfs/xfs_dmops.c b/fs/xfs/xfs_dmops.c
index a1e55fb9d5dd..e71e2581c0c3 100644
--- a/fs/xfs/xfs_dmops.c
+++ b/fs/xfs/xfs_dmops.c
@@ -25,7 +25,6 @@
25#include "xfs_inum.h" 25#include "xfs_inum.h"
26#include "xfs_ag.h" 26#include "xfs_ag.h"
27#include "xfs_mount.h" 27#include "xfs_mount.h"
28#include "xfs_clnt.h"
29 28
30 29
31static struct xfs_dmops xfs_dmcore_stub = { 30static struct xfs_dmops xfs_dmcore_stub = {
@@ -38,9 +37,9 @@ static struct xfs_dmops xfs_dmcore_stub = {
38}; 37};
39 38
40int 39int
41xfs_dmops_get(struct xfs_mount *mp, struct xfs_mount_args *args) 40xfs_dmops_get(struct xfs_mount *mp)
42{ 41{
43 if (args->flags & XFSMNT_DMAPI) { 42 if (mp->m_flags & XFS_MOUNT_DMAPI) {
44 cmn_err(CE_WARN, 43 cmn_err(CE_WARN,
45 "XFS: dmapi support not available in this kernel."); 44 "XFS: dmapi support not available in this kernel.");
46 return EINVAL; 45 return EINVAL;
diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c
index 8aa28f751b2a..05a4bdd4be39 100644
--- a/fs/xfs/xfs_extfree_item.c
+++ b/fs/xfs/xfs_extfree_item.c
@@ -108,19 +108,16 @@ xfs_efi_item_pin(xfs_efi_log_item_t *efip)
108STATIC void 108STATIC void
109xfs_efi_item_unpin(xfs_efi_log_item_t *efip, int stale) 109xfs_efi_item_unpin(xfs_efi_log_item_t *efip, int stale)
110{ 110{
111 xfs_mount_t *mp; 111 struct xfs_ail *ailp = efip->efi_item.li_ailp;
112 112
113 mp = efip->efi_item.li_mountp; 113 spin_lock(&ailp->xa_lock);
114 spin_lock(&mp->m_ail_lock);
115 if (efip->efi_flags & XFS_EFI_CANCELED) { 114 if (efip->efi_flags & XFS_EFI_CANCELED) {
116 /* 115 /* xfs_trans_ail_delete() drops the AIL lock. */
117 * xfs_trans_delete_ail() drops the AIL lock. 116 xfs_trans_ail_delete(ailp, (xfs_log_item_t *)efip);
118 */
119 xfs_trans_delete_ail(mp, (xfs_log_item_t *)efip);
120 xfs_efi_item_free(efip); 117 xfs_efi_item_free(efip);
121 } else { 118 } else {
122 efip->efi_flags |= XFS_EFI_COMMITTED; 119 efip->efi_flags |= XFS_EFI_COMMITTED;
123 spin_unlock(&mp->m_ail_lock); 120 spin_unlock(&ailp->xa_lock);
124 } 121 }
125} 122}
126 123
@@ -134,26 +131,23 @@ xfs_efi_item_unpin(xfs_efi_log_item_t *efip, int stale)
134STATIC void 131STATIC void
135xfs_efi_item_unpin_remove(xfs_efi_log_item_t *efip, xfs_trans_t *tp) 132xfs_efi_item_unpin_remove(xfs_efi_log_item_t *efip, xfs_trans_t *tp)
136{ 133{
137 xfs_mount_t *mp; 134 struct xfs_ail *ailp = efip->efi_item.li_ailp;
138 xfs_log_item_desc_t *lidp; 135 xfs_log_item_desc_t *lidp;
139 136
140 mp = efip->efi_item.li_mountp; 137 spin_lock(&ailp->xa_lock);
141 spin_lock(&mp->m_ail_lock);
142 if (efip->efi_flags & XFS_EFI_CANCELED) { 138 if (efip->efi_flags & XFS_EFI_CANCELED) {
143 /* 139 /*
144 * free the xaction descriptor pointing to this item 140 * free the xaction descriptor pointing to this item
145 */ 141 */
146 lidp = xfs_trans_find_item(tp, (xfs_log_item_t *) efip); 142 lidp = xfs_trans_find_item(tp, (xfs_log_item_t *) efip);
147 xfs_trans_free_item(tp, lidp); 143 xfs_trans_free_item(tp, lidp);
148 /* 144
149 * pull the item off the AIL. 145 /* xfs_trans_ail_delete() drops the AIL lock. */
150 * xfs_trans_delete_ail() drops the AIL lock. 146 xfs_trans_ail_delete(ailp, (xfs_log_item_t *)efip);
151 */
152 xfs_trans_delete_ail(mp, (xfs_log_item_t *)efip);
153 xfs_efi_item_free(efip); 147 xfs_efi_item_free(efip);
154 } else { 148 } else {
155 efip->efi_flags |= XFS_EFI_COMMITTED; 149 efip->efi_flags |= XFS_EFI_COMMITTED;
156 spin_unlock(&mp->m_ail_lock); 150 spin_unlock(&ailp->xa_lock);
157 } 151 }
158} 152}
159 153
@@ -268,6 +262,7 @@ xfs_efi_init(xfs_mount_t *mp,
268 efip->efi_item.li_type = XFS_LI_EFI; 262 efip->efi_item.li_type = XFS_LI_EFI;
269 efip->efi_item.li_ops = &xfs_efi_item_ops; 263 efip->efi_item.li_ops = &xfs_efi_item_ops;
270 efip->efi_item.li_mountp = mp; 264 efip->efi_item.li_mountp = mp;
265 efip->efi_item.li_ailp = mp->m_ail;
271 efip->efi_format.efi_nextents = nextents; 266 efip->efi_format.efi_nextents = nextents;
272 efip->efi_format.efi_id = (__psint_t)(void*)efip; 267 efip->efi_format.efi_id = (__psint_t)(void*)efip;
273 268
@@ -345,25 +340,22 @@ void
345xfs_efi_release(xfs_efi_log_item_t *efip, 340xfs_efi_release(xfs_efi_log_item_t *efip,
346 uint nextents) 341 uint nextents)
347{ 342{
348 xfs_mount_t *mp; 343 struct xfs_ail *ailp = efip->efi_item.li_ailp;
349 int extents_left; 344 int extents_left;
350 345
351 mp = efip->efi_item.li_mountp;
352 ASSERT(efip->efi_next_extent > 0); 346 ASSERT(efip->efi_next_extent > 0);
353 ASSERT(efip->efi_flags & XFS_EFI_COMMITTED); 347 ASSERT(efip->efi_flags & XFS_EFI_COMMITTED);
354 348
355 spin_lock(&mp->m_ail_lock); 349 spin_lock(&ailp->xa_lock);
356 ASSERT(efip->efi_next_extent >= nextents); 350 ASSERT(efip->efi_next_extent >= nextents);
357 efip->efi_next_extent -= nextents; 351 efip->efi_next_extent -= nextents;
358 extents_left = efip->efi_next_extent; 352 extents_left = efip->efi_next_extent;
359 if (extents_left == 0) { 353 if (extents_left == 0) {
360 /* 354 /* xfs_trans_ail_delete() drops the AIL lock. */
361 * xfs_trans_delete_ail() drops the AIL lock. 355 xfs_trans_ail_delete(ailp, (xfs_log_item_t *)efip);
362 */
363 xfs_trans_delete_ail(mp, (xfs_log_item_t *)efip);
364 xfs_efi_item_free(efip); 356 xfs_efi_item_free(efip);
365 } else { 357 } else {
366 spin_unlock(&mp->m_ail_lock); 358 spin_unlock(&ailp->xa_lock);
367 } 359 }
368} 360}
369 361
@@ -565,6 +557,7 @@ xfs_efd_init(xfs_mount_t *mp,
565 efdp->efd_item.li_type = XFS_LI_EFD; 557 efdp->efd_item.li_type = XFS_LI_EFD;
566 efdp->efd_item.li_ops = &xfs_efd_item_ops; 558 efdp->efd_item.li_ops = &xfs_efd_item_ops;
567 efdp->efd_item.li_mountp = mp; 559 efdp->efd_item.li_mountp = mp;
560 efdp->efd_item.li_ailp = mp->m_ail;
568 efdp->efd_efip = efip; 561 efdp->efd_efip = efip;
569 efdp->efd_format.efd_nextents = nextents; 562 efdp->efd_format.efd_nextents = nextents;
570 efdp->efd_format.efd_efi_id = efip->efi_format.efi_id; 563 efdp->efd_format.efd_efi_id = efip->efi_format.efi_id;
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index 84583cf73db3..f1d0585041b9 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -126,7 +126,7 @@ xfs_growfs_data_private(
126 xfs_extlen_t agsize; 126 xfs_extlen_t agsize;
127 xfs_extlen_t tmpsize; 127 xfs_extlen_t tmpsize;
128 xfs_alloc_rec_t *arec; 128 xfs_alloc_rec_t *arec;
129 xfs_btree_sblock_t *block; 129 struct xfs_btree_block *block;
130 xfs_buf_t *bp; 130 xfs_buf_t *bp;
131 int bucket; 131 int bucket;
132 int dpct; 132 int dpct;
@@ -251,14 +251,14 @@ xfs_growfs_data_private(
251 bp = xfs_buf_get(mp->m_ddev_targp, 251 bp = xfs_buf_get(mp->m_ddev_targp,
252 XFS_AGB_TO_DADDR(mp, agno, XFS_BNO_BLOCK(mp)), 252 XFS_AGB_TO_DADDR(mp, agno, XFS_BNO_BLOCK(mp)),
253 BTOBB(mp->m_sb.sb_blocksize), 0); 253 BTOBB(mp->m_sb.sb_blocksize), 0);
254 block = XFS_BUF_TO_SBLOCK(bp); 254 block = XFS_BUF_TO_BLOCK(bp);
255 memset(block, 0, mp->m_sb.sb_blocksize); 255 memset(block, 0, mp->m_sb.sb_blocksize);
256 block->bb_magic = cpu_to_be32(XFS_ABTB_MAGIC); 256 block->bb_magic = cpu_to_be32(XFS_ABTB_MAGIC);
257 block->bb_level = 0; 257 block->bb_level = 0;
258 block->bb_numrecs = cpu_to_be16(1); 258 block->bb_numrecs = cpu_to_be16(1);
259 block->bb_leftsib = cpu_to_be32(NULLAGBLOCK); 259 block->bb_u.s.bb_leftsib = cpu_to_be32(NULLAGBLOCK);
260 block->bb_rightsib = cpu_to_be32(NULLAGBLOCK); 260 block->bb_u.s.bb_rightsib = cpu_to_be32(NULLAGBLOCK);
261 arec = XFS_BTREE_REC_ADDR(xfs_alloc, block, 1); 261 arec = XFS_ALLOC_REC_ADDR(mp, block, 1);
262 arec->ar_startblock = cpu_to_be32(XFS_PREALLOC_BLOCKS(mp)); 262 arec->ar_startblock = cpu_to_be32(XFS_PREALLOC_BLOCKS(mp));
263 arec->ar_blockcount = cpu_to_be32( 263 arec->ar_blockcount = cpu_to_be32(
264 agsize - be32_to_cpu(arec->ar_startblock)); 264 agsize - be32_to_cpu(arec->ar_startblock));
@@ -272,14 +272,14 @@ xfs_growfs_data_private(
272 bp = xfs_buf_get(mp->m_ddev_targp, 272 bp = xfs_buf_get(mp->m_ddev_targp,
273 XFS_AGB_TO_DADDR(mp, agno, XFS_CNT_BLOCK(mp)), 273 XFS_AGB_TO_DADDR(mp, agno, XFS_CNT_BLOCK(mp)),
274 BTOBB(mp->m_sb.sb_blocksize), 0); 274 BTOBB(mp->m_sb.sb_blocksize), 0);
275 block = XFS_BUF_TO_SBLOCK(bp); 275 block = XFS_BUF_TO_BLOCK(bp);
276 memset(block, 0, mp->m_sb.sb_blocksize); 276 memset(block, 0, mp->m_sb.sb_blocksize);
277 block->bb_magic = cpu_to_be32(XFS_ABTC_MAGIC); 277 block->bb_magic = cpu_to_be32(XFS_ABTC_MAGIC);
278 block->bb_level = 0; 278 block->bb_level = 0;
279 block->bb_numrecs = cpu_to_be16(1); 279 block->bb_numrecs = cpu_to_be16(1);
280 block->bb_leftsib = cpu_to_be32(NULLAGBLOCK); 280 block->bb_u.s.bb_leftsib = cpu_to_be32(NULLAGBLOCK);
281 block->bb_rightsib = cpu_to_be32(NULLAGBLOCK); 281 block->bb_u.s.bb_rightsib = cpu_to_be32(NULLAGBLOCK);
282 arec = XFS_BTREE_REC_ADDR(xfs_alloc, block, 1); 282 arec = XFS_ALLOC_REC_ADDR(mp, block, 1);
283 arec->ar_startblock = cpu_to_be32(XFS_PREALLOC_BLOCKS(mp)); 283 arec->ar_startblock = cpu_to_be32(XFS_PREALLOC_BLOCKS(mp));
284 arec->ar_blockcount = cpu_to_be32( 284 arec->ar_blockcount = cpu_to_be32(
285 agsize - be32_to_cpu(arec->ar_startblock)); 285 agsize - be32_to_cpu(arec->ar_startblock));
@@ -294,13 +294,13 @@ xfs_growfs_data_private(
294 bp = xfs_buf_get(mp->m_ddev_targp, 294 bp = xfs_buf_get(mp->m_ddev_targp,
295 XFS_AGB_TO_DADDR(mp, agno, XFS_IBT_BLOCK(mp)), 295 XFS_AGB_TO_DADDR(mp, agno, XFS_IBT_BLOCK(mp)),
296 BTOBB(mp->m_sb.sb_blocksize), 0); 296 BTOBB(mp->m_sb.sb_blocksize), 0);
297 block = XFS_BUF_TO_SBLOCK(bp); 297 block = XFS_BUF_TO_BLOCK(bp);
298 memset(block, 0, mp->m_sb.sb_blocksize); 298 memset(block, 0, mp->m_sb.sb_blocksize);
299 block->bb_magic = cpu_to_be32(XFS_IBT_MAGIC); 299 block->bb_magic = cpu_to_be32(XFS_IBT_MAGIC);
300 block->bb_level = 0; 300 block->bb_level = 0;
301 block->bb_numrecs = 0; 301 block->bb_numrecs = 0;
302 block->bb_leftsib = cpu_to_be32(NULLAGBLOCK); 302 block->bb_u.s.bb_leftsib = cpu_to_be32(NULLAGBLOCK);
303 block->bb_rightsib = cpu_to_be32(NULLAGBLOCK); 303 block->bb_u.s.bb_rightsib = cpu_to_be32(NULLAGBLOCK);
304 error = xfs_bwrite(mp, bp); 304 error = xfs_bwrite(mp, bp);
305 if (error) { 305 if (error) {
306 goto error0; 306 goto error0;
diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c
index aad8c5da38af..c8a56c529642 100644
--- a/fs/xfs/xfs_ialloc.c
+++ b/fs/xfs/xfs_ialloc.c
@@ -119,6 +119,102 @@ xfs_ialloc_cluster_alignment(
119} 119}
120 120
121/* 121/*
122 * Lookup the record equal to ino in the btree given by cur.
123 */
124STATIC int /* error */
125xfs_inobt_lookup_eq(
126 struct xfs_btree_cur *cur, /* btree cursor */
127 xfs_agino_t ino, /* starting inode of chunk */
128 __int32_t fcnt, /* free inode count */
129 xfs_inofree_t free, /* free inode mask */
130 int *stat) /* success/failure */
131{
132 cur->bc_rec.i.ir_startino = ino;
133 cur->bc_rec.i.ir_freecount = fcnt;
134 cur->bc_rec.i.ir_free = free;
135 return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat);
136}
137
138/*
139 * Lookup the first record greater than or equal to ino
140 * in the btree given by cur.
141 */
142int /* error */
143xfs_inobt_lookup_ge(
144 struct xfs_btree_cur *cur, /* btree cursor */
145 xfs_agino_t ino, /* starting inode of chunk */
146 __int32_t fcnt, /* free inode count */
147 xfs_inofree_t free, /* free inode mask */
148 int *stat) /* success/failure */
149{
150 cur->bc_rec.i.ir_startino = ino;
151 cur->bc_rec.i.ir_freecount = fcnt;
152 cur->bc_rec.i.ir_free = free;
153 return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat);
154}
155
156/*
157 * Lookup the first record less than or equal to ino
158 * in the btree given by cur.
159 */
160int /* error */
161xfs_inobt_lookup_le(
162 struct xfs_btree_cur *cur, /* btree cursor */
163 xfs_agino_t ino, /* starting inode of chunk */
164 __int32_t fcnt, /* free inode count */
165 xfs_inofree_t free, /* free inode mask */
166 int *stat) /* success/failure */
167{
168 cur->bc_rec.i.ir_startino = ino;
169 cur->bc_rec.i.ir_freecount = fcnt;
170 cur->bc_rec.i.ir_free = free;
171 return xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat);
172}
173
174/*
175 * Update the record referred to by cur to the value given
176 * by [ino, fcnt, free].
177 * This either works (return 0) or gets an EFSCORRUPTED error.
178 */
179STATIC int /* error */
180xfs_inobt_update(
181 struct xfs_btree_cur *cur, /* btree cursor */
182 xfs_agino_t ino, /* starting inode of chunk */
183 __int32_t fcnt, /* free inode count */
184 xfs_inofree_t free) /* free inode mask */
185{
186 union xfs_btree_rec rec;
187
188 rec.inobt.ir_startino = cpu_to_be32(ino);
189 rec.inobt.ir_freecount = cpu_to_be32(fcnt);
190 rec.inobt.ir_free = cpu_to_be64(free);
191 return xfs_btree_update(cur, &rec);
192}
193
194/*
195 * Get the data from the pointed-to record.
196 */
197int /* error */
198xfs_inobt_get_rec(
199 struct xfs_btree_cur *cur, /* btree cursor */
200 xfs_agino_t *ino, /* output: starting inode of chunk */
201 __int32_t *fcnt, /* output: number of free inodes */
202 xfs_inofree_t *free, /* output: free inode mask */
203 int *stat) /* output: success/failure */
204{
205 union xfs_btree_rec *rec;
206 int error;
207
208 error = xfs_btree_get_rec(cur, &rec, stat);
209 if (!error && *stat == 1) {
210 *ino = be32_to_cpu(rec->inobt.ir_startino);
211 *fcnt = be32_to_cpu(rec->inobt.ir_freecount);
212 *free = be64_to_cpu(rec->inobt.ir_free);
213 }
214 return error;
215}
216
217/*
122 * Allocate new inodes in the allocation group specified by agbp. 218 * Allocate new inodes in the allocation group specified by agbp.
123 * Return 0 for success, else error code. 219 * Return 0 for success, else error code.
124 */ 220 */
@@ -335,8 +431,7 @@ xfs_ialloc_ag_alloc(
335 /* 431 /*
336 * Insert records describing the new inode chunk into the btree. 432 * Insert records describing the new inode chunk into the btree.
337 */ 433 */
338 cur = xfs_btree_init_cursor(args.mp, tp, agbp, agno, 434 cur = xfs_inobt_init_cursor(args.mp, tp, agbp, agno);
339 XFS_BTNUM_INO, (xfs_inode_t *)0, 0);
340 for (thisino = newino; 435 for (thisino = newino;
341 thisino < newino + newlen; 436 thisino < newino + newlen;
342 thisino += XFS_INODES_PER_CHUNK) { 437 thisino += XFS_INODES_PER_CHUNK) {
@@ -346,7 +441,7 @@ xfs_ialloc_ag_alloc(
346 return error; 441 return error;
347 } 442 }
348 ASSERT(i == 0); 443 ASSERT(i == 0);
349 if ((error = xfs_inobt_insert(cur, &i))) { 444 if ((error = xfs_btree_insert(cur, &i))) {
350 xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); 445 xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
351 return error; 446 return error;
352 } 447 }
@@ -676,8 +771,7 @@ nextag:
676 */ 771 */
677 agno = tagno; 772 agno = tagno;
678 *IO_agbp = NULL; 773 *IO_agbp = NULL;
679 cur = xfs_btree_init_cursor(mp, tp, agbp, be32_to_cpu(agi->agi_seqno), 774 cur = xfs_inobt_init_cursor(mp, tp, agbp, be32_to_cpu(agi->agi_seqno));
680 XFS_BTNUM_INO, (xfs_inode_t *)0, 0);
681 /* 775 /*
682 * If pagino is 0 (this is the root inode allocation) use newino. 776 * If pagino is 0 (this is the root inode allocation) use newino.
683 * This must work because we've just allocated some. 777 * This must work because we've just allocated some.
@@ -697,7 +791,7 @@ nextag:
697 goto error0; 791 goto error0;
698 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 792 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
699 freecount += rec.ir_freecount; 793 freecount += rec.ir_freecount;
700 if ((error = xfs_inobt_increment(cur, 0, &i))) 794 if ((error = xfs_btree_increment(cur, 0, &i)))
701 goto error0; 795 goto error0;
702 } while (i == 1); 796 } while (i == 1);
703 797
@@ -741,7 +835,7 @@ nextag:
741 /* 835 /*
742 * Search left with tcur, back up 1 record. 836 * Search left with tcur, back up 1 record.
743 */ 837 */
744 if ((error = xfs_inobt_decrement(tcur, 0, &i))) 838 if ((error = xfs_btree_decrement(tcur, 0, &i)))
745 goto error1; 839 goto error1;
746 doneleft = !i; 840 doneleft = !i;
747 if (!doneleft) { 841 if (!doneleft) {
@@ -755,7 +849,7 @@ nextag:
755 /* 849 /*
756 * Search right with cur, go forward 1 record. 850 * Search right with cur, go forward 1 record.
757 */ 851 */
758 if ((error = xfs_inobt_increment(cur, 0, &i))) 852 if ((error = xfs_btree_increment(cur, 0, &i)))
759 goto error1; 853 goto error1;
760 doneright = !i; 854 doneright = !i;
761 if (!doneright) { 855 if (!doneright) {
@@ -817,7 +911,7 @@ nextag:
817 * further left. 911 * further left.
818 */ 912 */
819 if (useleft) { 913 if (useleft) {
820 if ((error = xfs_inobt_decrement(tcur, 0, 914 if ((error = xfs_btree_decrement(tcur, 0,
821 &i))) 915 &i)))
822 goto error1; 916 goto error1;
823 doneleft = !i; 917 doneleft = !i;
@@ -837,7 +931,7 @@ nextag:
837 * further right. 931 * further right.
838 */ 932 */
839 else { 933 else {
840 if ((error = xfs_inobt_increment(cur, 0, 934 if ((error = xfs_btree_increment(cur, 0,
841 &i))) 935 &i)))
842 goto error1; 936 goto error1;
843 doneright = !i; 937 doneright = !i;
@@ -892,7 +986,7 @@ nextag:
892 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 986 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
893 if (rec.ir_freecount > 0) 987 if (rec.ir_freecount > 0)
894 break; 988 break;
895 if ((error = xfs_inobt_increment(cur, 0, &i))) 989 if ((error = xfs_btree_increment(cur, 0, &i)))
896 goto error0; 990 goto error0;
897 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 991 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
898 } 992 }
@@ -926,7 +1020,7 @@ nextag:
926 goto error0; 1020 goto error0;
927 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1021 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
928 freecount += rec.ir_freecount; 1022 freecount += rec.ir_freecount;
929 if ((error = xfs_inobt_increment(cur, 0, &i))) 1023 if ((error = xfs_btree_increment(cur, 0, &i)))
930 goto error0; 1024 goto error0;
931 } while (i == 1); 1025 } while (i == 1);
932 ASSERT(freecount == be32_to_cpu(agi->agi_freecount) || 1026 ASSERT(freecount == be32_to_cpu(agi->agi_freecount) ||
@@ -1022,8 +1116,7 @@ xfs_difree(
1022 /* 1116 /*
1023 * Initialize the cursor. 1117 * Initialize the cursor.
1024 */ 1118 */
1025 cur = xfs_btree_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_INO, 1119 cur = xfs_inobt_init_cursor(mp, tp, agbp, agno);
1026 (xfs_inode_t *)0, 0);
1027#ifdef DEBUG 1120#ifdef DEBUG
1028 if (cur->bc_nlevels == 1) { 1121 if (cur->bc_nlevels == 1) {
1029 int freecount = 0; 1122 int freecount = 0;
@@ -1036,7 +1129,7 @@ xfs_difree(
1036 goto error0; 1129 goto error0;
1037 if (i) { 1130 if (i) {
1038 freecount += rec.ir_freecount; 1131 freecount += rec.ir_freecount;
1039 if ((error = xfs_inobt_increment(cur, 0, &i))) 1132 if ((error = xfs_btree_increment(cur, 0, &i)))
1040 goto error0; 1133 goto error0;
1041 } 1134 }
1042 } while (i == 1); 1135 } while (i == 1);
@@ -1098,8 +1191,8 @@ xfs_difree(
1098 xfs_trans_mod_sb(tp, XFS_TRANS_SB_ICOUNT, -ilen); 1191 xfs_trans_mod_sb(tp, XFS_TRANS_SB_ICOUNT, -ilen);
1099 xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, -(ilen - 1)); 1192 xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, -(ilen - 1));
1100 1193
1101 if ((error = xfs_inobt_delete(cur, &i))) { 1194 if ((error = xfs_btree_delete(cur, &i))) {
1102 cmn_err(CE_WARN, "xfs_difree: xfs_inobt_delete returned an error %d on %s.\n", 1195 cmn_err(CE_WARN, "xfs_difree: xfs_btree_delete returned an error %d on %s.\n",
1103 error, mp->m_fsname); 1196 error, mp->m_fsname);
1104 goto error0; 1197 goto error0;
1105 } 1198 }
@@ -1141,7 +1234,7 @@ xfs_difree(
1141 goto error0; 1234 goto error0;
1142 if (i) { 1235 if (i) {
1143 freecount += rec.ir_freecount; 1236 freecount += rec.ir_freecount;
1144 if ((error = xfs_inobt_increment(cur, 0, &i))) 1237 if ((error = xfs_btree_increment(cur, 0, &i)))
1145 goto error0; 1238 goto error0;
1146 } 1239 }
1147 } while (i == 1); 1240 } while (i == 1);
@@ -1259,8 +1352,7 @@ xfs_dilocate(
1259#endif /* DEBUG */ 1352#endif /* DEBUG */
1260 return error; 1353 return error;
1261 } 1354 }
1262 cur = xfs_btree_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_INO, 1355 cur = xfs_inobt_init_cursor(mp, tp, agbp, agno);
1263 (xfs_inode_t *)0, 0);
1264 if ((error = xfs_inobt_lookup_le(cur, agino, 0, 0, &i))) { 1356 if ((error = xfs_inobt_lookup_le(cur, agino, 0, 0, &i))) {
1265#ifdef DEBUG 1357#ifdef DEBUG
1266 xfs_fs_cmn_err(CE_ALERT, mp, "xfs_dilocate: " 1358 xfs_fs_cmn_err(CE_ALERT, mp, "xfs_dilocate: "
diff --git a/fs/xfs/xfs_ialloc.h b/fs/xfs/xfs_ialloc.h
index 4e30ec1d13bc..ccf554a6e0a1 100644
--- a/fs/xfs/xfs_ialloc.h
+++ b/fs/xfs/xfs_ialloc.h
@@ -56,7 +56,6 @@ static inline int xfs_ialloc_find_free(xfs_inofree_t *fp)
56} 56}
57 57
58 58
59#ifdef __KERNEL__
60/* 59/*
61 * Allocate an inode on disk. 60 * Allocate an inode on disk.
62 * Mode is used to tell whether the new inode will need space, and whether 61 * Mode is used to tell whether the new inode will need space, and whether
@@ -154,6 +153,24 @@ xfs_ialloc_pagi_init(
154 struct xfs_trans *tp, /* transaction pointer */ 153 struct xfs_trans *tp, /* transaction pointer */
155 xfs_agnumber_t agno); /* allocation group number */ 154 xfs_agnumber_t agno); /* allocation group number */
156 155
157#endif /* __KERNEL__ */ 156/*
157 * Lookup the first record greater than or equal to ino
158 * in the btree given by cur.
159 */
160int xfs_inobt_lookup_ge(struct xfs_btree_cur *cur, xfs_agino_t ino,
161 __int32_t fcnt, xfs_inofree_t free, int *stat);
162
163/*
164 * Lookup the first record less than or equal to ino
165 * in the btree given by cur.
166 */
167int xfs_inobt_lookup_le(struct xfs_btree_cur *cur, xfs_agino_t ino,
168 __int32_t fcnt, xfs_inofree_t free, int *stat);
169
170/*
171 * Get the data from the pointed-to record.
172 */
173extern int xfs_inobt_get_rec(struct xfs_btree_cur *cur, xfs_agino_t *ino,
174 __int32_t *fcnt, xfs_inofree_t *free, int *stat);
158 175
159#endif /* __XFS_IALLOC_H__ */ 176#endif /* __XFS_IALLOC_H__ */
diff --git a/fs/xfs/xfs_ialloc_btree.c b/fs/xfs/xfs_ialloc_btree.c
index 83502f3edef0..99f2408e8d8e 100644
--- a/fs/xfs/xfs_ialloc_btree.c
+++ b/fs/xfs/xfs_ialloc_btree.c
@@ -35,2044 +35,349 @@
35#include "xfs_dinode.h" 35#include "xfs_dinode.h"
36#include "xfs_inode.h" 36#include "xfs_inode.h"
37#include "xfs_btree.h" 37#include "xfs_btree.h"
38#include "xfs_btree_trace.h"
38#include "xfs_ialloc.h" 39#include "xfs_ialloc.h"
39#include "xfs_alloc.h" 40#include "xfs_alloc.h"
40#include "xfs_error.h" 41#include "xfs_error.h"
41 42
42STATIC void xfs_inobt_log_block(xfs_trans_t *, xfs_buf_t *, int);
43STATIC void xfs_inobt_log_keys(xfs_btree_cur_t *, xfs_buf_t *, int, int);
44STATIC void xfs_inobt_log_ptrs(xfs_btree_cur_t *, xfs_buf_t *, int, int);
45STATIC void xfs_inobt_log_recs(xfs_btree_cur_t *, xfs_buf_t *, int, int);
46STATIC int xfs_inobt_lshift(xfs_btree_cur_t *, int, int *);
47STATIC int xfs_inobt_newroot(xfs_btree_cur_t *, int *);
48STATIC int xfs_inobt_rshift(xfs_btree_cur_t *, int, int *);
49STATIC int xfs_inobt_split(xfs_btree_cur_t *, int, xfs_agblock_t *,
50 xfs_inobt_key_t *, xfs_btree_cur_t **, int *);
51STATIC int xfs_inobt_updkey(xfs_btree_cur_t *, xfs_inobt_key_t *, int);
52 43
53/* 44STATIC int
54 * Single level of the xfs_inobt_delete record deletion routine. 45xfs_inobt_get_minrecs(
55 * Delete record pointed to by cur/level. 46 struct xfs_btree_cur *cur,
56 * Remove the record from its block then rebalance the tree. 47 int level)
57 * Return 0 for error, 1 for done, 2 to go on to the next level.
58 */
59STATIC int /* error */
60xfs_inobt_delrec(
61 xfs_btree_cur_t *cur, /* btree cursor */
62 int level, /* level removing record from */
63 int *stat) /* fail/done/go-on */
64{ 48{
65 xfs_buf_t *agbp; /* buffer for a.g. inode header */ 49 return cur->bc_mp->m_inobt_mnr[level != 0];
66 xfs_mount_t *mp; /* mount structure */ 50}
67 xfs_agi_t *agi; /* allocation group inode header */
68 xfs_inobt_block_t *block; /* btree block record/key lives in */
69 xfs_agblock_t bno; /* btree block number */
70 xfs_buf_t *bp; /* buffer for block */
71 int error; /* error return value */
72 int i; /* loop index */
73 xfs_inobt_key_t key; /* kp points here if block is level 0 */
74 xfs_inobt_key_t *kp = NULL; /* pointer to btree keys */
75 xfs_agblock_t lbno; /* left block's block number */
76 xfs_buf_t *lbp; /* left block's buffer pointer */
77 xfs_inobt_block_t *left; /* left btree block */
78 xfs_inobt_key_t *lkp; /* left block key pointer */
79 xfs_inobt_ptr_t *lpp; /* left block address pointer */
80 int lrecs = 0; /* number of records in left block */
81 xfs_inobt_rec_t *lrp; /* left block record pointer */
82 xfs_inobt_ptr_t *pp = NULL; /* pointer to btree addresses */
83 int ptr; /* index in btree block for this rec */
84 xfs_agblock_t rbno; /* right block's block number */
85 xfs_buf_t *rbp; /* right block's buffer pointer */
86 xfs_inobt_block_t *right; /* right btree block */
87 xfs_inobt_key_t *rkp; /* right block key pointer */
88 xfs_inobt_rec_t *rp; /* pointer to btree records */
89 xfs_inobt_ptr_t *rpp; /* right block address pointer */
90 int rrecs = 0; /* number of records in right block */
91 int numrecs;
92 xfs_inobt_rec_t *rrp; /* right block record pointer */
93 xfs_btree_cur_t *tcur; /* temporary btree cursor */
94
95 mp = cur->bc_mp;
96
97 /*
98 * Get the index of the entry being deleted, check for nothing there.
99 */
100 ptr = cur->bc_ptrs[level];
101 if (ptr == 0) {
102 *stat = 0;
103 return 0;
104 }
105
106 /*
107 * Get the buffer & block containing the record or key/ptr.
108 */
109 bp = cur->bc_bufs[level];
110 block = XFS_BUF_TO_INOBT_BLOCK(bp);
111#ifdef DEBUG
112 if ((error = xfs_btree_check_sblock(cur, block, level, bp)))
113 return error;
114#endif
115 /*
116 * Fail if we're off the end of the block.
117 */
118 51
119 numrecs = be16_to_cpu(block->bb_numrecs); 52STATIC struct xfs_btree_cur *
120 if (ptr > numrecs) { 53xfs_inobt_dup_cursor(
121 *stat = 0; 54 struct xfs_btree_cur *cur)
122 return 0; 55{
123 } 56 return xfs_inobt_init_cursor(cur->bc_mp, cur->bc_tp,
124 /* 57 cur->bc_private.a.agbp, cur->bc_private.a.agno);
125 * It's a nonleaf. Excise the key and ptr being deleted, by 58}
126 * sliding the entries past them down one.
127 * Log the changed areas of the block.
128 */
129 if (level > 0) {
130 kp = XFS_INOBT_KEY_ADDR(block, 1, cur);
131 pp = XFS_INOBT_PTR_ADDR(block, 1, cur);
132#ifdef DEBUG
133 for (i = ptr; i < numrecs; i++) {
134 if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(pp[i]), level)))
135 return error;
136 }
137#endif
138 if (ptr < numrecs) {
139 memmove(&kp[ptr - 1], &kp[ptr],
140 (numrecs - ptr) * sizeof(*kp));
141 memmove(&pp[ptr - 1], &pp[ptr],
142 (numrecs - ptr) * sizeof(*kp));
143 xfs_inobt_log_keys(cur, bp, ptr, numrecs - 1);
144 xfs_inobt_log_ptrs(cur, bp, ptr, numrecs - 1);
145 }
146 }
147 /*
148 * It's a leaf. Excise the record being deleted, by sliding the
149 * entries past it down one. Log the changed areas of the block.
150 */
151 else {
152 rp = XFS_INOBT_REC_ADDR(block, 1, cur);
153 if (ptr < numrecs) {
154 memmove(&rp[ptr - 1], &rp[ptr],
155 (numrecs - ptr) * sizeof(*rp));
156 xfs_inobt_log_recs(cur, bp, ptr, numrecs - 1);
157 }
158 /*
159 * If it's the first record in the block, we'll need a key
160 * structure to pass up to the next level (updkey).
161 */
162 if (ptr == 1) {
163 key.ir_startino = rp->ir_startino;
164 kp = &key;
165 }
166 }
167 /*
168 * Decrement and log the number of entries in the block.
169 */
170 numrecs--;
171 block->bb_numrecs = cpu_to_be16(numrecs);
172 xfs_inobt_log_block(cur->bc_tp, bp, XFS_BB_NUMRECS);
173 /*
174 * Is this the root level? If so, we're almost done.
175 */
176 if (level == cur->bc_nlevels - 1) {
177 /*
178 * If this is the root level,
179 * and there's only one entry left,
180 * and it's NOT the leaf level,
181 * then we can get rid of this level.
182 */
183 if (numrecs == 1 && level > 0) {
184 agbp = cur->bc_private.a.agbp;
185 agi = XFS_BUF_TO_AGI(agbp);
186 /*
187 * pp is still set to the first pointer in the block.
188 * Make it the new root of the btree.
189 */
190 bno = be32_to_cpu(agi->agi_root);
191 agi->agi_root = *pp;
192 be32_add_cpu(&agi->agi_level, -1);
193 /*
194 * Free the block.
195 */
196 if ((error = xfs_free_extent(cur->bc_tp,
197 XFS_AGB_TO_FSB(mp, cur->bc_private.a.agno, bno), 1)))
198 return error;
199 xfs_trans_binval(cur->bc_tp, bp);
200 xfs_ialloc_log_agi(cur->bc_tp, agbp,
201 XFS_AGI_ROOT | XFS_AGI_LEVEL);
202 /*
203 * Update the cursor so there's one fewer level.
204 */
205 cur->bc_bufs[level] = NULL;
206 cur->bc_nlevels--;
207 } else if (level > 0 &&
208 (error = xfs_inobt_decrement(cur, level, &i)))
209 return error;
210 *stat = 1;
211 return 0;
212 }
213 /*
214 * If we deleted the leftmost entry in the block, update the
215 * key values above us in the tree.
216 */
217 if (ptr == 1 && (error = xfs_inobt_updkey(cur, kp, level + 1)))
218 return error;
219 /*
220 * If the number of records remaining in the block is at least
221 * the minimum, we're done.
222 */
223 if (numrecs >= XFS_INOBT_BLOCK_MINRECS(level, cur)) {
224 if (level > 0 &&
225 (error = xfs_inobt_decrement(cur, level, &i)))
226 return error;
227 *stat = 1;
228 return 0;
229 }
230 /*
231 * Otherwise, we have to move some records around to keep the
232 * tree balanced. Look at the left and right sibling blocks to
233 * see if we can re-balance by moving only one record.
234 */
235 rbno = be32_to_cpu(block->bb_rightsib);
236 lbno = be32_to_cpu(block->bb_leftsib);
237 bno = NULLAGBLOCK;
238 ASSERT(rbno != NULLAGBLOCK || lbno != NULLAGBLOCK);
239 /*
240 * Duplicate the cursor so our btree manipulations here won't
241 * disrupt the next level up.
242 */
243 if ((error = xfs_btree_dup_cursor(cur, &tcur)))
244 return error;
245 /*
246 * If there's a right sibling, see if it's ok to shift an entry
247 * out of it.
248 */
249 if (rbno != NULLAGBLOCK) {
250 /*
251 * Move the temp cursor to the last entry in the next block.
252 * Actually any entry but the first would suffice.
253 */
254 i = xfs_btree_lastrec(tcur, level);
255 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
256 if ((error = xfs_inobt_increment(tcur, level, &i)))
257 goto error0;
258 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
259 i = xfs_btree_lastrec(tcur, level);
260 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
261 /*
262 * Grab a pointer to the block.
263 */
264 rbp = tcur->bc_bufs[level];
265 right = XFS_BUF_TO_INOBT_BLOCK(rbp);
266#ifdef DEBUG
267 if ((error = xfs_btree_check_sblock(cur, right, level, rbp)))
268 goto error0;
269#endif
270 /*
271 * Grab the current block number, for future use.
272 */
273 bno = be32_to_cpu(right->bb_leftsib);
274 /*
275 * If right block is full enough so that removing one entry
276 * won't make it too empty, and left-shifting an entry out
277 * of right to us works, we're done.
278 */
279 if (be16_to_cpu(right->bb_numrecs) - 1 >=
280 XFS_INOBT_BLOCK_MINRECS(level, cur)) {
281 if ((error = xfs_inobt_lshift(tcur, level, &i)))
282 goto error0;
283 if (i) {
284 ASSERT(be16_to_cpu(block->bb_numrecs) >=
285 XFS_INOBT_BLOCK_MINRECS(level, cur));
286 xfs_btree_del_cursor(tcur,
287 XFS_BTREE_NOERROR);
288 if (level > 0 &&
289 (error = xfs_inobt_decrement(cur, level,
290 &i)))
291 return error;
292 *stat = 1;
293 return 0;
294 }
295 }
296 /*
297 * Otherwise, grab the number of records in right for
298 * future reference, and fix up the temp cursor to point
299 * to our block again (last record).
300 */
301 rrecs = be16_to_cpu(right->bb_numrecs);
302 if (lbno != NULLAGBLOCK) {
303 xfs_btree_firstrec(tcur, level);
304 if ((error = xfs_inobt_decrement(tcur, level, &i)))
305 goto error0;
306 }
307 }
308 /*
309 * If there's a left sibling, see if it's ok to shift an entry
310 * out of it.
311 */
312 if (lbno != NULLAGBLOCK) {
313 /*
314 * Move the temp cursor to the first entry in the
315 * previous block.
316 */
317 xfs_btree_firstrec(tcur, level);
318 if ((error = xfs_inobt_decrement(tcur, level, &i)))
319 goto error0;
320 xfs_btree_firstrec(tcur, level);
321 /*
322 * Grab a pointer to the block.
323 */
324 lbp = tcur->bc_bufs[level];
325 left = XFS_BUF_TO_INOBT_BLOCK(lbp);
326#ifdef DEBUG
327 if ((error = xfs_btree_check_sblock(cur, left, level, lbp)))
328 goto error0;
329#endif
330 /*
331 * Grab the current block number, for future use.
332 */
333 bno = be32_to_cpu(left->bb_rightsib);
334 /*
335 * If left block is full enough so that removing one entry
336 * won't make it too empty, and right-shifting an entry out
337 * of left to us works, we're done.
338 */
339 if (be16_to_cpu(left->bb_numrecs) - 1 >=
340 XFS_INOBT_BLOCK_MINRECS(level, cur)) {
341 if ((error = xfs_inobt_rshift(tcur, level, &i)))
342 goto error0;
343 if (i) {
344 ASSERT(be16_to_cpu(block->bb_numrecs) >=
345 XFS_INOBT_BLOCK_MINRECS(level, cur));
346 xfs_btree_del_cursor(tcur,
347 XFS_BTREE_NOERROR);
348 if (level == 0)
349 cur->bc_ptrs[0]++;
350 *stat = 1;
351 return 0;
352 }
353 }
354 /*
355 * Otherwise, grab the number of records in right for
356 * future reference.
357 */
358 lrecs = be16_to_cpu(left->bb_numrecs);
359 }
360 /*
361 * Delete the temp cursor, we're done with it.
362 */
363 xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
364 /*
365 * If here, we need to do a join to keep the tree balanced.
366 */
367 ASSERT(bno != NULLAGBLOCK);
368 /*
369 * See if we can join with the left neighbor block.
370 */
371 if (lbno != NULLAGBLOCK &&
372 lrecs + numrecs <= XFS_INOBT_BLOCK_MAXRECS(level, cur)) {
373 /*
374 * Set "right" to be the starting block,
375 * "left" to be the left neighbor.
376 */
377 rbno = bno;
378 right = block;
379 rrecs = be16_to_cpu(right->bb_numrecs);
380 rbp = bp;
381 if ((error = xfs_btree_read_bufs(mp, cur->bc_tp,
382 cur->bc_private.a.agno, lbno, 0, &lbp,
383 XFS_INO_BTREE_REF)))
384 return error;
385 left = XFS_BUF_TO_INOBT_BLOCK(lbp);
386 lrecs = be16_to_cpu(left->bb_numrecs);
387 if ((error = xfs_btree_check_sblock(cur, left, level, lbp)))
388 return error;
389 }
390 /*
391 * If that won't work, see if we can join with the right neighbor block.
392 */
393 else if (rbno != NULLAGBLOCK &&
394 rrecs + numrecs <= XFS_INOBT_BLOCK_MAXRECS(level, cur)) {
395 /*
396 * Set "left" to be the starting block,
397 * "right" to be the right neighbor.
398 */
399 lbno = bno;
400 left = block;
401 lrecs = be16_to_cpu(left->bb_numrecs);
402 lbp = bp;
403 if ((error = xfs_btree_read_bufs(mp, cur->bc_tp,
404 cur->bc_private.a.agno, rbno, 0, &rbp,
405 XFS_INO_BTREE_REF)))
406 return error;
407 right = XFS_BUF_TO_INOBT_BLOCK(rbp);
408 rrecs = be16_to_cpu(right->bb_numrecs);
409 if ((error = xfs_btree_check_sblock(cur, right, level, rbp)))
410 return error;
411 }
412 /*
413 * Otherwise, we can't fix the imbalance.
414 * Just return. This is probably a logic error, but it's not fatal.
415 */
416 else {
417 if (level > 0 && (error = xfs_inobt_decrement(cur, level, &i)))
418 return error;
419 *stat = 1;
420 return 0;
421 }
422 /*
423 * We're now going to join "left" and "right" by moving all the stuff
424 * in "right" to "left" and deleting "right".
425 */
426 if (level > 0) {
427 /*
428 * It's a non-leaf. Move keys and pointers.
429 */
430 lkp = XFS_INOBT_KEY_ADDR(left, lrecs + 1, cur);
431 lpp = XFS_INOBT_PTR_ADDR(left, lrecs + 1, cur);
432 rkp = XFS_INOBT_KEY_ADDR(right, 1, cur);
433 rpp = XFS_INOBT_PTR_ADDR(right, 1, cur);
434#ifdef DEBUG
435 for (i = 0; i < rrecs; i++) {
436 if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(rpp[i]), level)))
437 return error;
438 }
439#endif
440 memcpy(lkp, rkp, rrecs * sizeof(*lkp));
441 memcpy(lpp, rpp, rrecs * sizeof(*lpp));
442 xfs_inobt_log_keys(cur, lbp, lrecs + 1, lrecs + rrecs);
443 xfs_inobt_log_ptrs(cur, lbp, lrecs + 1, lrecs + rrecs);
444 } else {
445 /*
446 * It's a leaf. Move records.
447 */
448 lrp = XFS_INOBT_REC_ADDR(left, lrecs + 1, cur);
449 rrp = XFS_INOBT_REC_ADDR(right, 1, cur);
450 memcpy(lrp, rrp, rrecs * sizeof(*lrp));
451 xfs_inobt_log_recs(cur, lbp, lrecs + 1, lrecs + rrecs);
452 }
453 /*
454 * If we joined with the left neighbor, set the buffer in the
455 * cursor to the left block, and fix up the index.
456 */
457 if (bp != lbp) {
458 xfs_btree_setbuf(cur, level, lbp);
459 cur->bc_ptrs[level] += lrecs;
460 }
461 /*
462 * If we joined with the right neighbor and there's a level above
463 * us, increment the cursor at that level.
464 */
465 else if (level + 1 < cur->bc_nlevels &&
466 (error = xfs_alloc_increment(cur, level + 1, &i)))
467 return error;
468 /*
469 * Fix up the number of records in the surviving block.
470 */
471 lrecs += rrecs;
472 left->bb_numrecs = cpu_to_be16(lrecs);
473 /*
474 * Fix up the right block pointer in the surviving block, and log it.
475 */
476 left->bb_rightsib = right->bb_rightsib;
477 xfs_inobt_log_block(cur->bc_tp, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB);
478 /*
479 * If there is a right sibling now, make it point to the
480 * remaining block.
481 */
482 if (be32_to_cpu(left->bb_rightsib) != NULLAGBLOCK) {
483 xfs_inobt_block_t *rrblock;
484 xfs_buf_t *rrbp;
485 59
486 if ((error = xfs_btree_read_bufs(mp, cur->bc_tp, 60STATIC void
487 cur->bc_private.a.agno, be32_to_cpu(left->bb_rightsib), 0, 61xfs_inobt_set_root(
488 &rrbp, XFS_INO_BTREE_REF))) 62 struct xfs_btree_cur *cur,
489 return error; 63 union xfs_btree_ptr *nptr,
490 rrblock = XFS_BUF_TO_INOBT_BLOCK(rrbp); 64 int inc) /* level change */
491 if ((error = xfs_btree_check_sblock(cur, rrblock, level, rrbp))) 65{
492 return error; 66 struct xfs_buf *agbp = cur->bc_private.a.agbp;
493 rrblock->bb_leftsib = cpu_to_be32(lbno); 67 struct xfs_agi *agi = XFS_BUF_TO_AGI(agbp);
494 xfs_inobt_log_block(cur->bc_tp, rrbp, XFS_BB_LEFTSIB);
495 }
496 /*
497 * Free the deleting block.
498 */
499 if ((error = xfs_free_extent(cur->bc_tp, XFS_AGB_TO_FSB(mp,
500 cur->bc_private.a.agno, rbno), 1)))
501 return error;
502 xfs_trans_binval(cur->bc_tp, rbp);
503 /*
504 * Readjust the ptr at this level if it's not a leaf, since it's
505 * still pointing at the deletion point, which makes the cursor
506 * inconsistent. If this makes the ptr 0, the caller fixes it up.
507 * We can't use decrement because it would change the next level up.
508 */
509 if (level > 0)
510 cur->bc_ptrs[level]--;
511 /*
512 * Return value means the next level up has something to do.
513 */
514 *stat = 2;
515 return 0;
516 68
517error0: 69 agi->agi_root = nptr->s;
518 xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR); 70 be32_add_cpu(&agi->agi_level, inc);
519 return error; 71 xfs_ialloc_log_agi(cur->bc_tp, agbp, XFS_AGI_ROOT | XFS_AGI_LEVEL);
520} 72}
521 73
522/* 74STATIC int
523 * Insert one record/level. Return information to the caller 75xfs_inobt_alloc_block(
524 * allowing the next level up to proceed if necessary. 76 struct xfs_btree_cur *cur,
525 */ 77 union xfs_btree_ptr *start,
526STATIC int /* error */ 78 union xfs_btree_ptr *new,
527xfs_inobt_insrec( 79 int length,
528 xfs_btree_cur_t *cur, /* btree cursor */ 80 int *stat)
529 int level, /* level to insert record at */
530 xfs_agblock_t *bnop, /* i/o: block number inserted */
531 xfs_inobt_rec_t *recp, /* i/o: record data inserted */
532 xfs_btree_cur_t **curp, /* output: new cursor replacing cur */
533 int *stat) /* success/failure */
534{ 81{
535 xfs_inobt_block_t *block; /* btree block record/key lives in */ 82 xfs_alloc_arg_t args; /* block allocation args */
536 xfs_buf_t *bp; /* buffer for block */ 83 int error; /* error return value */
537 int error; /* error return value */ 84 xfs_agblock_t sbno = be32_to_cpu(start->s);
538 int i; /* loop index */
539 xfs_inobt_key_t key; /* key value being inserted */
540 xfs_inobt_key_t *kp=NULL; /* pointer to btree keys */
541 xfs_agblock_t nbno; /* block number of allocated block */
542 xfs_btree_cur_t *ncur; /* new cursor to be used at next lvl */
543 xfs_inobt_key_t nkey; /* new key value, from split */
544 xfs_inobt_rec_t nrec; /* new record value, for caller */
545 int numrecs;
546 int optr; /* old ptr value */
547 xfs_inobt_ptr_t *pp; /* pointer to btree addresses */
548 int ptr; /* index in btree block for this rec */
549 xfs_inobt_rec_t *rp=NULL; /* pointer to btree records */
550 85
551 /* 86 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
552 * GCC doesn't understand the (arguably complex) control flow in
553 * this function and complains about uninitialized structure fields
554 * without this.
555 */
556 memset(&nrec, 0, sizeof(nrec));
557 87
558 /* 88 memset(&args, 0, sizeof(args));
559 * If we made it to the root level, allocate a new root block 89 args.tp = cur->bc_tp;
560 * and we're done. 90 args.mp = cur->bc_mp;
561 */ 91 args.fsbno = XFS_AGB_TO_FSB(args.mp, cur->bc_private.a.agno, sbno);
562 if (level >= cur->bc_nlevels) { 92 args.minlen = 1;
563 error = xfs_inobt_newroot(cur, &i); 93 args.maxlen = 1;
564 *bnop = NULLAGBLOCK; 94 args.prod = 1;
565 *stat = i; 95 args.type = XFS_ALLOCTYPE_NEAR_BNO;
96
97 error = xfs_alloc_vextent(&args);
98 if (error) {
99 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
566 return error; 100 return error;
567 } 101 }
568 /* 102 if (args.fsbno == NULLFSBLOCK) {
569 * Make a key out of the record data to be inserted, and save it. 103 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
570 */
571 key.ir_startino = recp->ir_startino;
572 optr = ptr = cur->bc_ptrs[level];
573 /*
574 * If we're off the left edge, return failure.
575 */
576 if (ptr == 0) {
577 *stat = 0; 104 *stat = 0;
578 return 0; 105 return 0;
579 } 106 }
580 /* 107 ASSERT(args.len == 1);
581 * Get pointers to the btree buffer and block. 108 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
582 */ 109
583 bp = cur->bc_bufs[level]; 110 new->s = cpu_to_be32(XFS_FSB_TO_AGBNO(args.mp, args.fsbno));
584 block = XFS_BUF_TO_INOBT_BLOCK(bp);
585 numrecs = be16_to_cpu(block->bb_numrecs);
586#ifdef DEBUG
587 if ((error = xfs_btree_check_sblock(cur, block, level, bp)))
588 return error;
589 /*
590 * Check that the new entry is being inserted in the right place.
591 */
592 if (ptr <= numrecs) {
593 if (level == 0) {
594 rp = XFS_INOBT_REC_ADDR(block, ptr, cur);
595 xfs_btree_check_rec(cur->bc_btnum, recp, rp);
596 } else {
597 kp = XFS_INOBT_KEY_ADDR(block, ptr, cur);
598 xfs_btree_check_key(cur->bc_btnum, &key, kp);
599 }
600 }
601#endif
602 nbno = NULLAGBLOCK;
603 ncur = NULL;
604 /*
605 * If the block is full, we can't insert the new entry until we
606 * make the block un-full.
607 */
608 if (numrecs == XFS_INOBT_BLOCK_MAXRECS(level, cur)) {
609 /*
610 * First, try shifting an entry to the right neighbor.
611 */
612 if ((error = xfs_inobt_rshift(cur, level, &i)))
613 return error;
614 if (i) {
615 /* nothing */
616 }
617 /*
618 * Next, try shifting an entry to the left neighbor.
619 */
620 else {
621 if ((error = xfs_inobt_lshift(cur, level, &i)))
622 return error;
623 if (i) {
624 optr = ptr = cur->bc_ptrs[level];
625 } else {
626 /*
627 * Next, try splitting the current block
628 * in half. If this works we have to
629 * re-set our variables because
630 * we could be in a different block now.
631 */
632 if ((error = xfs_inobt_split(cur, level, &nbno,
633 &nkey, &ncur, &i)))
634 return error;
635 if (i) {
636 bp = cur->bc_bufs[level];
637 block = XFS_BUF_TO_INOBT_BLOCK(bp);
638#ifdef DEBUG
639 if ((error = xfs_btree_check_sblock(cur,
640 block, level, bp)))
641 return error;
642#endif
643 ptr = cur->bc_ptrs[level];
644 nrec.ir_startino = nkey.ir_startino;
645 } else {
646 /*
647 * Otherwise the insert fails.
648 */
649 *stat = 0;
650 return 0;
651 }
652 }
653 }
654 }
655 /*
656 * At this point we know there's room for our new entry in the block
657 * we're pointing at.
658 */
659 numrecs = be16_to_cpu(block->bb_numrecs);
660 if (level > 0) {
661 /*
662 * It's a non-leaf entry. Make a hole for the new data
663 * in the key and ptr regions of the block.
664 */
665 kp = XFS_INOBT_KEY_ADDR(block, 1, cur);
666 pp = XFS_INOBT_PTR_ADDR(block, 1, cur);
667#ifdef DEBUG
668 for (i = numrecs; i >= ptr; i--) {
669 if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(pp[i - 1]), level)))
670 return error;
671 }
672#endif
673 memmove(&kp[ptr], &kp[ptr - 1],
674 (numrecs - ptr + 1) * sizeof(*kp));
675 memmove(&pp[ptr], &pp[ptr - 1],
676 (numrecs - ptr + 1) * sizeof(*pp));
677 /*
678 * Now stuff the new data in, bump numrecs and log the new data.
679 */
680#ifdef DEBUG
681 if ((error = xfs_btree_check_sptr(cur, *bnop, level)))
682 return error;
683#endif
684 kp[ptr - 1] = key;
685 pp[ptr - 1] = cpu_to_be32(*bnop);
686 numrecs++;
687 block->bb_numrecs = cpu_to_be16(numrecs);
688 xfs_inobt_log_keys(cur, bp, ptr, numrecs);
689 xfs_inobt_log_ptrs(cur, bp, ptr, numrecs);
690 } else {
691 /*
692 * It's a leaf entry. Make a hole for the new record.
693 */
694 rp = XFS_INOBT_REC_ADDR(block, 1, cur);
695 memmove(&rp[ptr], &rp[ptr - 1],
696 (numrecs - ptr + 1) * sizeof(*rp));
697 /*
698 * Now stuff the new record in, bump numrecs
699 * and log the new data.
700 */
701 rp[ptr - 1] = *recp;
702 numrecs++;
703 block->bb_numrecs = cpu_to_be16(numrecs);
704 xfs_inobt_log_recs(cur, bp, ptr, numrecs);
705 }
706 /*
707 * Log the new number of records in the btree header.
708 */
709 xfs_inobt_log_block(cur->bc_tp, bp, XFS_BB_NUMRECS);
710#ifdef DEBUG
711 /*
712 * Check that the key/record is in the right place, now.
713 */
714 if (ptr < numrecs) {
715 if (level == 0)
716 xfs_btree_check_rec(cur->bc_btnum, rp + ptr - 1,
717 rp + ptr);
718 else
719 xfs_btree_check_key(cur->bc_btnum, kp + ptr - 1,
720 kp + ptr);
721 }
722#endif
723 /*
724 * If we inserted at the start of a block, update the parents' keys.
725 */
726 if (optr == 1 && (error = xfs_inobt_updkey(cur, &key, level + 1)))
727 return error;
728 /*
729 * Return the new block number, if any.
730 * If there is one, give back a record value and a cursor too.
731 */
732 *bnop = nbno;
733 if (nbno != NULLAGBLOCK) {
734 *recp = nrec;
735 *curp = ncur;
736 }
737 *stat = 1; 111 *stat = 1;
738 return 0; 112 return 0;
739} 113}
740 114
741/* 115STATIC int
742 * Log header fields from a btree block. 116xfs_inobt_free_block(
743 */ 117 struct xfs_btree_cur *cur,
744STATIC void 118 struct xfs_buf *bp)
745xfs_inobt_log_block(
746 xfs_trans_t *tp, /* transaction pointer */
747 xfs_buf_t *bp, /* buffer containing btree block */
748 int fields) /* mask of fields: XFS_BB_... */
749{ 119{
750 int first; /* first byte offset logged */ 120 xfs_fsblock_t fsbno;
751 int last; /* last byte offset logged */ 121 int error;
752 static const short offsets[] = { /* table of offsets */
753 offsetof(xfs_inobt_block_t, bb_magic),
754 offsetof(xfs_inobt_block_t, bb_level),
755 offsetof(xfs_inobt_block_t, bb_numrecs),
756 offsetof(xfs_inobt_block_t, bb_leftsib),
757 offsetof(xfs_inobt_block_t, bb_rightsib),
758 sizeof(xfs_inobt_block_t)
759 };
760 122
761 xfs_btree_offsets(fields, offsets, XFS_BB_NUM_BITS, &first, &last); 123 fsbno = XFS_DADDR_TO_FSB(cur->bc_mp, XFS_BUF_ADDR(bp));
762 xfs_trans_log_buf(tp, bp, first, last); 124 error = xfs_free_extent(cur->bc_tp, fsbno, 1);
125 if (error)
126 return error;
127
128 xfs_trans_binval(cur->bc_tp, bp);
129 return error;
763} 130}
764 131
765/* 132STATIC int
766 * Log keys from a btree block (nonleaf). 133xfs_inobt_get_maxrecs(
767 */ 134 struct xfs_btree_cur *cur,
768STATIC void 135 int level)
769xfs_inobt_log_keys(
770 xfs_btree_cur_t *cur, /* btree cursor */
771 xfs_buf_t *bp, /* buffer containing btree block */
772 int kfirst, /* index of first key to log */
773 int klast) /* index of last key to log */
774{ 136{
775 xfs_inobt_block_t *block; /* btree block to log from */ 137 return cur->bc_mp->m_inobt_mxr[level != 0];
776 int first; /* first byte offset logged */
777 xfs_inobt_key_t *kp; /* key pointer in btree block */
778 int last; /* last byte offset logged */
779
780 block = XFS_BUF_TO_INOBT_BLOCK(bp);
781 kp = XFS_INOBT_KEY_ADDR(block, 1, cur);
782 first = (int)((xfs_caddr_t)&kp[kfirst - 1] - (xfs_caddr_t)block);
783 last = (int)(((xfs_caddr_t)&kp[klast] - 1) - (xfs_caddr_t)block);
784 xfs_trans_log_buf(cur->bc_tp, bp, first, last);
785} 138}
786 139
787/*
788 * Log block pointer fields from a btree block (nonleaf).
789 */
790STATIC void 140STATIC void
791xfs_inobt_log_ptrs( 141xfs_inobt_init_key_from_rec(
792 xfs_btree_cur_t *cur, /* btree cursor */ 142 union xfs_btree_key *key,
793 xfs_buf_t *bp, /* buffer containing btree block */ 143 union xfs_btree_rec *rec)
794 int pfirst, /* index of first pointer to log */
795 int plast) /* index of last pointer to log */
796{ 144{
797 xfs_inobt_block_t *block; /* btree block to log from */ 145 key->inobt.ir_startino = rec->inobt.ir_startino;
798 int first; /* first byte offset logged */
799 int last; /* last byte offset logged */
800 xfs_inobt_ptr_t *pp; /* block-pointer pointer in btree blk */
801
802 block = XFS_BUF_TO_INOBT_BLOCK(bp);
803 pp = XFS_INOBT_PTR_ADDR(block, 1, cur);
804 first = (int)((xfs_caddr_t)&pp[pfirst - 1] - (xfs_caddr_t)block);
805 last = (int)(((xfs_caddr_t)&pp[plast] - 1) - (xfs_caddr_t)block);
806 xfs_trans_log_buf(cur->bc_tp, bp, first, last);
807} 146}
808 147
809/*
810 * Log records from a btree block (leaf).
811 */
812STATIC void 148STATIC void
813xfs_inobt_log_recs( 149xfs_inobt_init_rec_from_key(
814 xfs_btree_cur_t *cur, /* btree cursor */ 150 union xfs_btree_key *key,
815 xfs_buf_t *bp, /* buffer containing btree block */ 151 union xfs_btree_rec *rec)
816 int rfirst, /* index of first record to log */
817 int rlast) /* index of last record to log */
818{ 152{
819 xfs_inobt_block_t *block; /* btree block to log from */ 153 rec->inobt.ir_startino = key->inobt.ir_startino;
820 int first; /* first byte offset logged */ 154}
821 int last; /* last byte offset logged */
822 xfs_inobt_rec_t *rp; /* record pointer for btree block */
823 155
824 block = XFS_BUF_TO_INOBT_BLOCK(bp); 156STATIC void
825 rp = XFS_INOBT_REC_ADDR(block, 1, cur); 157xfs_inobt_init_rec_from_cur(
826 first = (int)((xfs_caddr_t)&rp[rfirst - 1] - (xfs_caddr_t)block); 158 struct xfs_btree_cur *cur,
827 last = (int)(((xfs_caddr_t)&rp[rlast] - 1) - (xfs_caddr_t)block); 159 union xfs_btree_rec *rec)
828 xfs_trans_log_buf(cur->bc_tp, bp, first, last); 160{
161 rec->inobt.ir_startino = cpu_to_be32(cur->bc_rec.i.ir_startino);
162 rec->inobt.ir_freecount = cpu_to_be32(cur->bc_rec.i.ir_freecount);
163 rec->inobt.ir_free = cpu_to_be64(cur->bc_rec.i.ir_free);
829} 164}
830 165
831/* 166/*
832 * Lookup the record. The cursor is made to point to it, based on dir. 167 * intial value of ptr for lookup
833 * Return 0 if can't find any such record, 1 for success.
834 */ 168 */
835STATIC int /* error */ 169STATIC void
836xfs_inobt_lookup( 170xfs_inobt_init_ptr_from_cur(
837 xfs_btree_cur_t *cur, /* btree cursor */ 171 struct xfs_btree_cur *cur,
838 xfs_lookup_t dir, /* <=, ==, or >= */ 172 union xfs_btree_ptr *ptr)
839 int *stat) /* success/failure */
840{ 173{
841 xfs_agblock_t agbno; /* a.g. relative btree block number */ 174 struct xfs_agi *agi = XFS_BUF_TO_AGI(cur->bc_private.a.agbp);
842 xfs_agnumber_t agno; /* allocation group number */
843 xfs_inobt_block_t *block=NULL; /* current btree block */
844 __int64_t diff; /* difference for the current key */
845 int error; /* error return value */
846 int keyno=0; /* current key number */
847 int level; /* level in the btree */
848 xfs_mount_t *mp; /* file system mount point */
849
850 /*
851 * Get the allocation group header, and the root block number.
852 */
853 mp = cur->bc_mp;
854 {
855 xfs_agi_t *agi; /* a.g. inode header */
856
857 agi = XFS_BUF_TO_AGI(cur->bc_private.a.agbp);
858 agno = be32_to_cpu(agi->agi_seqno);
859 agbno = be32_to_cpu(agi->agi_root);
860 }
861 /*
862 * Iterate over each level in the btree, starting at the root.
863 * For each level above the leaves, find the key we need, based
864 * on the lookup record, then follow the corresponding block
865 * pointer down to the next level.
866 */
867 for (level = cur->bc_nlevels - 1, diff = 1; level >= 0; level--) {
868 xfs_buf_t *bp; /* buffer pointer for btree block */
869 xfs_daddr_t d; /* disk address of btree block */
870
871 /*
872 * Get the disk address we're looking for.
873 */
874 d = XFS_AGB_TO_DADDR(mp, agno, agbno);
875 /*
876 * If the old buffer at this level is for a different block,
877 * throw it away, otherwise just use it.
878 */
879 bp = cur->bc_bufs[level];
880 if (bp && XFS_BUF_ADDR(bp) != d)
881 bp = NULL;
882 if (!bp) {
883 /*
884 * Need to get a new buffer. Read it, then
885 * set it in the cursor, releasing the old one.
886 */
887 if ((error = xfs_btree_read_bufs(mp, cur->bc_tp,
888 agno, agbno, 0, &bp, XFS_INO_BTREE_REF)))
889 return error;
890 xfs_btree_setbuf(cur, level, bp);
891 /*
892 * Point to the btree block, now that we have the buffer
893 */
894 block = XFS_BUF_TO_INOBT_BLOCK(bp);
895 if ((error = xfs_btree_check_sblock(cur, block, level,
896 bp)))
897 return error;
898 } else
899 block = XFS_BUF_TO_INOBT_BLOCK(bp);
900 /*
901 * If we already had a key match at a higher level, we know
902 * we need to use the first entry in this block.
903 */
904 if (diff == 0)
905 keyno = 1;
906 /*
907 * Otherwise we need to search this block. Do a binary search.
908 */
909 else {
910 int high; /* high entry number */
911 xfs_inobt_key_t *kkbase=NULL;/* base of keys in block */
912 xfs_inobt_rec_t *krbase=NULL;/* base of records in block */
913 int low; /* low entry number */
914 175
915 /* 176 ASSERT(cur->bc_private.a.agno == be32_to_cpu(agi->agi_seqno));
916 * Get a pointer to keys or records.
917 */
918 if (level > 0)
919 kkbase = XFS_INOBT_KEY_ADDR(block, 1, cur);
920 else
921 krbase = XFS_INOBT_REC_ADDR(block, 1, cur);
922 /*
923 * Set low and high entry numbers, 1-based.
924 */
925 low = 1;
926 if (!(high = be16_to_cpu(block->bb_numrecs))) {
927 /*
928 * If the block is empty, the tree must
929 * be an empty leaf.
930 */
931 ASSERT(level == 0 && cur->bc_nlevels == 1);
932 cur->bc_ptrs[0] = dir != XFS_LOOKUP_LE;
933 *stat = 0;
934 return 0;
935 }
936 /*
937 * Binary search the block.
938 */
939 while (low <= high) {
940 xfs_agino_t startino; /* key value */
941
942 /*
943 * keyno is average of low and high.
944 */
945 keyno = (low + high) >> 1;
946 /*
947 * Get startino.
948 */
949 if (level > 0) {
950 xfs_inobt_key_t *kkp;
951
952 kkp = kkbase + keyno - 1;
953 startino = be32_to_cpu(kkp->ir_startino);
954 } else {
955 xfs_inobt_rec_t *krp;
956
957 krp = krbase + keyno - 1;
958 startino = be32_to_cpu(krp->ir_startino);
959 }
960 /*
961 * Compute difference to get next direction.
962 */
963 diff = (__int64_t)
964 startino - cur->bc_rec.i.ir_startino;
965 /*
966 * Less than, move right.
967 */
968 if (diff < 0)
969 low = keyno + 1;
970 /*
971 * Greater than, move left.
972 */
973 else if (diff > 0)
974 high = keyno - 1;
975 /*
976 * Equal, we're done.
977 */
978 else
979 break;
980 }
981 }
982 /*
983 * If there are more levels, set up for the next level
984 * by getting the block number and filling in the cursor.
985 */
986 if (level > 0) {
987 /*
988 * If we moved left, need the previous key number,
989 * unless there isn't one.
990 */
991 if (diff > 0 && --keyno < 1)
992 keyno = 1;
993 agbno = be32_to_cpu(*XFS_INOBT_PTR_ADDR(block, keyno, cur));
994#ifdef DEBUG
995 if ((error = xfs_btree_check_sptr(cur, agbno, level)))
996 return error;
997#endif
998 cur->bc_ptrs[level] = keyno;
999 }
1000 }
1001 /*
1002 * Done with the search.
1003 * See if we need to adjust the results.
1004 */
1005 if (dir != XFS_LOOKUP_LE && diff < 0) {
1006 keyno++;
1007 /*
1008 * If ge search and we went off the end of the block, but it's
1009 * not the last block, we're in the wrong block.
1010 */
1011 if (dir == XFS_LOOKUP_GE &&
1012 keyno > be16_to_cpu(block->bb_numrecs) &&
1013 be32_to_cpu(block->bb_rightsib) != NULLAGBLOCK) {
1014 int i;
1015 177
1016 cur->bc_ptrs[0] = keyno; 178 ptr->s = agi->agi_root;
1017 if ((error = xfs_inobt_increment(cur, 0, &i)))
1018 return error;
1019 ASSERT(i == 1);
1020 *stat = 1;
1021 return 0;
1022 }
1023 }
1024 else if (dir == XFS_LOOKUP_LE && diff > 0)
1025 keyno--;
1026 cur->bc_ptrs[0] = keyno;
1027 /*
1028 * Return if we succeeded or not.
1029 */
1030 if (keyno == 0 || keyno > be16_to_cpu(block->bb_numrecs))
1031 *stat = 0;
1032 else
1033 *stat = ((dir != XFS_LOOKUP_EQ) || (diff == 0));
1034 return 0;
1035} 179}
1036 180
1037/* 181STATIC __int64_t
1038 * Move 1 record left from cur/level if possible. 182xfs_inobt_key_diff(
1039 * Update cur to reflect the new path. 183 struct xfs_btree_cur *cur,
1040 */ 184 union xfs_btree_key *key)
1041STATIC int /* error */
1042xfs_inobt_lshift(
1043 xfs_btree_cur_t *cur, /* btree cursor */
1044 int level, /* level to shift record on */
1045 int *stat) /* success/failure */
1046{ 185{
1047 int error; /* error return value */ 186 return (__int64_t)be32_to_cpu(key->inobt.ir_startino) -
1048#ifdef DEBUG 187 cur->bc_rec.i.ir_startino;
1049 int i; /* loop index */
1050#endif
1051 xfs_inobt_key_t key; /* key value for leaf level upward */
1052 xfs_buf_t *lbp; /* buffer for left neighbor block */
1053 xfs_inobt_block_t *left; /* left neighbor btree block */
1054 xfs_inobt_key_t *lkp=NULL; /* key pointer for left block */
1055 xfs_inobt_ptr_t *lpp; /* address pointer for left block */
1056 xfs_inobt_rec_t *lrp=NULL; /* record pointer for left block */
1057 int nrec; /* new number of left block entries */
1058 xfs_buf_t *rbp; /* buffer for right (current) block */
1059 xfs_inobt_block_t *right; /* right (current) btree block */
1060 xfs_inobt_key_t *rkp=NULL; /* key pointer for right block */
1061 xfs_inobt_ptr_t *rpp=NULL; /* address pointer for right block */
1062 xfs_inobt_rec_t *rrp=NULL; /* record pointer for right block */
1063
1064 /*
1065 * Set up variables for this block as "right".
1066 */
1067 rbp = cur->bc_bufs[level];
1068 right = XFS_BUF_TO_INOBT_BLOCK(rbp);
1069#ifdef DEBUG
1070 if ((error = xfs_btree_check_sblock(cur, right, level, rbp)))
1071 return error;
1072#endif
1073 /*
1074 * If we've got no left sibling then we can't shift an entry left.
1075 */
1076 if (be32_to_cpu(right->bb_leftsib) == NULLAGBLOCK) {
1077 *stat = 0;
1078 return 0;
1079 }
1080 /*
1081 * If the cursor entry is the one that would be moved, don't
1082 * do it... it's too complicated.
1083 */
1084 if (cur->bc_ptrs[level] <= 1) {
1085 *stat = 0;
1086 return 0;
1087 }
1088 /*
1089 * Set up the left neighbor as "left".
1090 */
1091 if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp,
1092 cur->bc_private.a.agno, be32_to_cpu(right->bb_leftsib),
1093 0, &lbp, XFS_INO_BTREE_REF)))
1094 return error;
1095 left = XFS_BUF_TO_INOBT_BLOCK(lbp);
1096 if ((error = xfs_btree_check_sblock(cur, left, level, lbp)))
1097 return error;
1098 /*
1099 * If it's full, it can't take another entry.
1100 */
1101 if (be16_to_cpu(left->bb_numrecs) == XFS_INOBT_BLOCK_MAXRECS(level, cur)) {
1102 *stat = 0;
1103 return 0;
1104 }
1105 nrec = be16_to_cpu(left->bb_numrecs) + 1;
1106 /*
1107 * If non-leaf, copy a key and a ptr to the left block.
1108 */
1109 if (level > 0) {
1110 lkp = XFS_INOBT_KEY_ADDR(left, nrec, cur);
1111 rkp = XFS_INOBT_KEY_ADDR(right, 1, cur);
1112 *lkp = *rkp;
1113 xfs_inobt_log_keys(cur, lbp, nrec, nrec);
1114 lpp = XFS_INOBT_PTR_ADDR(left, nrec, cur);
1115 rpp = XFS_INOBT_PTR_ADDR(right, 1, cur);
1116#ifdef DEBUG
1117 if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(*rpp), level)))
1118 return error;
1119#endif
1120 *lpp = *rpp;
1121 xfs_inobt_log_ptrs(cur, lbp, nrec, nrec);
1122 }
1123 /*
1124 * If leaf, copy a record to the left block.
1125 */
1126 else {
1127 lrp = XFS_INOBT_REC_ADDR(left, nrec, cur);
1128 rrp = XFS_INOBT_REC_ADDR(right, 1, cur);
1129 *lrp = *rrp;
1130 xfs_inobt_log_recs(cur, lbp, nrec, nrec);
1131 }
1132 /*
1133 * Bump and log left's numrecs, decrement and log right's numrecs.
1134 */
1135 be16_add_cpu(&left->bb_numrecs, 1);
1136 xfs_inobt_log_block(cur->bc_tp, lbp, XFS_BB_NUMRECS);
1137#ifdef DEBUG
1138 if (level > 0)
1139 xfs_btree_check_key(cur->bc_btnum, lkp - 1, lkp);
1140 else
1141 xfs_btree_check_rec(cur->bc_btnum, lrp - 1, lrp);
1142#endif
1143 be16_add_cpu(&right->bb_numrecs, -1);
1144 xfs_inobt_log_block(cur->bc_tp, rbp, XFS_BB_NUMRECS);
1145 /*
1146 * Slide the contents of right down one entry.
1147 */
1148 if (level > 0) {
1149#ifdef DEBUG
1150 for (i = 0; i < be16_to_cpu(right->bb_numrecs); i++) {
1151 if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(rpp[i + 1]),
1152 level)))
1153 return error;
1154 }
1155#endif
1156 memmove(rkp, rkp + 1, be16_to_cpu(right->bb_numrecs) * sizeof(*rkp));
1157 memmove(rpp, rpp + 1, be16_to_cpu(right->bb_numrecs) * sizeof(*rpp));
1158 xfs_inobt_log_keys(cur, rbp, 1, be16_to_cpu(right->bb_numrecs));
1159 xfs_inobt_log_ptrs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs));
1160 } else {
1161 memmove(rrp, rrp + 1, be16_to_cpu(right->bb_numrecs) * sizeof(*rrp));
1162 xfs_inobt_log_recs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs));
1163 key.ir_startino = rrp->ir_startino;
1164 rkp = &key;
1165 }
1166 /*
1167 * Update the parent key values of right.
1168 */
1169 if ((error = xfs_inobt_updkey(cur, rkp, level + 1)))
1170 return error;
1171 /*
1172 * Slide the cursor value left one.
1173 */
1174 cur->bc_ptrs[level]--;
1175 *stat = 1;
1176 return 0;
1177} 188}
1178 189
1179/* 190STATIC int
1180 * Allocate a new root block, fill it in. 191xfs_inobt_kill_root(
1181 */ 192 struct xfs_btree_cur *cur,
1182STATIC int /* error */ 193 struct xfs_buf *bp,
1183xfs_inobt_newroot( 194 int level,
1184 xfs_btree_cur_t *cur, /* btree cursor */ 195 union xfs_btree_ptr *newroot)
1185 int *stat) /* success/failure */
1186{ 196{
1187 xfs_agi_t *agi; /* a.g. inode header */ 197 int error;
1188 xfs_alloc_arg_t args; /* allocation argument structure */
1189 xfs_inobt_block_t *block; /* one half of the old root block */
1190 xfs_buf_t *bp; /* buffer containing block */
1191 int error; /* error return value */
1192 xfs_inobt_key_t *kp; /* btree key pointer */
1193 xfs_agblock_t lbno; /* left block number */
1194 xfs_buf_t *lbp; /* left buffer pointer */
1195 xfs_inobt_block_t *left; /* left btree block */
1196 xfs_buf_t *nbp; /* new (root) buffer */
1197 xfs_inobt_block_t *new; /* new (root) btree block */
1198 int nptr; /* new value for key index, 1 or 2 */
1199 xfs_inobt_ptr_t *pp; /* btree address pointer */
1200 xfs_agblock_t rbno; /* right block number */
1201 xfs_buf_t *rbp; /* right buffer pointer */
1202 xfs_inobt_block_t *right; /* right btree block */
1203 xfs_inobt_rec_t *rp; /* btree record pointer */
1204 198
1205 ASSERT(cur->bc_nlevels < XFS_IN_MAXLEVELS(cur->bc_mp)); 199 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
200 XFS_BTREE_STATS_INC(cur, killroot);
1206 201
1207 /* 202 /*
1208 * Get a block & a buffer. 203 * Update the root pointer, decreasing the level by 1 and then
204 * free the old root.
1209 */ 205 */
1210 agi = XFS_BUF_TO_AGI(cur->bc_private.a.agbp); 206 xfs_inobt_set_root(cur, newroot, -1);
1211 args.tp = cur->bc_tp; 207 error = xfs_inobt_free_block(cur, bp);
1212 args.mp = cur->bc_mp; 208 if (error) {
1213 args.fsbno = XFS_AGB_TO_FSB(args.mp, cur->bc_private.a.agno, 209 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
1214 be32_to_cpu(agi->agi_root));
1215 args.mod = args.minleft = args.alignment = args.total = args.wasdel =
1216 args.isfl = args.userdata = args.minalignslop = 0;
1217 args.minlen = args.maxlen = args.prod = 1;
1218 args.type = XFS_ALLOCTYPE_NEAR_BNO;
1219 if ((error = xfs_alloc_vextent(&args)))
1220 return error; 210 return error;
1221 /*
1222 * None available, we fail.
1223 */
1224 if (args.fsbno == NULLFSBLOCK) {
1225 *stat = 0;
1226 return 0;
1227 }
1228 ASSERT(args.len == 1);
1229 nbp = xfs_btree_get_bufs(args.mp, args.tp, args.agno, args.agbno, 0);
1230 new = XFS_BUF_TO_INOBT_BLOCK(nbp);
1231 /*
1232 * Set the root data in the a.g. inode structure.
1233 */
1234 agi->agi_root = cpu_to_be32(args.agbno);
1235 be32_add_cpu(&agi->agi_level, 1);
1236 xfs_ialloc_log_agi(args.tp, cur->bc_private.a.agbp,
1237 XFS_AGI_ROOT | XFS_AGI_LEVEL);
1238 /*
1239 * At the previous root level there are now two blocks: the old
1240 * root, and the new block generated when it was split.
1241 * We don't know which one the cursor is pointing at, so we
1242 * set up variables "left" and "right" for each case.
1243 */
1244 bp = cur->bc_bufs[cur->bc_nlevels - 1];
1245 block = XFS_BUF_TO_INOBT_BLOCK(bp);
1246#ifdef DEBUG
1247 if ((error = xfs_btree_check_sblock(cur, block, cur->bc_nlevels - 1, bp)))
1248 return error;
1249#endif
1250 if (be32_to_cpu(block->bb_rightsib) != NULLAGBLOCK) {
1251 /*
1252 * Our block is left, pick up the right block.
1253 */
1254 lbp = bp;
1255 lbno = XFS_DADDR_TO_AGBNO(args.mp, XFS_BUF_ADDR(lbp));
1256 left = block;
1257 rbno = be32_to_cpu(left->bb_rightsib);
1258 if ((error = xfs_btree_read_bufs(args.mp, args.tp, args.agno,
1259 rbno, 0, &rbp, XFS_INO_BTREE_REF)))
1260 return error;
1261 bp = rbp;
1262 right = XFS_BUF_TO_INOBT_BLOCK(rbp);
1263 if ((error = xfs_btree_check_sblock(cur, right,
1264 cur->bc_nlevels - 1, rbp)))
1265 return error;
1266 nptr = 1;
1267 } else {
1268 /*
1269 * Our block is right, pick up the left block.
1270 */
1271 rbp = bp;
1272 rbno = XFS_DADDR_TO_AGBNO(args.mp, XFS_BUF_ADDR(rbp));
1273 right = block;
1274 lbno = be32_to_cpu(right->bb_leftsib);
1275 if ((error = xfs_btree_read_bufs(args.mp, args.tp, args.agno,
1276 lbno, 0, &lbp, XFS_INO_BTREE_REF)))
1277 return error;
1278 bp = lbp;
1279 left = XFS_BUF_TO_INOBT_BLOCK(lbp);
1280 if ((error = xfs_btree_check_sblock(cur, left,
1281 cur->bc_nlevels - 1, lbp)))
1282 return error;
1283 nptr = 2;
1284 }
1285 /*
1286 * Fill in the new block's btree header and log it.
1287 */
1288 new->bb_magic = cpu_to_be32(xfs_magics[cur->bc_btnum]);
1289 new->bb_level = cpu_to_be16(cur->bc_nlevels);
1290 new->bb_numrecs = cpu_to_be16(2);
1291 new->bb_leftsib = cpu_to_be32(NULLAGBLOCK);
1292 new->bb_rightsib = cpu_to_be32(NULLAGBLOCK);
1293 xfs_inobt_log_block(args.tp, nbp, XFS_BB_ALL_BITS);
1294 ASSERT(lbno != NULLAGBLOCK && rbno != NULLAGBLOCK);
1295 /*
1296 * Fill in the key data in the new root.
1297 */
1298 kp = XFS_INOBT_KEY_ADDR(new, 1, cur);
1299 if (be16_to_cpu(left->bb_level) > 0) {
1300 kp[0] = *XFS_INOBT_KEY_ADDR(left, 1, cur);
1301 kp[1] = *XFS_INOBT_KEY_ADDR(right, 1, cur);
1302 } else {
1303 rp = XFS_INOBT_REC_ADDR(left, 1, cur);
1304 kp[0].ir_startino = rp->ir_startino;
1305 rp = XFS_INOBT_REC_ADDR(right, 1, cur);
1306 kp[1].ir_startino = rp->ir_startino;
1307 } 211 }
1308 xfs_inobt_log_keys(cur, nbp, 1, 2);
1309 /*
1310 * Fill in the pointer data in the new root.
1311 */
1312 pp = XFS_INOBT_PTR_ADDR(new, 1, cur);
1313 pp[0] = cpu_to_be32(lbno);
1314 pp[1] = cpu_to_be32(rbno);
1315 xfs_inobt_log_ptrs(cur, nbp, 1, 2);
1316 /*
1317 * Fix up the cursor.
1318 */
1319 xfs_btree_setbuf(cur, cur->bc_nlevels, nbp);
1320 cur->bc_ptrs[cur->bc_nlevels] = nptr;
1321 cur->bc_nlevels++;
1322 *stat = 1;
1323 return 0;
1324}
1325 212
1326/* 213 XFS_BTREE_STATS_INC(cur, free);
1327 * Move 1 record right from cur/level if possible.
1328 * Update cur to reflect the new path.
1329 */
1330STATIC int /* error */
1331xfs_inobt_rshift(
1332 xfs_btree_cur_t *cur, /* btree cursor */
1333 int level, /* level to shift record on */
1334 int *stat) /* success/failure */
1335{
1336 int error; /* error return value */
1337 int i; /* loop index */
1338 xfs_inobt_key_t key; /* key value for leaf level upward */
1339 xfs_buf_t *lbp; /* buffer for left (current) block */
1340 xfs_inobt_block_t *left; /* left (current) btree block */
1341 xfs_inobt_key_t *lkp; /* key pointer for left block */
1342 xfs_inobt_ptr_t *lpp; /* address pointer for left block */
1343 xfs_inobt_rec_t *lrp; /* record pointer for left block */
1344 xfs_buf_t *rbp; /* buffer for right neighbor block */
1345 xfs_inobt_block_t *right; /* right neighbor btree block */
1346 xfs_inobt_key_t *rkp; /* key pointer for right block */
1347 xfs_inobt_ptr_t *rpp; /* address pointer for right block */
1348 xfs_inobt_rec_t *rrp=NULL; /* record pointer for right block */
1349 xfs_btree_cur_t *tcur; /* temporary cursor */
1350 214
1351 /* 215 cur->bc_bufs[level] = NULL;
1352 * Set up variables for this block as "left". 216 cur->bc_nlevels--;
1353 */ 217
1354 lbp = cur->bc_bufs[level]; 218 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1355 left = XFS_BUF_TO_INOBT_BLOCK(lbp);
1356#ifdef DEBUG
1357 if ((error = xfs_btree_check_sblock(cur, left, level, lbp)))
1358 return error;
1359#endif
1360 /*
1361 * If we've got no right sibling then we can't shift an entry right.
1362 */
1363 if (be32_to_cpu(left->bb_rightsib) == NULLAGBLOCK) {
1364 *stat = 0;
1365 return 0;
1366 }
1367 /*
1368 * If the cursor entry is the one that would be moved, don't
1369 * do it... it's too complicated.
1370 */
1371 if (cur->bc_ptrs[level] >= be16_to_cpu(left->bb_numrecs)) {
1372 *stat = 0;
1373 return 0;
1374 }
1375 /*
1376 * Set up the right neighbor as "right".
1377 */
1378 if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp,
1379 cur->bc_private.a.agno, be32_to_cpu(left->bb_rightsib),
1380 0, &rbp, XFS_INO_BTREE_REF)))
1381 return error;
1382 right = XFS_BUF_TO_INOBT_BLOCK(rbp);
1383 if ((error = xfs_btree_check_sblock(cur, right, level, rbp)))
1384 return error;
1385 /*
1386 * If it's full, it can't take another entry.
1387 */
1388 if (be16_to_cpu(right->bb_numrecs) == XFS_INOBT_BLOCK_MAXRECS(level, cur)) {
1389 *stat = 0;
1390 return 0;
1391 }
1392 /*
1393 * Make a hole at the start of the right neighbor block, then
1394 * copy the last left block entry to the hole.
1395 */
1396 if (level > 0) {
1397 lkp = XFS_INOBT_KEY_ADDR(left, be16_to_cpu(left->bb_numrecs), cur);
1398 lpp = XFS_INOBT_PTR_ADDR(left, be16_to_cpu(left->bb_numrecs), cur);
1399 rkp = XFS_INOBT_KEY_ADDR(right, 1, cur);
1400 rpp = XFS_INOBT_PTR_ADDR(right, 1, cur);
1401#ifdef DEBUG
1402 for (i = be16_to_cpu(right->bb_numrecs) - 1; i >= 0; i--) {
1403 if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(rpp[i]), level)))
1404 return error;
1405 }
1406#endif
1407 memmove(rkp + 1, rkp, be16_to_cpu(right->bb_numrecs) * sizeof(*rkp));
1408 memmove(rpp + 1, rpp, be16_to_cpu(right->bb_numrecs) * sizeof(*rpp));
1409#ifdef DEBUG
1410 if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(*lpp), level)))
1411 return error;
1412#endif
1413 *rkp = *lkp;
1414 *rpp = *lpp;
1415 xfs_inobt_log_keys(cur, rbp, 1, be16_to_cpu(right->bb_numrecs) + 1);
1416 xfs_inobt_log_ptrs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs) + 1);
1417 } else {
1418 lrp = XFS_INOBT_REC_ADDR(left, be16_to_cpu(left->bb_numrecs), cur);
1419 rrp = XFS_INOBT_REC_ADDR(right, 1, cur);
1420 memmove(rrp + 1, rrp, be16_to_cpu(right->bb_numrecs) * sizeof(*rrp));
1421 *rrp = *lrp;
1422 xfs_inobt_log_recs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs) + 1);
1423 key.ir_startino = rrp->ir_startino;
1424 rkp = &key;
1425 }
1426 /*
1427 * Decrement and log left's numrecs, bump and log right's numrecs.
1428 */
1429 be16_add_cpu(&left->bb_numrecs, -1);
1430 xfs_inobt_log_block(cur->bc_tp, lbp, XFS_BB_NUMRECS);
1431 be16_add_cpu(&right->bb_numrecs, 1);
1432#ifdef DEBUG
1433 if (level > 0)
1434 xfs_btree_check_key(cur->bc_btnum, rkp, rkp + 1);
1435 else
1436 xfs_btree_check_rec(cur->bc_btnum, rrp, rrp + 1);
1437#endif
1438 xfs_inobt_log_block(cur->bc_tp, rbp, XFS_BB_NUMRECS);
1439 /*
1440 * Using a temporary cursor, update the parent key values of the
1441 * block on the right.
1442 */
1443 if ((error = xfs_btree_dup_cursor(cur, &tcur)))
1444 return error;
1445 xfs_btree_lastrec(tcur, level);
1446 if ((error = xfs_inobt_increment(tcur, level, &i)) ||
1447 (error = xfs_inobt_updkey(tcur, rkp, level + 1))) {
1448 xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
1449 return error;
1450 }
1451 xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
1452 *stat = 1;
1453 return 0; 219 return 0;
1454} 220}
1455 221
1456/*
1457 * Split cur/level block in half.
1458 * Return new block number and its first record (to be inserted into parent).
1459 */
1460STATIC int /* error */
1461xfs_inobt_split(
1462 xfs_btree_cur_t *cur, /* btree cursor */
1463 int level, /* level to split */
1464 xfs_agblock_t *bnop, /* output: block number allocated */
1465 xfs_inobt_key_t *keyp, /* output: first key of new block */
1466 xfs_btree_cur_t **curp, /* output: new cursor */
1467 int *stat) /* success/failure */
1468{
1469 xfs_alloc_arg_t args; /* allocation argument structure */
1470 int error; /* error return value */
1471 int i; /* loop index/record number */
1472 xfs_agblock_t lbno; /* left (current) block number */
1473 xfs_buf_t *lbp; /* buffer for left block */
1474 xfs_inobt_block_t *left; /* left (current) btree block */
1475 xfs_inobt_key_t *lkp; /* left btree key pointer */
1476 xfs_inobt_ptr_t *lpp; /* left btree address pointer */
1477 xfs_inobt_rec_t *lrp; /* left btree record pointer */
1478 xfs_buf_t *rbp; /* buffer for right block */
1479 xfs_inobt_block_t *right; /* right (new) btree block */
1480 xfs_inobt_key_t *rkp; /* right btree key pointer */
1481 xfs_inobt_ptr_t *rpp; /* right btree address pointer */
1482 xfs_inobt_rec_t *rrp; /* right btree record pointer */
1483
1484 /*
1485 * Set up left block (current one).
1486 */
1487 lbp = cur->bc_bufs[level];
1488 args.tp = cur->bc_tp;
1489 args.mp = cur->bc_mp;
1490 lbno = XFS_DADDR_TO_AGBNO(args.mp, XFS_BUF_ADDR(lbp));
1491 /*
1492 * Allocate the new block.
1493 * If we can't do it, we're toast. Give up.
1494 */
1495 args.fsbno = XFS_AGB_TO_FSB(args.mp, cur->bc_private.a.agno, lbno);
1496 args.mod = args.minleft = args.alignment = args.total = args.wasdel =
1497 args.isfl = args.userdata = args.minalignslop = 0;
1498 args.minlen = args.maxlen = args.prod = 1;
1499 args.type = XFS_ALLOCTYPE_NEAR_BNO;
1500 if ((error = xfs_alloc_vextent(&args)))
1501 return error;
1502 if (args.fsbno == NULLFSBLOCK) {
1503 *stat = 0;
1504 return 0;
1505 }
1506 ASSERT(args.len == 1);
1507 rbp = xfs_btree_get_bufs(args.mp, args.tp, args.agno, args.agbno, 0);
1508 /*
1509 * Set up the new block as "right".
1510 */
1511 right = XFS_BUF_TO_INOBT_BLOCK(rbp);
1512 /*
1513 * "Left" is the current (according to the cursor) block.
1514 */
1515 left = XFS_BUF_TO_INOBT_BLOCK(lbp);
1516#ifdef DEBUG 222#ifdef DEBUG
1517 if ((error = xfs_btree_check_sblock(cur, left, level, lbp))) 223STATIC int
1518 return error; 224xfs_inobt_keys_inorder(
1519#endif 225 struct xfs_btree_cur *cur,
1520 /* 226 union xfs_btree_key *k1,
1521 * Fill in the btree header for the new block. 227 union xfs_btree_key *k2)
1522 */ 228{
1523 right->bb_magic = cpu_to_be32(xfs_magics[cur->bc_btnum]); 229 return be32_to_cpu(k1->inobt.ir_startino) <
1524 right->bb_level = left->bb_level; 230 be32_to_cpu(k2->inobt.ir_startino);
1525 right->bb_numrecs = cpu_to_be16(be16_to_cpu(left->bb_numrecs) / 2);
1526 /*
1527 * Make sure that if there's an odd number of entries now, that
1528 * each new block will have the same number of entries.
1529 */
1530 if ((be16_to_cpu(left->bb_numrecs) & 1) &&
1531 cur->bc_ptrs[level] <= be16_to_cpu(right->bb_numrecs) + 1)
1532 be16_add_cpu(&right->bb_numrecs, 1);
1533 i = be16_to_cpu(left->bb_numrecs) - be16_to_cpu(right->bb_numrecs) + 1;
1534 /*
1535 * For non-leaf blocks, copy keys and addresses over to the new block.
1536 */
1537 if (level > 0) {
1538 lkp = XFS_INOBT_KEY_ADDR(left, i, cur);
1539 lpp = XFS_INOBT_PTR_ADDR(left, i, cur);
1540 rkp = XFS_INOBT_KEY_ADDR(right, 1, cur);
1541 rpp = XFS_INOBT_PTR_ADDR(right, 1, cur);
1542#ifdef DEBUG
1543 for (i = 0; i < be16_to_cpu(right->bb_numrecs); i++) {
1544 if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(lpp[i]), level)))
1545 return error;
1546 }
1547#endif
1548 memcpy(rkp, lkp, be16_to_cpu(right->bb_numrecs) * sizeof(*rkp));
1549 memcpy(rpp, lpp, be16_to_cpu(right->bb_numrecs) * sizeof(*rpp));
1550 xfs_inobt_log_keys(cur, rbp, 1, be16_to_cpu(right->bb_numrecs));
1551 xfs_inobt_log_ptrs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs));
1552 *keyp = *rkp;
1553 }
1554 /*
1555 * For leaf blocks, copy records over to the new block.
1556 */
1557 else {
1558 lrp = XFS_INOBT_REC_ADDR(left, i, cur);
1559 rrp = XFS_INOBT_REC_ADDR(right, 1, cur);
1560 memcpy(rrp, lrp, be16_to_cpu(right->bb_numrecs) * sizeof(*rrp));
1561 xfs_inobt_log_recs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs));
1562 keyp->ir_startino = rrp->ir_startino;
1563 }
1564 /*
1565 * Find the left block number by looking in the buffer.
1566 * Adjust numrecs, sibling pointers.
1567 */
1568 be16_add_cpu(&left->bb_numrecs, -(be16_to_cpu(right->bb_numrecs)));
1569 right->bb_rightsib = left->bb_rightsib;
1570 left->bb_rightsib = cpu_to_be32(args.agbno);
1571 right->bb_leftsib = cpu_to_be32(lbno);
1572 xfs_inobt_log_block(args.tp, rbp, XFS_BB_ALL_BITS);
1573 xfs_inobt_log_block(args.tp, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB);
1574 /*
1575 * If there's a block to the new block's right, make that block
1576 * point back to right instead of to left.
1577 */
1578 if (be32_to_cpu(right->bb_rightsib) != NULLAGBLOCK) {
1579 xfs_inobt_block_t *rrblock; /* rr btree block */
1580 xfs_buf_t *rrbp; /* buffer for rrblock */
1581
1582 if ((error = xfs_btree_read_bufs(args.mp, args.tp, args.agno,
1583 be32_to_cpu(right->bb_rightsib), 0, &rrbp,
1584 XFS_INO_BTREE_REF)))
1585 return error;
1586 rrblock = XFS_BUF_TO_INOBT_BLOCK(rrbp);
1587 if ((error = xfs_btree_check_sblock(cur, rrblock, level, rrbp)))
1588 return error;
1589 rrblock->bb_leftsib = cpu_to_be32(args.agbno);
1590 xfs_inobt_log_block(args.tp, rrbp, XFS_BB_LEFTSIB);
1591 }
1592 /*
1593 * If the cursor is really in the right block, move it there.
1594 * If it's just pointing past the last entry in left, then we'll
1595 * insert there, so don't change anything in that case.
1596 */
1597 if (cur->bc_ptrs[level] > be16_to_cpu(left->bb_numrecs) + 1) {
1598 xfs_btree_setbuf(cur, level, rbp);
1599 cur->bc_ptrs[level] -= be16_to_cpu(left->bb_numrecs);
1600 }
1601 /*
1602 * If there are more levels, we'll need another cursor which refers
1603 * the right block, no matter where this cursor was.
1604 */
1605 if (level + 1 < cur->bc_nlevels) {
1606 if ((error = xfs_btree_dup_cursor(cur, curp)))
1607 return error;
1608 (*curp)->bc_ptrs[level + 1]++;
1609 }
1610 *bnop = args.agbno;
1611 *stat = 1;
1612 return 0;
1613} 231}
1614 232
1615/* 233STATIC int
1616 * Update keys at all levels from here to the root along the cursor's path. 234xfs_inobt_recs_inorder(
1617 */ 235 struct xfs_btree_cur *cur,
1618STATIC int /* error */ 236 union xfs_btree_rec *r1,
1619xfs_inobt_updkey( 237 union xfs_btree_rec *r2)
1620 xfs_btree_cur_t *cur, /* btree cursor */
1621 xfs_inobt_key_t *keyp, /* new key value to update to */
1622 int level) /* starting level for update */
1623{ 238{
1624 int ptr; /* index of key in block */ 239 return be32_to_cpu(r1->inobt.ir_startino) + XFS_INODES_PER_CHUNK <=
1625 240 be32_to_cpu(r2->inobt.ir_startino);
1626 /*
1627 * Go up the tree from this level toward the root.
1628 * At each level, update the key value to the value input.
1629 * Stop when we reach a level where the cursor isn't pointing
1630 * at the first entry in the block.
1631 */
1632 for (ptr = 1; ptr == 1 && level < cur->bc_nlevels; level++) {
1633 xfs_buf_t *bp; /* buffer for block */
1634 xfs_inobt_block_t *block; /* btree block */
1635#ifdef DEBUG
1636 int error; /* error return value */
1637#endif
1638 xfs_inobt_key_t *kp; /* ptr to btree block keys */
1639
1640 bp = cur->bc_bufs[level];
1641 block = XFS_BUF_TO_INOBT_BLOCK(bp);
1642#ifdef DEBUG
1643 if ((error = xfs_btree_check_sblock(cur, block, level, bp)))
1644 return error;
1645#endif
1646 ptr = cur->bc_ptrs[level];
1647 kp = XFS_INOBT_KEY_ADDR(block, ptr, cur);
1648 *kp = *keyp;
1649 xfs_inobt_log_keys(cur, bp, ptr, ptr);
1650 }
1651 return 0;
1652} 241}
242#endif /* DEBUG */
1653 243
1654/* 244#ifdef XFS_BTREE_TRACE
1655 * Externally visible routines. 245ktrace_t *xfs_inobt_trace_buf;
1656 */
1657 246
1658/* 247STATIC void
1659 * Decrement cursor by one record at the level. 248xfs_inobt_trace_enter(
1660 * For nonzero levels the leaf-ward information is untouched. 249 struct xfs_btree_cur *cur,
1661 */ 250 const char *func,
1662int /* error */ 251 char *s,
1663xfs_inobt_decrement( 252 int type,
1664 xfs_btree_cur_t *cur, /* btree cursor */ 253 int line,
1665 int level, /* level in btree, 0 is leaf */ 254 __psunsigned_t a0,
1666 int *stat) /* success/failure */ 255 __psunsigned_t a1,
256 __psunsigned_t a2,
257 __psunsigned_t a3,
258 __psunsigned_t a4,
259 __psunsigned_t a5,
260 __psunsigned_t a6,
261 __psunsigned_t a7,
262 __psunsigned_t a8,
263 __psunsigned_t a9,
264 __psunsigned_t a10)
1667{ 265{
1668 xfs_inobt_block_t *block; /* btree block */ 266 ktrace_enter(xfs_inobt_trace_buf, (void *)(__psint_t)type,
1669 int error; 267 (void *)func, (void *)s, NULL, (void *)cur,
1670 int lev; /* btree level */ 268 (void *)a0, (void *)a1, (void *)a2, (void *)a3,
1671 269 (void *)a4, (void *)a5, (void *)a6, (void *)a7,
1672 ASSERT(level < cur->bc_nlevels); 270 (void *)a8, (void *)a9, (void *)a10);
1673 /*
1674 * Read-ahead to the left at this level.
1675 */
1676 xfs_btree_readahead(cur, level, XFS_BTCUR_LEFTRA);
1677 /*
1678 * Decrement the ptr at this level. If we're still in the block
1679 * then we're done.
1680 */
1681 if (--cur->bc_ptrs[level] > 0) {
1682 *stat = 1;
1683 return 0;
1684 }
1685 /*
1686 * Get a pointer to the btree block.
1687 */
1688 block = XFS_BUF_TO_INOBT_BLOCK(cur->bc_bufs[level]);
1689#ifdef DEBUG
1690 if ((error = xfs_btree_check_sblock(cur, block, level,
1691 cur->bc_bufs[level])))
1692 return error;
1693#endif
1694 /*
1695 * If we just went off the left edge of the tree, return failure.
1696 */
1697 if (be32_to_cpu(block->bb_leftsib) == NULLAGBLOCK) {
1698 *stat = 0;
1699 return 0;
1700 }
1701 /*
1702 * March up the tree decrementing pointers.
1703 * Stop when we don't go off the left edge of a block.
1704 */
1705 for (lev = level + 1; lev < cur->bc_nlevels; lev++) {
1706 if (--cur->bc_ptrs[lev] > 0)
1707 break;
1708 /*
1709 * Read-ahead the left block, we're going to read it
1710 * in the next loop.
1711 */
1712 xfs_btree_readahead(cur, lev, XFS_BTCUR_LEFTRA);
1713 }
1714 /*
1715 * If we went off the root then we are seriously confused.
1716 */
1717 ASSERT(lev < cur->bc_nlevels);
1718 /*
1719 * Now walk back down the tree, fixing up the cursor's buffer
1720 * pointers and key numbers.
1721 */
1722 for (block = XFS_BUF_TO_INOBT_BLOCK(cur->bc_bufs[lev]); lev > level; ) {
1723 xfs_agblock_t agbno; /* block number of btree block */
1724 xfs_buf_t *bp; /* buffer containing btree block */
1725
1726 agbno = be32_to_cpu(*XFS_INOBT_PTR_ADDR(block, cur->bc_ptrs[lev], cur));
1727 if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp,
1728 cur->bc_private.a.agno, agbno, 0, &bp,
1729 XFS_INO_BTREE_REF)))
1730 return error;
1731 lev--;
1732 xfs_btree_setbuf(cur, lev, bp);
1733 block = XFS_BUF_TO_INOBT_BLOCK(bp);
1734 if ((error = xfs_btree_check_sblock(cur, block, lev, bp)))
1735 return error;
1736 cur->bc_ptrs[lev] = be16_to_cpu(block->bb_numrecs);
1737 }
1738 *stat = 1;
1739 return 0;
1740} 271}
1741 272
1742/* 273STATIC void
1743 * Delete the record pointed to by cur. 274xfs_inobt_trace_cursor(
1744 * The cursor refers to the place where the record was (could be inserted) 275 struct xfs_btree_cur *cur,
1745 * when the operation returns. 276 __uint32_t *s0,
1746 */ 277 __uint64_t *l0,
1747int /* error */ 278 __uint64_t *l1)
1748xfs_inobt_delete(
1749 xfs_btree_cur_t *cur, /* btree cursor */
1750 int *stat) /* success/failure */
1751{ 279{
1752 int error; 280 *s0 = cur->bc_private.a.agno;
1753 int i; /* result code */ 281 *l0 = cur->bc_rec.i.ir_startino;
1754 int level; /* btree level */ 282 *l1 = cur->bc_rec.i.ir_free;
1755
1756 /*
1757 * Go up the tree, starting at leaf level.
1758 * If 2 is returned then a join was done; go to the next level.
1759 * Otherwise we are done.
1760 */
1761 for (level = 0, i = 2; i == 2; level++) {
1762 if ((error = xfs_inobt_delrec(cur, level, &i)))
1763 return error;
1764 }
1765 if (i == 0) {
1766 for (level = 1; level < cur->bc_nlevels; level++) {
1767 if (cur->bc_ptrs[level] == 0) {
1768 if ((error = xfs_inobt_decrement(cur, level, &i)))
1769 return error;
1770 break;
1771 }
1772 }
1773 }
1774 *stat = i;
1775 return 0;
1776} 283}
1777 284
1778 285STATIC void
1779/* 286xfs_inobt_trace_key(
1780 * Get the data from the pointed-to record. 287 struct xfs_btree_cur *cur,
1781 */ 288 union xfs_btree_key *key,
1782int /* error */ 289 __uint64_t *l0,
1783xfs_inobt_get_rec( 290 __uint64_t *l1)
1784 xfs_btree_cur_t *cur, /* btree cursor */
1785 xfs_agino_t *ino, /* output: starting inode of chunk */
1786 __int32_t *fcnt, /* output: number of free inodes */
1787 xfs_inofree_t *free, /* output: free inode mask */
1788 int *stat) /* output: success/failure */
1789{ 291{
1790 xfs_inobt_block_t *block; /* btree block */ 292 *l0 = be32_to_cpu(key->inobt.ir_startino);
1791 xfs_buf_t *bp; /* buffer containing btree block */ 293 *l1 = 0;
1792#ifdef DEBUG
1793 int error; /* error return value */
1794#endif
1795 int ptr; /* record number */
1796 xfs_inobt_rec_t *rec; /* record data */
1797
1798 bp = cur->bc_bufs[0];
1799 ptr = cur->bc_ptrs[0];
1800 block = XFS_BUF_TO_INOBT_BLOCK(bp);
1801#ifdef DEBUG
1802 if ((error = xfs_btree_check_sblock(cur, block, 0, bp)))
1803 return error;
1804#endif
1805 /*
1806 * Off the right end or left end, return failure.
1807 */
1808 if (ptr > be16_to_cpu(block->bb_numrecs) || ptr <= 0) {
1809 *stat = 0;
1810 return 0;
1811 }
1812 /*
1813 * Point to the record and extract its data.
1814 */
1815 rec = XFS_INOBT_REC_ADDR(block, ptr, cur);
1816 *ino = be32_to_cpu(rec->ir_startino);
1817 *fcnt = be32_to_cpu(rec->ir_freecount);
1818 *free = be64_to_cpu(rec->ir_free);
1819 *stat = 1;
1820 return 0;
1821} 294}
1822 295
1823/* 296STATIC void
1824 * Increment cursor by one record at the level. 297xfs_inobt_trace_record(
1825 * For nonzero levels the leaf-ward information is untouched. 298 struct xfs_btree_cur *cur,
1826 */ 299 union xfs_btree_rec *rec,
1827int /* error */ 300 __uint64_t *l0,
1828xfs_inobt_increment( 301 __uint64_t *l1,
1829 xfs_btree_cur_t *cur, /* btree cursor */ 302 __uint64_t *l2)
1830 int level, /* level in btree, 0 is leaf */
1831 int *stat) /* success/failure */
1832{ 303{
1833 xfs_inobt_block_t *block; /* btree block */ 304 *l0 = be32_to_cpu(rec->inobt.ir_startino);
1834 xfs_buf_t *bp; /* buffer containing btree block */ 305 *l1 = be32_to_cpu(rec->inobt.ir_freecount);
1835 int error; /* error return value */ 306 *l2 = be64_to_cpu(rec->inobt.ir_free);
1836 int lev; /* btree level */ 307}
308#endif /* XFS_BTREE_TRACE */
309
310static const struct xfs_btree_ops xfs_inobt_ops = {
311 .rec_len = sizeof(xfs_inobt_rec_t),
312 .key_len = sizeof(xfs_inobt_key_t),
313
314 .dup_cursor = xfs_inobt_dup_cursor,
315 .set_root = xfs_inobt_set_root,
316 .kill_root = xfs_inobt_kill_root,
317 .alloc_block = xfs_inobt_alloc_block,
318 .free_block = xfs_inobt_free_block,
319 .get_minrecs = xfs_inobt_get_minrecs,
320 .get_maxrecs = xfs_inobt_get_maxrecs,
321 .init_key_from_rec = xfs_inobt_init_key_from_rec,
322 .init_rec_from_key = xfs_inobt_init_rec_from_key,
323 .init_rec_from_cur = xfs_inobt_init_rec_from_cur,
324 .init_ptr_from_cur = xfs_inobt_init_ptr_from_cur,
325 .key_diff = xfs_inobt_key_diff,
1837 326
1838 ASSERT(level < cur->bc_nlevels);
1839 /*
1840 * Read-ahead to the right at this level.
1841 */
1842 xfs_btree_readahead(cur, level, XFS_BTCUR_RIGHTRA);
1843 /*
1844 * Get a pointer to the btree block.
1845 */
1846 bp = cur->bc_bufs[level];
1847 block = XFS_BUF_TO_INOBT_BLOCK(bp);
1848#ifdef DEBUG
1849 if ((error = xfs_btree_check_sblock(cur, block, level, bp)))
1850 return error;
1851#endif
1852 /*
1853 * Increment the ptr at this level. If we're still in the block
1854 * then we're done.
1855 */
1856 if (++cur->bc_ptrs[level] <= be16_to_cpu(block->bb_numrecs)) {
1857 *stat = 1;
1858 return 0;
1859 }
1860 /*
1861 * If we just went off the right edge of the tree, return failure.
1862 */
1863 if (be32_to_cpu(block->bb_rightsib) == NULLAGBLOCK) {
1864 *stat = 0;
1865 return 0;
1866 }
1867 /*
1868 * March up the tree incrementing pointers.
1869 * Stop when we don't go off the right edge of a block.
1870 */
1871 for (lev = level + 1; lev < cur->bc_nlevels; lev++) {
1872 bp = cur->bc_bufs[lev];
1873 block = XFS_BUF_TO_INOBT_BLOCK(bp);
1874#ifdef DEBUG 327#ifdef DEBUG
1875 if ((error = xfs_btree_check_sblock(cur, block, lev, bp))) 328 .keys_inorder = xfs_inobt_keys_inorder,
1876 return error; 329 .recs_inorder = xfs_inobt_recs_inorder,
1877#endif 330#endif
1878 if (++cur->bc_ptrs[lev] <= be16_to_cpu(block->bb_numrecs))
1879 break;
1880 /*
1881 * Read-ahead the right block, we're going to read it
1882 * in the next loop.
1883 */
1884 xfs_btree_readahead(cur, lev, XFS_BTCUR_RIGHTRA);
1885 }
1886 /*
1887 * If we went off the root then we are seriously confused.
1888 */
1889 ASSERT(lev < cur->bc_nlevels);
1890 /*
1891 * Now walk back down the tree, fixing up the cursor's buffer
1892 * pointers and key numbers.
1893 */
1894 for (bp = cur->bc_bufs[lev], block = XFS_BUF_TO_INOBT_BLOCK(bp);
1895 lev > level; ) {
1896 xfs_agblock_t agbno; /* block number of btree block */
1897 331
1898 agbno = be32_to_cpu(*XFS_INOBT_PTR_ADDR(block, cur->bc_ptrs[lev], cur)); 332#ifdef XFS_BTREE_TRACE
1899 if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp, 333 .trace_enter = xfs_inobt_trace_enter,
1900 cur->bc_private.a.agno, agbno, 0, &bp, 334 .trace_cursor = xfs_inobt_trace_cursor,
1901 XFS_INO_BTREE_REF))) 335 .trace_key = xfs_inobt_trace_key,
1902 return error; 336 .trace_record = xfs_inobt_trace_record,
1903 lev--; 337#endif
1904 xfs_btree_setbuf(cur, lev, bp); 338};
1905 block = XFS_BUF_TO_INOBT_BLOCK(bp);
1906 if ((error = xfs_btree_check_sblock(cur, block, lev, bp)))
1907 return error;
1908 cur->bc_ptrs[lev] = 1;
1909 }
1910 *stat = 1;
1911 return 0;
1912}
1913 339
1914/* 340/*
1915 * Insert the current record at the point referenced by cur. 341 * Allocate a new inode btree cursor.
1916 * The cursor may be inconsistent on return if splits have been done.
1917 */ 342 */
1918int /* error */ 343struct xfs_btree_cur * /* new inode btree cursor */
1919xfs_inobt_insert( 344xfs_inobt_init_cursor(
1920 xfs_btree_cur_t *cur, /* btree cursor */ 345 struct xfs_mount *mp, /* file system mount point */
1921 int *stat) /* success/failure */ 346 struct xfs_trans *tp, /* transaction pointer */
347 struct xfs_buf *agbp, /* buffer for agi structure */
348 xfs_agnumber_t agno) /* allocation group number */
1922{ 349{
1923 int error; /* error return value */ 350 struct xfs_agi *agi = XFS_BUF_TO_AGI(agbp);
1924 int i; /* result value, 0 for failure */ 351 struct xfs_btree_cur *cur;
1925 int level; /* current level number in btree */
1926 xfs_agblock_t nbno; /* new block number (split result) */
1927 xfs_btree_cur_t *ncur; /* new cursor (split result) */
1928 xfs_inobt_rec_t nrec; /* record being inserted this level */
1929 xfs_btree_cur_t *pcur; /* previous level's cursor */
1930 352
1931 level = 0; 353 cur = kmem_zone_zalloc(xfs_btree_cur_zone, KM_SLEEP);
1932 nbno = NULLAGBLOCK;
1933 nrec.ir_startino = cpu_to_be32(cur->bc_rec.i.ir_startino);
1934 nrec.ir_freecount = cpu_to_be32(cur->bc_rec.i.ir_freecount);
1935 nrec.ir_free = cpu_to_be64(cur->bc_rec.i.ir_free);
1936 ncur = NULL;
1937 pcur = cur;
1938 /*
1939 * Loop going up the tree, starting at the leaf level.
1940 * Stop when we don't get a split block, that must mean that
1941 * the insert is finished with this level.
1942 */
1943 do {
1944 /*
1945 * Insert nrec/nbno into this level of the tree.
1946 * Note if we fail, nbno will be null.
1947 */
1948 if ((error = xfs_inobt_insrec(pcur, level++, &nbno, &nrec, &ncur,
1949 &i))) {
1950 if (pcur != cur)
1951 xfs_btree_del_cursor(pcur, XFS_BTREE_ERROR);
1952 return error;
1953 }
1954 /*
1955 * See if the cursor we just used is trash.
1956 * Can't trash the caller's cursor, but otherwise we should
1957 * if ncur is a new cursor or we're about to be done.
1958 */
1959 if (pcur != cur && (ncur || nbno == NULLAGBLOCK)) {
1960 cur->bc_nlevels = pcur->bc_nlevels;
1961 xfs_btree_del_cursor(pcur, XFS_BTREE_NOERROR);
1962 }
1963 /*
1964 * If we got a new cursor, switch to it.
1965 */
1966 if (ncur) {
1967 pcur = ncur;
1968 ncur = NULL;
1969 }
1970 } while (nbno != NULLAGBLOCK);
1971 *stat = i;
1972 return 0;
1973}
1974 354
1975/* 355 cur->bc_tp = tp;
1976 * Lookup the record equal to ino in the btree given by cur. 356 cur->bc_mp = mp;
1977 */ 357 cur->bc_nlevels = be32_to_cpu(agi->agi_level);
1978int /* error */ 358 cur->bc_btnum = XFS_BTNUM_INO;
1979xfs_inobt_lookup_eq( 359 cur->bc_blocklog = mp->m_sb.sb_blocklog;
1980 xfs_btree_cur_t *cur, /* btree cursor */
1981 xfs_agino_t ino, /* starting inode of chunk */
1982 __int32_t fcnt, /* free inode count */
1983 xfs_inofree_t free, /* free inode mask */
1984 int *stat) /* success/failure */
1985{
1986 cur->bc_rec.i.ir_startino = ino;
1987 cur->bc_rec.i.ir_freecount = fcnt;
1988 cur->bc_rec.i.ir_free = free;
1989 return xfs_inobt_lookup(cur, XFS_LOOKUP_EQ, stat);
1990}
1991 360
1992/* 361 cur->bc_ops = &xfs_inobt_ops;
1993 * Lookup the first record greater than or equal to ino
1994 * in the btree given by cur.
1995 */
1996int /* error */
1997xfs_inobt_lookup_ge(
1998 xfs_btree_cur_t *cur, /* btree cursor */
1999 xfs_agino_t ino, /* starting inode of chunk */
2000 __int32_t fcnt, /* free inode count */
2001 xfs_inofree_t free, /* free inode mask */
2002 int *stat) /* success/failure */
2003{
2004 cur->bc_rec.i.ir_startino = ino;
2005 cur->bc_rec.i.ir_freecount = fcnt;
2006 cur->bc_rec.i.ir_free = free;
2007 return xfs_inobt_lookup(cur, XFS_LOOKUP_GE, stat);
2008}
2009 362
2010/* 363 cur->bc_private.a.agbp = agbp;
2011 * Lookup the first record less than or equal to ino 364 cur->bc_private.a.agno = agno;
2012 * in the btree given by cur. 365
2013 */ 366 return cur;
2014int /* error */
2015xfs_inobt_lookup_le(
2016 xfs_btree_cur_t *cur, /* btree cursor */
2017 xfs_agino_t ino, /* starting inode of chunk */
2018 __int32_t fcnt, /* free inode count */
2019 xfs_inofree_t free, /* free inode mask */
2020 int *stat) /* success/failure */
2021{
2022 cur->bc_rec.i.ir_startino = ino;
2023 cur->bc_rec.i.ir_freecount = fcnt;
2024 cur->bc_rec.i.ir_free = free;
2025 return xfs_inobt_lookup(cur, XFS_LOOKUP_LE, stat);
2026} 367}
2027 368
2028/* 369/*
2029 * Update the record referred to by cur, to the value given 370 * Calculate number of records in an inobt btree block.
2030 * by [ino, fcnt, free].
2031 * This either works (return 0) or gets an EFSCORRUPTED error.
2032 */ 371 */
2033int /* error */ 372int
2034xfs_inobt_update( 373xfs_inobt_maxrecs(
2035 xfs_btree_cur_t *cur, /* btree cursor */ 374 struct xfs_mount *mp,
2036 xfs_agino_t ino, /* starting inode of chunk */ 375 int blocklen,
2037 __int32_t fcnt, /* free inode count */ 376 int leaf)
2038 xfs_inofree_t free) /* free inode mask */
2039{ 377{
2040 xfs_inobt_block_t *block; /* btree block to update */ 378 blocklen -= XFS_INOBT_BLOCK_LEN(mp);
2041 xfs_buf_t *bp; /* buffer containing btree block */
2042 int error; /* error return value */
2043 int ptr; /* current record number (updating) */
2044 xfs_inobt_rec_t *rp; /* pointer to updated record */
2045 379
2046 /* 380 if (leaf)
2047 * Pick up the current block. 381 return blocklen / sizeof(xfs_inobt_rec_t);
2048 */ 382 return blocklen / (sizeof(xfs_inobt_key_t) + sizeof(xfs_inobt_ptr_t));
2049 bp = cur->bc_bufs[0];
2050 block = XFS_BUF_TO_INOBT_BLOCK(bp);
2051#ifdef DEBUG
2052 if ((error = xfs_btree_check_sblock(cur, block, 0, bp)))
2053 return error;
2054#endif
2055 /*
2056 * Get the address of the rec to be updated.
2057 */
2058 ptr = cur->bc_ptrs[0];
2059 rp = XFS_INOBT_REC_ADDR(block, ptr, cur);
2060 /*
2061 * Fill in the new contents and log them.
2062 */
2063 rp->ir_startino = cpu_to_be32(ino);
2064 rp->ir_freecount = cpu_to_be32(fcnt);
2065 rp->ir_free = cpu_to_be64(free);
2066 xfs_inobt_log_recs(cur, bp, ptr, ptr);
2067 /*
2068 * Updating first record in leaf. Pass new key value up to our parent.
2069 */
2070 if (ptr == 1) {
2071 xfs_inobt_key_t key; /* key containing [ino] */
2072
2073 key.ir_startino = cpu_to_be32(ino);
2074 if ((error = xfs_inobt_updkey(cur, &key, 1)))
2075 return error;
2076 }
2077 return 0;
2078} 383}
diff --git a/fs/xfs/xfs_ialloc_btree.h b/fs/xfs/xfs_ialloc_btree.h
index 8efc4a5b8b92..37e5dd01a577 100644
--- a/fs/xfs/xfs_ialloc_btree.h
+++ b/fs/xfs/xfs_ialloc_btree.h
@@ -24,7 +24,6 @@
24 24
25struct xfs_buf; 25struct xfs_buf;
26struct xfs_btree_cur; 26struct xfs_btree_cur;
27struct xfs_btree_sblock;
28struct xfs_mount; 27struct xfs_mount;
29 28
30/* 29/*
@@ -70,11 +69,6 @@ typedef struct xfs_inobt_key {
70/* btree pointer type */ 69/* btree pointer type */
71typedef __be32 xfs_inobt_ptr_t; 70typedef __be32 xfs_inobt_ptr_t;
72 71
73/* btree block header type */
74typedef struct xfs_btree_sblock xfs_inobt_block_t;
75
76#define XFS_BUF_TO_INOBT_BLOCK(bp) ((xfs_inobt_block_t *)XFS_BUF_PTR(bp))
77
78/* 72/*
79 * Bit manipulations for ir_free. 73 * Bit manipulations for ir_free.
80 */ 74 */
@@ -85,14 +79,6 @@ typedef struct xfs_btree_sblock xfs_inobt_block_t;
85#define XFS_INOBT_CLR_FREE(rp,i) ((rp)->ir_free &= ~XFS_INOBT_MASK(i)) 79#define XFS_INOBT_CLR_FREE(rp,i) ((rp)->ir_free &= ~XFS_INOBT_MASK(i))
86 80
87/* 81/*
88 * Real block structures have a size equal to the disk block size.
89 */
90#define XFS_INOBT_BLOCK_MAXRECS(lev,cur) ((cur)->bc_mp->m_inobt_mxr[lev != 0])
91#define XFS_INOBT_BLOCK_MINRECS(lev,cur) ((cur)->bc_mp->m_inobt_mnr[lev != 0])
92#define XFS_INOBT_IS_LAST_REC(cur) \
93 ((cur)->bc_ptrs[0] == be16_to_cpu(XFS_BUF_TO_INOBT_BLOCK((cur)->bc_bufs[0])->bb_numrecs))
94
95/*
96 * Maximum number of inode btree levels. 82 * Maximum number of inode btree levels.
97 */ 83 */
98#define XFS_IN_MAXLEVELS(mp) ((mp)->m_in_maxlevels) 84#define XFS_IN_MAXLEVELS(mp) ((mp)->m_in_maxlevels)
@@ -104,75 +90,38 @@ typedef struct xfs_btree_sblock xfs_inobt_block_t;
104#define XFS_PREALLOC_BLOCKS(mp) ((xfs_agblock_t)(XFS_IBT_BLOCK(mp) + 1)) 90#define XFS_PREALLOC_BLOCKS(mp) ((xfs_agblock_t)(XFS_IBT_BLOCK(mp) + 1))
105 91
106/* 92/*
107 * Record, key, and pointer address macros for btree blocks. 93 * Btree block header size depends on a superblock flag.
108 */ 94 *
109#define XFS_INOBT_REC_ADDR(bb,i,cur) \ 95 * (not quite yet, but soon)
110 (XFS_BTREE_REC_ADDR(xfs_inobt, bb, i))
111
112#define XFS_INOBT_KEY_ADDR(bb,i,cur) \
113 (XFS_BTREE_KEY_ADDR(xfs_inobt, bb, i))
114
115#define XFS_INOBT_PTR_ADDR(bb,i,cur) \
116 (XFS_BTREE_PTR_ADDR(xfs_inobt, bb, \
117 i, XFS_INOBT_BLOCK_MAXRECS(1, cur)))
118
119/*
120 * Decrement cursor by one record at the level.
121 * For nonzero levels the leaf-ward information is untouched.
122 */
123extern int xfs_inobt_decrement(struct xfs_btree_cur *cur, int level, int *stat);
124
125/*
126 * Delete the record pointed to by cur.
127 * The cursor refers to the place where the record was (could be inserted)
128 * when the operation returns.
129 */
130extern int xfs_inobt_delete(struct xfs_btree_cur *cur, int *stat);
131
132/*
133 * Get the data from the pointed-to record.
134 */
135extern int xfs_inobt_get_rec(struct xfs_btree_cur *cur, xfs_agino_t *ino,
136 __int32_t *fcnt, xfs_inofree_t *free, int *stat);
137
138/*
139 * Increment cursor by one record at the level.
140 * For nonzero levels the leaf-ward information is untouched.
141 */
142extern int xfs_inobt_increment(struct xfs_btree_cur *cur, int level, int *stat);
143
144/*
145 * Insert the current record at the point referenced by cur.
146 * The cursor may be inconsistent on return if splits have been done.
147 */
148extern int xfs_inobt_insert(struct xfs_btree_cur *cur, int *stat);
149
150/*
151 * Lookup the record equal to ino in the btree given by cur.
152 */
153extern int xfs_inobt_lookup_eq(struct xfs_btree_cur *cur, xfs_agino_t ino,
154 __int32_t fcnt, xfs_inofree_t free, int *stat);
155
156/*
157 * Lookup the first record greater than or equal to ino
158 * in the btree given by cur.
159 */
160extern int xfs_inobt_lookup_ge(struct xfs_btree_cur *cur, xfs_agino_t ino,
161 __int32_t fcnt, xfs_inofree_t free, int *stat);
162
163/*
164 * Lookup the first record less than or equal to ino
165 * in the btree given by cur.
166 */ 96 */
167extern int xfs_inobt_lookup_le(struct xfs_btree_cur *cur, xfs_agino_t ino, 97#define XFS_INOBT_BLOCK_LEN(mp) XFS_BTREE_SBLOCK_LEN
168 __int32_t fcnt, xfs_inofree_t free, int *stat);
169 98
170/* 99/*
171 * Update the record referred to by cur, to the value given 100 * Record, key, and pointer address macros for btree blocks.
172 * by [ino, fcnt, free]. 101 *
173 * This either works (return 0) or gets an EFSCORRUPTED error. 102 * (note that some of these may appear unused, but they are used in userspace)
174 */ 103 */
175extern int xfs_inobt_update(struct xfs_btree_cur *cur, xfs_agino_t ino, 104#define XFS_INOBT_REC_ADDR(mp, block, index) \
176 __int32_t fcnt, xfs_inofree_t free); 105 ((xfs_inobt_rec_t *) \
106 ((char *)(block) + \
107 XFS_INOBT_BLOCK_LEN(mp) + \
108 (((index) - 1) * sizeof(xfs_inobt_rec_t))))
109
110#define XFS_INOBT_KEY_ADDR(mp, block, index) \
111 ((xfs_inobt_key_t *) \
112 ((char *)(block) + \
113 XFS_INOBT_BLOCK_LEN(mp) + \
114 ((index) - 1) * sizeof(xfs_inobt_key_t)))
115
116#define XFS_INOBT_PTR_ADDR(mp, block, index, maxrecs) \
117 ((xfs_inobt_ptr_t *) \
118 ((char *)(block) + \
119 XFS_INOBT_BLOCK_LEN(mp) + \
120 (maxrecs) * sizeof(xfs_inobt_key_t) + \
121 ((index) - 1) * sizeof(xfs_inobt_ptr_t)))
122
123extern struct xfs_btree_cur *xfs_inobt_init_cursor(struct xfs_mount *,
124 struct xfs_trans *, struct xfs_buf *, xfs_agnumber_t);
125extern int xfs_inobt_maxrecs(struct xfs_mount *, int, int);
177 126
178#endif /* __XFS_IALLOC_BTREE_H__ */ 127#endif /* __XFS_IALLOC_BTREE_H__ */
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index e229e9e001c2..bf4dc5eb4cfc 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -38,164 +38,122 @@
38#include "xfs_ialloc.h" 38#include "xfs_ialloc.h"
39#include "xfs_quota.h" 39#include "xfs_quota.h"
40#include "xfs_utils.h" 40#include "xfs_utils.h"
41#include "xfs_trans_priv.h"
42#include "xfs_inode_item.h"
41 43
42/* 44/*
43 * Look up an inode by number in the given file system. 45 * Check the validity of the inode we just found it the cache
44 * The inode is looked up in the cache held in each AG.
45 * If the inode is found in the cache, attach it to the provided
46 * vnode.
47 *
48 * If it is not in core, read it in from the file system's device,
49 * add it to the cache and attach the provided vnode.
50 *
51 * The inode is locked according to the value of the lock_flags parameter.
52 * This flag parameter indicates how and if the inode's IO lock and inode lock
53 * should be taken.
54 *
55 * mp -- the mount point structure for the current file system. It points
56 * to the inode hash table.
57 * tp -- a pointer to the current transaction if there is one. This is
58 * simply passed through to the xfs_iread() call.
59 * ino -- the number of the inode desired. This is the unique identifier
60 * within the file system for the inode being requested.
61 * lock_flags -- flags indicating how to lock the inode. See the comment
62 * for xfs_ilock() for a list of valid values.
63 * bno -- the block number starting the buffer containing the inode,
64 * if known (as by bulkstat), else 0.
65 */ 46 */
66STATIC int 47static int
67xfs_iget_core( 48xfs_iget_cache_hit(
68 struct inode *inode, 49 struct xfs_perag *pag,
69 xfs_mount_t *mp, 50 struct xfs_inode *ip,
70 xfs_trans_t *tp, 51 int flags,
71 xfs_ino_t ino, 52 int lock_flags) __releases(pag->pag_ici_lock)
72 uint flags,
73 uint lock_flags,
74 xfs_inode_t **ipp,
75 xfs_daddr_t bno)
76{ 53{
77 struct inode *old_inode; 54 struct xfs_mount *mp = ip->i_mount;
78 xfs_inode_t *ip; 55 int error = EAGAIN;
79 xfs_inode_t *iq;
80 int error;
81 unsigned long first_index, mask;
82 xfs_perag_t *pag;
83 xfs_agino_t agino;
84 56
85 /* the radix tree exists only in inode capable AGs */ 57 /*
86 if (XFS_INO_TO_AGNO(mp, ino) >= mp->m_maxagi) 58 * If INEW is set this inode is being set up
87 return EINVAL; 59 * If IRECLAIM is set this inode is being torn down
88 60 * Pause and try again.
89 /* get the perag structure and ensure that it's inode capable */ 61 */
90 pag = xfs_get_perag(mp, ino); 62 if (xfs_iflags_test(ip, (XFS_INEW|XFS_IRECLAIM))) {
91 if (!pag->pagi_inodeok) 63 XFS_STATS_INC(xs_ig_frecycle);
92 return EINVAL; 64 goto out_error;
93 ASSERT(pag->pag_ici_init); 65 }
94 agino = XFS_INO_TO_AGINO(mp, ino);
95 66
96again: 67 /* If IRECLAIMABLE is set, we've torn down the vfs inode part */
97 read_lock(&pag->pag_ici_lock); 68 if (xfs_iflags_test(ip, XFS_IRECLAIMABLE)) {
98 ip = radix_tree_lookup(&pag->pag_ici_root, agino);
99 69
100 if (ip != NULL) {
101 /* 70 /*
102 * If INEW is set this inode is being set up 71 * If lookup is racing with unlink, then we should return an
103 * we need to pause and try again. 72 * error immediately so we don't remove it from the reclaim
73 * list and potentially leak the inode.
104 */ 74 */
105 if (xfs_iflags_test(ip, XFS_INEW)) { 75 if ((ip->i_d.di_mode == 0) && !(flags & XFS_IGET_CREATE)) {
106 read_unlock(&pag->pag_ici_lock); 76 error = ENOENT;
107 delay(1); 77 goto out_error;
108 XFS_STATS_INC(xs_ig_frecycle);
109
110 goto again;
111 } 78 }
112 79
113 old_inode = ip->i_vnode; 80 xfs_itrace_exit_tag(ip, "xfs_iget.alloc");
114 if (old_inode == NULL) {
115 /*
116 * If IRECLAIM is set this inode is
117 * on its way out of the system,
118 * we need to pause and try again.
119 */
120 if (xfs_iflags_test(ip, XFS_IRECLAIM)) {
121 read_unlock(&pag->pag_ici_lock);
122 delay(1);
123 XFS_STATS_INC(xs_ig_frecycle);
124
125 goto again;
126 }
127 ASSERT(xfs_iflags_test(ip, XFS_IRECLAIMABLE));
128
129 /*
130 * If lookup is racing with unlink, then we
131 * should return an error immediately so we
132 * don't remove it from the reclaim list and
133 * potentially leak the inode.
134 */
135 if ((ip->i_d.di_mode == 0) &&
136 !(flags & XFS_IGET_CREATE)) {
137 read_unlock(&pag->pag_ici_lock);
138 xfs_put_perag(mp, pag);
139 return ENOENT;
140 }
141
142 xfs_itrace_exit_tag(ip, "xfs_iget.alloc");
143
144 XFS_STATS_INC(xs_ig_found);
145 xfs_iflags_clear(ip, XFS_IRECLAIMABLE);
146 read_unlock(&pag->pag_ici_lock);
147
148 XFS_MOUNT_ILOCK(mp);
149 list_del_init(&ip->i_reclaim);
150 XFS_MOUNT_IUNLOCK(mp);
151
152 goto finish_inode;
153
154 } else if (inode != old_inode) {
155 /* The inode is being torn down, pause and
156 * try again.
157 */
158 if (old_inode->i_state & (I_FREEING | I_CLEAR)) {
159 read_unlock(&pag->pag_ici_lock);
160 delay(1);
161 XFS_STATS_INC(xs_ig_frecycle);
162
163 goto again;
164 }
165/* Chances are the other vnode (the one in the inode) is being torn
166* down right now, and we landed on top of it. Question is, what do
167* we do? Unhook the old inode and hook up the new one?
168*/
169 cmn_err(CE_PANIC,
170 "xfs_iget_core: ambiguous vns: vp/0x%p, invp/0x%p",
171 old_inode, inode);
172 }
173 81
174 /* 82 /*
175 * Inode cache hit 83 * We need to re-initialise the VFS inode as it has been
84 * 'freed' by the VFS. Do this here so we can deal with
85 * errors cleanly, then tag it so it can be set up correctly
86 * later.
176 */ 87 */
177 read_unlock(&pag->pag_ici_lock); 88 if (!inode_init_always(mp->m_super, VFS_I(ip))) {
178 XFS_STATS_INC(xs_ig_found); 89 error = ENOMEM;
179 90 goto out_error;
180finish_inode:
181 if (ip->i_d.di_mode == 0 && !(flags & XFS_IGET_CREATE)) {
182 xfs_put_perag(mp, pag);
183 return ENOENT;
184 } 91 }
185 92
186 if (lock_flags != 0) 93 /*
187 xfs_ilock(ip, lock_flags); 94 * We must set the XFS_INEW flag before clearing the
95 * XFS_IRECLAIMABLE flag so that if a racing lookup does
96 * not find the XFS_IRECLAIMABLE above but has the igrab()
97 * below succeed we can safely check XFS_INEW to detect
98 * that this inode is still being initialised.
99 */
100 xfs_iflags_set(ip, XFS_INEW);
101 xfs_iflags_clear(ip, XFS_IRECLAIMABLE);
102
103 /* clear the radix tree reclaim flag as well. */
104 __xfs_inode_clear_reclaim_tag(mp, pag, ip);
105 } else if (!igrab(VFS_I(ip))) {
106 /* If the VFS inode is being torn down, pause and try again. */
107 XFS_STATS_INC(xs_ig_frecycle);
108 goto out_error;
109 } else if (xfs_iflags_test(ip, XFS_INEW)) {
110 /*
111 * We are racing with another cache hit that is
112 * currently recycling this inode out of the XFS_IRECLAIMABLE
113 * state. Wait for the initialisation to complete before
114 * continuing.
115 */
116 wait_on_inode(VFS_I(ip));
117 }
188 118
189 xfs_iflags_clear(ip, XFS_ISTALE); 119 if (ip->i_d.di_mode == 0 && !(flags & XFS_IGET_CREATE)) {
190 xfs_itrace_exit_tag(ip, "xfs_iget.found"); 120 error = ENOENT;
191 goto return_ip; 121 iput(VFS_I(ip));
122 goto out_error;
192 } 123 }
193 124
194 /* 125 /* We've got a live one. */
195 * Inode cache miss 126 read_unlock(&pag->pag_ici_lock);
196 */ 127
128 if (lock_flags != 0)
129 xfs_ilock(ip, lock_flags);
130
131 xfs_iflags_clear(ip, XFS_ISTALE);
132 xfs_itrace_exit_tag(ip, "xfs_iget.found");
133 XFS_STATS_INC(xs_ig_found);
134 return 0;
135
136out_error:
197 read_unlock(&pag->pag_ici_lock); 137 read_unlock(&pag->pag_ici_lock);
198 XFS_STATS_INC(xs_ig_missed); 138 return error;
139}
140
141
142static int
143xfs_iget_cache_miss(
144 struct xfs_mount *mp,
145 struct xfs_perag *pag,
146 xfs_trans_t *tp,
147 xfs_ino_t ino,
148 struct xfs_inode **ipp,
149 xfs_daddr_t bno,
150 int flags,
151 int lock_flags) __releases(pag->pag_ici_lock)
152{
153 struct xfs_inode *ip;
154 int error;
155 unsigned long first_index, mask;
156 xfs_agino_t agino = XFS_INO_TO_AGINO(mp, ino);
199 157
200 /* 158 /*
201 * Read the disk inode attributes into a new inode structure and get 159 * Read the disk inode attributes into a new inode structure and get
@@ -203,116 +161,85 @@ finish_inode:
203 */ 161 */
204 error = xfs_iread(mp, tp, ino, &ip, bno, 162 error = xfs_iread(mp, tp, ino, &ip, bno,
205 (flags & XFS_IGET_BULKSTAT) ? XFS_IMAP_BULKSTAT : 0); 163 (flags & XFS_IGET_BULKSTAT) ? XFS_IMAP_BULKSTAT : 0);
206 if (error) { 164 if (error)
207 xfs_put_perag(mp, pag);
208 return error; 165 return error;
209 }
210 166
211 xfs_itrace_exit_tag(ip, "xfs_iget.alloc"); 167 xfs_itrace_exit_tag(ip, "xfs_iget.alloc");
212 168
213 169 if ((ip->i_d.di_mode == 0) && !(flags & XFS_IGET_CREATE)) {
214 mrlock_init(&ip->i_lock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER, 170 error = ENOENT;
215 "xfsino", ip->i_ino); 171 goto out_destroy;
216 mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino); 172 }
217 init_waitqueue_head(&ip->i_ipin_wait);
218 atomic_set(&ip->i_pincount, 0);
219
220 /*
221 * Because we want to use a counting completion, complete
222 * the flush completion once to allow a single access to
223 * the flush completion without blocking.
224 */
225 init_completion(&ip->i_flush);
226 complete(&ip->i_flush);
227 173
228 if (lock_flags) 174 if (lock_flags)
229 xfs_ilock(ip, lock_flags); 175 xfs_ilock(ip, lock_flags);
230 176
231 if ((ip->i_d.di_mode == 0) && !(flags & XFS_IGET_CREATE)) {
232 xfs_idestroy(ip);
233 xfs_put_perag(mp, pag);
234 return ENOENT;
235 }
236
237 /* 177 /*
238 * Preload the radix tree so we can insert safely under the 178 * Preload the radix tree so we can insert safely under the
239 * write spinlock. 179 * write spinlock. Note that we cannot sleep inside the preload
180 * region.
240 */ 181 */
241 if (radix_tree_preload(GFP_KERNEL)) { 182 if (radix_tree_preload(GFP_KERNEL)) {
242 xfs_idestroy(ip); 183 error = EAGAIN;
243 delay(1); 184 goto out_unlock;
244 goto again;
245 } 185 }
186
246 mask = ~(((XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog)) - 1); 187 mask = ~(((XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog)) - 1);
247 first_index = agino & mask; 188 first_index = agino & mask;
248 write_lock(&pag->pag_ici_lock); 189 write_lock(&pag->pag_ici_lock);
249 /* 190
250 * insert the new inode 191 /* insert the new inode */
251 */
252 error = radix_tree_insert(&pag->pag_ici_root, agino, ip); 192 error = radix_tree_insert(&pag->pag_ici_root, agino, ip);
253 if (unlikely(error)) { 193 if (unlikely(error)) {
254 BUG_ON(error != -EEXIST); 194 WARN_ON(error != -EEXIST);
255 write_unlock(&pag->pag_ici_lock);
256 radix_tree_preload_end();
257 xfs_idestroy(ip);
258 XFS_STATS_INC(xs_ig_dup); 195 XFS_STATS_INC(xs_ig_dup);
259 goto again; 196 error = EAGAIN;
197 goto out_preload_end;
260 } 198 }
261 199
262 /* 200 /* These values _must_ be set before releasing the radix tree lock! */
263 * These values _must_ be set before releasing the radix tree lock!
264 */
265 ip->i_udquot = ip->i_gdquot = NULL; 201 ip->i_udquot = ip->i_gdquot = NULL;
266 xfs_iflags_set(ip, XFS_INEW); 202 xfs_iflags_set(ip, XFS_INEW);
267 203
268 write_unlock(&pag->pag_ici_lock); 204 write_unlock(&pag->pag_ici_lock);
269 radix_tree_preload_end(); 205 radix_tree_preload_end();
270
271 /*
272 * Link ip to its mount and thread it on the mount's inode list.
273 */
274 XFS_MOUNT_ILOCK(mp);
275 if ((iq = mp->m_inodes)) {
276 ASSERT(iq->i_mprev->i_mnext == iq);
277 ip->i_mprev = iq->i_mprev;
278 iq->i_mprev->i_mnext = ip;
279 iq->i_mprev = ip;
280 ip->i_mnext = iq;
281 } else {
282 ip->i_mnext = ip;
283 ip->i_mprev = ip;
284 }
285 mp->m_inodes = ip;
286
287 XFS_MOUNT_IUNLOCK(mp);
288 xfs_put_perag(mp, pag);
289
290 return_ip:
291 ASSERT(ip->i_df.if_ext_max ==
292 XFS_IFORK_DSIZE(ip) / sizeof(xfs_bmbt_rec_t));
293
294 xfs_iflags_set(ip, XFS_IMODIFIED);
295 *ipp = ip; 206 *ipp = ip;
296
297 /*
298 * Set up the Linux with the Linux inode.
299 */
300 ip->i_vnode = inode;
301 inode->i_private = ip;
302
303 /*
304 * If we have a real type for an on-disk inode, we can set ops(&unlock)
305 * now. If it's a new inode being created, xfs_ialloc will handle it.
306 */
307 if (ip->i_d.di_mode != 0)
308 xfs_setup_inode(ip);
309 return 0; 207 return 0;
310}
311 208
209out_preload_end:
210 write_unlock(&pag->pag_ici_lock);
211 radix_tree_preload_end();
212out_unlock:
213 if (lock_flags)
214 xfs_iunlock(ip, lock_flags);
215out_destroy:
216 xfs_destroy_inode(ip);
217 return error;
218}
312 219
313/* 220/*
314 * The 'normal' internal xfs_iget, if needed it will 221 * Look up an inode by number in the given file system.
315 * 'allocate', or 'get', the vnode. 222 * The inode is looked up in the cache held in each AG.
223 * If the inode is found in the cache, initialise the vfs inode
224 * if necessary.
225 *
226 * If it is not in core, read it in from the file system's device,
227 * add it to the cache and initialise the vfs inode.
228 *
229 * The inode is locked according to the value of the lock_flags parameter.
230 * This flag parameter indicates how and if the inode's IO lock and inode lock
231 * should be taken.
232 *
233 * mp -- the mount point structure for the current file system. It points
234 * to the inode hash table.
235 * tp -- a pointer to the current transaction if there is one. This is
236 * simply passed through to the xfs_iread() call.
237 * ino -- the number of the inode desired. This is the unique identifier
238 * within the file system for the inode being requested.
239 * lock_flags -- flags indicating how to lock the inode. See the comment
240 * for xfs_ilock() for a list of valid values.
241 * bno -- the block number starting the buffer containing the inode,
242 * if known (as by bulkstat), else 0.
316 */ 243 */
317int 244int
318xfs_iget( 245xfs_iget(
@@ -324,61 +251,65 @@ xfs_iget(
324 xfs_inode_t **ipp, 251 xfs_inode_t **ipp,
325 xfs_daddr_t bno) 252 xfs_daddr_t bno)
326{ 253{
327 struct inode *inode;
328 xfs_inode_t *ip; 254 xfs_inode_t *ip;
329 int error; 255 int error;
256 xfs_perag_t *pag;
257 xfs_agino_t agino;
330 258
331 XFS_STATS_INC(xs_ig_attempts); 259 /* the radix tree exists only in inode capable AGs */
332 260 if (XFS_INO_TO_AGNO(mp, ino) >= mp->m_maxagi)
333retry: 261 return EINVAL;
334 inode = iget_locked(mp->m_super, ino); 262
335 if (!inode) 263 /* get the perag structure and ensure that it's inode capable */
336 /* If we got no inode we are out of memory */ 264 pag = xfs_get_perag(mp, ino);
337 return ENOMEM; 265 if (!pag->pagi_inodeok)
338 266 return EINVAL;
339 if (inode->i_state & I_NEW) { 267 ASSERT(pag->pag_ici_init);
340 XFS_STATS_INC(vn_active); 268 agino = XFS_INO_TO_AGINO(mp, ino);
341 XFS_STATS_INC(vn_alloc); 269
342 270again:
343 error = xfs_iget_core(inode, mp, tp, ino, flags, 271 error = 0;
344 lock_flags, ipp, bno); 272 read_lock(&pag->pag_ici_lock);
345 if (error) { 273 ip = radix_tree_lookup(&pag->pag_ici_root, agino);
346 make_bad_inode(inode); 274
347 if (inode->i_state & I_NEW) 275 if (ip) {
348 unlock_new_inode(inode); 276 error = xfs_iget_cache_hit(pag, ip, flags, lock_flags);
349 iput(inode); 277 if (error)
350 } 278 goto out_error_or_again;
351 return error; 279 } else {
280 read_unlock(&pag->pag_ici_lock);
281 XFS_STATS_INC(xs_ig_missed);
282
283 error = xfs_iget_cache_miss(mp, pag, tp, ino, &ip, bno,
284 flags, lock_flags);
285 if (error)
286 goto out_error_or_again;
352 } 287 }
288 xfs_put_perag(mp, pag);
289
290 xfs_iflags_set(ip, XFS_IMODIFIED);
291 *ipp = ip;
353 292
293 ASSERT(ip->i_df.if_ext_max ==
294 XFS_IFORK_DSIZE(ip) / sizeof(xfs_bmbt_rec_t));
354 /* 295 /*
355 * If the inode is not fully constructed due to 296 * If we have a real type for an on-disk inode, we can set ops(&unlock)
356 * filehandle mismatches wait for the inode to go 297 * now. If it's a new inode being created, xfs_ialloc will handle it.
357 * away and try again.
358 *
359 * iget_locked will call __wait_on_freeing_inode
360 * to wait for the inode to go away.
361 */ 298 */
362 if (is_bad_inode(inode)) { 299 if (xfs_iflags_test(ip, XFS_INEW) && ip->i_d.di_mode != 0)
363 iput(inode); 300 xfs_setup_inode(ip);
364 delay(1); 301 return 0;
365 goto retry;
366 }
367 302
368 ip = XFS_I(inode); 303out_error_or_again:
369 if (!ip) { 304 if (error == EAGAIN) {
370 iput(inode);
371 delay(1); 305 delay(1);
372 goto retry; 306 goto again;
373 } 307 }
374 308 xfs_put_perag(mp, pag);
375 if (lock_flags != 0) 309 return error;
376 xfs_ilock(ip, lock_flags);
377 XFS_STATS_INC(xs_ig_found);
378 *ipp = ip;
379 return 0;
380} 310}
381 311
312
382/* 313/*
383 * Look for the inode corresponding to the given ino in the hash table. 314 * Look for the inode corresponding to the given ino in the hash table.
384 * If it is there and its i_transp pointer matches tp, return it. 315 * If it is there and its i_transp pointer matches tp, return it.
@@ -462,14 +393,13 @@ xfs_ireclaim(xfs_inode_t *ip)
462 xfs_iextract(ip); 393 xfs_iextract(ip);
463 394
464 /* 395 /*
465 * Here we do a spurious inode lock in order to coordinate with 396 * Here we do a spurious inode lock in order to coordinate with inode
466 * xfs_sync(). This is because xfs_sync() references the inodes 397 * cache radix tree lookups. This is because the lookup can reference
467 * in the mount list without taking references on the corresponding 398 * the inodes in the cache without taking references. We make that OK
468 * vnodes. We make that OK here by ensuring that we wait until 399 * here by ensuring that we wait until the inode is unlocked after the
469 * the inode is unlocked in xfs_sync() before we go ahead and 400 * lookup before we go ahead and free it. We get both the ilock and
470 * free it. We get both the regular lock and the io lock because 401 * the iolock because the code may need to drop the ilock one but will
471 * the xfs_sync() code may need to drop the regular one but will 402 * still hold the iolock.
472 * still hold the io lock.
473 */ 403 */
474 xfs_ilock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); 404 xfs_ilock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
475 405
@@ -480,14 +410,6 @@ xfs_ireclaim(xfs_inode_t *ip)
480 XFS_QM_DQDETACH(ip->i_mount, ip); 410 XFS_QM_DQDETACH(ip->i_mount, ip);
481 411
482 /* 412 /*
483 * Pull our behavior descriptor from the vnode chain.
484 */
485 if (ip->i_vnode) {
486 ip->i_vnode->i_private = NULL;
487 ip->i_vnode = NULL;
488 }
489
490 /*
491 * Free all memory associated with the inode. 413 * Free all memory associated with the inode.
492 */ 414 */
493 xfs_iunlock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); 415 xfs_iunlock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
@@ -505,38 +427,13 @@ xfs_iextract(
505{ 427{
506 xfs_mount_t *mp = ip->i_mount; 428 xfs_mount_t *mp = ip->i_mount;
507 xfs_perag_t *pag = xfs_get_perag(mp, ip->i_ino); 429 xfs_perag_t *pag = xfs_get_perag(mp, ip->i_ino);
508 xfs_inode_t *iq;
509 430
510 write_lock(&pag->pag_ici_lock); 431 write_lock(&pag->pag_ici_lock);
511 radix_tree_delete(&pag->pag_ici_root, XFS_INO_TO_AGINO(mp, ip->i_ino)); 432 radix_tree_delete(&pag->pag_ici_root, XFS_INO_TO_AGINO(mp, ip->i_ino));
512 write_unlock(&pag->pag_ici_lock); 433 write_unlock(&pag->pag_ici_lock);
513 xfs_put_perag(mp, pag); 434 xfs_put_perag(mp, pag);
514 435
515 /*
516 * Remove from mount's inode list.
517 */
518 XFS_MOUNT_ILOCK(mp);
519 ASSERT((ip->i_mnext != NULL) && (ip->i_mprev != NULL));
520 iq = ip->i_mnext;
521 iq->i_mprev = ip->i_mprev;
522 ip->i_mprev->i_mnext = iq;
523
524 /*
525 * Fix up the head pointer if it points to the inode being deleted.
526 */
527 if (mp->m_inodes == ip) {
528 if (ip == iq) {
529 mp->m_inodes = NULL;
530 } else {
531 mp->m_inodes = iq;
532 }
533 }
534
535 /* Deal with the deleted inodes list */
536 list_del_init(&ip->i_reclaim);
537
538 mp->m_ireclaims++; 436 mp->m_ireclaims++;
539 XFS_MOUNT_IUNLOCK(mp);
540} 437}
541 438
542/* 439/*
@@ -737,7 +634,7 @@ xfs_iunlock(
737 * it is in the AIL and anyone is waiting on it. Don't do 634 * it is in the AIL and anyone is waiting on it. Don't do
738 * this if the caller has asked us not to. 635 * this if the caller has asked us not to.
739 */ 636 */
740 xfs_trans_unlocked_item(ip->i_mount, 637 xfs_trans_unlocked_item(ip->i_itemp->ili_item.li_ailp,
741 (xfs_log_item_t*)(ip->i_itemp)); 638 (xfs_log_item_t*)(ip->i_itemp));
742 } 639 }
743 xfs_ilock_trace(ip, 3, lock_flags, (inst_t *)__return_address); 640 xfs_ilock_trace(ip, 3, lock_flags, (inst_t *)__return_address);
diff --git a/fs/xfs/xfs_imap.h b/fs/xfs/xfs_imap.h
index d36450003983..f9ce62890ea5 100644
--- a/fs/xfs/xfs_imap.h
+++ b/fs/xfs/xfs_imap.h
@@ -30,11 +30,9 @@ typedef struct xfs_imap {
30 ushort im_boffset; /* inode offset in block in bytes */ 30 ushort im_boffset; /* inode offset in block in bytes */
31} xfs_imap_t; 31} xfs_imap_t;
32 32
33#ifdef __KERNEL__
34struct xfs_mount; 33struct xfs_mount;
35struct xfs_trans; 34struct xfs_trans;
36int xfs_imap(struct xfs_mount *, struct xfs_trans *, xfs_ino_t, 35int xfs_imap(struct xfs_mount *, struct xfs_trans *, xfs_ino_t,
37 xfs_imap_t *, uint); 36 xfs_imap_t *, uint);
38#endif
39 37
40#endif /* __XFS_IMAP_H__ */ 38#endif /* __XFS_IMAP_H__ */
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index dbd9cef852ec..cd522827f99e 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -41,6 +41,7 @@
41#include "xfs_buf_item.h" 41#include "xfs_buf_item.h"
42#include "xfs_inode_item.h" 42#include "xfs_inode_item.h"
43#include "xfs_btree.h" 43#include "xfs_btree.h"
44#include "xfs_btree_trace.h"
44#include "xfs_alloc.h" 45#include "xfs_alloc.h"
45#include "xfs_ialloc.h" 46#include "xfs_ialloc.h"
46#include "xfs_bmap.h" 47#include "xfs_bmap.h"
@@ -221,25 +222,26 @@ xfs_imap_to_bp(
221 * Use xfs_imap() to determine the size and location of the 222 * Use xfs_imap() to determine the size and location of the
222 * buffer to read from disk. 223 * buffer to read from disk.
223 */ 224 */
224STATIC int 225int
225xfs_inotobp( 226xfs_inotobp(
226 xfs_mount_t *mp, 227 xfs_mount_t *mp,
227 xfs_trans_t *tp, 228 xfs_trans_t *tp,
228 xfs_ino_t ino, 229 xfs_ino_t ino,
229 xfs_dinode_t **dipp, 230 xfs_dinode_t **dipp,
230 xfs_buf_t **bpp, 231 xfs_buf_t **bpp,
231 int *offset) 232 int *offset,
233 uint imap_flags)
232{ 234{
233 xfs_imap_t imap; 235 xfs_imap_t imap;
234 xfs_buf_t *bp; 236 xfs_buf_t *bp;
235 int error; 237 int error;
236 238
237 imap.im_blkno = 0; 239 imap.im_blkno = 0;
238 error = xfs_imap(mp, tp, ino, &imap, XFS_IMAP_LOOKUP); 240 error = xfs_imap(mp, tp, ino, &imap, imap_flags | XFS_IMAP_LOOKUP);
239 if (error) 241 if (error)
240 return error; 242 return error;
241 243
242 error = xfs_imap_to_bp(mp, tp, &imap, &bp, XFS_BUF_LOCK, 0); 244 error = xfs_imap_to_bp(mp, tp, &imap, &bp, XFS_BUF_LOCK, imap_flags);
243 if (error) 245 if (error)
244 return error; 246 return error;
245 247
@@ -621,7 +623,7 @@ xfs_iformat_btree(
621 ifp = XFS_IFORK_PTR(ip, whichfork); 623 ifp = XFS_IFORK_PTR(ip, whichfork);
622 dfp = (xfs_bmdr_block_t *)XFS_DFORK_PTR(dip, whichfork); 624 dfp = (xfs_bmdr_block_t *)XFS_DFORK_PTR(dip, whichfork);
623 size = XFS_BMAP_BROOT_SPACE(dfp); 625 size = XFS_BMAP_BROOT_SPACE(dfp);
624 nrecs = XFS_BMAP_BROOT_NUMRECS(dfp); 626 nrecs = be16_to_cpu(dfp->bb_numrecs);
625 627
626 /* 628 /*
627 * blow out if -- fork has less extents than can fit in 629 * blow out if -- fork has less extents than can fit in
@@ -649,8 +651,9 @@ xfs_iformat_btree(
649 * Copy and convert from the on-disk structure 651 * Copy and convert from the on-disk structure
650 * to the in-memory structure. 652 * to the in-memory structure.
651 */ 653 */
652 xfs_bmdr_to_bmbt(dfp, XFS_DFORK_SIZE(dip, ip->i_mount, whichfork), 654 xfs_bmdr_to_bmbt(ip->i_mount, dfp,
653 ifp->if_broot, size); 655 XFS_DFORK_SIZE(dip, ip->i_mount, whichfork),
656 ifp->if_broot, size);
654 ifp->if_flags &= ~XFS_IFEXTENTS; 657 ifp->if_flags &= ~XFS_IFEXTENTS;
655 ifp->if_flags |= XFS_IFBROOT; 658 ifp->if_flags |= XFS_IFBROOT;
656 659
@@ -788,51 +791,56 @@ xfs_dic2xflags(
788} 791}
789 792
790/* 793/*
791 * Given a mount structure and an inode number, return a pointer 794 * Allocate and initialise an xfs_inode.
792 * to a newly allocated in-core inode corresponding to the given
793 * inode number.
794 *
795 * Initialize the inode's attributes and extent pointers if it
796 * already has them (it will not if the inode has no links).
797 */ 795 */
798int 796STATIC struct xfs_inode *
799xfs_iread( 797xfs_inode_alloc(
800 xfs_mount_t *mp, 798 struct xfs_mount *mp,
801 xfs_trans_t *tp, 799 xfs_ino_t ino)
802 xfs_ino_t ino,
803 xfs_inode_t **ipp,
804 xfs_daddr_t bno,
805 uint imap_flags)
806{ 800{
807 xfs_buf_t *bp; 801 struct xfs_inode *ip;
808 xfs_dinode_t *dip;
809 xfs_inode_t *ip;
810 int error;
811 802
812 ASSERT(xfs_inode_zone != NULL); 803 /*
804 * if this didn't occur in transactions, we could use
805 * KM_MAYFAIL and return NULL here on ENOMEM. Set the
806 * code up to do this anyway.
807 */
808 ip = kmem_zone_alloc(xfs_inode_zone, KM_SLEEP);
809 if (!ip)
810 return NULL;
813 811
814 ip = kmem_zone_zalloc(xfs_inode_zone, KM_SLEEP); 812 ASSERT(atomic_read(&ip->i_iocount) == 0);
815 ip->i_ino = ino; 813 ASSERT(atomic_read(&ip->i_pincount) == 0);
816 ip->i_mount = mp; 814 ASSERT(!spin_is_locked(&ip->i_flags_lock));
817 atomic_set(&ip->i_iocount, 0); 815 ASSERT(completion_done(&ip->i_flush));
818 spin_lock_init(&ip->i_flags_lock);
819 816
820 /* 817 /*
821 * Get pointer's to the on-disk inode and the buffer containing it. 818 * initialise the VFS inode here to get failures
822 * If the inode number refers to a block outside the file system 819 * out of the way early.
823 * then xfs_itobp() will return NULL. In this case we should
824 * return NULL as well. Set i_blkno to 0 so that xfs_itobp() will
825 * know that this is a new incore inode.
826 */ 820 */
827 error = xfs_itobp(mp, tp, ip, &dip, &bp, bno, imap_flags, XFS_BUF_LOCK); 821 if (!inode_init_always(mp->m_super, VFS_I(ip))) {
828 if (error) {
829 kmem_zone_free(xfs_inode_zone, ip); 822 kmem_zone_free(xfs_inode_zone, ip);
830 return error; 823 return NULL;
831 } 824 }
832 825
826 /* initialise the xfs inode */
827 ip->i_ino = ino;
828 ip->i_mount = mp;
829 ip->i_blkno = 0;
830 ip->i_len = 0;
831 ip->i_boffset =0;
832 ip->i_afp = NULL;
833 memset(&ip->i_df, 0, sizeof(xfs_ifork_t));
834 ip->i_flags = 0;
835 ip->i_update_core = 0;
836 ip->i_update_size = 0;
837 ip->i_delayed_blks = 0;
838 memset(&ip->i_d, 0, sizeof(xfs_icdinode_t));
839 ip->i_size = 0;
840 ip->i_new_size = 0;
841
833 /* 842 /*
834 * Initialize inode's trace buffers. 843 * Initialize inode's trace buffers.
835 * Do this before xfs_iformat in case it adds entries.
836 */ 844 */
837#ifdef XFS_INODE_TRACE 845#ifdef XFS_INODE_TRACE
838 ip->i_trace = ktrace_alloc(INODE_TRACE_SIZE, KM_NOFS); 846 ip->i_trace = ktrace_alloc(INODE_TRACE_SIZE, KM_NOFS);
@@ -840,7 +848,7 @@ xfs_iread(
840#ifdef XFS_BMAP_TRACE 848#ifdef XFS_BMAP_TRACE
841 ip->i_xtrace = ktrace_alloc(XFS_BMAP_KTRACE_SIZE, KM_NOFS); 849 ip->i_xtrace = ktrace_alloc(XFS_BMAP_KTRACE_SIZE, KM_NOFS);
842#endif 850#endif
843#ifdef XFS_BMBT_TRACE 851#ifdef XFS_BTREE_TRACE
844 ip->i_btrace = ktrace_alloc(XFS_BMBT_KTRACE_SIZE, KM_NOFS); 852 ip->i_btrace = ktrace_alloc(XFS_BMBT_KTRACE_SIZE, KM_NOFS);
845#endif 853#endif
846#ifdef XFS_RW_TRACE 854#ifdef XFS_RW_TRACE
@@ -853,13 +861,51 @@ xfs_iread(
853 ip->i_dir_trace = ktrace_alloc(XFS_DIR2_KTRACE_SIZE, KM_NOFS); 861 ip->i_dir_trace = ktrace_alloc(XFS_DIR2_KTRACE_SIZE, KM_NOFS);
854#endif 862#endif
855 863
864 return ip;
865}
866
867/*
868 * Given a mount structure and an inode number, return a pointer
869 * to a newly allocated in-core inode corresponding to the given
870 * inode number.
871 *
872 * Initialize the inode's attributes and extent pointers if it
873 * already has them (it will not if the inode has no links).
874 */
875int
876xfs_iread(
877 xfs_mount_t *mp,
878 xfs_trans_t *tp,
879 xfs_ino_t ino,
880 xfs_inode_t **ipp,
881 xfs_daddr_t bno,
882 uint imap_flags)
883{
884 xfs_buf_t *bp;
885 xfs_dinode_t *dip;
886 xfs_inode_t *ip;
887 int error;
888
889 ip = xfs_inode_alloc(mp, ino);
890 if (!ip)
891 return ENOMEM;
892
893 /*
894 * Get pointer's to the on-disk inode and the buffer containing it.
895 * If the inode number refers to a block outside the file system
896 * then xfs_itobp() will return NULL. In this case we should
897 * return NULL as well. Set i_blkno to 0 so that xfs_itobp() will
898 * know that this is a new incore inode.
899 */
900 error = xfs_itobp(mp, tp, ip, &dip, &bp, bno, imap_flags, XFS_BUF_LOCK);
901 if (error)
902 goto out_destroy_inode;
903
856 /* 904 /*
857 * If we got something that isn't an inode it means someone 905 * If we got something that isn't an inode it means someone
858 * (nfs or dmi) has a stale handle. 906 * (nfs or dmi) has a stale handle.
859 */ 907 */
860 if (be16_to_cpu(dip->di_core.di_magic) != XFS_DINODE_MAGIC) { 908 if (be16_to_cpu(dip->di_core.di_magic) != XFS_DINODE_MAGIC) {
861 kmem_zone_free(xfs_inode_zone, ip);
862 xfs_trans_brelse(tp, bp);
863#ifdef DEBUG 909#ifdef DEBUG
864 xfs_fs_cmn_err(CE_ALERT, mp, "xfs_iread: " 910 xfs_fs_cmn_err(CE_ALERT, mp, "xfs_iread: "
865 "dip->di_core.di_magic (0x%x) != " 911 "dip->di_core.di_magic (0x%x) != "
@@ -867,7 +913,8 @@ xfs_iread(
867 be16_to_cpu(dip->di_core.di_magic), 913 be16_to_cpu(dip->di_core.di_magic),
868 XFS_DINODE_MAGIC); 914 XFS_DINODE_MAGIC);
869#endif /* DEBUG */ 915#endif /* DEBUG */
870 return XFS_ERROR(EINVAL); 916 error = XFS_ERROR(EINVAL);
917 goto out_brelse;
871 } 918 }
872 919
873 /* 920 /*
@@ -881,14 +928,12 @@ xfs_iread(
881 xfs_dinode_from_disk(&ip->i_d, &dip->di_core); 928 xfs_dinode_from_disk(&ip->i_d, &dip->di_core);
882 error = xfs_iformat(ip, dip); 929 error = xfs_iformat(ip, dip);
883 if (error) { 930 if (error) {
884 kmem_zone_free(xfs_inode_zone, ip);
885 xfs_trans_brelse(tp, bp);
886#ifdef DEBUG 931#ifdef DEBUG
887 xfs_fs_cmn_err(CE_ALERT, mp, "xfs_iread: " 932 xfs_fs_cmn_err(CE_ALERT, mp, "xfs_iread: "
888 "xfs_iformat() returned error %d", 933 "xfs_iformat() returned error %d",
889 error); 934 error);
890#endif /* DEBUG */ 935#endif /* DEBUG */
891 return error; 936 goto out_brelse;
892 } 937 }
893 } else { 938 } else {
894 ip->i_d.di_magic = be16_to_cpu(dip->di_core.di_magic); 939 ip->i_d.di_magic = be16_to_cpu(dip->di_core.di_magic);
@@ -911,8 +956,6 @@ xfs_iread(
911 XFS_IFORK_DSIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t); 956 XFS_IFORK_DSIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t);
912 } 957 }
913 958
914 INIT_LIST_HEAD(&ip->i_reclaim);
915
916 /* 959 /*
917 * The inode format changed when we moved the link count and 960 * The inode format changed when we moved the link count and
918 * made it 32 bits long. If this is an old format inode, 961 * made it 32 bits long. If this is an old format inode,
@@ -956,6 +999,12 @@ xfs_iread(
956 xfs_trans_brelse(tp, bp); 999 xfs_trans_brelse(tp, bp);
957 *ipp = ip; 1000 *ipp = ip;
958 return 0; 1001 return 0;
1002
1003 out_brelse:
1004 xfs_trans_brelse(tp, bp);
1005 out_destroy_inode:
1006 xfs_destroy_inode(ip);
1007 return error;
959} 1008}
960 1009
961/* 1010/*
@@ -1049,6 +1098,7 @@ xfs_ialloc(
1049 uint flags; 1098 uint flags;
1050 int error; 1099 int error;
1051 timespec_t tv; 1100 timespec_t tv;
1101 int filestreams = 0;
1052 1102
1053 /* 1103 /*
1054 * Call the space management code to pick 1104 * Call the space management code to pick
@@ -1056,9 +1106,8 @@ xfs_ialloc(
1056 */ 1106 */
1057 error = xfs_dialloc(tp, pip ? pip->i_ino : 0, mode, okalloc, 1107 error = xfs_dialloc(tp, pip ? pip->i_ino : 0, mode, okalloc,
1058 ialloc_context, call_again, &ino); 1108 ialloc_context, call_again, &ino);
1059 if (error != 0) { 1109 if (error)
1060 return error; 1110 return error;
1061 }
1062 if (*call_again || ino == NULLFSINO) { 1111 if (*call_again || ino == NULLFSINO) {
1063 *ipp = NULL; 1112 *ipp = NULL;
1064 return 0; 1113 return 0;
@@ -1072,9 +1121,8 @@ xfs_ialloc(
1072 */ 1121 */
1073 error = xfs_trans_iget(tp->t_mountp, tp, ino, 1122 error = xfs_trans_iget(tp->t_mountp, tp, ino,
1074 XFS_IGET_CREATE, XFS_ILOCK_EXCL, &ip); 1123 XFS_IGET_CREATE, XFS_ILOCK_EXCL, &ip);
1075 if (error != 0) { 1124 if (error)
1076 return error; 1125 return error;
1077 }
1078 ASSERT(ip != NULL); 1126 ASSERT(ip != NULL);
1079 1127
1080 ip->i_d.di_mode = (__uint16_t)mode; 1128 ip->i_d.di_mode = (__uint16_t)mode;
@@ -1155,13 +1203,12 @@ xfs_ialloc(
1155 flags |= XFS_ILOG_DEV; 1203 flags |= XFS_ILOG_DEV;
1156 break; 1204 break;
1157 case S_IFREG: 1205 case S_IFREG:
1158 if (pip && xfs_inode_is_filestream(pip)) { 1206 /*
1159 error = xfs_filestream_associate(pip, ip); 1207 * we can't set up filestreams until after the VFS inode
1160 if (error < 0) 1208 * is set up properly.
1161 return -error; 1209 */
1162 if (!error) 1210 if (pip && xfs_inode_is_filestream(pip))
1163 xfs_iflags_set(ip, XFS_IFILESTREAM); 1211 filestreams = 1;
1164 }
1165 /* fall through */ 1212 /* fall through */
1166 case S_IFDIR: 1213 case S_IFDIR:
1167 if (pip && (pip->i_d.di_flags & XFS_DIFLAG_ANY)) { 1214 if (pip && (pip->i_d.di_flags & XFS_DIFLAG_ANY)) {
@@ -1227,6 +1274,15 @@ xfs_ialloc(
1227 /* now that we have an i_mode we can setup inode ops and unlock */ 1274 /* now that we have an i_mode we can setup inode ops and unlock */
1228 xfs_setup_inode(ip); 1275 xfs_setup_inode(ip);
1229 1276
1277 /* now we have set up the vfs inode we can associate the filestream */
1278 if (filestreams) {
1279 error = xfs_filestream_associate(pip, ip);
1280 if (error < 0)
1281 return -error;
1282 if (!error)
1283 xfs_iflags_set(ip, XFS_IFILESTREAM);
1284 }
1285
1230 *ipp = ip; 1286 *ipp = ip;
1231 return 0; 1287 return 0;
1232} 1288}
@@ -1414,7 +1470,7 @@ xfs_itruncate_start(
1414 mp = ip->i_mount; 1470 mp = ip->i_mount;
1415 1471
1416 /* wait for the completion of any pending DIOs */ 1472 /* wait for the completion of any pending DIOs */
1417 if (new_size < ip->i_size) 1473 if (new_size == 0 || new_size < ip->i_size)
1418 vn_iowait(ip); 1474 vn_iowait(ip);
1419 1475
1420 /* 1476 /*
@@ -1992,7 +2048,7 @@ xfs_iunlink_remove(
1992 } 2048 }
1993 next_ino = XFS_AGINO_TO_INO(mp, agno, next_agino); 2049 next_ino = XFS_AGINO_TO_INO(mp, agno, next_agino);
1994 error = xfs_inotobp(mp, tp, next_ino, &last_dip, 2050 error = xfs_inotobp(mp, tp, next_ino, &last_dip,
1995 &last_ibp, &last_offset); 2051 &last_ibp, &last_offset, 0);
1996 if (error) { 2052 if (error) {
1997 cmn_err(CE_WARN, 2053 cmn_err(CE_WARN,
1998 "xfs_iunlink_remove: xfs_inotobp() returned an error %d on %s. Returning error.", 2054 "xfs_iunlink_remove: xfs_inotobp() returned an error %d on %s. Returning error.",
@@ -2160,9 +2216,9 @@ xfs_ifree_cluster(
2160 iip = (xfs_inode_log_item_t *)lip; 2216 iip = (xfs_inode_log_item_t *)lip;
2161 ASSERT(iip->ili_logged == 1); 2217 ASSERT(iip->ili_logged == 1);
2162 lip->li_cb = (void(*)(xfs_buf_t*,xfs_log_item_t*)) xfs_istale_done; 2218 lip->li_cb = (void(*)(xfs_buf_t*,xfs_log_item_t*)) xfs_istale_done;
2163 spin_lock(&mp->m_ail_lock); 2219 xfs_trans_ail_copy_lsn(mp->m_ail,
2164 iip->ili_flush_lsn = iip->ili_item.li_lsn; 2220 &iip->ili_flush_lsn,
2165 spin_unlock(&mp->m_ail_lock); 2221 &iip->ili_item.li_lsn);
2166 xfs_iflags_set(iip->ili_inode, XFS_ISTALE); 2222 xfs_iflags_set(iip->ili_inode, XFS_ISTALE);
2167 pre_flushed++; 2223 pre_flushed++;
2168 } 2224 }
@@ -2183,9 +2239,8 @@ xfs_ifree_cluster(
2183 iip->ili_last_fields = iip->ili_format.ilf_fields; 2239 iip->ili_last_fields = iip->ili_format.ilf_fields;
2184 iip->ili_format.ilf_fields = 0; 2240 iip->ili_format.ilf_fields = 0;
2185 iip->ili_logged = 1; 2241 iip->ili_logged = 1;
2186 spin_lock(&mp->m_ail_lock); 2242 xfs_trans_ail_copy_lsn(mp->m_ail, &iip->ili_flush_lsn,
2187 iip->ili_flush_lsn = iip->ili_item.li_lsn; 2243 &iip->ili_item.li_lsn);
2188 spin_unlock(&mp->m_ail_lock);
2189 2244
2190 xfs_buf_attach_iodone(bp, 2245 xfs_buf_attach_iodone(bp,
2191 (void(*)(xfs_buf_t*,xfs_log_item_t*)) 2246 (void(*)(xfs_buf_t*,xfs_log_item_t*))
@@ -2312,9 +2367,10 @@ xfs_iroot_realloc(
2312 int rec_diff, 2367 int rec_diff,
2313 int whichfork) 2368 int whichfork)
2314{ 2369{
2370 struct xfs_mount *mp = ip->i_mount;
2315 int cur_max; 2371 int cur_max;
2316 xfs_ifork_t *ifp; 2372 xfs_ifork_t *ifp;
2317 xfs_bmbt_block_t *new_broot; 2373 struct xfs_btree_block *new_broot;
2318 int new_max; 2374 int new_max;
2319 size_t new_size; 2375 size_t new_size;
2320 char *np; 2376 char *np;
@@ -2335,8 +2391,7 @@ xfs_iroot_realloc(
2335 */ 2391 */
2336 if (ifp->if_broot_bytes == 0) { 2392 if (ifp->if_broot_bytes == 0) {
2337 new_size = (size_t)XFS_BMAP_BROOT_SPACE_CALC(rec_diff); 2393 new_size = (size_t)XFS_BMAP_BROOT_SPACE_CALC(rec_diff);
2338 ifp->if_broot = (xfs_bmbt_block_t*)kmem_alloc(new_size, 2394 ifp->if_broot = kmem_alloc(new_size, KM_SLEEP);
2339 KM_SLEEP);
2340 ifp->if_broot_bytes = (int)new_size; 2395 ifp->if_broot_bytes = (int)new_size;
2341 return; 2396 return;
2342 } 2397 }
@@ -2347,18 +2402,16 @@ xfs_iroot_realloc(
2347 * location. The records don't change location because 2402 * location. The records don't change location because
2348 * they are kept butted up against the btree block header. 2403 * they are kept butted up against the btree block header.
2349 */ 2404 */
2350 cur_max = XFS_BMAP_BROOT_MAXRECS(ifp->if_broot_bytes); 2405 cur_max = xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, 0);
2351 new_max = cur_max + rec_diff; 2406 new_max = cur_max + rec_diff;
2352 new_size = (size_t)XFS_BMAP_BROOT_SPACE_CALC(new_max); 2407 new_size = (size_t)XFS_BMAP_BROOT_SPACE_CALC(new_max);
2353 ifp->if_broot = (xfs_bmbt_block_t *) 2408 ifp->if_broot = kmem_realloc(ifp->if_broot, new_size,
2354 kmem_realloc(ifp->if_broot,
2355 new_size,
2356 (size_t)XFS_BMAP_BROOT_SPACE_CALC(cur_max), /* old size */ 2409 (size_t)XFS_BMAP_BROOT_SPACE_CALC(cur_max), /* old size */
2357 KM_SLEEP); 2410 KM_SLEEP);
2358 op = (char *)XFS_BMAP_BROOT_PTR_ADDR(ifp->if_broot, 1, 2411 op = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1,
2359 ifp->if_broot_bytes); 2412 ifp->if_broot_bytes);
2360 np = (char *)XFS_BMAP_BROOT_PTR_ADDR(ifp->if_broot, 1, 2413 np = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1,
2361 (int)new_size); 2414 (int)new_size);
2362 ifp->if_broot_bytes = (int)new_size; 2415 ifp->if_broot_bytes = (int)new_size;
2363 ASSERT(ifp->if_broot_bytes <= 2416 ASSERT(ifp->if_broot_bytes <=
2364 XFS_IFORK_SIZE(ip, whichfork) + XFS_BROOT_SIZE_ADJ); 2417 XFS_IFORK_SIZE(ip, whichfork) + XFS_BROOT_SIZE_ADJ);
@@ -2372,7 +2425,7 @@ xfs_iroot_realloc(
2372 * records, just get rid of the root and clear the status bit. 2425 * records, just get rid of the root and clear the status bit.
2373 */ 2426 */
2374 ASSERT((ifp->if_broot != NULL) && (ifp->if_broot_bytes > 0)); 2427 ASSERT((ifp->if_broot != NULL) && (ifp->if_broot_bytes > 0));
2375 cur_max = XFS_BMAP_BROOT_MAXRECS(ifp->if_broot_bytes); 2428 cur_max = xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, 0);
2376 new_max = cur_max + rec_diff; 2429 new_max = cur_max + rec_diff;
2377 ASSERT(new_max >= 0); 2430 ASSERT(new_max >= 0);
2378 if (new_max > 0) 2431 if (new_max > 0)
@@ -2380,11 +2433,11 @@ xfs_iroot_realloc(
2380 else 2433 else
2381 new_size = 0; 2434 new_size = 0;
2382 if (new_size > 0) { 2435 if (new_size > 0) {
2383 new_broot = (xfs_bmbt_block_t *)kmem_alloc(new_size, KM_SLEEP); 2436 new_broot = kmem_alloc(new_size, KM_SLEEP);
2384 /* 2437 /*
2385 * First copy over the btree block header. 2438 * First copy over the btree block header.
2386 */ 2439 */
2387 memcpy(new_broot, ifp->if_broot, sizeof(xfs_bmbt_block_t)); 2440 memcpy(new_broot, ifp->if_broot, XFS_BTREE_LBLOCK_LEN);
2388 } else { 2441 } else {
2389 new_broot = NULL; 2442 new_broot = NULL;
2390 ifp->if_flags &= ~XFS_IFBROOT; 2443 ifp->if_flags &= ~XFS_IFBROOT;
@@ -2397,18 +2450,16 @@ xfs_iroot_realloc(
2397 /* 2450 /*
2398 * First copy the records. 2451 * First copy the records.
2399 */ 2452 */
2400 op = (char *)XFS_BMAP_BROOT_REC_ADDR(ifp->if_broot, 1, 2453 op = (char *)XFS_BMBT_REC_ADDR(mp, ifp->if_broot, 1);
2401 ifp->if_broot_bytes); 2454 np = (char *)XFS_BMBT_REC_ADDR(mp, new_broot, 1);
2402 np = (char *)XFS_BMAP_BROOT_REC_ADDR(new_broot, 1,
2403 (int)new_size);
2404 memcpy(np, op, new_max * (uint)sizeof(xfs_bmbt_rec_t)); 2455 memcpy(np, op, new_max * (uint)sizeof(xfs_bmbt_rec_t));
2405 2456
2406 /* 2457 /*
2407 * Then copy the pointers. 2458 * Then copy the pointers.
2408 */ 2459 */
2409 op = (char *)XFS_BMAP_BROOT_PTR_ADDR(ifp->if_broot, 1, 2460 op = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1,
2410 ifp->if_broot_bytes); 2461 ifp->if_broot_bytes);
2411 np = (char *)XFS_BMAP_BROOT_PTR_ADDR(new_broot, 1, 2462 np = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, new_broot, 1,
2412 (int)new_size); 2463 (int)new_size);
2413 memcpy(np, op, new_max * (uint)sizeof(xfs_dfsbno_t)); 2464 memcpy(np, op, new_max * (uint)sizeof(xfs_dfsbno_t));
2414 } 2465 }
@@ -2617,6 +2668,10 @@ xfs_idestroy_fork(
2617 * It must free the inode itself and any buffers allocated for 2668 * It must free the inode itself and any buffers allocated for
2618 * if_extents/if_data and if_broot. It must also free the lock 2669 * if_extents/if_data and if_broot. It must also free the lock
2619 * associated with the inode. 2670 * associated with the inode.
2671 *
2672 * Note: because we don't initialise everything on reallocation out
2673 * of the zone, we must ensure we nullify everything correctly before
2674 * freeing the structure.
2620 */ 2675 */
2621void 2676void
2622xfs_idestroy( 2677xfs_idestroy(
@@ -2631,8 +2686,6 @@ xfs_idestroy(
2631 } 2686 }
2632 if (ip->i_afp) 2687 if (ip->i_afp)
2633 xfs_idestroy_fork(ip, XFS_ATTR_FORK); 2688 xfs_idestroy_fork(ip, XFS_ATTR_FORK);
2634 mrfree(&ip->i_lock);
2635 mrfree(&ip->i_iolock);
2636 2689
2637#ifdef XFS_INODE_TRACE 2690#ifdef XFS_INODE_TRACE
2638 ktrace_free(ip->i_trace); 2691 ktrace_free(ip->i_trace);
@@ -2640,7 +2693,7 @@ xfs_idestroy(
2640#ifdef XFS_BMAP_TRACE 2693#ifdef XFS_BMAP_TRACE
2641 ktrace_free(ip->i_xtrace); 2694 ktrace_free(ip->i_xtrace);
2642#endif 2695#endif
2643#ifdef XFS_BMBT_TRACE 2696#ifdef XFS_BTREE_TRACE
2644 ktrace_free(ip->i_btrace); 2697 ktrace_free(ip->i_btrace);
2645#endif 2698#endif
2646#ifdef XFS_RW_TRACE 2699#ifdef XFS_RW_TRACE
@@ -2658,20 +2711,26 @@ xfs_idestroy(
2658 * inode still in the AIL. If it is there, we should remove 2711 * inode still in the AIL. If it is there, we should remove
2659 * it to prevent a use-after-free from occurring. 2712 * it to prevent a use-after-free from occurring.
2660 */ 2713 */
2661 xfs_mount_t *mp = ip->i_mount;
2662 xfs_log_item_t *lip = &ip->i_itemp->ili_item; 2714 xfs_log_item_t *lip = &ip->i_itemp->ili_item;
2715 struct xfs_ail *ailp = lip->li_ailp;
2663 2716
2664 ASSERT(((lip->li_flags & XFS_LI_IN_AIL) == 0) || 2717 ASSERT(((lip->li_flags & XFS_LI_IN_AIL) == 0) ||
2665 XFS_FORCED_SHUTDOWN(ip->i_mount)); 2718 XFS_FORCED_SHUTDOWN(ip->i_mount));
2666 if (lip->li_flags & XFS_LI_IN_AIL) { 2719 if (lip->li_flags & XFS_LI_IN_AIL) {
2667 spin_lock(&mp->m_ail_lock); 2720 spin_lock(&ailp->xa_lock);
2668 if (lip->li_flags & XFS_LI_IN_AIL) 2721 if (lip->li_flags & XFS_LI_IN_AIL)
2669 xfs_trans_delete_ail(mp, lip); 2722 xfs_trans_ail_delete(ailp, lip);
2670 else 2723 else
2671 spin_unlock(&mp->m_ail_lock); 2724 spin_unlock(&ailp->xa_lock);
2672 } 2725 }
2673 xfs_inode_item_destroy(ip); 2726 xfs_inode_item_destroy(ip);
2727 ip->i_itemp = NULL;
2674 } 2728 }
2729 /* asserts to verify all state is correct here */
2730 ASSERT(atomic_read(&ip->i_iocount) == 0);
2731 ASSERT(atomic_read(&ip->i_pincount) == 0);
2732 ASSERT(!spin_is_locked(&ip->i_flags_lock));
2733 ASSERT(completion_done(&ip->i_flush));
2675 kmem_zone_free(xfs_inode_zone, ip); 2734 kmem_zone_free(xfs_inode_zone, ip);
2676} 2735}
2677 2736
@@ -2880,7 +2939,7 @@ xfs_iflush_fork(
2880 ASSERT(ifp->if_broot_bytes <= 2939 ASSERT(ifp->if_broot_bytes <=
2881 (XFS_IFORK_SIZE(ip, whichfork) + 2940 (XFS_IFORK_SIZE(ip, whichfork) +
2882 XFS_BROOT_SIZE_ADJ)); 2941 XFS_BROOT_SIZE_ADJ));
2883 xfs_bmbt_to_bmdr(ifp->if_broot, ifp->if_broot_bytes, 2942 xfs_bmbt_to_bmdr(mp, ifp->if_broot, ifp->if_broot_bytes,
2884 (xfs_bmdr_block_t *)cp, 2943 (xfs_bmdr_block_t *)cp,
2885 XFS_DFORK_SIZE(dip, mp, whichfork)); 2944 XFS_DFORK_SIZE(dip, mp, whichfork));
2886 } 2945 }
@@ -3418,10 +3477,8 @@ xfs_iflush_int(
3418 iip->ili_format.ilf_fields = 0; 3477 iip->ili_format.ilf_fields = 0;
3419 iip->ili_logged = 1; 3478 iip->ili_logged = 1;
3420 3479
3421 ASSERT(sizeof(xfs_lsn_t) == 8); /* don't lock if it shrinks */ 3480 xfs_trans_ail_copy_lsn(mp->m_ail, &iip->ili_flush_lsn,
3422 spin_lock(&mp->m_ail_lock); 3481 &iip->ili_item.li_lsn);
3423 iip->ili_flush_lsn = iip->ili_item.li_lsn;
3424 spin_unlock(&mp->m_ail_lock);
3425 3482
3426 /* 3483 /*
3427 * Attach the function xfs_iflush_done to the inode's 3484 * Attach the function xfs_iflush_done to the inode's
@@ -3459,41 +3516,6 @@ corrupt_out:
3459} 3516}
3460 3517
3461 3518
3462/*
3463 * Flush all inactive inodes in mp.
3464 */
3465void
3466xfs_iflush_all(
3467 xfs_mount_t *mp)
3468{
3469 xfs_inode_t *ip;
3470
3471 again:
3472 XFS_MOUNT_ILOCK(mp);
3473 ip = mp->m_inodes;
3474 if (ip == NULL)
3475 goto out;
3476
3477 do {
3478 /* Make sure we skip markers inserted by sync */
3479 if (ip->i_mount == NULL) {
3480 ip = ip->i_mnext;
3481 continue;
3482 }
3483
3484 if (!VFS_I(ip)) {
3485 XFS_MOUNT_IUNLOCK(mp);
3486 xfs_finish_reclaim(ip, 0, XFS_IFLUSH_ASYNC);
3487 goto again;
3488 }
3489
3490 ASSERT(vn_count(VFS_I(ip)) == 0);
3491
3492 ip = ip->i_mnext;
3493 } while (ip != mp->m_inodes);
3494 out:
3495 XFS_MOUNT_IUNLOCK(mp);
3496}
3497 3519
3498#ifdef XFS_ILOCK_TRACE 3520#ifdef XFS_ILOCK_TRACE
3499ktrace_t *xfs_ilock_trace_buf; 3521ktrace_t *xfs_ilock_trace_buf;
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 1420c49674d7..7f007ef4bbb3 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -20,7 +20,7 @@
20 20
21struct xfs_dinode; 21struct xfs_dinode;
22struct xfs_dinode_core; 22struct xfs_dinode_core;
23 23struct xfs_inode;
24 24
25/* 25/*
26 * Fork identifiers. 26 * Fork identifiers.
@@ -63,7 +63,7 @@ typedef struct xfs_ext_irec {
63typedef struct xfs_ifork { 63typedef struct xfs_ifork {
64 int if_bytes; /* bytes in if_u1 */ 64 int if_bytes; /* bytes in if_u1 */
65 int if_real_bytes; /* bytes allocated in if_u1 */ 65 int if_real_bytes; /* bytes allocated in if_u1 */
66 xfs_bmbt_block_t *if_broot; /* file's incore btree root */ 66 struct xfs_btree_block *if_broot; /* file's incore btree root */
67 short if_broot_bytes; /* bytes allocated for root */ 67 short if_broot_bytes; /* bytes allocated for root */
68 unsigned char if_flags; /* per-fork flags */ 68 unsigned char if_flags; /* per-fork flags */
69 unsigned char if_ext_max; /* max # of extent records */ 69 unsigned char if_ext_max; /* max # of extent records */
@@ -84,54 +84,6 @@ typedef struct xfs_ifork {
84} xfs_ifork_t; 84} xfs_ifork_t;
85 85
86/* 86/*
87 * Flags for xfs_ichgtime().
88 */
89#define XFS_ICHGTIME_MOD 0x1 /* data fork modification timestamp */
90#define XFS_ICHGTIME_CHG 0x2 /* inode field change timestamp */
91
92/*
93 * Per-fork incore inode flags.
94 */
95#define XFS_IFINLINE 0x01 /* Inline data is read in */
96#define XFS_IFEXTENTS 0x02 /* All extent pointers are read in */
97#define XFS_IFBROOT 0x04 /* i_broot points to the bmap b-tree root */
98#define XFS_IFEXTIREC 0x08 /* Indirection array of extent blocks */
99
100/*
101 * Flags for xfs_itobp(), xfs_imap() and xfs_dilocate().
102 */
103#define XFS_IMAP_LOOKUP 0x1
104#define XFS_IMAP_BULKSTAT 0x2
105
106#ifdef __KERNEL__
107struct bhv_desc;
108struct cred;
109struct ktrace;
110struct xfs_buf;
111struct xfs_bmap_free;
112struct xfs_bmbt_irec;
113struct xfs_bmbt_block;
114struct xfs_inode;
115struct xfs_inode_log_item;
116struct xfs_mount;
117struct xfs_trans;
118struct xfs_dquot;
119
120#if defined(XFS_ILOCK_TRACE)
121#define XFS_ILOCK_KTRACE_SIZE 32
122extern ktrace_t *xfs_ilock_trace_buf;
123extern void xfs_ilock_trace(struct xfs_inode *, int, unsigned int, inst_t *);
124#else
125#define xfs_ilock_trace(i,n,f,ra)
126#endif
127
128typedef struct dm_attrs_s {
129 __uint32_t da_dmevmask; /* DMIG event mask */
130 __uint16_t da_dmstate; /* DMIG state info */
131 __uint16_t da_pad; /* DMIG extra padding */
132} dm_attrs_t;
133
134/*
135 * This is the xfs in-core inode structure. 87 * This is the xfs in-core inode structure.
136 * Most of the on-disk inode is embedded in the i_d field. 88 * Most of the on-disk inode is embedded in the i_d field.
137 * 89 *
@@ -191,19 +143,98 @@ typedef struct xfs_icdinode {
191 __uint32_t di_gen; /* generation number */ 143 __uint32_t di_gen; /* generation number */
192} xfs_icdinode_t; 144} xfs_icdinode_t;
193 145
194typedef struct { 146/*
195 struct xfs_inode *ip_mnext; /* next inode in mount list */ 147 * Flags for xfs_ichgtime().
196 struct xfs_inode *ip_mprev; /* ptr to prev inode */ 148 */
197 struct xfs_mount *ip_mount; /* fs mount struct ptr */ 149#define XFS_ICHGTIME_MOD 0x1 /* data fork modification timestamp */
198} xfs_iptr_t; 150#define XFS_ICHGTIME_CHG 0x2 /* inode field change timestamp */
151
152/*
153 * Per-fork incore inode flags.
154 */
155#define XFS_IFINLINE 0x01 /* Inline data is read in */
156#define XFS_IFEXTENTS 0x02 /* All extent pointers are read in */
157#define XFS_IFBROOT 0x04 /* i_broot points to the bmap b-tree root */
158#define XFS_IFEXTIREC 0x08 /* Indirection array of extent blocks */
159
160/*
161 * Flags for xfs_inotobp, xfs_itobp(), xfs_imap() and xfs_dilocate().
162 */
163#define XFS_IMAP_LOOKUP 0x1
164#define XFS_IMAP_BULKSTAT 0x2
165
166/*
167 * Fork handling.
168 */
169
170#define XFS_IFORK_Q(ip) ((ip)->i_d.di_forkoff != 0)
171#define XFS_IFORK_BOFF(ip) ((int)((ip)->i_d.di_forkoff << 3))
172
173#define XFS_IFORK_PTR(ip,w) \
174 ((w) == XFS_DATA_FORK ? \
175 &(ip)->i_df : \
176 (ip)->i_afp)
177#define XFS_IFORK_DSIZE(ip) \
178 (XFS_IFORK_Q(ip) ? \
179 XFS_IFORK_BOFF(ip) : \
180 XFS_LITINO((ip)->i_mount))
181#define XFS_IFORK_ASIZE(ip) \
182 (XFS_IFORK_Q(ip) ? \
183 XFS_LITINO((ip)->i_mount) - XFS_IFORK_BOFF(ip) : \
184 0)
185#define XFS_IFORK_SIZE(ip,w) \
186 ((w) == XFS_DATA_FORK ? \
187 XFS_IFORK_DSIZE(ip) : \
188 XFS_IFORK_ASIZE(ip))
189#define XFS_IFORK_FORMAT(ip,w) \
190 ((w) == XFS_DATA_FORK ? \
191 (ip)->i_d.di_format : \
192 (ip)->i_d.di_aformat)
193#define XFS_IFORK_FMT_SET(ip,w,n) \
194 ((w) == XFS_DATA_FORK ? \
195 ((ip)->i_d.di_format = (n)) : \
196 ((ip)->i_d.di_aformat = (n)))
197#define XFS_IFORK_NEXTENTS(ip,w) \
198 ((w) == XFS_DATA_FORK ? \
199 (ip)->i_d.di_nextents : \
200 (ip)->i_d.di_anextents)
201#define XFS_IFORK_NEXT_SET(ip,w,n) \
202 ((w) == XFS_DATA_FORK ? \
203 ((ip)->i_d.di_nextents = (n)) : \
204 ((ip)->i_d.di_anextents = (n)))
205
206
207
208#ifdef __KERNEL__
209
210struct bhv_desc;
211struct cred;
212struct ktrace;
213struct xfs_buf;
214struct xfs_bmap_free;
215struct xfs_bmbt_irec;
216struct xfs_inode_log_item;
217struct xfs_mount;
218struct xfs_trans;
219struct xfs_dquot;
220
221#if defined(XFS_ILOCK_TRACE)
222#define XFS_ILOCK_KTRACE_SIZE 32
223extern ktrace_t *xfs_ilock_trace_buf;
224extern void xfs_ilock_trace(struct xfs_inode *, int, unsigned int, inst_t *);
225#else
226#define xfs_ilock_trace(i,n,f,ra)
227#endif
228
229typedef struct dm_attrs_s {
230 __uint32_t da_dmevmask; /* DMIG event mask */
231 __uint16_t da_dmstate; /* DMIG state info */
232 __uint16_t da_pad; /* DMIG extra padding */
233} dm_attrs_t;
199 234
200typedef struct xfs_inode { 235typedef struct xfs_inode {
201 /* Inode linking and identification information. */ 236 /* Inode linking and identification information. */
202 struct xfs_inode *i_mnext; /* next inode in mount list */
203 struct xfs_inode *i_mprev; /* ptr to prev inode */
204 struct xfs_mount *i_mount; /* fs mount struct ptr */ 237 struct xfs_mount *i_mount; /* fs mount struct ptr */
205 struct list_head i_reclaim; /* reclaim list */
206 struct inode *i_vnode; /* vnode backpointer */
207 struct xfs_dquot *i_udquot; /* user dquot */ 238 struct xfs_dquot *i_udquot; /* user dquot */
208 struct xfs_dquot *i_gdquot; /* group dquot */ 239 struct xfs_dquot *i_gdquot; /* group dquot */
209 240
@@ -238,6 +269,10 @@ typedef struct xfs_inode {
238 xfs_fsize_t i_size; /* in-memory size */ 269 xfs_fsize_t i_size; /* in-memory size */
239 xfs_fsize_t i_new_size; /* size when write completes */ 270 xfs_fsize_t i_new_size; /* size when write completes */
240 atomic_t i_iocount; /* outstanding I/O count */ 271 atomic_t i_iocount; /* outstanding I/O count */
272
273 /* VFS inode */
274 struct inode i_vnode; /* embedded VFS inode */
275
241 /* Trace buffers per inode. */ 276 /* Trace buffers per inode. */
242#ifdef XFS_INODE_TRACE 277#ifdef XFS_INODE_TRACE
243 struct ktrace *i_trace; /* general inode trace */ 278 struct ktrace *i_trace; /* general inode trace */
@@ -245,7 +280,7 @@ typedef struct xfs_inode {
245#ifdef XFS_BMAP_TRACE 280#ifdef XFS_BMAP_TRACE
246 struct ktrace *i_xtrace; /* inode extent list trace */ 281 struct ktrace *i_xtrace; /* inode extent list trace */
247#endif 282#endif
248#ifdef XFS_BMBT_TRACE 283#ifdef XFS_BTREE_TRACE
249 struct ktrace *i_btrace; /* inode bmap btree trace */ 284 struct ktrace *i_btrace; /* inode bmap btree trace */
250#endif 285#endif
251#ifdef XFS_RW_TRACE 286#ifdef XFS_RW_TRACE
@@ -265,13 +300,30 @@ typedef struct xfs_inode {
265/* Convert from vfs inode to xfs inode */ 300/* Convert from vfs inode to xfs inode */
266static inline struct xfs_inode *XFS_I(struct inode *inode) 301static inline struct xfs_inode *XFS_I(struct inode *inode)
267{ 302{
268 return (struct xfs_inode *)inode->i_private; 303 return container_of(inode, struct xfs_inode, i_vnode);
269} 304}
270 305
271/* convert from xfs inode to vfs inode */ 306/* convert from xfs inode to vfs inode */
272static inline struct inode *VFS_I(struct xfs_inode *ip) 307static inline struct inode *VFS_I(struct xfs_inode *ip)
273{ 308{
274 return (struct inode *)ip->i_vnode; 309 return &ip->i_vnode;
310}
311
312/*
313 * Get rid of a partially initialized inode.
314 *
315 * We have to go through destroy_inode to make sure allocations
316 * from init_inode_always like the security data are undone.
317 *
318 * We mark the inode bad so that it takes the short cut in
319 * the reclaim path instead of going through the flush path
320 * which doesn't make sense for an inode that has never seen the
321 * light of day.
322 */
323static inline void xfs_destroy_inode(struct xfs_inode *ip)
324{
325 make_bad_inode(VFS_I(ip));
326 return destroy_inode(VFS_I(ip));
275} 327}
276 328
277/* 329/*
@@ -327,50 +379,26 @@ xfs_iflags_test_and_clear(xfs_inode_t *ip, unsigned short flags)
327 spin_unlock(&ip->i_flags_lock); 379 spin_unlock(&ip->i_flags_lock);
328 return ret; 380 return ret;
329} 381}
330#endif /* __KERNEL__ */
331
332 382
333/* 383/*
334 * Fork handling. 384 * Manage the i_flush queue embedded in the inode. This completion
385 * queue synchronizes processes attempting to flush the in-core
386 * inode back to disk.
335 */ 387 */
388static inline void xfs_iflock(xfs_inode_t *ip)
389{
390 wait_for_completion(&ip->i_flush);
391}
336 392
337#define XFS_IFORK_Q(ip) ((ip)->i_d.di_forkoff != 0) 393static inline int xfs_iflock_nowait(xfs_inode_t *ip)
338#define XFS_IFORK_BOFF(ip) ((int)((ip)->i_d.di_forkoff << 3)) 394{
339 395 return try_wait_for_completion(&ip->i_flush);
340#define XFS_IFORK_PTR(ip,w) \ 396}
341 ((w) == XFS_DATA_FORK ? \
342 &(ip)->i_df : \
343 (ip)->i_afp)
344#define XFS_IFORK_DSIZE(ip) \
345 (XFS_IFORK_Q(ip) ? \
346 XFS_IFORK_BOFF(ip) : \
347 XFS_LITINO((ip)->i_mount))
348#define XFS_IFORK_ASIZE(ip) \
349 (XFS_IFORK_Q(ip) ? \
350 XFS_LITINO((ip)->i_mount) - XFS_IFORK_BOFF(ip) : \
351 0)
352#define XFS_IFORK_SIZE(ip,w) \
353 ((w) == XFS_DATA_FORK ? \
354 XFS_IFORK_DSIZE(ip) : \
355 XFS_IFORK_ASIZE(ip))
356#define XFS_IFORK_FORMAT(ip,w) \
357 ((w) == XFS_DATA_FORK ? \
358 (ip)->i_d.di_format : \
359 (ip)->i_d.di_aformat)
360#define XFS_IFORK_FMT_SET(ip,w,n) \
361 ((w) == XFS_DATA_FORK ? \
362 ((ip)->i_d.di_format = (n)) : \
363 ((ip)->i_d.di_aformat = (n)))
364#define XFS_IFORK_NEXTENTS(ip,w) \
365 ((w) == XFS_DATA_FORK ? \
366 (ip)->i_d.di_nextents : \
367 (ip)->i_d.di_anextents)
368#define XFS_IFORK_NEXT_SET(ip,w,n) \
369 ((w) == XFS_DATA_FORK ? \
370 ((ip)->i_d.di_nextents = (n)) : \
371 ((ip)->i_d.di_anextents = (n)))
372 397
373#ifdef __KERNEL__ 398static inline void xfs_ifunlock(xfs_inode_t *ip)
399{
400 complete(&ip->i_flush);
401}
374 402
375/* 403/*
376 * In-core inode flags. 404 * In-core inode flags.
@@ -484,25 +512,15 @@ int xfs_isilocked(xfs_inode_t *, uint);
484uint xfs_ilock_map_shared(xfs_inode_t *); 512uint xfs_ilock_map_shared(xfs_inode_t *);
485void xfs_iunlock_map_shared(xfs_inode_t *, uint); 513void xfs_iunlock_map_shared(xfs_inode_t *, uint);
486void xfs_ireclaim(xfs_inode_t *); 514void xfs_ireclaim(xfs_inode_t *);
487int xfs_finish_reclaim(xfs_inode_t *, int, int);
488int xfs_finish_reclaim_all(struct xfs_mount *, int);
489 515
490/* 516/*
491 * xfs_inode.c prototypes. 517 * xfs_inode.c prototypes.
492 */ 518 */
493int xfs_itobp(struct xfs_mount *, struct xfs_trans *,
494 xfs_inode_t *, struct xfs_dinode **, struct xfs_buf **,
495 xfs_daddr_t, uint, uint);
496int xfs_iread(struct xfs_mount *, struct xfs_trans *, xfs_ino_t, 519int xfs_iread(struct xfs_mount *, struct xfs_trans *, xfs_ino_t,
497 xfs_inode_t **, xfs_daddr_t, uint); 520 xfs_inode_t **, xfs_daddr_t, uint);
498int xfs_iread_extents(struct xfs_trans *, xfs_inode_t *, int);
499int xfs_ialloc(struct xfs_trans *, xfs_inode_t *, mode_t, 521int xfs_ialloc(struct xfs_trans *, xfs_inode_t *, mode_t,
500 xfs_nlink_t, xfs_dev_t, struct cred *, xfs_prid_t, 522 xfs_nlink_t, xfs_dev_t, struct cred *, xfs_prid_t,
501 int, struct xfs_buf **, boolean_t *, xfs_inode_t **); 523 int, struct xfs_buf **, boolean_t *, xfs_inode_t **);
502void xfs_dinode_from_disk(struct xfs_icdinode *,
503 struct xfs_dinode_core *);
504void xfs_dinode_to_disk(struct xfs_dinode_core *,
505 struct xfs_icdinode *);
506 524
507uint xfs_ip2xflags(struct xfs_inode *); 525uint xfs_ip2xflags(struct xfs_inode *);
508uint xfs_dic2xflags(struct xfs_dinode *); 526uint xfs_dic2xflags(struct xfs_dinode *);
@@ -513,17 +531,12 @@ int xfs_itruncate_finish(struct xfs_trans **, xfs_inode_t *,
513 xfs_fsize_t, int, int); 531 xfs_fsize_t, int, int);
514int xfs_iunlink(struct xfs_trans *, xfs_inode_t *); 532int xfs_iunlink(struct xfs_trans *, xfs_inode_t *);
515 533
516void xfs_idestroy_fork(xfs_inode_t *, int);
517void xfs_idestroy(xfs_inode_t *); 534void xfs_idestroy(xfs_inode_t *);
518void xfs_idata_realloc(xfs_inode_t *, int, int);
519void xfs_iextract(xfs_inode_t *); 535void xfs_iextract(xfs_inode_t *);
520void xfs_iext_realloc(xfs_inode_t *, int, int); 536void xfs_iext_realloc(xfs_inode_t *, int, int);
521void xfs_iroot_realloc(xfs_inode_t *, int, int);
522void xfs_ipin(xfs_inode_t *); 537void xfs_ipin(xfs_inode_t *);
523void xfs_iunpin(xfs_inode_t *); 538void xfs_iunpin(xfs_inode_t *);
524int xfs_iextents_copy(xfs_inode_t *, xfs_bmbt_rec_t *, int);
525int xfs_iflush(xfs_inode_t *, uint); 539int xfs_iflush(xfs_inode_t *, uint);
526void xfs_iflush_all(struct xfs_mount *);
527void xfs_ichgtime(xfs_inode_t *, int); 540void xfs_ichgtime(xfs_inode_t *, int);
528xfs_fsize_t xfs_file_last_byte(xfs_inode_t *); 541xfs_fsize_t xfs_file_last_byte(xfs_inode_t *);
529void xfs_lock_inodes(xfs_inode_t **, int, uint); 542void xfs_lock_inodes(xfs_inode_t **, int, uint);
@@ -532,6 +545,24 @@ void xfs_lock_two_inodes(xfs_inode_t *, xfs_inode_t *, uint);
532void xfs_synchronize_atime(xfs_inode_t *); 545void xfs_synchronize_atime(xfs_inode_t *);
533void xfs_mark_inode_dirty_sync(xfs_inode_t *); 546void xfs_mark_inode_dirty_sync(xfs_inode_t *);
534 547
548#endif /* __KERNEL__ */
549
550int xfs_inotobp(struct xfs_mount *, struct xfs_trans *,
551 xfs_ino_t, struct xfs_dinode **,
552 struct xfs_buf **, int *, uint);
553int xfs_itobp(struct xfs_mount *, struct xfs_trans *,
554 struct xfs_inode *, struct xfs_dinode **,
555 struct xfs_buf **, xfs_daddr_t, uint, uint);
556void xfs_dinode_from_disk(struct xfs_icdinode *,
557 struct xfs_dinode_core *);
558void xfs_dinode_to_disk(struct xfs_dinode_core *,
559 struct xfs_icdinode *);
560void xfs_idestroy_fork(struct xfs_inode *, int);
561void xfs_idata_realloc(struct xfs_inode *, int, int);
562void xfs_iroot_realloc(struct xfs_inode *, int, int);
563int xfs_iread_extents(struct xfs_trans *, struct xfs_inode *, int);
564int xfs_iextents_copy(struct xfs_inode *, xfs_bmbt_rec_t *, int);
565
535xfs_bmbt_rec_host_t *xfs_iext_get_ext(xfs_ifork_t *, xfs_extnum_t); 566xfs_bmbt_rec_host_t *xfs_iext_get_ext(xfs_ifork_t *, xfs_extnum_t);
536void xfs_iext_insert(xfs_ifork_t *, xfs_extnum_t, xfs_extnum_t, 567void xfs_iext_insert(xfs_ifork_t *, xfs_extnum_t, xfs_extnum_t,
537 xfs_bmbt_irec_t *); 568 xfs_bmbt_irec_t *);
@@ -561,7 +592,8 @@ void xfs_iext_irec_update_extoffs(xfs_ifork_t *, int, int);
561#define xfs_ipincount(ip) ((unsigned int) atomic_read(&ip->i_pincount)) 592#define xfs_ipincount(ip) ((unsigned int) atomic_read(&ip->i_pincount))
562 593
563#ifdef DEBUG 594#ifdef DEBUG
564void xfs_isize_check(struct xfs_mount *, xfs_inode_t *, xfs_fsize_t); 595void xfs_isize_check(struct xfs_mount *, struct xfs_inode *,
596 xfs_fsize_t);
565#else /* DEBUG */ 597#else /* DEBUG */
566#define xfs_isize_check(mp, ip, isize) 598#define xfs_isize_check(mp, ip, isize)
567#endif /* DEBUG */ 599#endif /* DEBUG */
@@ -576,26 +608,4 @@ extern struct kmem_zone *xfs_ifork_zone;
576extern struct kmem_zone *xfs_inode_zone; 608extern struct kmem_zone *xfs_inode_zone;
577extern struct kmem_zone *xfs_ili_zone; 609extern struct kmem_zone *xfs_ili_zone;
578 610
579/*
580 * Manage the i_flush queue embedded in the inode. This completion
581 * queue synchronizes processes attempting to flush the in-core
582 * inode back to disk.
583 */
584static inline void xfs_iflock(xfs_inode_t *ip)
585{
586 wait_for_completion(&ip->i_flush);
587}
588
589static inline int xfs_iflock_nowait(xfs_inode_t *ip)
590{
591 return try_wait_for_completion(&ip->i_flush);
592}
593
594static inline void xfs_ifunlock(xfs_inode_t *ip)
595{
596 complete(&ip->i_flush);
597}
598
599#endif /* __KERNEL__ */
600
601#endif /* __XFS_INODE_H__ */ 611#endif /* __XFS_INODE_H__ */
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index 97c7452e2620..aa9bf05060c6 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -932,6 +932,7 @@ xfs_inode_item_init(
932 iip->ili_item.li_type = XFS_LI_INODE; 932 iip->ili_item.li_type = XFS_LI_INODE;
933 iip->ili_item.li_ops = &xfs_inode_item_ops; 933 iip->ili_item.li_ops = &xfs_inode_item_ops;
934 iip->ili_item.li_mountp = mp; 934 iip->ili_item.li_mountp = mp;
935 iip->ili_item.li_ailp = mp->m_ail;
935 iip->ili_inode = ip; 936 iip->ili_inode = ip;
936 937
937 /* 938 /*
@@ -976,9 +977,8 @@ xfs_iflush_done(
976 xfs_buf_t *bp, 977 xfs_buf_t *bp,
977 xfs_inode_log_item_t *iip) 978 xfs_inode_log_item_t *iip)
978{ 979{
979 xfs_inode_t *ip; 980 xfs_inode_t *ip = iip->ili_inode;
980 981 struct xfs_ail *ailp = iip->ili_item.li_ailp;
981 ip = iip->ili_inode;
982 982
983 /* 983 /*
984 * We only want to pull the item from the AIL if it is 984 * We only want to pull the item from the AIL if it is
@@ -991,15 +991,12 @@ xfs_iflush_done(
991 */ 991 */
992 if (iip->ili_logged && 992 if (iip->ili_logged &&
993 (iip->ili_item.li_lsn == iip->ili_flush_lsn)) { 993 (iip->ili_item.li_lsn == iip->ili_flush_lsn)) {
994 spin_lock(&ip->i_mount->m_ail_lock); 994 spin_lock(&ailp->xa_lock);
995 if (iip->ili_item.li_lsn == iip->ili_flush_lsn) { 995 if (iip->ili_item.li_lsn == iip->ili_flush_lsn) {
996 /* 996 /* xfs_trans_ail_delete() drops the AIL lock. */
997 * xfs_trans_delete_ail() drops the AIL lock. 997 xfs_trans_ail_delete(ailp, (xfs_log_item_t*)iip);
998 */
999 xfs_trans_delete_ail(ip->i_mount,
1000 (xfs_log_item_t*)iip);
1001 } else { 998 } else {
1002 spin_unlock(&ip->i_mount->m_ail_lock); 999 spin_unlock(&ailp->xa_lock);
1003 } 1000 }
1004 } 1001 }
1005 1002
@@ -1031,21 +1028,20 @@ void
1031xfs_iflush_abort( 1028xfs_iflush_abort(
1032 xfs_inode_t *ip) 1029 xfs_inode_t *ip)
1033{ 1030{
1034 xfs_inode_log_item_t *iip; 1031 xfs_inode_log_item_t *iip = ip->i_itemp;
1035 xfs_mount_t *mp; 1032 xfs_mount_t *mp;
1036 1033
1037 iip = ip->i_itemp; 1034 iip = ip->i_itemp;
1038 mp = ip->i_mount; 1035 mp = ip->i_mount;
1039 if (iip) { 1036 if (iip) {
1037 struct xfs_ail *ailp = iip->ili_item.li_ailp;
1040 if (iip->ili_item.li_flags & XFS_LI_IN_AIL) { 1038 if (iip->ili_item.li_flags & XFS_LI_IN_AIL) {
1041 spin_lock(&mp->m_ail_lock); 1039 spin_lock(&ailp->xa_lock);
1042 if (iip->ili_item.li_flags & XFS_LI_IN_AIL) { 1040 if (iip->ili_item.li_flags & XFS_LI_IN_AIL) {
1043 /* 1041 /* xfs_trans_ail_delete() drops the AIL lock. */
1044 * xfs_trans_delete_ail() drops the AIL lock. 1042 xfs_trans_ail_delete(ailp, (xfs_log_item_t *)iip);
1045 */
1046 xfs_trans_delete_ail(mp, (xfs_log_item_t *)iip);
1047 } else 1043 } else
1048 spin_unlock(&mp->m_ail_lock); 1044 spin_unlock(&ailp->xa_lock);
1049 } 1045 }
1050 iip->ili_logged = 0; 1046 iip->ili_logged = 0;
1051 /* 1047 /*
diff --git a/fs/xfs/xfs_inode_item.h b/fs/xfs/xfs_inode_item.h
index 40513077ab36..1ff04cc323ad 100644
--- a/fs/xfs/xfs_inode_item.h
+++ b/fs/xfs/xfs_inode_item.h
@@ -112,6 +112,24 @@ typedef struct xfs_inode_log_format_64 {
112#define XFS_ILI_IOLOCKED_ANY (XFS_ILI_IOLOCKED_EXCL | XFS_ILI_IOLOCKED_SHARED) 112#define XFS_ILI_IOLOCKED_ANY (XFS_ILI_IOLOCKED_EXCL | XFS_ILI_IOLOCKED_SHARED)
113 113
114 114
115#define XFS_ILOG_FBROOT(w) xfs_ilog_fbroot(w)
116static inline int xfs_ilog_fbroot(int w)
117{
118 return (w == XFS_DATA_FORK ? XFS_ILOG_DBROOT : XFS_ILOG_ABROOT);
119}
120
121#define XFS_ILOG_FEXT(w) xfs_ilog_fext(w)
122static inline int xfs_ilog_fext(int w)
123{
124 return (w == XFS_DATA_FORK ? XFS_ILOG_DEXT : XFS_ILOG_AEXT);
125}
126
127#define XFS_ILOG_FDATA(w) xfs_ilog_fdata(w)
128static inline int xfs_ilog_fdata(int w)
129{
130 return (w == XFS_DATA_FORK ? XFS_ILOG_DDATA : XFS_ILOG_ADATA);
131}
132
115#ifdef __KERNEL__ 133#ifdef __KERNEL__
116 134
117struct xfs_buf; 135struct xfs_buf;
@@ -148,26 +166,6 @@ typedef struct xfs_inode_log_item {
148} xfs_inode_log_item_t; 166} xfs_inode_log_item_t;
149 167
150 168
151#define XFS_ILOG_FDATA(w) xfs_ilog_fdata(w)
152static inline int xfs_ilog_fdata(int w)
153{
154 return (w == XFS_DATA_FORK ? XFS_ILOG_DDATA : XFS_ILOG_ADATA);
155}
156
157#endif /* __KERNEL__ */
158
159#define XFS_ILOG_FBROOT(w) xfs_ilog_fbroot(w)
160static inline int xfs_ilog_fbroot(int w)
161{
162 return (w == XFS_DATA_FORK ? XFS_ILOG_DBROOT : XFS_ILOG_ABROOT);
163}
164
165#define XFS_ILOG_FEXT(w) xfs_ilog_fext(w)
166static inline int xfs_ilog_fext(int w)
167{
168 return (w == XFS_DATA_FORK ? XFS_ILOG_DEXT : XFS_ILOG_AEXT);
169}
170
171static inline int xfs_inode_clean(xfs_inode_t *ip) 169static inline int xfs_inode_clean(xfs_inode_t *ip)
172{ 170{
173 return (!ip->i_itemp || 171 return (!ip->i_itemp ||
@@ -175,9 +173,6 @@ static inline int xfs_inode_clean(xfs_inode_t *ip)
175 !ip->i_update_core; 173 !ip->i_update_core;
176} 174}
177 175
178
179#ifdef __KERNEL__
180
181extern void xfs_inode_item_init(struct xfs_inode *, struct xfs_mount *); 176extern void xfs_inode_item_init(struct xfs_inode *, struct xfs_mount *);
182extern void xfs_inode_item_destroy(struct xfs_inode *); 177extern void xfs_inode_item_destroy(struct xfs_inode *);
183extern void xfs_iflush_done(struct xfs_buf *, xfs_inode_log_item_t *); 178extern void xfs_iflush_done(struct xfs_buf *, xfs_inode_log_item_t *);
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
index cf6754a3c5b3..35118032a5d6 100644
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -359,7 +359,6 @@ xfs_bulkstat(
359 int ubused; /* bytes used by formatter */ 359 int ubused; /* bytes used by formatter */
360 xfs_buf_t *bp; /* ptr to on-disk inode cluster buf */ 360 xfs_buf_t *bp; /* ptr to on-disk inode cluster buf */
361 xfs_dinode_t *dip; /* ptr into bp for specific inode */ 361 xfs_dinode_t *dip; /* ptr into bp for specific inode */
362 xfs_inode_t *ip; /* ptr to in-core inode struct */
363 362
364 /* 363 /*
365 * Get the last inode value, see if there's nothing to do. 364 * Get the last inode value, see if there's nothing to do.
@@ -416,8 +415,7 @@ xfs_bulkstat(
416 /* 415 /*
417 * Allocate and initialize a btree cursor for ialloc btree. 416 * Allocate and initialize a btree cursor for ialloc btree.
418 */ 417 */
419 cur = xfs_btree_init_cursor(mp, NULL, agbp, agno, XFS_BTNUM_INO, 418 cur = xfs_inobt_init_cursor(mp, NULL, agbp, agno);
420 (xfs_inode_t *)0, 0);
421 irbp = irbuf; 419 irbp = irbuf;
422 irbufend = irbuf + nirbuf; 420 irbufend = irbuf + nirbuf;
423 end_of_ag = 0; 421 end_of_ag = 0;
@@ -472,7 +470,7 @@ xfs_bulkstat(
472 * In any case, increment to the next record. 470 * In any case, increment to the next record.
473 */ 471 */
474 if (!error) 472 if (!error)
475 error = xfs_inobt_increment(cur, 0, &tmp); 473 error = xfs_btree_increment(cur, 0, &tmp);
476 } else { 474 } else {
477 /* 475 /*
478 * Start of ag. Lookup the first inode chunk. 476 * Start of ag. Lookup the first inode chunk.
@@ -539,7 +537,7 @@ xfs_bulkstat(
539 * Set agino to after this chunk and bump the cursor. 537 * Set agino to after this chunk and bump the cursor.
540 */ 538 */
541 agino = gino + XFS_INODES_PER_CHUNK; 539 agino = gino + XFS_INODES_PER_CHUNK;
542 error = xfs_inobt_increment(cur, 0, &tmp); 540 error = xfs_btree_increment(cur, 0, &tmp);
543 cond_resched(); 541 cond_resched();
544 } 542 }
545 /* 543 /*
@@ -586,6 +584,8 @@ xfs_bulkstat(
586 584
587 if (flags & (BULKSTAT_FG_QUICK | 585 if (flags & (BULKSTAT_FG_QUICK |
588 BULKSTAT_FG_INLINE)) { 586 BULKSTAT_FG_INLINE)) {
587 int offset;
588
589 ino = XFS_AGINO_TO_INO(mp, agno, 589 ino = XFS_AGINO_TO_INO(mp, agno,
590 agino); 590 agino);
591 bno = XFS_AGB_TO_DADDR(mp, agno, 591 bno = XFS_AGB_TO_DADDR(mp, agno,
@@ -594,21 +594,15 @@ xfs_bulkstat(
594 /* 594 /*
595 * Get the inode cluster buffer 595 * Get the inode cluster buffer
596 */ 596 */
597 ASSERT(xfs_inode_zone != NULL);
598 ip = kmem_zone_zalloc(xfs_inode_zone,
599 KM_SLEEP);
600 ip->i_ino = ino;
601 ip->i_mount = mp;
602 spin_lock_init(&ip->i_flags_lock);
603 if (bp) 597 if (bp)
604 xfs_buf_relse(bp); 598 xfs_buf_relse(bp);
605 error = xfs_itobp(mp, NULL, ip, 599
606 &dip, &bp, bno, 600 error = xfs_inotobp(mp, NULL, ino, &dip,
607 XFS_IMAP_BULKSTAT, 601 &bp, &offset,
608 XFS_BUF_LOCK); 602 XFS_IMAP_BULKSTAT);
603
609 if (!error) 604 if (!error)
610 clustidx = ip->i_boffset / mp->m_sb.sb_inodesize; 605 clustidx = offset / mp->m_sb.sb_inodesize;
611 kmem_zone_free(xfs_inode_zone, ip);
612 if (XFS_TEST_ERROR(error != 0, 606 if (XFS_TEST_ERROR(error != 0,
613 mp, XFS_ERRTAG_BULKSTAT_READ_CHUNK, 607 mp, XFS_ERRTAG_BULKSTAT_READ_CHUNK,
614 XFS_RANDOM_BULKSTAT_READ_CHUNK)) { 608 XFS_RANDOM_BULKSTAT_READ_CHUNK)) {
@@ -842,8 +836,7 @@ xfs_inumbers(
842 agino = 0; 836 agino = 0;
843 continue; 837 continue;
844 } 838 }
845 cur = xfs_btree_init_cursor(mp, NULL, agbp, agno, 839 cur = xfs_inobt_init_cursor(mp, NULL, agbp, agno);
846 XFS_BTNUM_INO, (xfs_inode_t *)0, 0);
847 error = xfs_inobt_lookup_ge(cur, agino, 0, 0, &tmp); 840 error = xfs_inobt_lookup_ge(cur, agino, 0, 0, &tmp);
848 if (error) { 841 if (error) {
849 xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); 842 xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
@@ -887,7 +880,7 @@ xfs_inumbers(
887 bufidx = 0; 880 bufidx = 0;
888 } 881 }
889 if (left) { 882 if (left) {
890 error = xfs_inobt_increment(cur, 0, &tmp); 883 error = xfs_btree_increment(cur, 0, &tmp);
891 if (error) { 884 if (error) {
892 xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); 885 xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
893 cur = NULL; 886 cur = NULL;
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 0b02c6443551..51840170b16c 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -567,12 +567,12 @@ xfs_log_mount(
567 /* 567 /*
568 * Initialize the AIL now we have a log. 568 * Initialize the AIL now we have a log.
569 */ 569 */
570 spin_lock_init(&mp->m_ail_lock);
571 error = xfs_trans_ail_init(mp); 570 error = xfs_trans_ail_init(mp);
572 if (error) { 571 if (error) {
573 cmn_err(CE_WARN, "XFS: AIL initialisation failed: error %d", error); 572 cmn_err(CE_WARN, "XFS: AIL initialisation failed: error %d", error);
574 goto error; 573 goto error;
575 } 574 }
575 mp->m_log->l_ailp = mp->m_ail;
576 576
577 /* 577 /*
578 * skip log recovery on a norecovery mount. pretend it all 578 * skip log recovery on a norecovery mount. pretend it all
@@ -900,7 +900,7 @@ xfs_log_move_tail(xfs_mount_t *mp,
900int 900int
901xfs_log_need_covered(xfs_mount_t *mp) 901xfs_log_need_covered(xfs_mount_t *mp)
902{ 902{
903 int needed = 0, gen; 903 int needed = 0;
904 xlog_t *log = mp->m_log; 904 xlog_t *log = mp->m_log;
905 905
906 if (!xfs_fs_writable(mp)) 906 if (!xfs_fs_writable(mp))
@@ -909,7 +909,7 @@ xfs_log_need_covered(xfs_mount_t *mp)
909 spin_lock(&log->l_icloglock); 909 spin_lock(&log->l_icloglock);
910 if (((log->l_covered_state == XLOG_STATE_COVER_NEED) || 910 if (((log->l_covered_state == XLOG_STATE_COVER_NEED) ||
911 (log->l_covered_state == XLOG_STATE_COVER_NEED2)) 911 (log->l_covered_state == XLOG_STATE_COVER_NEED2))
912 && !xfs_trans_first_ail(mp, &gen) 912 && !xfs_trans_ail_tail(log->l_ailp)
913 && xlog_iclogs_empty(log)) { 913 && xlog_iclogs_empty(log)) {
914 if (log->l_covered_state == XLOG_STATE_COVER_NEED) 914 if (log->l_covered_state == XLOG_STATE_COVER_NEED)
915 log->l_covered_state = XLOG_STATE_COVER_DONE; 915 log->l_covered_state = XLOG_STATE_COVER_DONE;
@@ -946,7 +946,7 @@ xlog_assign_tail_lsn(xfs_mount_t *mp)
946 xfs_lsn_t tail_lsn; 946 xfs_lsn_t tail_lsn;
947 xlog_t *log = mp->m_log; 947 xlog_t *log = mp->m_log;
948 948
949 tail_lsn = xfs_trans_tail_ail(mp); 949 tail_lsn = xfs_trans_ail_tail(mp->m_ail);
950 spin_lock(&log->l_grant_lock); 950 spin_lock(&log->l_grant_lock);
951 if (tail_lsn != 0) { 951 if (tail_lsn != 0) {
952 log->l_tail_lsn = tail_lsn; 952 log->l_tail_lsn = tail_lsn;
@@ -1413,7 +1413,7 @@ xlog_grant_push_ail(xfs_mount_t *mp,
1413 */ 1413 */
1414 if (threshold_lsn && 1414 if (threshold_lsn &&
1415 !XLOG_FORCED_SHUTDOWN(log)) 1415 !XLOG_FORCED_SHUTDOWN(log))
1416 xfs_trans_push_ail(mp, threshold_lsn); 1416 xfs_trans_ail_push(log->l_ailp, threshold_lsn);
1417} /* xlog_grant_push_ail */ 1417} /* xlog_grant_push_ail */
1418 1418
1419 1419
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index e7d8f84443fa..de7ef6ca9206 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -404,6 +404,7 @@ typedef struct xlog_in_core {
404typedef struct log { 404typedef struct log {
405 /* The following fields don't need locking */ 405 /* The following fields don't need locking */
406 struct xfs_mount *l_mp; /* mount point */ 406 struct xfs_mount *l_mp; /* mount point */
407 struct xfs_ail *l_ailp; /* AIL log is working with */
407 struct xfs_buf *l_xbuf; /* extra buffer for log 408 struct xfs_buf *l_xbuf; /* extra buffer for log
408 * wrapping */ 409 * wrapping */
409 struct xfs_buftarg *l_targ; /* buftarg of log */ 410 struct xfs_buftarg *l_targ; /* buftarg of log */
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 82d46ce69d5f..b411d4947318 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -54,10 +54,8 @@ STATIC void xlog_recover_insert_item_backq(xlog_recover_item_t **q,
54 xlog_recover_item_t *item); 54 xlog_recover_item_t *item);
55#if defined(DEBUG) 55#if defined(DEBUG)
56STATIC void xlog_recover_check_summary(xlog_t *); 56STATIC void xlog_recover_check_summary(xlog_t *);
57STATIC void xlog_recover_check_ail(xfs_mount_t *, xfs_log_item_t *, int);
58#else 57#else
59#define xlog_recover_check_summary(log) 58#define xlog_recover_check_summary(log)
60#define xlog_recover_check_ail(mp, lip, gen)
61#endif 59#endif
62 60
63 61
@@ -1419,7 +1417,13 @@ xlog_recover_add_to_trans(
1419 return 0; 1417 return 0;
1420 item = trans->r_itemq; 1418 item = trans->r_itemq;
1421 if (item == NULL) { 1419 if (item == NULL) {
1422 ASSERT(*(uint *)dp == XFS_TRANS_HEADER_MAGIC); 1420 /* we need to catch log corruptions here */
1421 if (*(uint *)dp != XFS_TRANS_HEADER_MAGIC) {
1422 xlog_warn("XFS: xlog_recover_add_to_trans: "
1423 "bad header magic number");
1424 ASSERT(0);
1425 return XFS_ERROR(EIO);
1426 }
1423 if (len == sizeof(xfs_trans_header_t)) 1427 if (len == sizeof(xfs_trans_header_t))
1424 xlog_recover_add_item(&trans->r_itemq); 1428 xlog_recover_add_item(&trans->r_itemq);
1425 memcpy(&trans->r_theader, dp, len); /* d, s, l */ 1429 memcpy(&trans->r_theader, dp, len); /* d, s, l */
@@ -2452,8 +2456,8 @@ xlog_recover_do_inode_trans(
2452 break; 2456 break;
2453 2457
2454 case XFS_ILOG_DBROOT: 2458 case XFS_ILOG_DBROOT:
2455 xfs_bmbt_to_bmdr((xfs_bmbt_block_t *)src, len, 2459 xfs_bmbt_to_bmdr(mp, (struct xfs_btree_block *)src, len,
2456 &(dip->di_u.di_bmbt), 2460 &dip->di_u.di_bmbt,
2457 XFS_DFORK_DSIZE(dip, mp)); 2461 XFS_DFORK_DSIZE(dip, mp));
2458 break; 2462 break;
2459 2463
@@ -2490,8 +2494,8 @@ xlog_recover_do_inode_trans(
2490 2494
2491 case XFS_ILOG_ABROOT: 2495 case XFS_ILOG_ABROOT:
2492 dest = XFS_DFORK_APTR(dip); 2496 dest = XFS_DFORK_APTR(dip);
2493 xfs_bmbt_to_bmdr((xfs_bmbt_block_t *)src, len, 2497 xfs_bmbt_to_bmdr(mp, (struct xfs_btree_block *)src,
2494 (xfs_bmdr_block_t*)dest, 2498 len, (xfs_bmdr_block_t*)dest,
2495 XFS_DFORK_ASIZE(dip, mp)); 2499 XFS_DFORK_ASIZE(dip, mp));
2496 break; 2500 break;
2497 2501
@@ -2683,11 +2687,11 @@ xlog_recover_do_efi_trans(
2683 efip->efi_next_extent = efi_formatp->efi_nextents; 2687 efip->efi_next_extent = efi_formatp->efi_nextents;
2684 efip->efi_flags |= XFS_EFI_COMMITTED; 2688 efip->efi_flags |= XFS_EFI_COMMITTED;
2685 2689
2686 spin_lock(&mp->m_ail_lock); 2690 spin_lock(&log->l_ailp->xa_lock);
2687 /* 2691 /*
2688 * xfs_trans_update_ail() drops the AIL lock. 2692 * xfs_trans_ail_update() drops the AIL lock.
2689 */ 2693 */
2690 xfs_trans_update_ail(mp, (xfs_log_item_t *)efip, lsn); 2694 xfs_trans_ail_update(log->l_ailp, (xfs_log_item_t *)efip, lsn);
2691 return 0; 2695 return 0;
2692} 2696}
2693 2697
@@ -2706,12 +2710,12 @@ xlog_recover_do_efd_trans(
2706 xlog_recover_item_t *item, 2710 xlog_recover_item_t *item,
2707 int pass) 2711 int pass)
2708{ 2712{
2709 xfs_mount_t *mp;
2710 xfs_efd_log_format_t *efd_formatp; 2713 xfs_efd_log_format_t *efd_formatp;
2711 xfs_efi_log_item_t *efip = NULL; 2714 xfs_efi_log_item_t *efip = NULL;
2712 xfs_log_item_t *lip; 2715 xfs_log_item_t *lip;
2713 int gen;
2714 __uint64_t efi_id; 2716 __uint64_t efi_id;
2717 struct xfs_ail_cursor cur;
2718 struct xfs_ail *ailp = log->l_ailp;
2715 2719
2716 if (pass == XLOG_RECOVER_PASS1) { 2720 if (pass == XLOG_RECOVER_PASS1) {
2717 return; 2721 return;
@@ -2728,25 +2732,26 @@ xlog_recover_do_efd_trans(
2728 * Search for the efi with the id in the efd format structure 2732 * Search for the efi with the id in the efd format structure
2729 * in the AIL. 2733 * in the AIL.
2730 */ 2734 */
2731 mp = log->l_mp; 2735 spin_lock(&ailp->xa_lock);
2732 spin_lock(&mp->m_ail_lock); 2736 lip = xfs_trans_ail_cursor_first(ailp, &cur, 0);
2733 lip = xfs_trans_first_ail(mp, &gen);
2734 while (lip != NULL) { 2737 while (lip != NULL) {
2735 if (lip->li_type == XFS_LI_EFI) { 2738 if (lip->li_type == XFS_LI_EFI) {
2736 efip = (xfs_efi_log_item_t *)lip; 2739 efip = (xfs_efi_log_item_t *)lip;
2737 if (efip->efi_format.efi_id == efi_id) { 2740 if (efip->efi_format.efi_id == efi_id) {
2738 /* 2741 /*
2739 * xfs_trans_delete_ail() drops the 2742 * xfs_trans_ail_delete() drops the
2740 * AIL lock. 2743 * AIL lock.
2741 */ 2744 */
2742 xfs_trans_delete_ail(mp, lip); 2745 xfs_trans_ail_delete(ailp, lip);
2743 xfs_efi_item_free(efip); 2746 xfs_efi_item_free(efip);
2744 return; 2747 spin_lock(&ailp->xa_lock);
2748 break;
2745 } 2749 }
2746 } 2750 }
2747 lip = xfs_trans_next_ail(mp, lip, &gen, NULL); 2751 lip = xfs_trans_ail_cursor_next(ailp, &cur);
2748 } 2752 }
2749 spin_unlock(&mp->m_ail_lock); 2753 xfs_trans_ail_cursor_done(ailp, &cur);
2754 spin_unlock(&ailp->xa_lock);
2750} 2755}
2751 2756
2752/* 2757/*
@@ -3030,33 +3035,6 @@ abort_error:
3030} 3035}
3031 3036
3032/* 3037/*
3033 * Verify that once we've encountered something other than an EFI
3034 * in the AIL that there are no more EFIs in the AIL.
3035 */
3036#if defined(DEBUG)
3037STATIC void
3038xlog_recover_check_ail(
3039 xfs_mount_t *mp,
3040 xfs_log_item_t *lip,
3041 int gen)
3042{
3043 int orig_gen = gen;
3044
3045 do {
3046 ASSERT(lip->li_type != XFS_LI_EFI);
3047 lip = xfs_trans_next_ail(mp, lip, &gen, NULL);
3048 /*
3049 * The check will be bogus if we restart from the
3050 * beginning of the AIL, so ASSERT that we don't.
3051 * We never should since we're holding the AIL lock
3052 * the entire time.
3053 */
3054 ASSERT(gen == orig_gen);
3055 } while (lip != NULL);
3056}
3057#endif /* DEBUG */
3058
3059/*
3060 * When this is called, all of the EFIs which did not have 3038 * When this is called, all of the EFIs which did not have
3061 * corresponding EFDs should be in the AIL. What we do now 3039 * corresponding EFDs should be in the AIL. What we do now
3062 * is free the extents associated with each one. 3040 * is free the extents associated with each one.
@@ -3080,20 +3058,23 @@ xlog_recover_process_efis(
3080{ 3058{
3081 xfs_log_item_t *lip; 3059 xfs_log_item_t *lip;
3082 xfs_efi_log_item_t *efip; 3060 xfs_efi_log_item_t *efip;
3083 int gen;
3084 xfs_mount_t *mp;
3085 int error = 0; 3061 int error = 0;
3062 struct xfs_ail_cursor cur;
3063 struct xfs_ail *ailp;
3086 3064
3087 mp = log->l_mp; 3065 ailp = log->l_ailp;
3088 spin_lock(&mp->m_ail_lock); 3066 spin_lock(&ailp->xa_lock);
3089 3067 lip = xfs_trans_ail_cursor_first(ailp, &cur, 0);
3090 lip = xfs_trans_first_ail(mp, &gen);
3091 while (lip != NULL) { 3068 while (lip != NULL) {
3092 /* 3069 /*
3093 * We're done when we see something other than an EFI. 3070 * We're done when we see something other than an EFI.
3071 * There should be no EFIs left in the AIL now.
3094 */ 3072 */
3095 if (lip->li_type != XFS_LI_EFI) { 3073 if (lip->li_type != XFS_LI_EFI) {
3096 xlog_recover_check_ail(mp, lip, gen); 3074#ifdef DEBUG
3075 for (; lip; lip = xfs_trans_ail_cursor_next(ailp, &cur))
3076 ASSERT(lip->li_type != XFS_LI_EFI);
3077#endif
3097 break; 3078 break;
3098 } 3079 }
3099 3080
@@ -3102,18 +3083,20 @@ xlog_recover_process_efis(
3102 */ 3083 */
3103 efip = (xfs_efi_log_item_t *)lip; 3084 efip = (xfs_efi_log_item_t *)lip;
3104 if (efip->efi_flags & XFS_EFI_RECOVERED) { 3085 if (efip->efi_flags & XFS_EFI_RECOVERED) {
3105 lip = xfs_trans_next_ail(mp, lip, &gen, NULL); 3086 lip = xfs_trans_ail_cursor_next(ailp, &cur);
3106 continue; 3087 continue;
3107 } 3088 }
3108 3089
3109 spin_unlock(&mp->m_ail_lock); 3090 spin_unlock(&ailp->xa_lock);
3110 error = xlog_recover_process_efi(mp, efip); 3091 error = xlog_recover_process_efi(log->l_mp, efip);
3092 spin_lock(&ailp->xa_lock);
3111 if (error) 3093 if (error)
3112 return error; 3094 goto out;
3113 spin_lock(&mp->m_ail_lock); 3095 lip = xfs_trans_ail_cursor_next(ailp, &cur);
3114 lip = xfs_trans_next_ail(mp, lip, &gen, NULL);
3115 } 3096 }
3116 spin_unlock(&mp->m_ail_lock); 3097out:
3098 xfs_trans_ail_cursor_done(ailp, &cur);
3099 spin_unlock(&ailp->xa_lock);
3117 return error; 3100 return error;
3118} 3101}
3119 3102
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index a4503f5e9497..177976dfea04 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -567,8 +567,6 @@ xfs_readsb(xfs_mount_t *mp, int flags)
567STATIC void 567STATIC void
568xfs_mount_common(xfs_mount_t *mp, xfs_sb_t *sbp) 568xfs_mount_common(xfs_mount_t *mp, xfs_sb_t *sbp)
569{ 569{
570 int i;
571
572 mp->m_agfrotor = mp->m_agirotor = 0; 570 mp->m_agfrotor = mp->m_agirotor = 0;
573 spin_lock_init(&mp->m_agirotor_lock); 571 spin_lock_init(&mp->m_agirotor_lock);
574 mp->m_maxagi = mp->m_sb.sb_agcount; 572 mp->m_maxagi = mp->m_sb.sb_agcount;
@@ -582,7 +580,6 @@ xfs_mount_common(xfs_mount_t *mp, xfs_sb_t *sbp)
582 mp->m_blockmask = sbp->sb_blocksize - 1; 580 mp->m_blockmask = sbp->sb_blocksize - 1;
583 mp->m_blockwsize = sbp->sb_blocksize >> XFS_WORDLOG; 581 mp->m_blockwsize = sbp->sb_blocksize >> XFS_WORDLOG;
584 mp->m_blockwmask = mp->m_blockwsize - 1; 582 mp->m_blockwmask = mp->m_blockwsize - 1;
585 INIT_LIST_HEAD(&mp->m_del_inodes);
586 583
587 /* 584 /*
588 * Setup for attributes, in case they get created. 585 * Setup for attributes, in case they get created.
@@ -605,24 +602,20 @@ xfs_mount_common(xfs_mount_t *mp, xfs_sb_t *sbp)
605 } 602 }
606 ASSERT(mp->m_attroffset < XFS_LITINO(mp)); 603 ASSERT(mp->m_attroffset < XFS_LITINO(mp));
607 604
608 for (i = 0; i < 2; i++) { 605 mp->m_alloc_mxr[0] = xfs_allocbt_maxrecs(mp, sbp->sb_blocksize, 1);
609 mp->m_alloc_mxr[i] = XFS_BTREE_BLOCK_MAXRECS(sbp->sb_blocksize, 606 mp->m_alloc_mxr[1] = xfs_allocbt_maxrecs(mp, sbp->sb_blocksize, 0);
610 xfs_alloc, i == 0); 607 mp->m_alloc_mnr[0] = mp->m_alloc_mxr[0] / 2;
611 mp->m_alloc_mnr[i] = XFS_BTREE_BLOCK_MINRECS(sbp->sb_blocksize, 608 mp->m_alloc_mnr[1] = mp->m_alloc_mxr[1] / 2;
612 xfs_alloc, i == 0); 609
613 } 610 mp->m_inobt_mxr[0] = xfs_inobt_maxrecs(mp, sbp->sb_blocksize, 1);
614 for (i = 0; i < 2; i++) { 611 mp->m_inobt_mxr[1] = xfs_inobt_maxrecs(mp, sbp->sb_blocksize, 0);
615 mp->m_bmap_dmxr[i] = XFS_BTREE_BLOCK_MAXRECS(sbp->sb_blocksize, 612 mp->m_inobt_mnr[0] = mp->m_inobt_mxr[0] / 2;
616 xfs_bmbt, i == 0); 613 mp->m_inobt_mnr[1] = mp->m_inobt_mxr[1] / 2;
617 mp->m_bmap_dmnr[i] = XFS_BTREE_BLOCK_MINRECS(sbp->sb_blocksize, 614
618 xfs_bmbt, i == 0); 615 mp->m_bmap_dmxr[0] = xfs_bmbt_maxrecs(mp, sbp->sb_blocksize, 1);
619 } 616 mp->m_bmap_dmxr[1] = xfs_bmbt_maxrecs(mp, sbp->sb_blocksize, 0);
620 for (i = 0; i < 2; i++) { 617 mp->m_bmap_dmnr[0] = mp->m_bmap_dmxr[0] / 2;
621 mp->m_inobt_mxr[i] = XFS_BTREE_BLOCK_MAXRECS(sbp->sb_blocksize, 618 mp->m_bmap_dmnr[1] = mp->m_bmap_dmxr[1] / 2;
622 xfs_inobt, i == 0);
623 mp->m_inobt_mnr[i] = XFS_BTREE_BLOCK_MINRECS(sbp->sb_blocksize,
624 xfs_inobt, i == 0);
625 }
626 619
627 mp->m_bsize = XFS_FSB_TO_BB(mp, 1); 620 mp->m_bsize = XFS_FSB_TO_BB(mp, 1);
628 mp->m_ialloc_inos = (int)MAX((__uint16_t)XFS_INODES_PER_CHUNK, 621 mp->m_ialloc_inos = (int)MAX((__uint16_t)XFS_INODES_PER_CHUNK,
@@ -1241,10 +1234,13 @@ xfs_unmountfs(
1241 * need to force the log first. 1234 * need to force the log first.
1242 */ 1235 */
1243 xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE | XFS_LOG_SYNC); 1236 xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE | XFS_LOG_SYNC);
1244 xfs_iflush_all(mp); 1237 xfs_reclaim_inodes(mp, 0, XFS_IFLUSH_ASYNC);
1245 1238
1246 XFS_QM_DQPURGEALL(mp, XFS_QMOPT_QUOTALL | XFS_QMOPT_UMOUNTING); 1239 XFS_QM_DQPURGEALL(mp, XFS_QMOPT_QUOTALL | XFS_QMOPT_UMOUNTING);
1247 1240
1241 if (mp->m_quotainfo)
1242 XFS_QM_DONE(mp);
1243
1248 /* 1244 /*
1249 * Flush out the log synchronously so that we know for sure 1245 * Flush out the log synchronously so that we know for sure
1250 * that nothing is pinned. This is important because bflush() 1246 * that nothing is pinned. This is important because bflush()
@@ -1285,11 +1281,6 @@ xfs_unmountfs(
1285 xfs_unmountfs_wait(mp); /* wait for async bufs */ 1281 xfs_unmountfs_wait(mp); /* wait for async bufs */
1286 xfs_log_unmount(mp); /* Done! No more fs ops. */ 1282 xfs_log_unmount(mp); /* Done! No more fs ops. */
1287 1283
1288 /*
1289 * All inodes from this mount point should be freed.
1290 */
1291 ASSERT(mp->m_inodes == NULL);
1292
1293 if ((mp->m_flags & XFS_MOUNT_NOUUID) == 0) 1284 if ((mp->m_flags & XFS_MOUNT_NOUUID) == 0)
1294 uuid_table_remove(&mp->m_sb.sb_uuid); 1285 uuid_table_remove(&mp->m_sb.sb_uuid);
1295 1286
@@ -1297,8 +1288,6 @@ xfs_unmountfs(
1297 xfs_errortag_clearall(mp, 0); 1288 xfs_errortag_clearall(mp, 0);
1298#endif 1289#endif
1299 xfs_free_perag(mp); 1290 xfs_free_perag(mp);
1300 if (mp->m_quotainfo)
1301 XFS_QM_DONE(mp);
1302} 1291}
1303 1292
1304STATIC void 1293STATIC void
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index f3c1024b1241..e3f618c84e47 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -18,6 +18,7 @@
18#ifndef __XFS_MOUNT_H__ 18#ifndef __XFS_MOUNT_H__
19#define __XFS_MOUNT_H__ 19#define __XFS_MOUNT_H__
20 20
21#include "xfs_sync.h"
21 22
22typedef struct xfs_trans_reservations { 23typedef struct xfs_trans_reservations {
23 uint tr_write; /* extent alloc trans */ 24 uint tr_write; /* extent alloc trans */
@@ -44,14 +45,14 @@ typedef struct xfs_trans_reservations {
44} xfs_trans_reservations_t; 45} xfs_trans_reservations_t;
45 46
46#ifndef __KERNEL__ 47#ifndef __KERNEL__
47/* 48
48 * Moved here from xfs_ag.h to avoid reordering header files
49 */
50#define XFS_DADDR_TO_AGNO(mp,d) \ 49#define XFS_DADDR_TO_AGNO(mp,d) \
51 ((xfs_agnumber_t)(XFS_BB_TO_FSBT(mp, d) / (mp)->m_sb.sb_agblocks)) 50 ((xfs_agnumber_t)(XFS_BB_TO_FSBT(mp, d) / (mp)->m_sb.sb_agblocks))
52#define XFS_DADDR_TO_AGBNO(mp,d) \ 51#define XFS_DADDR_TO_AGBNO(mp,d) \
53 ((xfs_agblock_t)(XFS_BB_TO_FSBT(mp, d) % (mp)->m_sb.sb_agblocks)) 52 ((xfs_agblock_t)(XFS_BB_TO_FSBT(mp, d) % (mp)->m_sb.sb_agblocks))
54#else 53
54#else /* __KERNEL__ */
55
55struct cred; 56struct cred;
56struct log; 57struct log;
57struct xfs_mount_args; 58struct xfs_mount_args;
@@ -62,6 +63,7 @@ struct xfs_extdelta;
62struct xfs_swapext; 63struct xfs_swapext;
63struct xfs_mru_cache; 64struct xfs_mru_cache;
64struct xfs_nameops; 65struct xfs_nameops;
66struct xfs_ail;
65 67
66/* 68/*
67 * Prototypes and functions for the Data Migration subsystem. 69 * Prototypes and functions for the Data Migration subsystem.
@@ -223,18 +225,10 @@ extern void xfs_icsb_sync_counters_locked(struct xfs_mount *, int);
223#define xfs_icsb_sync_counters_locked(mp, flags) do { } while (0) 225#define xfs_icsb_sync_counters_locked(mp, flags) do { } while (0)
224#endif 226#endif
225 227
226typedef struct xfs_ail {
227 struct list_head xa_ail;
228 uint xa_gen;
229 struct task_struct *xa_task;
230 xfs_lsn_t xa_target;
231} xfs_ail_t;
232
233typedef struct xfs_mount { 228typedef struct xfs_mount {
234 struct super_block *m_super; 229 struct super_block *m_super;
235 xfs_tid_t m_tid; /* next unused tid for fs */ 230 xfs_tid_t m_tid; /* next unused tid for fs */
236 spinlock_t m_ail_lock; /* fs AIL mutex */ 231 struct xfs_ail *m_ail; /* fs active log item list */
237 xfs_ail_t m_ail; /* fs active log item list */
238 xfs_sb_t m_sb; /* copy of fs superblock */ 232 xfs_sb_t m_sb; /* copy of fs superblock */
239 spinlock_t m_sb_lock; /* sb counter lock */ 233 spinlock_t m_sb_lock; /* sb counter lock */
240 struct xfs_buf *m_sb_bp; /* buffer for superblock */ 234 struct xfs_buf *m_sb_bp; /* buffer for superblock */
@@ -247,9 +241,6 @@ typedef struct xfs_mount {
247 xfs_agnumber_t m_agirotor; /* last ag dir inode alloced */ 241 xfs_agnumber_t m_agirotor; /* last ag dir inode alloced */
248 spinlock_t m_agirotor_lock;/* .. and lock protecting it */ 242 spinlock_t m_agirotor_lock;/* .. and lock protecting it */
249 xfs_agnumber_t m_maxagi; /* highest inode alloc group */ 243 xfs_agnumber_t m_maxagi; /* highest inode alloc group */
250 struct xfs_inode *m_inodes; /* active inode list */
251 struct list_head m_del_inodes; /* inodes to reclaim */
252 mutex_t m_ilock; /* inode list mutex */
253 uint m_ireclaims; /* count of calls to reclaim*/ 244 uint m_ireclaims; /* count of calls to reclaim*/
254 uint m_readio_log; /* min read size log bytes */ 245 uint m_readio_log; /* min read size log bytes */
255 uint m_readio_blocks; /* min read size blocks */ 246 uint m_readio_blocks; /* min read size blocks */
@@ -267,7 +258,6 @@ typedef struct xfs_mount {
267 xfs_buftarg_t *m_ddev_targp; /* saves taking the address */ 258 xfs_buftarg_t *m_ddev_targp; /* saves taking the address */
268 xfs_buftarg_t *m_logdev_targp;/* ptr to log device */ 259 xfs_buftarg_t *m_logdev_targp;/* ptr to log device */
269 xfs_buftarg_t *m_rtdev_targp; /* ptr to rt device */ 260 xfs_buftarg_t *m_rtdev_targp; /* ptr to rt device */
270 __uint8_t m_dircook_elog; /* log d-cookie entry bits */
271 __uint8_t m_blkbit_log; /* blocklog + NBBY */ 261 __uint8_t m_blkbit_log; /* blocklog + NBBY */
272 __uint8_t m_blkbb_log; /* blocklog - BBSHIFT */ 262 __uint8_t m_blkbb_log; /* blocklog - BBSHIFT */
273 __uint8_t m_agno_log; /* log #ag's */ 263 __uint8_t m_agno_log; /* log #ag's */
@@ -276,12 +266,12 @@ typedef struct xfs_mount {
276 uint m_blockmask; /* sb_blocksize-1 */ 266 uint m_blockmask; /* sb_blocksize-1 */
277 uint m_blockwsize; /* sb_blocksize in words */ 267 uint m_blockwsize; /* sb_blocksize in words */
278 uint m_blockwmask; /* blockwsize-1 */ 268 uint m_blockwmask; /* blockwsize-1 */
279 uint m_alloc_mxr[2]; /* XFS_ALLOC_BLOCK_MAXRECS */ 269 uint m_alloc_mxr[2]; /* max alloc btree records */
280 uint m_alloc_mnr[2]; /* XFS_ALLOC_BLOCK_MINRECS */ 270 uint m_alloc_mnr[2]; /* min alloc btree records */
281 uint m_bmap_dmxr[2]; /* XFS_BMAP_BLOCK_DMAXRECS */ 271 uint m_bmap_dmxr[2]; /* max bmap btree records */
282 uint m_bmap_dmnr[2]; /* XFS_BMAP_BLOCK_DMINRECS */ 272 uint m_bmap_dmnr[2]; /* min bmap btree records */
283 uint m_inobt_mxr[2]; /* XFS_INOBT_BLOCK_MAXRECS */ 273 uint m_inobt_mxr[2]; /* max inobt btree records */
284 uint m_inobt_mnr[2]; /* XFS_INOBT_BLOCK_MINRECS */ 274 uint m_inobt_mnr[2]; /* min inobt btree records */
285 uint m_ag_maxlevels; /* XFS_AG_MAXLEVELS */ 275 uint m_ag_maxlevels; /* XFS_AG_MAXLEVELS */
286 uint m_bm_maxlevels[2]; /* XFS_BM_MAXLEVELS */ 276 uint m_bm_maxlevels[2]; /* XFS_BM_MAXLEVELS */
287 uint m_in_maxlevels; /* XFS_IN_MAXLEVELS */ 277 uint m_in_maxlevels; /* XFS_IN_MAXLEVELS */
@@ -313,8 +303,7 @@ typedef struct xfs_mount {
313 int m_attr_magicpct;/* 37% of the blocksize */ 303 int m_attr_magicpct;/* 37% of the blocksize */
314 int m_dir_magicpct; /* 37% of the dir blocksize */ 304 int m_dir_magicpct; /* 37% of the dir blocksize */
315 __uint8_t m_mk_sharedro; /* mark shared ro on unmount */ 305 __uint8_t m_mk_sharedro; /* mark shared ro on unmount */
316 __uint8_t m_inode_quiesce;/* call quiesce on new inodes. 306 __uint8_t m_inode_quiesce;/* call quiesce on new inodes. */
317 field governed by m_ilock */
318 __uint8_t m_sectbb_log; /* sectlog - BBSHIFT */ 307 __uint8_t m_sectbb_log; /* sectlog - BBSHIFT */
319 const struct xfs_nameops *m_dirnameops; /* vector of dir name ops */ 308 const struct xfs_nameops *m_dirnameops; /* vector of dir name ops */
320 int m_dirblksize; /* directory block sz--bytes */ 309 int m_dirblksize; /* directory block sz--bytes */
@@ -508,7 +497,6 @@ typedef struct xfs_mod_sb {
508#define XFS_MOUNT_ILOCK(mp) mutex_lock(&((mp)->m_ilock)) 497#define XFS_MOUNT_ILOCK(mp) mutex_lock(&((mp)->m_ilock))
509#define XFS_MOUNT_IUNLOCK(mp) mutex_unlock(&((mp)->m_ilock)) 498#define XFS_MOUNT_IUNLOCK(mp) mutex_unlock(&((mp)->m_ilock))
510 499
511extern void xfs_mod_sb(xfs_trans_t *, __int64_t);
512extern int xfs_log_sbcount(xfs_mount_t *, uint); 500extern int xfs_log_sbcount(xfs_mount_t *, uint);
513extern int xfs_mountfs(xfs_mount_t *mp); 501extern int xfs_mountfs(xfs_mount_t *mp);
514extern void xfs_mountfs_check_barriers(xfs_mount_t *mp); 502extern void xfs_mountfs_check_barriers(xfs_mount_t *mp);
@@ -525,20 +513,20 @@ extern struct xfs_buf *xfs_getsb(xfs_mount_t *, int);
525extern int xfs_readsb(xfs_mount_t *, int); 513extern int xfs_readsb(xfs_mount_t *, int);
526extern void xfs_freesb(xfs_mount_t *); 514extern void xfs_freesb(xfs_mount_t *);
527extern int xfs_fs_writable(xfs_mount_t *); 515extern int xfs_fs_writable(xfs_mount_t *);
528extern int xfs_syncsub(xfs_mount_t *, int, int *);
529extern int xfs_sync_inodes(xfs_mount_t *, int, int *);
530extern xfs_agnumber_t xfs_initialize_perag(xfs_mount_t *, xfs_agnumber_t);
531extern void xfs_sb_from_disk(struct xfs_sb *, struct xfs_dsb *);
532extern void xfs_sb_to_disk(struct xfs_dsb *, struct xfs_sb *, __int64_t);
533extern int xfs_sb_validate_fsb_count(struct xfs_sb *, __uint64_t); 516extern int xfs_sb_validate_fsb_count(struct xfs_sb *, __uint64_t);
534 517
535extern int xfs_dmops_get(struct xfs_mount *, struct xfs_mount_args *); 518extern int xfs_dmops_get(struct xfs_mount *);
536extern void xfs_dmops_put(struct xfs_mount *); 519extern void xfs_dmops_put(struct xfs_mount *);
537extern int xfs_qmops_get(struct xfs_mount *, struct xfs_mount_args *); 520extern int xfs_qmops_get(struct xfs_mount *);
538extern void xfs_qmops_put(struct xfs_mount *); 521extern void xfs_qmops_put(struct xfs_mount *);
539 522
540extern struct xfs_dmops xfs_dmcore_xfs; 523extern struct xfs_dmops xfs_dmcore_xfs;
541 524
542#endif /* __KERNEL__ */ 525#endif /* __KERNEL__ */
543 526
527extern void xfs_mod_sb(struct xfs_trans *, __int64_t);
528extern xfs_agnumber_t xfs_initialize_perag(struct xfs_mount *, xfs_agnumber_t);
529extern void xfs_sb_from_disk(struct xfs_sb *, struct xfs_dsb *);
530extern void xfs_sb_to_disk(struct xfs_dsb *, struct xfs_sb *, __int64_t);
531
544#endif /* __XFS_MOUNT_H__ */ 532#endif /* __XFS_MOUNT_H__ */
diff --git a/fs/xfs/xfs_qmops.c b/fs/xfs/xfs_qmops.c
index a294e58db8dd..27f80581520a 100644
--- a/fs/xfs/xfs_qmops.c
+++ b/fs/xfs/xfs_qmops.c
@@ -28,7 +28,6 @@
28#include "xfs_mount.h" 28#include "xfs_mount.h"
29#include "xfs_quota.h" 29#include "xfs_quota.h"
30#include "xfs_error.h" 30#include "xfs_error.h"
31#include "xfs_clnt.h"
32 31
33 32
34STATIC struct xfs_dquot * 33STATIC struct xfs_dquot *
@@ -131,9 +130,9 @@ static struct xfs_qmops xfs_qmcore_stub = {
131}; 130};
132 131
133int 132int
134xfs_qmops_get(struct xfs_mount *mp, struct xfs_mount_args *args) 133xfs_qmops_get(struct xfs_mount *mp)
135{ 134{
136 if (args->flags & (XFSMNT_UQUOTA | XFSMNT_PQUOTA | XFSMNT_GQUOTA)) { 135 if (XFS_IS_QUOTA_RUNNING(mp)) {
137#ifdef CONFIG_XFS_QUOTA 136#ifdef CONFIG_XFS_QUOTA
138 mp->m_qm_ops = &xfs_qmcore_xfs; 137 mp->m_qm_ops = &xfs_qmcore_xfs;
139#else 138#else
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index 4e1c22a23be5..ad137efc8702 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -1383,11 +1383,12 @@ xfs_trans_chunk_committed(
1383 xfs_log_item_desc_t *lidp; 1383 xfs_log_item_desc_t *lidp;
1384 xfs_log_item_t *lip; 1384 xfs_log_item_t *lip;
1385 xfs_lsn_t item_lsn; 1385 xfs_lsn_t item_lsn;
1386 struct xfs_mount *mp;
1387 int i; 1386 int i;
1388 1387
1389 lidp = licp->lic_descs; 1388 lidp = licp->lic_descs;
1390 for (i = 0; i < licp->lic_unused; i++, lidp++) { 1389 for (i = 0; i < licp->lic_unused; i++, lidp++) {
1390 struct xfs_ail *ailp;
1391
1391 if (xfs_lic_isfree(licp, i)) { 1392 if (xfs_lic_isfree(licp, i)) {
1392 continue; 1393 continue;
1393 } 1394 }
@@ -1424,19 +1425,19 @@ xfs_trans_chunk_committed(
1424 * This would cause the earlier transaction to fail 1425 * This would cause the earlier transaction to fail
1425 * the test below. 1426 * the test below.
1426 */ 1427 */
1427 mp = lip->li_mountp; 1428 ailp = lip->li_ailp;
1428 spin_lock(&mp->m_ail_lock); 1429 spin_lock(&ailp->xa_lock);
1429 if (XFS_LSN_CMP(item_lsn, lip->li_lsn) > 0) { 1430 if (XFS_LSN_CMP(item_lsn, lip->li_lsn) > 0) {
1430 /* 1431 /*
1431 * This will set the item's lsn to item_lsn 1432 * This will set the item's lsn to item_lsn
1432 * and update the position of the item in 1433 * and update the position of the item in
1433 * the AIL. 1434 * the AIL.
1434 * 1435 *
1435 * xfs_trans_update_ail() drops the AIL lock. 1436 * xfs_trans_ail_update() drops the AIL lock.
1436 */ 1437 */
1437 xfs_trans_update_ail(mp, lip, item_lsn); 1438 xfs_trans_ail_update(ailp, lip, item_lsn);
1438 } else { 1439 } else {
1439 spin_unlock(&mp->m_ail_lock); 1440 spin_unlock(&ailp->xa_lock);
1440 } 1441 }
1441 1442
1442 /* 1443 /*
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index 74c80bd2b0ec..d6fe4a88d79f 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -18,6 +18,8 @@
18#ifndef __XFS_TRANS_H__ 18#ifndef __XFS_TRANS_H__
19#define __XFS_TRANS_H__ 19#define __XFS_TRANS_H__
20 20
21struct xfs_log_item;
22
21/* 23/*
22 * This is the structure written in the log at the head of 24 * This is the structure written in the log at the head of
23 * every transaction. It identifies the type and id of the 25 * every transaction. It identifies the type and id of the
@@ -98,76 +100,6 @@ typedef struct xfs_trans_header {
98#define XFS_TRANS_TYPE_MAX 41 100#define XFS_TRANS_TYPE_MAX 41
99/* new transaction types need to be reflected in xfs_logprint(8) */ 101/* new transaction types need to be reflected in xfs_logprint(8) */
100 102
101
102#ifdef __KERNEL__
103struct xfs_buf;
104struct xfs_buftarg;
105struct xfs_efd_log_item;
106struct xfs_efi_log_item;
107struct xfs_inode;
108struct xfs_item_ops;
109struct xfs_log_iovec;
110struct xfs_log_item;
111struct xfs_log_item_desc;
112struct xfs_mount;
113struct xfs_trans;
114struct xfs_dquot_acct;
115
116typedef struct xfs_log_item {
117 struct list_head li_ail; /* AIL pointers */
118 xfs_lsn_t li_lsn; /* last on-disk lsn */
119 struct xfs_log_item_desc *li_desc; /* ptr to current desc*/
120 struct xfs_mount *li_mountp; /* ptr to fs mount */
121 uint li_type; /* item type */
122 uint li_flags; /* misc flags */
123 struct xfs_log_item *li_bio_list; /* buffer item list */
124 void (*li_cb)(struct xfs_buf *,
125 struct xfs_log_item *);
126 /* buffer item iodone */
127 /* callback func */
128 struct xfs_item_ops *li_ops; /* function list */
129} xfs_log_item_t;
130
131#define XFS_LI_IN_AIL 0x1
132#define XFS_LI_ABORTED 0x2
133
134typedef struct xfs_item_ops {
135 uint (*iop_size)(xfs_log_item_t *);
136 void (*iop_format)(xfs_log_item_t *, struct xfs_log_iovec *);
137 void (*iop_pin)(xfs_log_item_t *);
138 void (*iop_unpin)(xfs_log_item_t *, int);
139 void (*iop_unpin_remove)(xfs_log_item_t *, struct xfs_trans *);
140 uint (*iop_trylock)(xfs_log_item_t *);
141 void (*iop_unlock)(xfs_log_item_t *);
142 xfs_lsn_t (*iop_committed)(xfs_log_item_t *, xfs_lsn_t);
143 void (*iop_push)(xfs_log_item_t *);
144 void (*iop_pushbuf)(xfs_log_item_t *);
145 void (*iop_committing)(xfs_log_item_t *, xfs_lsn_t);
146} xfs_item_ops_t;
147
148#define IOP_SIZE(ip) (*(ip)->li_ops->iop_size)(ip)
149#define IOP_FORMAT(ip,vp) (*(ip)->li_ops->iop_format)(ip, vp)
150#define IOP_PIN(ip) (*(ip)->li_ops->iop_pin)(ip)
151#define IOP_UNPIN(ip, flags) (*(ip)->li_ops->iop_unpin)(ip, flags)
152#define IOP_UNPIN_REMOVE(ip,tp) (*(ip)->li_ops->iop_unpin_remove)(ip, tp)
153#define IOP_TRYLOCK(ip) (*(ip)->li_ops->iop_trylock)(ip)
154#define IOP_UNLOCK(ip) (*(ip)->li_ops->iop_unlock)(ip)
155#define IOP_COMMITTED(ip, lsn) (*(ip)->li_ops->iop_committed)(ip, lsn)
156#define IOP_PUSH(ip) (*(ip)->li_ops->iop_push)(ip)
157#define IOP_PUSHBUF(ip) (*(ip)->li_ops->iop_pushbuf)(ip)
158#define IOP_COMMITTING(ip, lsn) (*(ip)->li_ops->iop_committing)(ip, lsn)
159
160/*
161 * Return values for the IOP_TRYLOCK() routines.
162 */
163#define XFS_ITEM_SUCCESS 0
164#define XFS_ITEM_PINNED 1
165#define XFS_ITEM_LOCKED 2
166#define XFS_ITEM_FLUSHING 3
167#define XFS_ITEM_PUSHBUF 4
168
169#endif /* __KERNEL__ */
170
171/* 103/*
172 * This structure is used to track log items associated with 104 * This structure is used to track log items associated with
173 * a transaction. It points to the log item and keeps some 105 * a transaction. It points to the log item and keeps some
@@ -176,7 +108,7 @@ typedef struct xfs_item_ops {
176 * once we get to commit processing (see xfs_trans_commit()). 108 * once we get to commit processing (see xfs_trans_commit()).
177 */ 109 */
178typedef struct xfs_log_item_desc { 110typedef struct xfs_log_item_desc {
179 xfs_log_item_t *lid_item; 111 struct xfs_log_item *lid_item;
180 ushort lid_size; 112 ushort lid_size;
181 unsigned char lid_flags; 113 unsigned char lid_flags;
182 unsigned char lid_index; 114 unsigned char lid_index;
@@ -276,94 +208,6 @@ xfs_lic_desc_to_chunk(xfs_log_item_desc_t *dp)
276 (xfs_caddr_t)(((xfs_log_item_chunk_t*)0)->lic_descs)); 208 (xfs_caddr_t)(((xfs_log_item_chunk_t*)0)->lic_descs));
277} 209}
278 210
279#ifdef __KERNEL__
280/*
281 * This structure is used to maintain a list of block ranges that have been
282 * freed in the transaction. The ranges are listed in the perag[] busy list
283 * between when they're freed and the transaction is committed to disk.
284 */
285
286typedef struct xfs_log_busy_slot {
287 xfs_agnumber_t lbc_ag;
288 ushort lbc_idx; /* index in perag.busy[] */
289} xfs_log_busy_slot_t;
290
291#define XFS_LBC_NUM_SLOTS 31
292typedef struct xfs_log_busy_chunk {
293 struct xfs_log_busy_chunk *lbc_next;
294 uint lbc_free; /* free slots bitmask */
295 ushort lbc_unused; /* first unused */
296 xfs_log_busy_slot_t lbc_busy[XFS_LBC_NUM_SLOTS];
297} xfs_log_busy_chunk_t;
298
299#define XFS_LBC_MAX_SLOT (XFS_LBC_NUM_SLOTS - 1)
300#define XFS_LBC_FREEMASK ((1U << XFS_LBC_NUM_SLOTS) - 1)
301
302#define XFS_LBC_INIT(cp) ((cp)->lbc_free = XFS_LBC_FREEMASK)
303#define XFS_LBC_CLAIM(cp, slot) ((cp)->lbc_free &= ~(1 << (slot)))
304#define XFS_LBC_SLOT(cp, slot) (&((cp)->lbc_busy[(slot)]))
305#define XFS_LBC_VACANCY(cp) (((cp)->lbc_free) & XFS_LBC_FREEMASK)
306#define XFS_LBC_ISFREE(cp, slot) ((cp)->lbc_free & (1 << (slot)))
307
308/*
309 * This is the type of function which can be given to xfs_trans_callback()
310 * to be called upon the transaction's commit to disk.
311 */
312typedef void (*xfs_trans_callback_t)(struct xfs_trans *, void *);
313
314/*
315 * This is the structure maintained for every active transaction.
316 */
317typedef struct xfs_trans {
318 unsigned int t_magic; /* magic number */
319 xfs_log_callback_t t_logcb; /* log callback struct */
320 unsigned int t_type; /* transaction type */
321 unsigned int t_log_res; /* amt of log space resvd */
322 unsigned int t_log_count; /* count for perm log res */
323 unsigned int t_blk_res; /* # of blocks resvd */
324 unsigned int t_blk_res_used; /* # of resvd blocks used */
325 unsigned int t_rtx_res; /* # of rt extents resvd */
326 unsigned int t_rtx_res_used; /* # of resvd rt extents used */
327 xfs_log_ticket_t t_ticket; /* log mgr ticket */
328 xfs_lsn_t t_lsn; /* log seq num of start of
329 * transaction. */
330 xfs_lsn_t t_commit_lsn; /* log seq num of end of
331 * transaction. */
332 struct xfs_mount *t_mountp; /* ptr to fs mount struct */
333 struct xfs_dquot_acct *t_dqinfo; /* acctg info for dquots */
334 xfs_trans_callback_t t_callback; /* transaction callback */
335 void *t_callarg; /* callback arg */
336 unsigned int t_flags; /* misc flags */
337 int64_t t_icount_delta; /* superblock icount change */
338 int64_t t_ifree_delta; /* superblock ifree change */
339 int64_t t_fdblocks_delta; /* superblock fdblocks chg */
340 int64_t t_res_fdblocks_delta; /* on-disk only chg */
341 int64_t t_frextents_delta;/* superblock freextents chg*/
342 int64_t t_res_frextents_delta; /* on-disk only chg */
343#ifdef DEBUG
344 int64_t t_ag_freeblks_delta; /* debugging counter */
345 int64_t t_ag_flist_delta; /* debugging counter */
346 int64_t t_ag_btree_delta; /* debugging counter */
347#endif
348 int64_t t_dblocks_delta;/* superblock dblocks change */
349 int64_t t_agcount_delta;/* superblock agcount change */
350 int64_t t_imaxpct_delta;/* superblock imaxpct change */
351 int64_t t_rextsize_delta;/* superblock rextsize chg */
352 int64_t t_rbmblocks_delta;/* superblock rbmblocks chg */
353 int64_t t_rblocks_delta;/* superblock rblocks change */
354 int64_t t_rextents_delta;/* superblocks rextents chg */
355 int64_t t_rextslog_delta;/* superblocks rextslog chg */
356 unsigned int t_items_free; /* log item descs free */
357 xfs_log_item_chunk_t t_items; /* first log item desc chunk */
358 xfs_trans_header_t t_header; /* header for in-log trans */
359 unsigned int t_busy_free; /* busy descs free */
360 xfs_log_busy_chunk_t t_busy; /* busy/async free blocks */
361 unsigned long t_pflags; /* saved process flags state */
362} xfs_trans_t;
363
364#endif /* __KERNEL__ */
365
366
367#define XFS_TRANS_MAGIC 0x5452414E /* 'TRAN' */ 211#define XFS_TRANS_MAGIC 0x5452414E /* 'TRAN' */
368/* 212/*
369 * Values for t_flags. 213 * Values for t_flags.
@@ -906,6 +750,157 @@ typedef struct xfs_trans {
906#define XFS_DQUOT_REF 1 750#define XFS_DQUOT_REF 1
907 751
908#ifdef __KERNEL__ 752#ifdef __KERNEL__
753
754struct xfs_buf;
755struct xfs_buftarg;
756struct xfs_efd_log_item;
757struct xfs_efi_log_item;
758struct xfs_inode;
759struct xfs_item_ops;
760struct xfs_log_iovec;
761struct xfs_log_item_desc;
762struct xfs_mount;
763struct xfs_trans;
764struct xfs_dquot_acct;
765
766typedef struct xfs_log_item {
767 struct list_head li_ail; /* AIL pointers */
768 xfs_lsn_t li_lsn; /* last on-disk lsn */
769 struct xfs_log_item_desc *li_desc; /* ptr to current desc*/
770 struct xfs_mount *li_mountp; /* ptr to fs mount */
771 struct xfs_ail *li_ailp; /* ptr to AIL */
772 uint li_type; /* item type */
773 uint li_flags; /* misc flags */
774 struct xfs_log_item *li_bio_list; /* buffer item list */
775 void (*li_cb)(struct xfs_buf *,
776 struct xfs_log_item *);
777 /* buffer item iodone */
778 /* callback func */
779 struct xfs_item_ops *li_ops; /* function list */
780} xfs_log_item_t;
781
782#define XFS_LI_IN_AIL 0x1
783#define XFS_LI_ABORTED 0x2
784
785typedef struct xfs_item_ops {
786 uint (*iop_size)(xfs_log_item_t *);
787 void (*iop_format)(xfs_log_item_t *, struct xfs_log_iovec *);
788 void (*iop_pin)(xfs_log_item_t *);
789 void (*iop_unpin)(xfs_log_item_t *, int);
790 void (*iop_unpin_remove)(xfs_log_item_t *, struct xfs_trans *);
791 uint (*iop_trylock)(xfs_log_item_t *);
792 void (*iop_unlock)(xfs_log_item_t *);
793 xfs_lsn_t (*iop_committed)(xfs_log_item_t *, xfs_lsn_t);
794 void (*iop_push)(xfs_log_item_t *);
795 void (*iop_pushbuf)(xfs_log_item_t *);
796 void (*iop_committing)(xfs_log_item_t *, xfs_lsn_t);
797} xfs_item_ops_t;
798
799#define IOP_SIZE(ip) (*(ip)->li_ops->iop_size)(ip)
800#define IOP_FORMAT(ip,vp) (*(ip)->li_ops->iop_format)(ip, vp)
801#define IOP_PIN(ip) (*(ip)->li_ops->iop_pin)(ip)
802#define IOP_UNPIN(ip, flags) (*(ip)->li_ops->iop_unpin)(ip, flags)
803#define IOP_UNPIN_REMOVE(ip,tp) (*(ip)->li_ops->iop_unpin_remove)(ip, tp)
804#define IOP_TRYLOCK(ip) (*(ip)->li_ops->iop_trylock)(ip)
805#define IOP_UNLOCK(ip) (*(ip)->li_ops->iop_unlock)(ip)
806#define IOP_COMMITTED(ip, lsn) (*(ip)->li_ops->iop_committed)(ip, lsn)
807#define IOP_PUSH(ip) (*(ip)->li_ops->iop_push)(ip)
808#define IOP_PUSHBUF(ip) (*(ip)->li_ops->iop_pushbuf)(ip)
809#define IOP_COMMITTING(ip, lsn) (*(ip)->li_ops->iop_committing)(ip, lsn)
810
811/*
812 * Return values for the IOP_TRYLOCK() routines.
813 */
814#define XFS_ITEM_SUCCESS 0
815#define XFS_ITEM_PINNED 1
816#define XFS_ITEM_LOCKED 2
817#define XFS_ITEM_FLUSHING 3
818#define XFS_ITEM_PUSHBUF 4
819
820/*
821 * This structure is used to maintain a list of block ranges that have been
822 * freed in the transaction. The ranges are listed in the perag[] busy list
823 * between when they're freed and the transaction is committed to disk.
824 */
825
826typedef struct xfs_log_busy_slot {
827 xfs_agnumber_t lbc_ag;
828 ushort lbc_idx; /* index in perag.busy[] */
829} xfs_log_busy_slot_t;
830
831#define XFS_LBC_NUM_SLOTS 31
832typedef struct xfs_log_busy_chunk {
833 struct xfs_log_busy_chunk *lbc_next;
834 uint lbc_free; /* free slots bitmask */
835 ushort lbc_unused; /* first unused */
836 xfs_log_busy_slot_t lbc_busy[XFS_LBC_NUM_SLOTS];
837} xfs_log_busy_chunk_t;
838
839#define XFS_LBC_MAX_SLOT (XFS_LBC_NUM_SLOTS - 1)
840#define XFS_LBC_FREEMASK ((1U << XFS_LBC_NUM_SLOTS) - 1)
841
842#define XFS_LBC_INIT(cp) ((cp)->lbc_free = XFS_LBC_FREEMASK)
843#define XFS_LBC_CLAIM(cp, slot) ((cp)->lbc_free &= ~(1 << (slot)))
844#define XFS_LBC_SLOT(cp, slot) (&((cp)->lbc_busy[(slot)]))
845#define XFS_LBC_VACANCY(cp) (((cp)->lbc_free) & XFS_LBC_FREEMASK)
846#define XFS_LBC_ISFREE(cp, slot) ((cp)->lbc_free & (1 << (slot)))
847
848/*
849 * This is the type of function which can be given to xfs_trans_callback()
850 * to be called upon the transaction's commit to disk.
851 */
852typedef void (*xfs_trans_callback_t)(struct xfs_trans *, void *);
853
854/*
855 * This is the structure maintained for every active transaction.
856 */
857typedef struct xfs_trans {
858 unsigned int t_magic; /* magic number */
859 xfs_log_callback_t t_logcb; /* log callback struct */
860 unsigned int t_type; /* transaction type */
861 unsigned int t_log_res; /* amt of log space resvd */
862 unsigned int t_log_count; /* count for perm log res */
863 unsigned int t_blk_res; /* # of blocks resvd */
864 unsigned int t_blk_res_used; /* # of resvd blocks used */
865 unsigned int t_rtx_res; /* # of rt extents resvd */
866 unsigned int t_rtx_res_used; /* # of resvd rt extents used */
867 xfs_log_ticket_t t_ticket; /* log mgr ticket */
868 xfs_lsn_t t_lsn; /* log seq num of start of
869 * transaction. */
870 xfs_lsn_t t_commit_lsn; /* log seq num of end of
871 * transaction. */
872 struct xfs_mount *t_mountp; /* ptr to fs mount struct */
873 struct xfs_dquot_acct *t_dqinfo; /* acctg info for dquots */
874 xfs_trans_callback_t t_callback; /* transaction callback */
875 void *t_callarg; /* callback arg */
876 unsigned int t_flags; /* misc flags */
877 int64_t t_icount_delta; /* superblock icount change */
878 int64_t t_ifree_delta; /* superblock ifree change */
879 int64_t t_fdblocks_delta; /* superblock fdblocks chg */
880 int64_t t_res_fdblocks_delta; /* on-disk only chg */
881 int64_t t_frextents_delta;/* superblock freextents chg*/
882 int64_t t_res_frextents_delta; /* on-disk only chg */
883#ifdef DEBUG
884 int64_t t_ag_freeblks_delta; /* debugging counter */
885 int64_t t_ag_flist_delta; /* debugging counter */
886 int64_t t_ag_btree_delta; /* debugging counter */
887#endif
888 int64_t t_dblocks_delta;/* superblock dblocks change */
889 int64_t t_agcount_delta;/* superblock agcount change */
890 int64_t t_imaxpct_delta;/* superblock imaxpct change */
891 int64_t t_rextsize_delta;/* superblock rextsize chg */
892 int64_t t_rbmblocks_delta;/* superblock rbmblocks chg */
893 int64_t t_rblocks_delta;/* superblock rblocks change */
894 int64_t t_rextents_delta;/* superblocks rextents chg */
895 int64_t t_rextslog_delta;/* superblocks rextslog chg */
896 unsigned int t_items_free; /* log item descs free */
897 xfs_log_item_chunk_t t_items; /* first log item desc chunk */
898 xfs_trans_header_t t_header; /* header for in-log trans */
899 unsigned int t_busy_free; /* busy descs free */
900 xfs_log_busy_chunk_t t_busy; /* busy/async free blocks */
901 unsigned long t_pflags; /* saved process flags state */
902} xfs_trans_t;
903
909/* 904/*
910 * XFS transaction mechanism exported interfaces that are 905 * XFS transaction mechanism exported interfaces that are
911 * actually macros. 906 * actually macros.
@@ -928,7 +923,6 @@ typedef struct xfs_trans {
928/* 923/*
929 * XFS transaction mechanism exported interfaces. 924 * XFS transaction mechanism exported interfaces.
930 */ 925 */
931void xfs_trans_init(struct xfs_mount *);
932xfs_trans_t *xfs_trans_alloc(struct xfs_mount *, uint); 926xfs_trans_t *xfs_trans_alloc(struct xfs_mount *, uint);
933xfs_trans_t *_xfs_trans_alloc(struct xfs_mount *, uint); 927xfs_trans_t *_xfs_trans_alloc(struct xfs_mount *, uint);
934xfs_trans_t *xfs_trans_dup(xfs_trans_t *); 928xfs_trans_t *xfs_trans_dup(xfs_trans_t *);
@@ -975,13 +969,8 @@ int _xfs_trans_commit(xfs_trans_t *,
975 int *); 969 int *);
976#define xfs_trans_commit(tp, flags) _xfs_trans_commit(tp, flags, NULL) 970#define xfs_trans_commit(tp, flags) _xfs_trans_commit(tp, flags, NULL)
977void xfs_trans_cancel(xfs_trans_t *, int); 971void xfs_trans_cancel(xfs_trans_t *, int);
978int xfs_trans_roll(struct xfs_trans **, struct xfs_inode *);
979int xfs_trans_ail_init(struct xfs_mount *); 972int xfs_trans_ail_init(struct xfs_mount *);
980void xfs_trans_ail_destroy(struct xfs_mount *); 973void xfs_trans_ail_destroy(struct xfs_mount *);
981void xfs_trans_push_ail(struct xfs_mount *, xfs_lsn_t);
982xfs_lsn_t xfs_trans_tail_ail(struct xfs_mount *);
983void xfs_trans_unlocked_item(struct xfs_mount *,
984 xfs_log_item_t *);
985xfs_log_busy_slot_t *xfs_trans_add_busy(xfs_trans_t *tp, 974xfs_log_busy_slot_t *xfs_trans_add_busy(xfs_trans_t *tp,
986 xfs_agnumber_t ag, 975 xfs_agnumber_t ag,
987 xfs_extlen_t idx); 976 xfs_extlen_t idx);
@@ -990,4 +979,7 @@ extern kmem_zone_t *xfs_trans_zone;
990 979
991#endif /* __KERNEL__ */ 980#endif /* __KERNEL__ */
992 981
982void xfs_trans_init(struct xfs_mount *);
983int xfs_trans_roll(struct xfs_trans **, struct xfs_inode *);
984
993#endif /* __XFS_TRANS_H__ */ 985#endif /* __XFS_TRANS_H__ */
diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c
index 1f77c00af566..2d47f10f8bed 100644
--- a/fs/xfs/xfs_trans_ail.c
+++ b/fs/xfs/xfs_trans_ail.c
@@ -1,5 +1,6 @@
1/* 1/*
2 * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc. 2 * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
3 * Copyright (c) 2008 Dave Chinner
3 * All Rights Reserved. 4 * All Rights Reserved.
4 * 5 *
5 * This program is free software; you can redistribute it and/or 6 * This program is free software; you can redistribute it and/or
@@ -28,13 +29,13 @@
28#include "xfs_trans_priv.h" 29#include "xfs_trans_priv.h"
29#include "xfs_error.h" 30#include "xfs_error.h"
30 31
31STATIC void xfs_ail_insert(xfs_ail_t *, xfs_log_item_t *); 32STATIC void xfs_ail_insert(struct xfs_ail *, xfs_log_item_t *);
32STATIC xfs_log_item_t * xfs_ail_delete(xfs_ail_t *, xfs_log_item_t *); 33STATIC xfs_log_item_t * xfs_ail_delete(struct xfs_ail *, xfs_log_item_t *);
33STATIC xfs_log_item_t * xfs_ail_min(xfs_ail_t *); 34STATIC xfs_log_item_t * xfs_ail_min(struct xfs_ail *);
34STATIC xfs_log_item_t * xfs_ail_next(xfs_ail_t *, xfs_log_item_t *); 35STATIC xfs_log_item_t * xfs_ail_next(struct xfs_ail *, xfs_log_item_t *);
35 36
36#ifdef DEBUG 37#ifdef DEBUG
37STATIC void xfs_ail_check(xfs_ail_t *, xfs_log_item_t *); 38STATIC void xfs_ail_check(struct xfs_ail *, xfs_log_item_t *);
38#else 39#else
39#define xfs_ail_check(a,l) 40#define xfs_ail_check(a,l)
40#endif /* DEBUG */ 41#endif /* DEBUG */
@@ -50,20 +51,20 @@ STATIC void xfs_ail_check(xfs_ail_t *, xfs_log_item_t *);
50 * lsn of the last item in the AIL. 51 * lsn of the last item in the AIL.
51 */ 52 */
52xfs_lsn_t 53xfs_lsn_t
53xfs_trans_tail_ail( 54xfs_trans_ail_tail(
54 xfs_mount_t *mp) 55 struct xfs_ail *ailp)
55{ 56{
56 xfs_lsn_t lsn; 57 xfs_lsn_t lsn;
57 xfs_log_item_t *lip; 58 xfs_log_item_t *lip;
58 59
59 spin_lock(&mp->m_ail_lock); 60 spin_lock(&ailp->xa_lock);
60 lip = xfs_ail_min(&mp->m_ail); 61 lip = xfs_ail_min(ailp);
61 if (lip == NULL) { 62 if (lip == NULL) {
62 lsn = (xfs_lsn_t)0; 63 lsn = (xfs_lsn_t)0;
63 } else { 64 } else {
64 lsn = lip->li_lsn; 65 lsn = lip->li_lsn;
65 } 66 }
66 spin_unlock(&mp->m_ail_lock); 67 spin_unlock(&ailp->xa_lock);
67 68
68 return lsn; 69 return lsn;
69} 70}
@@ -85,16 +86,125 @@ xfs_trans_tail_ail(
85 * any of the objects, so the lock is not needed. 86 * any of the objects, so the lock is not needed.
86 */ 87 */
87void 88void
88xfs_trans_push_ail( 89xfs_trans_ail_push(
89 xfs_mount_t *mp, 90 struct xfs_ail *ailp,
90 xfs_lsn_t threshold_lsn) 91 xfs_lsn_t threshold_lsn)
91{ 92{
92 xfs_log_item_t *lip; 93 xfs_log_item_t *lip;
94
95 lip = xfs_ail_min(ailp);
96 if (lip && !XFS_FORCED_SHUTDOWN(ailp->xa_mount)) {
97 if (XFS_LSN_CMP(threshold_lsn, ailp->xa_target) > 0)
98 xfsaild_wakeup(ailp, threshold_lsn);
99 }
100}
101
102/*
103 * AIL traversal cursor initialisation.
104 *
105 * The cursor keeps track of where our current traversal is up
106 * to by tracking the next ƣtem in the list for us. However, for
107 * this to be safe, removing an object from the AIL needs to invalidate
108 * any cursor that points to it. hence the traversal cursor needs to
109 * be linked to the struct xfs_ail so that deletion can search all the
110 * active cursors for invalidation.
111 *
112 * We don't link the push cursor because it is embedded in the struct
113 * xfs_ail and hence easily findable.
114 */
115STATIC void
116xfs_trans_ail_cursor_init(
117 struct xfs_ail *ailp,
118 struct xfs_ail_cursor *cur)
119{
120 cur->item = NULL;
121 if (cur == &ailp->xa_cursors)
122 return;
123
124 cur->next = ailp->xa_cursors.next;
125 ailp->xa_cursors.next = cur;
126}
127
128/*
129 * Set the cursor to the next item, because when we look
130 * up the cursor the current item may have been freed.
131 */
132STATIC void
133xfs_trans_ail_cursor_set(
134 struct xfs_ail *ailp,
135 struct xfs_ail_cursor *cur,
136 struct xfs_log_item *lip)
137{
138 if (lip)
139 cur->item = xfs_ail_next(ailp, lip);
140}
141
142/*
143 * Get the next item in the traversal and advance the cursor.
144 * If the cursor was invalidated (inidicated by a lip of 1),
145 * restart the traversal.
146 */
147struct xfs_log_item *
148xfs_trans_ail_cursor_next(
149 struct xfs_ail *ailp,
150 struct xfs_ail_cursor *cur)
151{
152 struct xfs_log_item *lip = cur->item;
153
154 if ((__psint_t)lip & 1)
155 lip = xfs_ail_min(ailp);
156 xfs_trans_ail_cursor_set(ailp, cur, lip);
157 return lip;
158}
159
160/*
161 * Now that the traversal is complete, we need to remove the cursor
162 * from the list of traversing cursors. Avoid removing the embedded
163 * push cursor, but use the fact it is alway present to make the
164 * list deletion simple.
165 */
166void
167xfs_trans_ail_cursor_done(
168 struct xfs_ail *ailp,
169 struct xfs_ail_cursor *done)
170{
171 struct xfs_ail_cursor *prev = NULL;
172 struct xfs_ail_cursor *cur;
173
174 done->item = NULL;
175 if (done == &ailp->xa_cursors)
176 return;
177 prev = &ailp->xa_cursors;
178 for (cur = prev->next; cur; prev = cur, cur = prev->next) {
179 if (cur == done) {
180 prev->next = cur->next;
181 break;
182 }
183 }
184 ASSERT(cur);
185}
186
187/*
188 * Invalidate any cursor that is pointing to this item. This is
189 * called when an item is removed from the AIL. Any cursor pointing
190 * to this object is now invalid and the traversal needs to be
191 * terminated so it doesn't reference a freed object. We set the
192 * cursor item to a value of 1 so we can distinguish between an
193 * invalidation and the end of the list when getting the next item
194 * from the cursor.
195 */
196STATIC void
197xfs_trans_ail_cursor_clear(
198 struct xfs_ail *ailp,
199 struct xfs_log_item *lip)
200{
201 struct xfs_ail_cursor *cur;
93 202
94 lip = xfs_ail_min(&mp->m_ail); 203 /* need to search all cursors */
95 if (lip && !XFS_FORCED_SHUTDOWN(mp)) { 204 for (cur = &ailp->xa_cursors; cur; cur = cur->next) {
96 if (XFS_LSN_CMP(threshold_lsn, mp->m_ail.xa_target) > 0) 205 if (cur->item == lip)
97 xfsaild_wakeup(mp, threshold_lsn); 206 cur->item = (struct xfs_log_item *)
207 ((__psint_t)cur->item | 1);
98 } 208 }
99} 209}
100 210
@@ -103,25 +213,27 @@ xfs_trans_push_ail(
103 * Return the current tree generation number for use 213 * Return the current tree generation number for use
104 * in calls to xfs_trans_next_ail(). 214 * in calls to xfs_trans_next_ail().
105 */ 215 */
106STATIC xfs_log_item_t * 216xfs_log_item_t *
107xfs_trans_first_push_ail( 217xfs_trans_ail_cursor_first(
108 xfs_mount_t *mp, 218 struct xfs_ail *ailp,
109 int *gen, 219 struct xfs_ail_cursor *cur,
110 xfs_lsn_t lsn) 220 xfs_lsn_t lsn)
111{ 221{
112 xfs_log_item_t *lip; 222 xfs_log_item_t *lip;
113 223
114 lip = xfs_ail_min(&mp->m_ail); 224 xfs_trans_ail_cursor_init(ailp, cur);
115 *gen = (int)mp->m_ail.xa_gen; 225 lip = xfs_ail_min(ailp);
116 if (lsn == 0) 226 if (lsn == 0)
117 return lip; 227 goto out;
118 228
119 list_for_each_entry(lip, &mp->m_ail.xa_ail, li_ail) { 229 list_for_each_entry(lip, &ailp->xa_ail, li_ail) {
120 if (XFS_LSN_CMP(lip->li_lsn, lsn) >= 0) 230 if (XFS_LSN_CMP(lip->li_lsn, lsn) >= 0)
121 return lip; 231 goto out;
122 } 232 }
123 233 lip = NULL;
124 return NULL; 234out:
235 xfs_trans_ail_cursor_set(ailp, cur, lip);
236 return lip;
125} 237}
126 238
127/* 239/*
@@ -129,29 +241,29 @@ xfs_trans_first_push_ail(
129 */ 241 */
130long 242long
131xfsaild_push( 243xfsaild_push(
132 xfs_mount_t *mp, 244 struct xfs_ail *ailp,
133 xfs_lsn_t *last_lsn) 245 xfs_lsn_t *last_lsn)
134{ 246{
135 long tout = 1000; /* milliseconds */ 247 long tout = 1000; /* milliseconds */
136 xfs_lsn_t last_pushed_lsn = *last_lsn; 248 xfs_lsn_t last_pushed_lsn = *last_lsn;
137 xfs_lsn_t target = mp->m_ail.xa_target; 249 xfs_lsn_t target = ailp->xa_target;
138 xfs_lsn_t lsn; 250 xfs_lsn_t lsn;
139 xfs_log_item_t *lip; 251 xfs_log_item_t *lip;
140 int gen;
141 int restarts;
142 int flush_log, count, stuck; 252 int flush_log, count, stuck;
253 xfs_mount_t *mp = ailp->xa_mount;
254 struct xfs_ail_cursor *cur = &ailp->xa_cursors;
143 255
144#define XFS_TRANS_PUSH_AIL_RESTARTS 10 256 spin_lock(&ailp->xa_lock);
145 257 xfs_trans_ail_cursor_init(ailp, cur);
146 spin_lock(&mp->m_ail_lock); 258 lip = xfs_trans_ail_cursor_first(ailp, cur, *last_lsn);
147 lip = xfs_trans_first_push_ail(mp, &gen, *last_lsn);
148 if (!lip || XFS_FORCED_SHUTDOWN(mp)) { 259 if (!lip || XFS_FORCED_SHUTDOWN(mp)) {
149 /* 260 /*
150 * AIL is empty or our push has reached the end. 261 * AIL is empty or our push has reached the end.
151 */ 262 */
152 spin_unlock(&mp->m_ail_lock); 263 xfs_trans_ail_cursor_done(ailp, cur);
264 spin_unlock(&ailp->xa_lock);
153 last_pushed_lsn = 0; 265 last_pushed_lsn = 0;
154 goto out; 266 return tout;
155 } 267 }
156 268
157 XFS_STATS_INC(xs_push_ail); 269 XFS_STATS_INC(xs_push_ail);
@@ -169,7 +281,7 @@ xfsaild_push(
169 */ 281 */
170 tout = 10; 282 tout = 10;
171 lsn = lip->li_lsn; 283 lsn = lip->li_lsn;
172 flush_log = stuck = count = restarts = 0; 284 flush_log = stuck = count = 0;
173 while ((XFS_LSN_CMP(lip->li_lsn, target) < 0)) { 285 while ((XFS_LSN_CMP(lip->li_lsn, target) < 0)) {
174 int lock_result; 286 int lock_result;
175 /* 287 /*
@@ -184,7 +296,7 @@ xfsaild_push(
184 * skip to the next item in the list. 296 * skip to the next item in the list.
185 */ 297 */
186 lock_result = IOP_TRYLOCK(lip); 298 lock_result = IOP_TRYLOCK(lip);
187 spin_unlock(&mp->m_ail_lock); 299 spin_unlock(&ailp->xa_lock);
188 switch (lock_result) { 300 switch (lock_result) {
189 case XFS_ITEM_SUCCESS: 301 case XFS_ITEM_SUCCESS:
190 XFS_STATS_INC(xs_push_ail_success); 302 XFS_STATS_INC(xs_push_ail_success);
@@ -221,7 +333,7 @@ xfsaild_push(
221 break; 333 break;
222 } 334 }
223 335
224 spin_lock(&mp->m_ail_lock); 336 spin_lock(&ailp->xa_lock);
225 /* should we bother continuing? */ 337 /* should we bother continuing? */
226 if (XFS_FORCED_SHUTDOWN(mp)) 338 if (XFS_FORCED_SHUTDOWN(mp))
227 break; 339 break;
@@ -244,14 +356,13 @@ xfsaild_push(
244 if (stuck > 100) 356 if (stuck > 100)
245 break; 357 break;
246 358
247 lip = xfs_trans_next_ail(mp, lip, &gen, &restarts); 359 lip = xfs_trans_ail_cursor_next(ailp, cur);
248 if (lip == NULL) 360 if (lip == NULL)
249 break; 361 break;
250 if (restarts > XFS_TRANS_PUSH_AIL_RESTARTS)
251 break;
252 lsn = lip->li_lsn; 362 lsn = lip->li_lsn;
253 } 363 }
254 spin_unlock(&mp->m_ail_lock); 364 xfs_trans_ail_cursor_done(ailp, cur);
365 spin_unlock(&ailp->xa_lock);
255 366
256 if (flush_log) { 367 if (flush_log) {
257 /* 368 /*
@@ -274,8 +385,7 @@ xfsaild_push(
274 */ 385 */
275 tout += 20; 386 tout += 20;
276 last_pushed_lsn = 0; 387 last_pushed_lsn = 0;
277 } else if ((restarts > XFS_TRANS_PUSH_AIL_RESTARTS) || 388 } else if ((stuck * 100) / count > 90) {
278 ((stuck * 100) / count > 90)) {
279 /* 389 /*
280 * Either there is a lot of contention on the AIL or we 390 * Either there is a lot of contention on the AIL or we
281 * are stuck due to operations in progress. "Stuck" in this 391 * are stuck due to operations in progress. "Stuck" in this
@@ -287,7 +397,6 @@ xfsaild_push(
287 */ 397 */
288 tout += 10; 398 tout += 10;
289 } 399 }
290out:
291 *last_lsn = last_pushed_lsn; 400 *last_lsn = last_pushed_lsn;
292 return tout; 401 return tout;
293} /* xfsaild_push */ 402} /* xfsaild_push */
@@ -303,7 +412,7 @@ out:
303 */ 412 */
304void 413void
305xfs_trans_unlocked_item( 414xfs_trans_unlocked_item(
306 xfs_mount_t *mp, 415 struct xfs_ail *ailp,
307 xfs_log_item_t *lip) 416 xfs_log_item_t *lip)
308{ 417{
309 xfs_log_item_t *min_lip; 418 xfs_log_item_t *min_lip;
@@ -315,7 +424,7 @@ xfs_trans_unlocked_item(
315 * over some potentially valid data. 424 * over some potentially valid data.
316 */ 425 */
317 if (!(lip->li_flags & XFS_LI_IN_AIL) || 426 if (!(lip->li_flags & XFS_LI_IN_AIL) ||
318 XFS_FORCED_SHUTDOWN(mp)) { 427 XFS_FORCED_SHUTDOWN(ailp->xa_mount)) {
319 return; 428 return;
320 } 429 }
321 430
@@ -331,10 +440,10 @@ xfs_trans_unlocked_item(
331 * the call to xfs_log_move_tail() doesn't do anything if there's 440 * the call to xfs_log_move_tail() doesn't do anything if there's
332 * not enough free space to wake people up so we're safe calling it. 441 * not enough free space to wake people up so we're safe calling it.
333 */ 442 */
334 min_lip = xfs_ail_min(&mp->m_ail); 443 min_lip = xfs_ail_min(ailp);
335 444
336 if (min_lip == lip) 445 if (min_lip == lip)
337 xfs_log_move_tail(mp, 1); 446 xfs_log_move_tail(ailp->xa_mount, 1);
338} /* xfs_trans_unlocked_item */ 447} /* xfs_trans_unlocked_item */
339 448
340 449
@@ -347,41 +456,37 @@ xfs_trans_unlocked_item(
347 * we move in the AIL is the minimum one, update the tail lsn in the 456 * we move in the AIL is the minimum one, update the tail lsn in the
348 * log manager. 457 * log manager.
349 * 458 *
350 * Increment the AIL's generation count to indicate that the tree
351 * has changed.
352 *
353 * This function must be called with the AIL lock held. The lock 459 * This function must be called with the AIL lock held. The lock
354 * is dropped before returning. 460 * is dropped before returning.
355 */ 461 */
356void 462void
357xfs_trans_update_ail( 463xfs_trans_ail_update(
358 xfs_mount_t *mp, 464 struct xfs_ail *ailp,
359 xfs_log_item_t *lip, 465 xfs_log_item_t *lip,
360 xfs_lsn_t lsn) __releases(mp->m_ail_lock) 466 xfs_lsn_t lsn) __releases(ailp->xa_lock)
361{ 467{
362 xfs_log_item_t *dlip=NULL; 468 xfs_log_item_t *dlip = NULL;
363 xfs_log_item_t *mlip; /* ptr to minimum lip */ 469 xfs_log_item_t *mlip; /* ptr to minimum lip */
364 470
365 mlip = xfs_ail_min(&mp->m_ail); 471 mlip = xfs_ail_min(ailp);
366 472
367 if (lip->li_flags & XFS_LI_IN_AIL) { 473 if (lip->li_flags & XFS_LI_IN_AIL) {
368 dlip = xfs_ail_delete(&mp->m_ail, lip); 474 dlip = xfs_ail_delete(ailp, lip);
369 ASSERT(dlip == lip); 475 ASSERT(dlip == lip);
476 xfs_trans_ail_cursor_clear(ailp, dlip);
370 } else { 477 } else {
371 lip->li_flags |= XFS_LI_IN_AIL; 478 lip->li_flags |= XFS_LI_IN_AIL;
372 } 479 }
373 480
374 lip->li_lsn = lsn; 481 lip->li_lsn = lsn;
375 482 xfs_ail_insert(ailp, lip);
376 xfs_ail_insert(&mp->m_ail, lip);
377 mp->m_ail.xa_gen++;
378 483
379 if (mlip == dlip) { 484 if (mlip == dlip) {
380 mlip = xfs_ail_min(&mp->m_ail); 485 mlip = xfs_ail_min(ailp);
381 spin_unlock(&mp->m_ail_lock); 486 spin_unlock(&ailp->xa_lock);
382 xfs_log_move_tail(mp, mlip->li_lsn); 487 xfs_log_move_tail(ailp->xa_mount, mlip->li_lsn);
383 } else { 488 } else {
384 spin_unlock(&mp->m_ail_lock); 489 spin_unlock(&ailp->xa_lock);
385 } 490 }
386 491
387 492
@@ -403,29 +508,30 @@ xfs_trans_update_ail(
403 * is dropped before returning. 508 * is dropped before returning.
404 */ 509 */
405void 510void
406xfs_trans_delete_ail( 511xfs_trans_ail_delete(
407 xfs_mount_t *mp, 512 struct xfs_ail *ailp,
408 xfs_log_item_t *lip) __releases(mp->m_ail_lock) 513 xfs_log_item_t *lip) __releases(ailp->xa_lock)
409{ 514{
410 xfs_log_item_t *dlip; 515 xfs_log_item_t *dlip;
411 xfs_log_item_t *mlip; 516 xfs_log_item_t *mlip;
412 517
413 if (lip->li_flags & XFS_LI_IN_AIL) { 518 if (lip->li_flags & XFS_LI_IN_AIL) {
414 mlip = xfs_ail_min(&mp->m_ail); 519 mlip = xfs_ail_min(ailp);
415 dlip = xfs_ail_delete(&mp->m_ail, lip); 520 dlip = xfs_ail_delete(ailp, lip);
416 ASSERT(dlip == lip); 521 ASSERT(dlip == lip);
522 xfs_trans_ail_cursor_clear(ailp, dlip);
417 523
418 524
419 lip->li_flags &= ~XFS_LI_IN_AIL; 525 lip->li_flags &= ~XFS_LI_IN_AIL;
420 lip->li_lsn = 0; 526 lip->li_lsn = 0;
421 mp->m_ail.xa_gen++;
422 527
423 if (mlip == dlip) { 528 if (mlip == dlip) {
424 mlip = xfs_ail_min(&mp->m_ail); 529 mlip = xfs_ail_min(ailp);
425 spin_unlock(&mp->m_ail_lock); 530 spin_unlock(&ailp->xa_lock);
426 xfs_log_move_tail(mp, (mlip ? mlip->li_lsn : 0)); 531 xfs_log_move_tail(ailp->xa_mount,
532 (mlip ? mlip->li_lsn : 0));
427 } else { 533 } else {
428 spin_unlock(&mp->m_ail_lock); 534 spin_unlock(&ailp->xa_lock);
429 } 535 }
430 } 536 }
431 else { 537 else {
@@ -433,13 +539,13 @@ xfs_trans_delete_ail(
433 * If the file system is not being shutdown, we are in 539 * If the file system is not being shutdown, we are in
434 * serious trouble if we get to this stage. 540 * serious trouble if we get to this stage.
435 */ 541 */
436 if (XFS_FORCED_SHUTDOWN(mp)) 542 struct xfs_mount *mp = ailp->xa_mount;
437 spin_unlock(&mp->m_ail_lock); 543
438 else { 544 spin_unlock(&ailp->xa_lock);
545 if (!XFS_FORCED_SHUTDOWN(mp)) {
439 xfs_cmn_err(XFS_PTAG_AILDELETE, CE_ALERT, mp, 546 xfs_cmn_err(XFS_PTAG_AILDELETE, CE_ALERT, mp,
440 "%s: attempting to delete a log item that is not in the AIL", 547 "%s: attempting to delete a log item that is not in the AIL",
441 __func__); 548 __func__);
442 spin_unlock(&mp->m_ail_lock);
443 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); 549 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
444 } 550 }
445 } 551 }
@@ -448,56 +554,6 @@ xfs_trans_delete_ail(
448 554
449 555
450/* 556/*
451 * Return the item in the AIL with the smallest lsn.
452 * Return the current tree generation number for use
453 * in calls to xfs_trans_next_ail().
454 */
455xfs_log_item_t *
456xfs_trans_first_ail(
457 xfs_mount_t *mp,
458 int *gen)
459{
460 xfs_log_item_t *lip;
461
462 lip = xfs_ail_min(&mp->m_ail);
463 *gen = (int)mp->m_ail.xa_gen;
464
465 return lip;
466}
467
468/*
469 * If the generation count of the tree has not changed since the
470 * caller last took something from the AIL, then return the elmt
471 * in the tree which follows the one given. If the count has changed,
472 * then return the minimum elmt of the AIL and bump the restarts counter
473 * if one is given.
474 */
475xfs_log_item_t *
476xfs_trans_next_ail(
477 xfs_mount_t *mp,
478 xfs_log_item_t *lip,
479 int *gen,
480 int *restarts)
481{
482 xfs_log_item_t *nlip;
483
484 ASSERT(mp && lip && gen);
485 if (mp->m_ail.xa_gen == *gen) {
486 nlip = xfs_ail_next(&mp->m_ail, lip);
487 } else {
488 nlip = xfs_ail_min(&mp->m_ail);
489 *gen = (int)mp->m_ail.xa_gen;
490 if (restarts != NULL) {
491 XFS_STATS_INC(xs_push_ail_restarts);
492 (*restarts)++;
493 }
494 }
495
496 return (nlip);
497}
498
499
500/*
501 * The active item list (AIL) is a doubly linked list of log 557 * The active item list (AIL) is a doubly linked list of log
502 * items sorted by ascending lsn. The base of the list is 558 * items sorted by ascending lsn. The base of the list is
503 * a forw/back pointer pair embedded in the xfs mount structure. 559 * a forw/back pointer pair embedded in the xfs mount structure.
@@ -515,15 +571,35 @@ int
515xfs_trans_ail_init( 571xfs_trans_ail_init(
516 xfs_mount_t *mp) 572 xfs_mount_t *mp)
517{ 573{
518 INIT_LIST_HEAD(&mp->m_ail.xa_ail); 574 struct xfs_ail *ailp;
519 return xfsaild_start(mp); 575 int error;
576
577 ailp = kmem_zalloc(sizeof(struct xfs_ail), KM_MAYFAIL);
578 if (!ailp)
579 return ENOMEM;
580
581 ailp->xa_mount = mp;
582 INIT_LIST_HEAD(&ailp->xa_ail);
583 spin_lock_init(&ailp->xa_lock);
584 error = xfsaild_start(ailp);
585 if (error)
586 goto out_free_ailp;
587 mp->m_ail = ailp;
588 return 0;
589
590out_free_ailp:
591 kmem_free(ailp);
592 return error;
520} 593}
521 594
522void 595void
523xfs_trans_ail_destroy( 596xfs_trans_ail_destroy(
524 xfs_mount_t *mp) 597 xfs_mount_t *mp)
525{ 598{
526 xfsaild_stop(mp); 599 struct xfs_ail *ailp = mp->m_ail;
600
601 xfsaild_stop(ailp);
602 kmem_free(ailp);
527} 603}
528 604
529/* 605/*
@@ -534,7 +610,7 @@ xfs_trans_ail_destroy(
534 */ 610 */
535STATIC void 611STATIC void
536xfs_ail_insert( 612xfs_ail_insert(
537 xfs_ail_t *ailp, 613 struct xfs_ail *ailp,
538 xfs_log_item_t *lip) 614 xfs_log_item_t *lip)
539/* ARGSUSED */ 615/* ARGSUSED */
540{ 616{
@@ -568,7 +644,7 @@ xfs_ail_insert(
568/*ARGSUSED*/ 644/*ARGSUSED*/
569STATIC xfs_log_item_t * 645STATIC xfs_log_item_t *
570xfs_ail_delete( 646xfs_ail_delete(
571 xfs_ail_t *ailp, 647 struct xfs_ail *ailp,
572 xfs_log_item_t *lip) 648 xfs_log_item_t *lip)
573/* ARGSUSED */ 649/* ARGSUSED */
574{ 650{
@@ -585,7 +661,7 @@ xfs_ail_delete(
585 */ 661 */
586STATIC xfs_log_item_t * 662STATIC xfs_log_item_t *
587xfs_ail_min( 663xfs_ail_min(
588 xfs_ail_t *ailp) 664 struct xfs_ail *ailp)
589/* ARGSUSED */ 665/* ARGSUSED */
590{ 666{
591 if (list_empty(&ailp->xa_ail)) 667 if (list_empty(&ailp->xa_ail))
@@ -601,7 +677,7 @@ xfs_ail_min(
601 */ 677 */
602STATIC xfs_log_item_t * 678STATIC xfs_log_item_t *
603xfs_ail_next( 679xfs_ail_next(
604 xfs_ail_t *ailp, 680 struct xfs_ail *ailp,
605 xfs_log_item_t *lip) 681 xfs_log_item_t *lip)
606/* ARGSUSED */ 682/* ARGSUSED */
607{ 683{
@@ -617,7 +693,7 @@ xfs_ail_next(
617 */ 693 */
618STATIC void 694STATIC void
619xfs_ail_check( 695xfs_ail_check(
620 xfs_ail_t *ailp, 696 struct xfs_ail *ailp,
621 xfs_log_item_t *lip) 697 xfs_log_item_t *lip)
622{ 698{
623 xfs_log_item_t *prev_lip; 699 xfs_log_item_t *prev_lip;
diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c
index 4e855b5ced66..8ee2f8c8b0a6 100644
--- a/fs/xfs/xfs_trans_buf.c
+++ b/fs/xfs/xfs_trans_buf.c
@@ -527,9 +527,8 @@ xfs_trans_brelse(xfs_trans_t *tp,
527 lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *); 527 lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *);
528 if (lip->li_type == XFS_LI_BUF) { 528 if (lip->li_type == XFS_LI_BUF) {
529 bip = XFS_BUF_FSPRIVATE(bp,xfs_buf_log_item_t*); 529 bip = XFS_BUF_FSPRIVATE(bp,xfs_buf_log_item_t*);
530 xfs_trans_unlocked_item( 530 xfs_trans_unlocked_item(bip->bli_item.li_ailp,
531 bip->bli_item.li_mountp, 531 lip);
532 lip);
533 } 532 }
534 } 533 }
535 xfs_buf_relse(bp); 534 xfs_buf_relse(bp);
@@ -626,7 +625,7 @@ xfs_trans_brelse(xfs_trans_t *tp,
626 * tell the AIL that the buffer is being unlocked. 625 * tell the AIL that the buffer is being unlocked.
627 */ 626 */
628 if (bip != NULL) { 627 if (bip != NULL) {
629 xfs_trans_unlocked_item(bip->bli_item.li_mountp, 628 xfs_trans_unlocked_item(bip->bli_item.li_ailp,
630 (xfs_log_item_t*)bip); 629 (xfs_log_item_t*)bip);
631 } 630 }
632 631
diff --git a/fs/xfs/xfs_trans_item.c b/fs/xfs/xfs_trans_item.c
index 3c666e8317f8..e110bf57d7f4 100644
--- a/fs/xfs/xfs_trans_item.c
+++ b/fs/xfs/xfs_trans_item.c
@@ -22,6 +22,14 @@
22#include "xfs_inum.h" 22#include "xfs_inum.h"
23#include "xfs_trans.h" 23#include "xfs_trans.h"
24#include "xfs_trans_priv.h" 24#include "xfs_trans_priv.h"
25/* XXX: from here down needed until struct xfs_trans has it's own ailp */
26#include "xfs_bit.h"
27#include "xfs_buf_item.h"
28#include "xfs_sb.h"
29#include "xfs_ag.h"
30#include "xfs_dir2.h"
31#include "xfs_dmapi.h"
32#include "xfs_mount.h"
25 33
26STATIC int xfs_trans_unlock_chunk(xfs_log_item_chunk_t *, 34STATIC int xfs_trans_unlock_chunk(xfs_log_item_chunk_t *,
27 int, int, xfs_lsn_t); 35 int, int, xfs_lsn_t);
@@ -79,6 +87,7 @@ xfs_trans_add_item(xfs_trans_t *tp, xfs_log_item_t *lip)
79 lidp->lid_size = 0; 87 lidp->lid_size = 0;
80 lip->li_desc = lidp; 88 lip->li_desc = lidp;
81 lip->li_mountp = tp->t_mountp; 89 lip->li_mountp = tp->t_mountp;
90 lip->li_ailp = tp->t_mountp->m_ail;
82 return lidp; 91 return lidp;
83 } 92 }
84 93
@@ -120,6 +129,7 @@ xfs_trans_add_item(xfs_trans_t *tp, xfs_log_item_t *lip)
120 lidp->lid_size = 0; 129 lidp->lid_size = 0;
121 lip->li_desc = lidp; 130 lip->li_desc = lidp;
122 lip->li_mountp = tp->t_mountp; 131 lip->li_mountp = tp->t_mountp;
132 lip->li_ailp = tp->t_mountp->m_ail;
123 return lidp; 133 return lidp;
124} 134}
125 135
diff --git a/fs/xfs/xfs_trans_priv.h b/fs/xfs/xfs_trans_priv.h
index 3c748c456ed4..73e2ad397432 100644
--- a/fs/xfs/xfs_trans_priv.h
+++ b/fs/xfs/xfs_trans_priv.h
@@ -44,25 +44,93 @@ xfs_log_busy_slot_t *xfs_trans_add_busy(xfs_trans_t *tp,
44 xfs_extlen_t idx); 44 xfs_extlen_t idx);
45 45
46/* 46/*
47 * From xfs_trans_ail.c 47 * AIL traversal cursor.
48 *
49 * Rather than using a generation number for detecting changes in the ail, use
50 * a cursor that is protected by the ail lock. The aild cursor exists in the
51 * struct xfs_ail, but other traversals can declare it on the stack and link it
52 * to the ail list.
53 *
54 * When an object is deleted from or moved int the AIL, the cursor list is
55 * searched to see if the object is a designated cursor item. If it is, it is
56 * deleted from the cursor so that the next time the cursor is used traversal
57 * will return to the start.
58 *
59 * This means a traversal colliding with a removal will cause a restart of the
60 * list scan, rather than any insertion or deletion anywhere in the list. The
61 * low bit of the item pointer is set if the cursor has been invalidated so
62 * that we can tell the difference between invalidation and reaching the end
63 * of the list to trigger traversal restarts.
48 */ 64 */
49void xfs_trans_update_ail(struct xfs_mount *mp, 65struct xfs_ail_cursor {
50 struct xfs_log_item *lip, xfs_lsn_t lsn) 66 struct xfs_ail_cursor *next;
51 __releases(mp->m_ail_lock); 67 struct xfs_log_item *item;
52void xfs_trans_delete_ail(struct xfs_mount *mp, 68};
53 struct xfs_log_item *lip)
54 __releases(mp->m_ail_lock);
55struct xfs_log_item *xfs_trans_first_ail(struct xfs_mount *, int *);
56struct xfs_log_item *xfs_trans_next_ail(struct xfs_mount *,
57 struct xfs_log_item *, int *, int *);
58 69
70/*
71 * Private AIL structures.
72 *
73 * Eventually we need to drive the locking in here as well.
74 */
75struct xfs_ail {
76 struct xfs_mount *xa_mount;
77 struct list_head xa_ail;
78 uint xa_gen;
79 struct task_struct *xa_task;
80 xfs_lsn_t xa_target;
81 struct xfs_ail_cursor xa_cursors;
82 spinlock_t xa_lock;
83};
59 84
60/* 85/*
61 * AIL push thread support 86 * From xfs_trans_ail.c
62 */ 87 */
63long xfsaild_push(struct xfs_mount *, xfs_lsn_t *); 88void xfs_trans_ail_update(struct xfs_ail *ailp,
64void xfsaild_wakeup(struct xfs_mount *, xfs_lsn_t); 89 struct xfs_log_item *lip, xfs_lsn_t lsn)
65int xfsaild_start(struct xfs_mount *); 90 __releases(ailp->xa_lock);
66void xfsaild_stop(struct xfs_mount *); 91void xfs_trans_ail_delete(struct xfs_ail *ailp,
92 struct xfs_log_item *lip)
93 __releases(ailp->xa_lock);
94void xfs_trans_ail_push(struct xfs_ail *, xfs_lsn_t);
95void xfs_trans_unlocked_item(struct xfs_ail *,
96 xfs_log_item_t *);
97
98xfs_lsn_t xfs_trans_ail_tail(struct xfs_ail *ailp);
99
100struct xfs_log_item *xfs_trans_ail_cursor_first(struct xfs_ail *ailp,
101 struct xfs_ail_cursor *cur,
102 xfs_lsn_t lsn);
103struct xfs_log_item *xfs_trans_ail_cursor_next(struct xfs_ail *ailp,
104 struct xfs_ail_cursor *cur);
105void xfs_trans_ail_cursor_done(struct xfs_ail *ailp,
106 struct xfs_ail_cursor *cur);
107
108long xfsaild_push(struct xfs_ail *, xfs_lsn_t *);
109void xfsaild_wakeup(struct xfs_ail *, xfs_lsn_t);
110int xfsaild_start(struct xfs_ail *);
111void xfsaild_stop(struct xfs_ail *);
67 112
113#if BITS_PER_LONG != 64
114static inline void
115xfs_trans_ail_copy_lsn(
116 struct xfs_ail *ailp,
117 xfs_lsn_t *dst,
118 xfs_lsn_t *src)
119{
120 ASSERT(sizeof(xfs_lsn_t) == 8); /* don't lock if it shrinks */
121 spin_lock(&ailp->xa_lock);
122 *dst = *src;
123 spin_unlock(&ailp->xa_lock);
124}
125#else
126static inline void
127xfs_trans_ail_copy_lsn(
128 struct xfs_ail *ailp,
129 xfs_lsn_t *dst,
130 xfs_lsn_t *src)
131{
132 ASSERT(sizeof(xfs_lsn_t) == 8);
133 *dst = *src;
134}
135#endif
68#endif /* __XFS_TRANS_PRIV_H__ */ 136#endif /* __XFS_TRANS_PRIV_H__ */
diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c
index 439dd3939dda..305d9f3948e0 100644
--- a/fs/xfs/xfs_vfsops.c
+++ b/fs/xfs/xfs_vfsops.c
@@ -49,71 +49,15 @@
49#include "xfs_extfree_item.h" 49#include "xfs_extfree_item.h"
50#include "xfs_acl.h" 50#include "xfs_acl.h"
51#include "xfs_attr.h" 51#include "xfs_attr.h"
52#include "xfs_clnt.h"
53#include "xfs_mru_cache.h" 52#include "xfs_mru_cache.h"
54#include "xfs_filestream.h" 53#include "xfs_filestream.h"
55#include "xfs_fsops.h" 54#include "xfs_fsops.h"
56#include "xfs_vnodeops.h" 55#include "xfs_vnodeops.h"
57#include "xfs_vfsops.h" 56#include "xfs_vfsops.h"
58#include "xfs_utils.h" 57#include "xfs_utils.h"
58#include "xfs_sync.h"
59 59
60 60
61STATIC void
62xfs_quiesce_fs(
63 xfs_mount_t *mp)
64{
65 int count = 0, pincount;
66
67 xfs_flush_buftarg(mp->m_ddev_targp, 0);
68 xfs_finish_reclaim_all(mp, 0);
69
70 /* This loop must run at least twice.
71 * The first instance of the loop will flush
72 * most meta data but that will generate more
73 * meta data (typically directory updates).
74 * Which then must be flushed and logged before
75 * we can write the unmount record.
76 */
77 do {
78 xfs_syncsub(mp, SYNC_INODE_QUIESCE, NULL);
79 pincount = xfs_flush_buftarg(mp->m_ddev_targp, 1);
80 if (!pincount) {
81 delay(50);
82 count++;
83 }
84 } while (count < 2);
85}
86
87/*
88 * Second stage of a quiesce. The data is already synced, now we have to take
89 * care of the metadata. New transactions are already blocked, so we need to
90 * wait for any remaining transactions to drain out before proceding.
91 */
92void
93xfs_attr_quiesce(
94 xfs_mount_t *mp)
95{
96 int error = 0;
97
98 /* wait for all modifications to complete */
99 while (atomic_read(&mp->m_active_trans) > 0)
100 delay(100);
101
102 /* flush inodes and push all remaining buffers out to disk */
103 xfs_quiesce_fs(mp);
104
105 ASSERT_ALWAYS(atomic_read(&mp->m_active_trans) == 0);
106
107 /* Push the superblock and write an unmount record */
108 error = xfs_log_sbcount(mp, 1);
109 if (error)
110 xfs_fs_cmn_err(CE_WARN, mp,
111 "xfs_attr_quiesce: failed to log sb changes. "
112 "Frozen image may not be consistent.");
113 xfs_log_unmount_write(mp);
114 xfs_unmountfs_writesb(mp);
115}
116
117/* 61/*
118 * xfs_unmount_flush implements a set of flush operation on special 62 * xfs_unmount_flush implements a set of flush operation on special
119 * inodes, which are needed as a separate set of operations so that 63 * inodes, which are needed as a separate set of operations so that
@@ -196,562 +140,3 @@ fscorrupt_out2:
196 return XFS_ERROR(EFSCORRUPTED); 140 return XFS_ERROR(EFSCORRUPTED);
197} 141}
198 142
199/*
200 * xfs_sync flushes any pending I/O to file system vfsp.
201 *
202 * This routine is called by vfs_sync() to make sure that things make it
203 * out to disk eventually, on sync() system calls to flush out everything,
204 * and when the file system is unmounted. For the vfs_sync() case, all
205 * we really need to do is sync out the log to make all of our meta-data
206 * updates permanent (except for timestamps). For calls from pflushd(),
207 * dirty pages are kept moving by calling pdflush() on the inodes
208 * containing them. We also flush the inodes that we can lock without
209 * sleeping and the superblock if we can lock it without sleeping from
210 * vfs_sync() so that items at the tail of the log are always moving out.
211 *
212 * Flags:
213 * SYNC_BDFLUSH - We're being called from vfs_sync() so we don't want
214 * to sleep if we can help it. All we really need
215 * to do is ensure that the log is synced at least
216 * periodically. We also push the inodes and
217 * superblock if we can lock them without sleeping
218 * and they are not pinned.
219 * SYNC_ATTR - We need to flush the inodes. If SYNC_BDFLUSH is not
220 * set, then we really want to lock each inode and flush
221 * it.
222 * SYNC_WAIT - All the flushes that take place in this call should
223 * be synchronous.
224 * SYNC_DELWRI - This tells us to push dirty pages associated with
225 * inodes. SYNC_WAIT and SYNC_BDFLUSH are used to
226 * determine if they should be flushed sync, async, or
227 * delwri.
228 * SYNC_CLOSE - This flag is passed when the system is being
229 * unmounted. We should sync and invalidate everything.
230 * SYNC_FSDATA - This indicates that the caller would like to make
231 * sure the superblock is safe on disk. We can ensure
232 * this by simply making sure the log gets flushed
233 * if SYNC_BDFLUSH is set, and by actually writing it
234 * out otherwise.
235 * SYNC_IOWAIT - The caller wants us to wait for all data I/O to complete
236 * before we return (including direct I/O). Forms the drain
237 * side of the write barrier needed to safely quiesce the
238 * filesystem.
239 *
240 */
241int
242xfs_sync(
243 xfs_mount_t *mp,
244 int flags)
245{
246 int error;
247
248 /*
249 * Get the Quota Manager to flush the dquots.
250 *
251 * If XFS quota support is not enabled or this filesystem
252 * instance does not use quotas XFS_QM_DQSYNC will always
253 * return zero.
254 */
255 error = XFS_QM_DQSYNC(mp, flags);
256 if (error) {
257 /*
258 * If we got an IO error, we will be shutting down.
259 * So, there's nothing more for us to do here.
260 */
261 ASSERT(error != EIO || XFS_FORCED_SHUTDOWN(mp));
262 if (XFS_FORCED_SHUTDOWN(mp))
263 return XFS_ERROR(error);
264 }
265
266 if (flags & SYNC_IOWAIT)
267 xfs_filestream_flush(mp);
268
269 return xfs_syncsub(mp, flags, NULL);
270}
271
272/*
273 * xfs sync routine for internal use
274 *
275 * This routine supports all of the flags defined for the generic vfs_sync
276 * interface as explained above under xfs_sync.
277 *
278 */
279int
280xfs_sync_inodes(
281 xfs_mount_t *mp,
282 int flags,
283 int *bypassed)
284{
285 xfs_inode_t *ip = NULL;
286 struct inode *vp = NULL;
287 int error;
288 int last_error;
289 uint64_t fflag;
290 uint lock_flags;
291 uint base_lock_flags;
292 boolean_t mount_locked;
293 boolean_t vnode_refed;
294 int preempt;
295 xfs_iptr_t *ipointer;
296#ifdef DEBUG
297 boolean_t ipointer_in = B_FALSE;
298
299#define IPOINTER_SET ipointer_in = B_TRUE
300#define IPOINTER_CLR ipointer_in = B_FALSE
301#else
302#define IPOINTER_SET
303#define IPOINTER_CLR
304#endif
305
306
307/* Insert a marker record into the inode list after inode ip. The list
308 * must be locked when this is called. After the call the list will no
309 * longer be locked.
310 */
311#define IPOINTER_INSERT(ip, mp) { \
312 ASSERT(ipointer_in == B_FALSE); \
313 ipointer->ip_mnext = ip->i_mnext; \
314 ipointer->ip_mprev = ip; \
315 ip->i_mnext = (xfs_inode_t *)ipointer; \
316 ipointer->ip_mnext->i_mprev = (xfs_inode_t *)ipointer; \
317 preempt = 0; \
318 XFS_MOUNT_IUNLOCK(mp); \
319 mount_locked = B_FALSE; \
320 IPOINTER_SET; \
321 }
322
323/* Remove the marker from the inode list. If the marker was the only item
324 * in the list then there are no remaining inodes and we should zero out
325 * the whole list. If we are the current head of the list then move the head
326 * past us.
327 */
328#define IPOINTER_REMOVE(ip, mp) { \
329 ASSERT(ipointer_in == B_TRUE); \
330 if (ipointer->ip_mnext != (xfs_inode_t *)ipointer) { \
331 ip = ipointer->ip_mnext; \
332 ip->i_mprev = ipointer->ip_mprev; \
333 ipointer->ip_mprev->i_mnext = ip; \
334 if (mp->m_inodes == (xfs_inode_t *)ipointer) { \
335 mp->m_inodes = ip; \
336 } \
337 } else { \
338 ASSERT(mp->m_inodes == (xfs_inode_t *)ipointer); \
339 mp->m_inodes = NULL; \
340 ip = NULL; \
341 } \
342 IPOINTER_CLR; \
343 }
344
345#define XFS_PREEMPT_MASK 0x7f
346
347 ASSERT(!(flags & SYNC_BDFLUSH));
348
349 if (bypassed)
350 *bypassed = 0;
351 if (mp->m_flags & XFS_MOUNT_RDONLY)
352 return 0;
353 error = 0;
354 last_error = 0;
355 preempt = 0;
356
357 /* Allocate a reference marker */
358 ipointer = (xfs_iptr_t *)kmem_zalloc(sizeof(xfs_iptr_t), KM_SLEEP);
359
360 fflag = XFS_B_ASYNC; /* default is don't wait */
361 if (flags & SYNC_DELWRI)
362 fflag = XFS_B_DELWRI;
363 if (flags & SYNC_WAIT)
364 fflag = 0; /* synchronous overrides all */
365
366 base_lock_flags = XFS_ILOCK_SHARED;
367 if (flags & (SYNC_DELWRI | SYNC_CLOSE)) {
368 /*
369 * We need the I/O lock if we're going to call any of
370 * the flush/inval routines.
371 */
372 base_lock_flags |= XFS_IOLOCK_SHARED;
373 }
374
375 XFS_MOUNT_ILOCK(mp);
376
377 ip = mp->m_inodes;
378
379 mount_locked = B_TRUE;
380 vnode_refed = B_FALSE;
381
382 IPOINTER_CLR;
383
384 do {
385 ASSERT(ipointer_in == B_FALSE);
386 ASSERT(vnode_refed == B_FALSE);
387
388 lock_flags = base_lock_flags;
389
390 /*
391 * There were no inodes in the list, just break out
392 * of the loop.
393 */
394 if (ip == NULL) {
395 break;
396 }
397
398 /*
399 * We found another sync thread marker - skip it
400 */
401 if (ip->i_mount == NULL) {
402 ip = ip->i_mnext;
403 continue;
404 }
405
406 vp = VFS_I(ip);
407
408 /*
409 * If the vnode is gone then this is being torn down,
410 * call reclaim if it is flushed, else let regular flush
411 * code deal with it later in the loop.
412 */
413
414 if (vp == NULL) {
415 /* Skip ones already in reclaim */
416 if (ip->i_flags & XFS_IRECLAIM) {
417 ip = ip->i_mnext;
418 continue;
419 }
420 if (xfs_ilock_nowait(ip, XFS_ILOCK_EXCL) == 0) {
421 ip = ip->i_mnext;
422 } else if ((xfs_ipincount(ip) == 0) &&
423 xfs_iflock_nowait(ip)) {
424 IPOINTER_INSERT(ip, mp);
425
426 xfs_finish_reclaim(ip, 1,
427 XFS_IFLUSH_DELWRI_ELSE_ASYNC);
428
429 XFS_MOUNT_ILOCK(mp);
430 mount_locked = B_TRUE;
431 IPOINTER_REMOVE(ip, mp);
432 } else {
433 xfs_iunlock(ip, XFS_ILOCK_EXCL);
434 ip = ip->i_mnext;
435 }
436 continue;
437 }
438
439 if (VN_BAD(vp)) {
440 ip = ip->i_mnext;
441 continue;
442 }
443
444 if (XFS_FORCED_SHUTDOWN(mp) && !(flags & SYNC_CLOSE)) {
445 XFS_MOUNT_IUNLOCK(mp);
446 kmem_free(ipointer);
447 return 0;
448 }
449
450 /*
451 * Try to lock without sleeping. We're out of order with
452 * the inode list lock here, so if we fail we need to drop
453 * the mount lock and try again. If we're called from
454 * bdflush() here, then don't bother.
455 *
456 * The inode lock here actually coordinates with the
457 * almost spurious inode lock in xfs_ireclaim() to prevent
458 * the vnode we handle here without a reference from
459 * being freed while we reference it. If we lock the inode
460 * while it's on the mount list here, then the spurious inode
461 * lock in xfs_ireclaim() after the inode is pulled from
462 * the mount list will sleep until we release it here.
463 * This keeps the vnode from being freed while we reference
464 * it.
465 */
466 if (xfs_ilock_nowait(ip, lock_flags) == 0) {
467 if (vp == NULL) {
468 ip = ip->i_mnext;
469 continue;
470 }
471
472 vp = vn_grab(vp);
473 if (vp == NULL) {
474 ip = ip->i_mnext;
475 continue;
476 }
477
478 IPOINTER_INSERT(ip, mp);
479 xfs_ilock(ip, lock_flags);
480
481 ASSERT(vp == VFS_I(ip));
482 ASSERT(ip->i_mount == mp);
483
484 vnode_refed = B_TRUE;
485 }
486
487 /* From here on in the loop we may have a marker record
488 * in the inode list.
489 */
490
491 /*
492 * If we have to flush data or wait for I/O completion
493 * we need to drop the ilock that we currently hold.
494 * If we need to drop the lock, insert a marker if we
495 * have not already done so.
496 */
497 if ((flags & (SYNC_CLOSE|SYNC_IOWAIT)) ||
498 ((flags & SYNC_DELWRI) && VN_DIRTY(vp))) {
499 if (mount_locked) {
500 IPOINTER_INSERT(ip, mp);
501 }
502 xfs_iunlock(ip, XFS_ILOCK_SHARED);
503
504 if (flags & SYNC_CLOSE) {
505 /* Shutdown case. Flush and invalidate. */
506 if (XFS_FORCED_SHUTDOWN(mp))
507 xfs_tosspages(ip, 0, -1,
508 FI_REMAPF);
509 else
510 error = xfs_flushinval_pages(ip,
511 0, -1, FI_REMAPF);
512 } else if ((flags & SYNC_DELWRI) && VN_DIRTY(vp)) {
513 error = xfs_flush_pages(ip, 0,
514 -1, fflag, FI_NONE);
515 }
516
517 /*
518 * When freezing, we need to wait ensure all I/O (including direct
519 * I/O) is complete to ensure no further data modification can take
520 * place after this point
521 */
522 if (flags & SYNC_IOWAIT)
523 vn_iowait(ip);
524
525 xfs_ilock(ip, XFS_ILOCK_SHARED);
526 }
527
528 if ((flags & SYNC_ATTR) &&
529 (ip->i_update_core ||
530 (ip->i_itemp && ip->i_itemp->ili_format.ilf_fields))) {
531 if (mount_locked)
532 IPOINTER_INSERT(ip, mp);
533
534 if (flags & SYNC_WAIT) {
535 xfs_iflock(ip);
536 error = xfs_iflush(ip, XFS_IFLUSH_SYNC);
537
538 /*
539 * If we can't acquire the flush lock, then the inode
540 * is already being flushed so don't bother waiting.
541 *
542 * If we can lock it then do a delwri flush so we can
543 * combine multiple inode flushes in each disk write.
544 */
545 } else if (xfs_iflock_nowait(ip)) {
546 error = xfs_iflush(ip, XFS_IFLUSH_DELWRI);
547 } else if (bypassed) {
548 (*bypassed)++;
549 }
550 }
551
552 if (lock_flags != 0) {
553 xfs_iunlock(ip, lock_flags);
554 }
555
556 if (vnode_refed) {
557 /*
558 * If we had to take a reference on the vnode
559 * above, then wait until after we've unlocked
560 * the inode to release the reference. This is
561 * because we can be already holding the inode
562 * lock when IRELE() calls xfs_inactive().
563 *
564 * Make sure to drop the mount lock before calling
565 * IRELE() so that we don't trip over ourselves if
566 * we have to go for the mount lock again in the
567 * inactive code.
568 */
569 if (mount_locked) {
570 IPOINTER_INSERT(ip, mp);
571 }
572
573 IRELE(ip);
574
575 vnode_refed = B_FALSE;
576 }
577
578 if (error) {
579 last_error = error;
580 }
581
582 /*
583 * bail out if the filesystem is corrupted.
584 */
585 if (error == EFSCORRUPTED) {
586 if (!mount_locked) {
587 XFS_MOUNT_ILOCK(mp);
588 IPOINTER_REMOVE(ip, mp);
589 }
590 XFS_MOUNT_IUNLOCK(mp);
591 ASSERT(ipointer_in == B_FALSE);
592 kmem_free(ipointer);
593 return XFS_ERROR(error);
594 }
595
596 /* Let other threads have a chance at the mount lock
597 * if we have looped many times without dropping the
598 * lock.
599 */
600 if ((++preempt & XFS_PREEMPT_MASK) == 0) {
601 if (mount_locked) {
602 IPOINTER_INSERT(ip, mp);
603 }
604 }
605
606 if (mount_locked == B_FALSE) {
607 XFS_MOUNT_ILOCK(mp);
608 mount_locked = B_TRUE;
609 IPOINTER_REMOVE(ip, mp);
610 continue;
611 }
612
613 ASSERT(ipointer_in == B_FALSE);
614 ip = ip->i_mnext;
615
616 } while (ip != mp->m_inodes);
617
618 XFS_MOUNT_IUNLOCK(mp);
619
620 ASSERT(ipointer_in == B_FALSE);
621
622 kmem_free(ipointer);
623 return XFS_ERROR(last_error);
624}
625
626/*
627 * xfs sync routine for internal use
628 *
629 * This routine supports all of the flags defined for the generic vfs_sync
630 * interface as explained above under xfs_sync.
631 *
632 */
633int
634xfs_syncsub(
635 xfs_mount_t *mp,
636 int flags,
637 int *bypassed)
638{
639 int error = 0;
640 int last_error = 0;
641 uint log_flags = XFS_LOG_FORCE;
642 xfs_buf_t *bp;
643 xfs_buf_log_item_t *bip;
644
645 /*
646 * Sync out the log. This ensures that the log is periodically
647 * flushed even if there is not enough activity to fill it up.
648 */
649 if (flags & SYNC_WAIT)
650 log_flags |= XFS_LOG_SYNC;
651
652 xfs_log_force(mp, (xfs_lsn_t)0, log_flags);
653
654 if (flags & (SYNC_ATTR|SYNC_DELWRI)) {
655 if (flags & SYNC_BDFLUSH)
656 xfs_finish_reclaim_all(mp, 1);
657 else
658 error = xfs_sync_inodes(mp, flags, bypassed);
659 }
660
661 /*
662 * Flushing out dirty data above probably generated more
663 * log activity, so if this isn't vfs_sync() then flush
664 * the log again.
665 */
666 if (flags & SYNC_DELWRI) {
667 xfs_log_force(mp, (xfs_lsn_t)0, log_flags);
668 }
669
670 if (flags & SYNC_FSDATA) {
671 /*
672 * If this is vfs_sync() then only sync the superblock
673 * if we can lock it without sleeping and it is not pinned.
674 */
675 if (flags & SYNC_BDFLUSH) {
676 bp = xfs_getsb(mp, XFS_BUF_TRYLOCK);
677 if (bp != NULL) {
678 bip = XFS_BUF_FSPRIVATE(bp,xfs_buf_log_item_t*);
679 if ((bip != NULL) &&
680 xfs_buf_item_dirty(bip)) {
681 if (!(XFS_BUF_ISPINNED(bp))) {
682 XFS_BUF_ASYNC(bp);
683 error = xfs_bwrite(mp, bp);
684 } else {
685 xfs_buf_relse(bp);
686 }
687 } else {
688 xfs_buf_relse(bp);
689 }
690 }
691 } else {
692 bp = xfs_getsb(mp, 0);
693 /*
694 * If the buffer is pinned then push on the log so
695 * we won't get stuck waiting in the write for
696 * someone, maybe ourselves, to flush the log.
697 * Even though we just pushed the log above, we
698 * did not have the superblock buffer locked at
699 * that point so it can become pinned in between
700 * there and here.
701 */
702 if (XFS_BUF_ISPINNED(bp))
703 xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE);
704 if (flags & SYNC_WAIT)
705 XFS_BUF_UNASYNC(bp);
706 else
707 XFS_BUF_ASYNC(bp);
708 error = xfs_bwrite(mp, bp);
709 }
710 if (error) {
711 last_error = error;
712 }
713 }
714
715 /*
716 * Now check to see if the log needs a "dummy" transaction.
717 */
718 if (!(flags & SYNC_REMOUNT) && xfs_log_need_covered(mp)) {
719 xfs_trans_t *tp;
720 xfs_inode_t *ip;
721
722 /*
723 * Put a dummy transaction in the log to tell
724 * recovery that all others are OK.
725 */
726 tp = xfs_trans_alloc(mp, XFS_TRANS_DUMMY1);
727 if ((error = xfs_trans_reserve(tp, 0,
728 XFS_ICHANGE_LOG_RES(mp),
729 0, 0, 0))) {
730 xfs_trans_cancel(tp, 0);
731 return error;
732 }
733
734 ip = mp->m_rootip;
735 xfs_ilock(ip, XFS_ILOCK_EXCL);
736
737 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
738 xfs_trans_ihold(tp, ip);
739 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
740 error = xfs_trans_commit(tp, 0);
741 xfs_iunlock(ip, XFS_ILOCK_EXCL);
742 xfs_log_force(mp, (xfs_lsn_t)0, log_flags);
743 }
744
745 /*
746 * When shutting down, we need to insure that the AIL is pushed
747 * to disk or the filesystem can appear corrupt from the PROM.
748 */
749 if ((flags & (SYNC_CLOSE|SYNC_WAIT)) == (SYNC_CLOSE|SYNC_WAIT)) {
750 XFS_bflush(mp->m_ddev_targp);
751 if (mp->m_rtdev_targp) {
752 XFS_bflush(mp->m_rtdev_targp);
753 }
754 }
755
756 return XFS_ERROR(last_error);
757}
diff --git a/fs/xfs/xfs_vfsops.h b/fs/xfs/xfs_vfsops.h
index a74b05087da4..6b8e0b52b95e 100644
--- a/fs/xfs/xfs_vfsops.h
+++ b/fs/xfs/xfs_vfsops.h
@@ -8,9 +8,7 @@ struct kstatfs;
8struct xfs_mount; 8struct xfs_mount;
9struct xfs_mount_args; 9struct xfs_mount_args;
10 10
11int xfs_sync(struct xfs_mount *mp, int flags);
12void xfs_do_force_shutdown(struct xfs_mount *mp, int flags, char *fname, 11void xfs_do_force_shutdown(struct xfs_mount *mp, int flags, char *fname,
13 int lnnum); 12 int lnnum);
14void xfs_attr_quiesce(struct xfs_mount *mp);
15 13
16#endif /* _XFS_VFSOPS_H */ 14#endif /* _XFS_VFSOPS_H */
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index 8b6812f66a15..c45ea278ef41 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -79,8 +79,7 @@ int
79xfs_setattr( 79xfs_setattr(
80 struct xfs_inode *ip, 80 struct xfs_inode *ip,
81 struct iattr *iattr, 81 struct iattr *iattr,
82 int flags, 82 int flags)
83 cred_t *credp)
84{ 83{
85 xfs_mount_t *mp = ip->i_mount; 84 xfs_mount_t *mp = ip->i_mount;
86 struct inode *inode = VFS_I(ip); 85 struct inode *inode = VFS_I(ip);
@@ -233,10 +232,6 @@ xfs_setattr(
233 232
234 /* 233 /*
235 * Change file ownership. Must be the owner or privileged. 234 * Change file ownership. Must be the owner or privileged.
236 * If the system was configured with the "restricted_chown"
237 * option, the owner is not permitted to give away the file,
238 * and can change the group id only to a group of which he
239 * or she is a member.
240 */ 235 */
241 if (mask & (ATTR_UID|ATTR_GID)) { 236 if (mask & (ATTR_UID|ATTR_GID)) {
242 /* 237 /*
@@ -260,9 +255,8 @@ xfs_setattr(
260 * shall be equal to either the group ID or one of the 255 * shall be equal to either the group ID or one of the
261 * supplementary group IDs of the calling process. 256 * supplementary group IDs of the calling process.
262 */ 257 */
263 if (restricted_chown && 258 if ((iuid != uid ||
264 (iuid != uid || (igid != gid && 259 (igid != gid && !in_group_p((gid_t)gid))) &&
265 !in_group_p((gid_t)gid))) &&
266 !capable(CAP_CHOWN)) { 260 !capable(CAP_CHOWN)) {
267 code = XFS_ERROR(EPERM); 261 code = XFS_ERROR(EPERM);
268 goto error_return; 262 goto error_return;
@@ -456,10 +450,6 @@ xfs_setattr(
456 450
457 /* 451 /*
458 * Change file ownership. Must be the owner or privileged. 452 * Change file ownership. Must be the owner or privileged.
459 * If the system was configured with the "restricted_chown"
460 * option, the owner is not permitted to give away the file,
461 * and can change the group id only to a group of which he
462 * or she is a member.
463 */ 453 */
464 if (mask & (ATTR_UID|ATTR_GID)) { 454 if (mask & (ATTR_UID|ATTR_GID)) {
465 /* 455 /*
@@ -2009,7 +1999,7 @@ xfs_remove(
2009 goto out_bmap_cancel; 1999 goto out_bmap_cancel;
2010 2000
2011 /* 2001 /*
2012 * Drop the link from dp to ip. 2002 * Drop the "." link from ip to self.
2013 */ 2003 */
2014 error = xfs_droplink(tp, ip); 2004 error = xfs_droplink(tp, ip);
2015 if (error) 2005 if (error)
@@ -2024,7 +2014,7 @@ xfs_remove(
2024 } 2014 }
2025 2015
2026 /* 2016 /*
2027 * Drop the "." link from ip to self. 2017 * Drop the link from dp to ip.
2028 */ 2018 */
2029 error = xfs_droplink(tp, ip); 2019 error = xfs_droplink(tp, ip);
2030 if (error) 2020 if (error)
@@ -2833,122 +2823,10 @@ xfs_reclaim(
2833 if (!ip->i_update_core && (ip->i_itemp == NULL)) { 2823 if (!ip->i_update_core && (ip->i_itemp == NULL)) {
2834 xfs_ilock(ip, XFS_ILOCK_EXCL); 2824 xfs_ilock(ip, XFS_ILOCK_EXCL);
2835 xfs_iflock(ip); 2825 xfs_iflock(ip);
2836 return xfs_finish_reclaim(ip, 1, XFS_IFLUSH_DELWRI_ELSE_SYNC); 2826 xfs_iflags_set(ip, XFS_IRECLAIMABLE);
2837 } else { 2827 return xfs_reclaim_inode(ip, 1, XFS_IFLUSH_DELWRI_ELSE_SYNC);
2838 xfs_mount_t *mp = ip->i_mount;
2839
2840 /* Protect sync and unpin from us */
2841 XFS_MOUNT_ILOCK(mp);
2842 spin_lock(&ip->i_flags_lock);
2843 __xfs_iflags_set(ip, XFS_IRECLAIMABLE);
2844 VFS_I(ip)->i_private = NULL;
2845 ip->i_vnode = NULL;
2846 spin_unlock(&ip->i_flags_lock);
2847 list_add_tail(&ip->i_reclaim, &mp->m_del_inodes);
2848 XFS_MOUNT_IUNLOCK(mp);
2849 } 2828 }
2850 return 0; 2829 xfs_inode_set_reclaim_tag(ip);
2851}
2852
2853int
2854xfs_finish_reclaim(
2855 xfs_inode_t *ip,
2856 int locked,
2857 int sync_mode)
2858{
2859 xfs_perag_t *pag = xfs_get_perag(ip->i_mount, ip->i_ino);
2860 struct inode *vp = VFS_I(ip);
2861
2862 if (vp && VN_BAD(vp))
2863 goto reclaim;
2864
2865 /* The hash lock here protects a thread in xfs_iget_core from
2866 * racing with us on linking the inode back with a vnode.
2867 * Once we have the XFS_IRECLAIM flag set it will not touch
2868 * us.
2869 */
2870 write_lock(&pag->pag_ici_lock);
2871 spin_lock(&ip->i_flags_lock);
2872 if (__xfs_iflags_test(ip, XFS_IRECLAIM) ||
2873 (!__xfs_iflags_test(ip, XFS_IRECLAIMABLE) && vp == NULL)) {
2874 spin_unlock(&ip->i_flags_lock);
2875 write_unlock(&pag->pag_ici_lock);
2876 if (locked) {
2877 xfs_ifunlock(ip);
2878 xfs_iunlock(ip, XFS_ILOCK_EXCL);
2879 }
2880 return 1;
2881 }
2882 __xfs_iflags_set(ip, XFS_IRECLAIM);
2883 spin_unlock(&ip->i_flags_lock);
2884 write_unlock(&pag->pag_ici_lock);
2885 xfs_put_perag(ip->i_mount, pag);
2886
2887 /*
2888 * If the inode is still dirty, then flush it out. If the inode
2889 * is not in the AIL, then it will be OK to flush it delwri as
2890 * long as xfs_iflush() does not keep any references to the inode.
2891 * We leave that decision up to xfs_iflush() since it has the
2892 * knowledge of whether it's OK to simply do a delwri flush of
2893 * the inode or whether we need to wait until the inode is
2894 * pulled from the AIL.
2895 * We get the flush lock regardless, though, just to make sure
2896 * we don't free it while it is being flushed.
2897 */
2898 if (!locked) {
2899 xfs_ilock(ip, XFS_ILOCK_EXCL);
2900 xfs_iflock(ip);
2901 }
2902
2903 /*
2904 * In the case of a forced shutdown we rely on xfs_iflush() to
2905 * wait for the inode to be unpinned before returning an error.
2906 */
2907 if (xfs_iflush(ip, sync_mode) == 0) {
2908 /* synchronize with xfs_iflush_done */
2909 xfs_iflock(ip);
2910 xfs_ifunlock(ip);
2911 }
2912
2913 xfs_iunlock(ip, XFS_ILOCK_EXCL);
2914
2915 reclaim:
2916 xfs_ireclaim(ip);
2917 return 0;
2918}
2919
2920int
2921xfs_finish_reclaim_all(xfs_mount_t *mp, int noblock)
2922{
2923 int purged;
2924 xfs_inode_t *ip, *n;
2925 int done = 0;
2926
2927 while (!done) {
2928 purged = 0;
2929 XFS_MOUNT_ILOCK(mp);
2930 list_for_each_entry_safe(ip, n, &mp->m_del_inodes, i_reclaim) {
2931 if (noblock) {
2932 if (xfs_ilock_nowait(ip, XFS_ILOCK_EXCL) == 0)
2933 continue;
2934 if (xfs_ipincount(ip) ||
2935 !xfs_iflock_nowait(ip)) {
2936 xfs_iunlock(ip, XFS_ILOCK_EXCL);
2937 continue;
2938 }
2939 }
2940 XFS_MOUNT_IUNLOCK(mp);
2941 if (xfs_finish_reclaim(ip, noblock,
2942 XFS_IFLUSH_DELWRI_ELSE_ASYNC))
2943 delay(1);
2944 purged = 1;
2945 break;
2946 }
2947
2948 done = !purged;
2949 }
2950
2951 XFS_MOUNT_IUNLOCK(mp);
2952 return 0; 2830 return 0;
2953} 2831}
2954 2832
@@ -3474,7 +3352,6 @@ xfs_change_file_space(
3474 int cmd, 3352 int cmd,
3475 xfs_flock64_t *bf, 3353 xfs_flock64_t *bf,
3476 xfs_off_t offset, 3354 xfs_off_t offset,
3477 cred_t *credp,
3478 int attr_flags) 3355 int attr_flags)
3479{ 3356{
3480 xfs_mount_t *mp = ip->i_mount; 3357 xfs_mount_t *mp = ip->i_mount;
@@ -3562,7 +3439,7 @@ xfs_change_file_space(
3562 iattr.ia_valid = ATTR_SIZE; 3439 iattr.ia_valid = ATTR_SIZE;
3563 iattr.ia_size = startoffset; 3440 iattr.ia_size = startoffset;
3564 3441
3565 error = xfs_setattr(ip, &iattr, attr_flags, credp); 3442 error = xfs_setattr(ip, &iattr, attr_flags);
3566 3443
3567 if (error) 3444 if (error)
3568 return error; 3445 return error;
diff --git a/fs/xfs/xfs_vnodeops.h b/fs/xfs/xfs_vnodeops.h
index e932a96bec54..b1ae8e3f4043 100644
--- a/fs/xfs/xfs_vnodeops.h
+++ b/fs/xfs/xfs_vnodeops.h
@@ -15,8 +15,7 @@ struct xfs_iomap;
15 15
16 16
17int xfs_open(struct xfs_inode *ip); 17int xfs_open(struct xfs_inode *ip);
18int xfs_setattr(struct xfs_inode *ip, struct iattr *vap, int flags, 18int xfs_setattr(struct xfs_inode *ip, struct iattr *vap, int flags);
19 struct cred *credp);
20#define XFS_ATTR_DMI 0x01 /* invocation from a DMI function */ 19#define XFS_ATTR_DMI 0x01 /* invocation from a DMI function */
21#define XFS_ATTR_NONBLOCK 0x02 /* return EAGAIN if operation would block */ 20#define XFS_ATTR_NONBLOCK 0x02 /* return EAGAIN if operation would block */
22#define XFS_ATTR_NOLOCK 0x04 /* Don't grab any conflicting locks */ 21#define XFS_ATTR_NOLOCK 0x04 /* Don't grab any conflicting locks */
@@ -44,8 +43,7 @@ int xfs_inode_flush(struct xfs_inode *ip, int flags);
44int xfs_set_dmattrs(struct xfs_inode *ip, u_int evmask, u_int16_t state); 43int xfs_set_dmattrs(struct xfs_inode *ip, u_int evmask, u_int16_t state);
45int xfs_reclaim(struct xfs_inode *ip); 44int xfs_reclaim(struct xfs_inode *ip);
46int xfs_change_file_space(struct xfs_inode *ip, int cmd, 45int xfs_change_file_space(struct xfs_inode *ip, int cmd,
47 xfs_flock64_t *bf, xfs_off_t offset, 46 xfs_flock64_t *bf, xfs_off_t offset, int attr_flags);
48 struct cred *credp, int attr_flags);
49int xfs_rename(struct xfs_inode *src_dp, struct xfs_name *src_name, 47int xfs_rename(struct xfs_inode *src_dp, struct xfs_name *src_name,
50 struct xfs_inode *src_ip, struct xfs_inode *target_dp, 48 struct xfs_inode *src_ip, struct xfs_inode *target_dp,
51 struct xfs_name *target_name, struct xfs_inode *target_ip); 49 struct xfs_name *target_name, struct xfs_inode *target_ip);